Saying that calls are cheap may be right for many cases but definitely not for me, here is a tiny example I made by copying/pasting from my actual project which is a compiler that uses TinyC as a backend. On this sample using macros speeds the program up 400% i.e. 4 times on my AMD Ryzen5. In a real compiler it is even worse.
#define USE_MACROS
#include <stdio.h>
#include <sys/time.h>
typedef uint64_t val;
#define HEAP_FLAG 0x0001000000000000ULL
#define INT_MASK 0xFFFFFFFF
#define PTR_MASK 0x0000FFFFFFFFFFFFULL
#define __nil HEAP_FLAG
#ifdef USE_MACROS
#define __is_int(a) (((a) & HEAP_FLAG) == 0)
#define __int(a) (a & INT_MASK)
#define __let(a) (a & INT_MASK)
#define __dec(a) (*a)--
#define __eq(a, b) (__is_int(a) && __is_int(b) ? __int(a) == __int(b) : __nil)
#define __mod(a, b) (__is_int(a) && __is_int(b) ? __int(a) % __int(b) : __nil)
#define __cout(a) if (__is_int(a)) printf("%i", __int(a))
#else
int __is_int(val a) { return (a & HEAP_FLAG) == 0; }
int __int(val a) { return a & INT_MASK; }
val __let(int a) { return a & INT_MASK; }
void __dec(val *x) { (*x)--; };
int __eq(val a, val b) { return __is_int(a) && __is_int(b) ? __int(a) == __int(b) : __nil; }
int __mod(val a, val b) { return __is_int(a) && __is_int(b) ? __int(a) % __int(b) : __nil; }
void __cout(val a) { if (__is_int(a)) printf("%i", __int(a)); }
#endif
void test_loop() {
val a = __let(3000000);
while (1) {
__dec(&a);
if (__eq(a, __let(0))) break;
if (__eq(__mod(a, __let(100000)), __let(0))) { __cout(a); printf(" "); }
}
}
long time1000() {
struct timeval val;
gettimeofday(&val, 0);
return val.tv_sec * 1000 + val.tv_usec / 1000;
}
int main() {
int T;
T = time1000();
test_loop();
T = time1000() - T;
printf("<%i ms test_loop>\n", (int)T);
}