From 34f5591a0d88cef6613752d6497dba337aa217a3 Mon Sep 17 00:00:00 2001 From: Zygmunt Krynicki Date: Sun, 4 Oct 2020 11:39:59 +0200 Subject: [PATCH 01/12] Initial benchmark API This patch lays the groundwork for supporting micro-benchmarks inside libzt. Test suites can now visit benchmarks, in addition to test cases and other test suites. A benchmark is a function taking one argument of type zt_b, similar to zt_t for test cases. The typedef zt_b is a pointer to struct zt_benchmark, holding one parameter, a 64 bit counter, n, of desired number of iterations to execute. Internally libzt executes all benchmarks at least once, to ensure they do not crash. In verbose mode, when invoked with -v command line option, precise measurements are taken to compute the number of nanoseconds required to execute a single loop iteration. Timing is based on microsecond-accurate, portable, clock_t clock() function. There are several warm-up phases where the loop is executed enough times to take roughly ten milliseconds. In my crude measurements this stabilizes the result well enough to estimate the cost of a single iteration. Following that, benchmark.n is set to a value that should give about one second of execution. This is when final measurements are taken. I've experimented with several different ideas, and found significant noise in the early estimation phase, when the effective runtime was lower than 10ms, at one ms results were several orders of magnitude off the duration measured over 10ms. The duration of the complete test is currently over-exaggerated. I found no difference between desired runtime length of 1000ms and 100ms, suggesting there is some more room for improvement. There's a chance to improve accuracy by switching to non-portable, nanosecond-resolution APIs that internally fuel clock(), but this was not attempted yet. The code is not tested yet, manual pages are not complete but there is a small example of the new functionality. Signed-off-by: Zygmunt Krynicki --- .gitignore | 2 ++ examples/GNUmakefile | 5 ++-- examples/bench-sqrt.c | 48 ++++++++++++++++++++++++++++++ libzt.def | 3 +- libzt.export_list | 1 + libzt.map | 5 ++++ man/zt_visitor.3.in | 9 +++--- zt.c | 69 +++++++++++++++++++++++++++++++++++++++++++ zt.h | 8 +++++ 9 files changed, 143 insertions(+), 7 deletions(-) create mode 100644 examples/bench-sqrt.c diff --git a/.gitignore b/.gitignore index 11dec65..5aaafb8 100644 --- a/.gitignore +++ b/.gitignore @@ -15,6 +15,8 @@ examples/demo examples/demo.exe examples/test-root-user examples/test-root-user.exe +examples/sqrt-bench +examples/sqrt-bench.exe # Release archives libzt_*.tar.gz diff --git a/examples/GNUmakefile b/examples/GNUmakefile index 27fe7c5..7e415c6 100644 --- a/examples/GNUmakefile +++ b/examples/GNUmakefile @@ -3,6 +3,7 @@ CFLAGS ?= -Wall -Werror -O2 LDLIBS += -lzt -all: demo test-root-user +all: demo test-root-user bench-sqrt +bench-sqrt: LDLIBS += -lm clean: - rm -f *.o demo test-root-user + rm -f *.o demo test-root-user bench-sqrt diff --git a/examples/bench-sqrt.c b/examples/bench-sqrt.c new file mode 100644 index 0000000..e3c9076 --- /dev/null +++ b/examples/bench-sqrt.c @@ -0,0 +1,48 @@ +#include +#include +#include +#include + +#include + +static void bench_sqrtf(zt_b b) +{ + volatile float in = 2.0; + volatile float out; + for (uint64_t i = b->n; i != 0; --i) { + out = sqrtf(in); + } + (void)out; +} + +static void bench_sqrt(zt_b b) +{ + volatile double in = 2.0; + volatile double out; + for (uint64_t i = b->n; i != 0; --i) { + out = sqrt(in); + } + (void)out; +} + +static void bench_sqrtl(zt_b b) +{ + volatile long double in = 2.0; + volatile long double out; + for (uint64_t i = b->n; i != 0; --i) { + out = sqrtl(in); + } + (void)out; +} + +static void test_suite(zt_visitor v) +{ + ZT_VISIT_BENCHMARK(v, bench_sqrtf); + ZT_VISIT_BENCHMARK(v, bench_sqrt); + ZT_VISIT_BENCHMARK(v, bench_sqrtl); +} + +int main(int argc, char** argv, char** envp) +{ + return zt_main(argc, argv, envp, test_suite); +} diff --git a/libzt.def b/libzt.def index 7dd3a89..1f2f946 100644 --- a/libzt.def +++ b/libzt.def @@ -1,5 +1,5 @@ LIBRARY ZT -VERSION 0.3 +VERSION 0.4 EXPORTS zt_assert zt_check @@ -17,3 +17,4 @@ EXPORTS zt_true zt_visit_test_case zt_visit_test_suite + zt_visit_benchmark diff --git a/libzt.export_list b/libzt.export_list index 4e93738..f32c54d 100644 --- a/libzt.export_list +++ b/libzt.export_list @@ -14,3 +14,4 @@ _zt_pack_rune _zt_true _zt_visit_test_case _zt_visit_test_suite +_zt_visit_benchmark diff --git a/libzt.map b/libzt.map index 406efbe..6c0d143 100644 --- a/libzt.map +++ b/libzt.map @@ -26,3 +26,8 @@ VERS_0_3 { global: zt_cmp_ptr; } VERS_0_2; + +VERS_0_4 { + global: + zt_visit_benchmark; +} VERS_0_3; diff --git a/man/zt_visitor.3.in b/man/zt_visitor.3.in index 17893cb..a10c0f7 100644 --- a/man/zt_visitor.3.in +++ b/man/zt_visitor.3.in @@ -14,10 +14,10 @@ .El .Sh DESCRIPTION .Nm -is an interface for exploring test suites and test cases. The visitor type is -used as an argument to all test suites. Test suites can enumerate test cases -and other test suites. It is a part of the implementation and is not expected -to be implemented by library users. +is an interface for exploring test suites, test cases and benchmarks. The +visitor type is used as an argument to all test suites. Test suites can +enumerate test cases and other test suites. It is a part of the implementation +and is not expected to be implemented by library users. .Pp .Nm zt_visitor_vtab is an opaque type comprised of functions that define the interface. The @@ -32,6 +32,7 @@ be null as it defines the unique aspect of the implementation. .Sh SEE ALSO .Xr zt_visit_test_case 3 , .Xr zt_visit_test_suite 3 +.Xr zt_visit_benchmark 3 .Sh HISTORY .Nm first appeared in libzt 0.1 diff --git a/zt.c b/zt.c index 80eb185..fbec9f2 100644 --- a/zt.c +++ b/zt.c @@ -26,6 +26,7 @@ #include #include #include +#include #if !defined(__GNUC__) && !defined(__clang__) #define ZT_UNUSED @@ -259,9 +260,16 @@ typedef struct zt_test { zt_outcome outcome; } zt_test; +typedef struct zt_benchmark_internal { + zt_benchmark b; + const char* name; +} zt_benchmark_internal; +/* In reality zt_t is a pointer to zt_benchmark_internal. */ + typedef struct zt_visitor_vtab { void (*visit_case)(void*, zt_test_case_func, const char* name); void (*visit_suite)(void*, zt_test_suite_func, const char* name); + void (*visit_benchmark)(void*, zt_benchmark_func, const char* name); } zt_visitor_vtab; typedef struct zt_test_lister { @@ -337,6 +345,12 @@ void zt_visit_test_case(zt_visitor v, zt_test_case_func func, v.vtab->visit_case(v.id, func, name); } +void zt_visit_benchmark(zt_visitor v, zt_benchmark_func func, + const char* name) +{ + v.vtab->visit_benchmark(v.id, func, name); +} + /* Lister visitor */ static zt_visitor zt_visitor_from_test_lister(zt_test_lister* lister); @@ -360,9 +374,18 @@ static void zt_test_lister__visit_case(void* id, ZT_UNUSED zt_test_case_func fun fprintf(lister->stream, "%*c %s\n", lister->nesting * 3, '-', name); } +static void zt_test_lister__visit_benchmark(void* id, ZT_UNUSED zt_benchmark_func func, + const char* name) +{ + zt_test_lister* lister = (zt_test_lister*)id; + (void)func; + fprintf(lister->stream, "%*c %s (benchmark)\n", lister->nesting * 3, '-', name); +} + static const zt_visitor_vtab zt_test_lister__visitor_vtab = { /* .visit_case = */ zt_test_lister__visit_case, /* .visit_suite = */ zt_test_lister__visit_suite, + /* .visit_benchmark = */ zt_test_lister__visit_benchmark, }; static zt_visitor zt_visitor_from_test_lister(zt_test_lister* lister) @@ -447,9 +470,55 @@ static void zt_runner_visitor__visit_case(void* id, zt_test_case_func func, } } +static void zt_runner_visitor__visit_benchmark(void* id, zt_benchmark_func func, + const char* name) +{ + zt_test_runner* runner = (zt_test_runner*)id; + zt_benchmark_internal benchmark; + + memset(&benchmark, 0, sizeof benchmark); + + /* Run the benchmark function at least once. */ + if (!runner->verbose || runner->stream_out == NULL) { + benchmark.b.n = 1; + func(&benchmark.b); + return; + } + if (runner->verbose && runner->stream_out) { + fprintf(runner->stream_out, "%*c %s ", runner->nesting * 3, '-', name); + } + + clock_t start, end; + long double ns_per_loop; + + /* See if we can run for ten milliseconds. This is close to 100HZ default + * used for task switching on some systems. */ + start = end = clock(); + for (benchmark.b.n = 1; end - start < CLOCKS_PER_SEC / 100; benchmark.b.n <<= 1) { + func(&benchmark.b); + end = clock(); + } + ns_per_loop = (long double)(end - start); + ns_per_loop *= 1000000000 / CLOCKS_PER_SEC; + ns_per_loop /= (long double)benchmark.b.n; + + /* Run the benchmark for about one second. */ + benchmark.b.n = (uint64_t)((1e9 / ns_per_loop)); + start = clock(); + func(&benchmark.b); + end = clock(); + ns_per_loop = (long double)(end - start); + ns_per_loop *= 1000000000 / CLOCKS_PER_SEC; + ns_per_loop /= (long double)benchmark.b.n; + if (runner->verbose && runner->stream_out) { + fprintf(runner->stream_out, "%.1Lf ns/loop\n", ns_per_loop); + } +} + static const zt_visitor_vtab zt_test_runner__visitor_vtab = { /* .visit_case = */ zt_runner_visitor__visit_case, /* .visit_suite = */ zt_runner_visitor__visit_suite, + /* .visit_benchmark = */ zt_runner_visitor__visit_benchmark, }; static zt_visitor zt_visitor_from_test_runner(zt_test_runner* runner) diff --git a/zt.h b/zt.h index 9ca2874..03c83e4 100644 --- a/zt.h +++ b/zt.h @@ -31,6 +31,11 @@ extern "C" { struct zt_test; typedef struct zt_test* zt_t; +typedef struct zt_benchmark { + uint64_t n; +} zt_benchmark; +typedef struct zt_benchmark* zt_b; + struct zt_visitor_vtab; typedef struct zt_visitor { void* id; @@ -39,14 +44,17 @@ typedef struct zt_visitor { typedef void (*zt_test_case_func)(zt_t); typedef void (*zt_test_suite_func)(zt_visitor); +typedef void (*zt_benchmark_func)(zt_b); int zt_main(int argc, char** argv, char** envp, zt_test_suite_func tsuite); void zt_visit_test_suite(zt_visitor v, zt_test_suite_func func, const char* name); void zt_visit_test_case(zt_visitor v, zt_test_case_func func, const char* name); +void zt_visit_benchmark(zt_visitor v, zt_benchmark_func func, const char* name); #define ZT_VISIT_TEST_SUITE(v, tsuite) zt_visit_test_suite(v, tsuite, #tsuite) #define ZT_VISIT_TEST_CASE(v, tcase) zt_visit_test_case(v, tcase, #tcase) +#define ZT_VISIT_BENCHMARK(v, bench) zt_visit_benchmark(v, bench, #bench) typedef enum zt_value_kind { ZT_NOTHING, From 5ccfdaff7363ac54a556b175f10edd42042bb98d Mon Sep 17 00:00:00 2001 From: Zygmunt Krynicki Date: Tue, 6 Oct 2020 17:52:10 +0200 Subject: [PATCH 02/12] Fix comment about zt_b Signed-off-by: Zygmunt Krynicki --- zt.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/zt.c b/zt.c index fbec9f2..3816396 100644 --- a/zt.c +++ b/zt.c @@ -264,7 +264,7 @@ typedef struct zt_benchmark_internal { zt_benchmark b; const char* name; } zt_benchmark_internal; -/* In reality zt_t is a pointer to zt_benchmark_internal. */ +/* In reality zt_b is a pointer to zt_benchmark_internal.b. */ typedef struct zt_visitor_vtab { void (*visit_case)(void*, zt_test_case_func, const char* name); From 4e257931f54631a141c12dfec3660c3d7cc53c9b Mon Sep 17 00:00:00 2001 From: Zygmunt Krynicki Date: Tue, 6 Oct 2020 17:52:32 +0200 Subject: [PATCH 03/12] Add baseline no-op benchmark Signed-off-by: Zygmunt Krynicki --- examples/bench-sqrt.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/examples/bench-sqrt.c b/examples/bench-sqrt.c index e3c9076..82d91ea 100644 --- a/examples/bench-sqrt.c +++ b/examples/bench-sqrt.c @@ -5,6 +5,12 @@ #include +static void bench_nothing(zt_b b) +{ + for (volatile uint64_t i = b->n; i != 0; --i) { + } +} + static void bench_sqrtf(zt_b b) { volatile float in = 2.0; @@ -37,6 +43,7 @@ static void bench_sqrtl(zt_b b) static void test_suite(zt_visitor v) { + ZT_VISIT_BENCHMARK(v, bench_nothing); ZT_VISIT_BENCHMARK(v, bench_sqrtf); ZT_VISIT_BENCHMARK(v, bench_sqrt); ZT_VISIT_BENCHMARK(v, bench_sqrtl); From a0c516b2b36aebb6e59976dcc1d72ef1e370c3f9 Mon Sep 17 00:00:00 2001 From: Zygmunt Krynicki Date: Wed, 7 Oct 2020 21:17:23 +0200 Subject: [PATCH 04/12] Use clock_gettime for nanosecond precision This is not portable, a more portable fallback will follow. Signed-off-by: Zygmunt Krynicki --- zt.c | 33 +++++++++++++++++++++++---------- 1 file changed, 23 insertions(+), 10 deletions(-) diff --git a/zt.c b/zt.c index 3816396..ce4afac 100644 --- a/zt.c +++ b/zt.c @@ -17,6 +17,10 @@ * You should have received a copy of the GNU Lesser General Public License * along with Libzt. If not, see . */ +#ifndef _POSIX_C_SOURCE +#define _POSIX_C_SOURCE 199309L +#endif + #include "zt.h" #include @@ -470,6 +474,14 @@ static void zt_runner_visitor__visit_case(void* id, zt_test_case_func func, } } +static int ns_delta_below(struct timespec start, struct timespec end, long delta_ns) +{ + if (difftime(end.tv_sec, start.tv_sec) >= 1) { + return 0; + } + return end.tv_nsec - start.tv_nsec < delta_ns; +} + static void zt_runner_visitor__visit_benchmark(void* id, zt_benchmark_func func, const char* name) { @@ -488,27 +500,28 @@ static void zt_runner_visitor__visit_benchmark(void* id, zt_benchmark_func func, fprintf(runner->stream_out, "%*c %s ", runner->nesting * 3, '-', name); } - clock_t start, end; + struct timespec start, end; long double ns_per_loop; /* See if we can run for ten milliseconds. This is close to 100HZ default * used for task switching on some systems. */ - start = end = clock(); - for (benchmark.b.n = 1; end - start < CLOCKS_PER_SEC / 100; benchmark.b.n <<= 1) { + clock_gettime(CLOCK_PROCESS_CPUTIME_ID, &start); + end = start; + for (benchmark.b.n = 1; ns_delta_below(start, end, 10 * 1000 * 1000); benchmark.b.n <<= 1) { func(&benchmark.b); - end = clock(); + clock_gettime(CLOCK_PROCESS_CPUTIME_ID, &end); } - ns_per_loop = (long double)(end - start); - ns_per_loop *= 1000000000 / CLOCKS_PER_SEC; + ns_per_loop = difftime(end.tv_sec, start.tv_sec) * 1e9; + ns_per_loop += (long double)(end.tv_nsec - start.tv_nsec); ns_per_loop /= (long double)benchmark.b.n; /* Run the benchmark for about one second. */ benchmark.b.n = (uint64_t)((1e9 / ns_per_loop)); - start = clock(); + clock_gettime(CLOCK_PROCESS_CPUTIME_ID, &start); func(&benchmark.b); - end = clock(); - ns_per_loop = (long double)(end - start); - ns_per_loop *= 1000000000 / CLOCKS_PER_SEC; + clock_gettime(CLOCK_PROCESS_CPUTIME_ID, &end); + ns_per_loop = difftime(end.tv_sec, start.tv_sec) * 1e9; + ns_per_loop += (long double)(end.tv_nsec - start.tv_nsec); ns_per_loop /= (long double)benchmark.b.n; if (runner->verbose && runner->stream_out) { fprintf(runner->stream_out, "%.1Lf ns/loop\n", ns_per_loop); From 7ccf31714d03938df1388f0eae1c7ab24d19f6a0 Mon Sep 17 00:00:00 2001 From: Zygmunt Krynicki Date: Sun, 11 Oct 2020 20:33:32 +0200 Subject: [PATCH 05/12] Add redundant switch cases Those are identical to the default case but defining them silences warning emitted by mscv by default. --- zt.c | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/zt.c b/zt.c index ce4afac..0a77935 100644 --- a/zt.c +++ b/zt.c @@ -101,7 +101,15 @@ static void zt_promote_value(zt_value* v) v->as.uintmax = v->as.unsigned_integer; v->kind = ZT_UINTMAX; break; + case ZT_NOTHING: + case ZT_BOOLEAN: + case ZT_RUNE: + case ZT_STRING: + case ZT_POINTER: + case ZT_INTMAX: + case ZT_UINTMAX: default: + /* Nothing do to, all kinds listed to silence Microsoft compiler. */ break; } } @@ -760,6 +768,11 @@ static bool zt_verify_boolean_relation(zt_test* test, zt_value left, zt_value re return true; } break; + case ZT_REL_INVALID: + case ZT_REL_LE: + case ZT_REL_GE: + case ZT_REL_LT: + case ZT_REL_GT: default: zt_logf(test->stream, test->location, "assertion %s %s %s uses unsupported relation", zt_source_of(left), rel.as.string, zt_source_of(right)); @@ -1275,6 +1288,11 @@ static bool zt_verify_pointer_relation(zt_test* test, zt_value left, zt_value re return true; } break; + case ZT_REL_INVALID: + case ZT_REL_LE: + case ZT_REL_GE: + case ZT_REL_LT: + case ZT_REL_GT: default: zt_logf(test->stream, test->location, "assertion %s %s %s uses unsupported relation", zt_source_of(left), zt_source_of(rel), zt_source_of(right)); From f637a3f3c3df2a2515568352a202cb36773b0713 Mon Sep 17 00:00:00 2001 From: Zygmunt Krynicki Date: Sun, 11 Oct 2020 20:35:06 +0200 Subject: [PATCH 06/12] Port benchmark to win32 The benchmark logic is implemented with QueryPerformanceFrequency and QueryPerformanceCounter --- zt.c | 35 +++++++++++++++++++++++++++++++---- 1 file changed, 31 insertions(+), 4 deletions(-) diff --git a/zt.c b/zt.c index 0a77935..c442225 100644 --- a/zt.c +++ b/zt.c @@ -30,7 +30,11 @@ #include #include #include +#ifndef _WIN32 #include +#else +#include +#endif #if !defined(__GNUC__) && !defined(__clang__) #define ZT_UNUSED @@ -482,6 +486,7 @@ static void zt_runner_visitor__visit_case(void* id, zt_test_case_func func, } } +#ifndef _WIN32 static int ns_delta_below(struct timespec start, struct timespec end, long delta_ns) { if (difftime(end.tv_sec, start.tv_sec) >= 1) { @@ -489,6 +494,7 @@ static int ns_delta_below(struct timespec start, struct timespec end, long delta } return end.tv_nsec - start.tv_nsec < delta_ns; } +#endif static void zt_runner_visitor__visit_benchmark(void* id, zt_benchmark_func func, const char* name) @@ -508,11 +514,11 @@ static void zt_runner_visitor__visit_benchmark(void* id, zt_benchmark_func func, fprintf(runner->stream_out, "%*c %s ", runner->nesting * 3, '-', name); } - struct timespec start, end; - long double ns_per_loop; - + long double ns_per_loop; +#ifndef _WIN32 /* See if we can run for ten milliseconds. This is close to 100HZ default * used for task switching on some systems. */ + struct timespec start, end; clock_gettime(CLOCK_PROCESS_CPUTIME_ID, &start); end = start; for (benchmark.b.n = 1; ns_delta_below(start, end, 10 * 1000 * 1000); benchmark.b.n <<= 1) { @@ -522,7 +528,6 @@ static void zt_runner_visitor__visit_benchmark(void* id, zt_benchmark_func func, ns_per_loop = difftime(end.tv_sec, start.tv_sec) * 1e9; ns_per_loop += (long double)(end.tv_nsec - start.tv_nsec); ns_per_loop /= (long double)benchmark.b.n; - /* Run the benchmark for about one second. */ benchmark.b.n = (uint64_t)((1e9 / ns_per_loop)); clock_gettime(CLOCK_PROCESS_CPUTIME_ID, &start); @@ -531,6 +536,28 @@ static void zt_runner_visitor__visit_benchmark(void* id, zt_benchmark_func func, ns_per_loop = difftime(end.tv_sec, start.tv_sec) * 1e9; ns_per_loop += (long double)(end.tv_nsec - start.tv_nsec); ns_per_loop /= (long double)benchmark.b.n; +#else + LARGE_INTEGER start, end, freq; + QueryPerformanceFrequency(&freq); + QueryPerformanceCounter(&start); + end = start; + for (benchmark.b.n = 1; end.QuadPart - start.QuadPart < freq.QuadPart / 100; benchmark.b.n <<= 1) { + func(&benchmark.b); + QueryPerformanceCounter(&end); + } + ns_per_loop = (end.QuadPart - start.QuadPart) * 1e9/freq.QuadPart; + ns_per_loop /= (long double)benchmark.b.n; + /* Run the benchmark for about one second. */ + benchmark.b.n = (uint64_t)((1e9 / ns_per_loop)); + QueryPerformanceCounter(&start); + func(&benchmark.b); + QueryPerformanceCounter(&end); + ns_per_loop = (end.QuadPart - start.QuadPart) * 1e9 / freq.QuadPart; + ns_per_loop /= (long double)benchmark.b.n; + if (runner->verbose && runner->stream_out) { + fprintf(runner->stream_out, "%.1Lf ns/loop ", ns_per_loop); + } +#endif if (runner->verbose && runner->stream_out) { fprintf(runner->stream_out, "%.1Lf ns/loop\n", ns_per_loop); } From 7db0065e5f4cb71e055f7f4dd89c12507c7f91b0 Mon Sep 17 00:00:00 2001 From: Zygmunt Krynicki Date: Sun, 11 Oct 2020 20:36:19 +0200 Subject: [PATCH 07/12] Fix ignore pattern for bench-sqrt --- .gitignore | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.gitignore b/.gitignore index 5aaafb8..8d939e0 100644 --- a/.gitignore +++ b/.gitignore @@ -15,8 +15,8 @@ examples/demo examples/demo.exe examples/test-root-user examples/test-root-user.exe -examples/sqrt-bench -examples/sqrt-bench.exe +examples/bench-sqrt +examples/bench-sqrt.exe # Release archives libzt_*.tar.gz From 4a9415607731754360a1daedf0a04440caced1e1 Mon Sep 17 00:00:00 2001 From: Zygmunt Krynicki Date: Sun, 11 Oct 2020 20:56:58 +0200 Subject: [PATCH 08/12] Initialize out, silence warning There is no guarantee that the loop will have at least one iteration. --- examples/bench-sqrt.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/examples/bench-sqrt.c b/examples/bench-sqrt.c index 82d91ea..837240c 100644 --- a/examples/bench-sqrt.c +++ b/examples/bench-sqrt.c @@ -14,7 +14,7 @@ static void bench_nothing(zt_b b) static void bench_sqrtf(zt_b b) { volatile float in = 2.0; - volatile float out; + volatile float out = 0; for (uint64_t i = b->n; i != 0; --i) { out = sqrtf(in); } @@ -24,7 +24,7 @@ static void bench_sqrtf(zt_b b) static void bench_sqrt(zt_b b) { volatile double in = 2.0; - volatile double out; + volatile double out = 0; for (uint64_t i = b->n; i != 0; --i) { out = sqrt(in); } @@ -34,7 +34,7 @@ static void bench_sqrt(zt_b b) static void bench_sqrtl(zt_b b) { volatile long double in = 2.0; - volatile long double out; + volatile long double out = 0; for (uint64_t i = b->n; i != 0; --i) { out = sqrtl(in); } From 4ce670d4e8f3c67be5c9611df5f139dd2da3769e Mon Sep 17 00:00:00 2001 From: Zygmunt Krynicki Date: Sun, 11 Oct 2020 21:00:16 +0200 Subject: [PATCH 09/12] Optimize Windows build --- Makefile.nmake.mk | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile.nmake.mk b/Makefile.nmake.mk index 9ebe39e..c4cc7ed 100644 --- a/Makefile.nmake.mk +++ b/Makefile.nmake.mk @@ -1,4 +1,4 @@ -!if [set CL=/nologo /Wall /wd4820 /wd4100 /wd4996 /wd4710 /wd5045] +!if [set CL=/O2 /nologo /Wall /wd4820 /wd4100 /wd4996 /wd4710 /wd5045] !endif all: libzt-test.exe zt.dll From 8d42fd3482a0808b8d6292c57ad288fc6a370726 Mon Sep 17 00:00:00 2001 From: Zygmunt Krynicki Date: Sun, 13 Dec 2020 19:22:51 +0100 Subject: [PATCH 10/12] Trim spaces Signed-off-by: Zygmunt Krynicki --- zt.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/zt.c b/zt.c index c442225..6cd511e 100644 --- a/zt.c +++ b/zt.c @@ -514,7 +514,7 @@ static void zt_runner_visitor__visit_benchmark(void* id, zt_benchmark_func func, fprintf(runner->stream_out, "%*c %s ", runner->nesting * 3, '-', name); } - long double ns_per_loop; + long double ns_per_loop; #ifndef _WIN32 /* See if we can run for ten milliseconds. This is close to 100HZ default * used for task switching on some systems. */ From 568c5c2c3e75a333657e29af6c1aa1cc017aa34f Mon Sep 17 00:00:00 2001 From: Zygmunt Krynicki Date: Sun, 21 Nov 2021 22:48:44 +0100 Subject: [PATCH 11/12] Silence inline and macro warnings --- Makefile.nmake.mk | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile.nmake.mk b/Makefile.nmake.mk index c4cc7ed..adf2082 100644 --- a/Makefile.nmake.mk +++ b/Makefile.nmake.mk @@ -1,4 +1,4 @@ -!if [set CL=/O2 /nologo /Wall /wd4820 /wd4100 /wd4996 /wd4710 /wd5045] +!if [set CL=/O2 /nologo /Wall /wd4820 /wd4100 /wd4996 /wd4710 /wd5045 /wd4711 /wd4668] !endif all: libzt-test.exe zt.dll From fa6b1752b56d7a129a61a1d1f82e9f228a0ae539 Mon Sep 17 00:00:00 2001 From: Zygmunt Krynicki Date: Sun, 21 Nov 2021 22:49:20 +0100 Subject: [PATCH 12/12] Avoid printing benchmark twice when on Windows --- zt.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/zt.c b/zt.c index 6cd511e..7b66392 100644 --- a/zt.c +++ b/zt.c @@ -554,9 +554,6 @@ static void zt_runner_visitor__visit_benchmark(void* id, zt_benchmark_func func, QueryPerformanceCounter(&end); ns_per_loop = (end.QuadPart - start.QuadPart) * 1e9 / freq.QuadPart; ns_per_loop /= (long double)benchmark.b.n; - if (runner->verbose && runner->stream_out) { - fprintf(runner->stream_out, "%.1Lf ns/loop ", ns_per_loop); - } #endif if (runner->verbose && runner->stream_out) { fprintf(runner->stream_out, "%.1Lf ns/loop\n", ns_per_loop);