From b1f72be243f3a1afb659beb73c6c6bd7f32db093 Mon Sep 17 00:00:00 2001 From: Andrew Jeffery Date: Mon, 18 Dec 2017 08:57:58 +1030 Subject: [PATCH] genann: Unroll loops via hoisting inner-loop conditions in genann_run() This gives a reduction of rougly 27 million instructions and 11 million branches in the execution trace of example4. On a Lenovo X1 Carbon Gen 3 machine (Intel(R) Core(TM) i7-5600U CPU @ 2.60GHz) running Ubuntu 17.10 with GCC 7.2.0-8ubuntu3, using CFLAGS="-g -O3 -march=native -DNDEBUG" I see the following change in `perf stat`: Before: ``` Performance counter stats for './example4': 101.369081 task-clock (msec) # 0.998 CPUs utilized 1 context-switches # 0.010 K/sec 0 cpu-migrations # 0.000 K/sec 79 page-faults # 0.779 K/sec 320,197,883 cycles # 3.159 GHz 1,121,174,423 instructions # 3.50 insn per cycle 223,257,752 branches # 2202.425 M/sec 62,680 branch-misses # 0.03% of all branches 0.101595114 seconds time elapsed ``` After: ``` Performance counter stats for './example4': 98.988806 task-clock (msec) # 0.998 CPUs utilized 1 context-switches # 0.010 K/sec 0 cpu-migrations # 0.000 K/sec 79 page-faults # 0.798 K/sec 312,298,260 cycles # 3.155 GHz 1,094,183,752 instructions # 3.50 insn per cycle 212,007,732 branches # 2141.734 M/sec 62,774 branch-misses # 0.03% of all branches 0.099228100 seconds time elapsed ``` Signed-off-by: Andrew Jeffery --- genann.c | 33 ++++++++++++++++++++++++++++----- 1 file changed, 28 insertions(+), 5 deletions(-) diff --git a/genann.c b/genann.c index 3fca1d4..8e549ab 100644 --- a/genann.c +++ b/genann.c @@ -203,18 +203,41 @@ double const *genann_run(genann const *ann, double const *inputs) { const genann_actfun act = ann->activation_hidden; const genann_actfun acto = ann->activation_output; + if (!ann->hidden_layers) { + double *ret = o; + for (j = 0; j < ann->outputs; ++j) { + double sum = *w++ * -1.0; + for (k = 0; k < ann->inputs; ++k) { + sum += *w++ * i[k]; + } + *o++ = acto(sum); + } + + return ret; + } + + /* Figure input layer */ + for (j = 0; j < ann->hidden; ++j) { + double sum = *w++ * -1.0; + for (k = 0; k < ann->inputs; ++k) { + sum += *w++ * i[k]; + } + *o++ = act(sum); + } + + i += ann->inputs; + /* Figure hidden layers, if any. */ - for (h = 0; h < ann->hidden_layers; ++h) { + for (h = 1; h < ann->hidden_layers; ++h) { for (j = 0; j < ann->hidden; ++j) { double sum = *w++ * -1.0; - for (k = 0; k < (h == 0 ? ann->inputs : ann->hidden); ++k) { + for (k = 0; k < ann->hidden; ++k) { sum += *w++ * i[k]; } *o++ = act(sum); } - - i += (h == 0 ? ann->inputs : ann->hidden); + i += ann->hidden; } double const *ret = o; @@ -222,7 +245,7 @@ double const *genann_run(genann const *ann, double const *inputs) { /* Figure output layer. */ for (j = 0; j < ann->outputs; ++j) { double sum = *w++ * -1.0; - for (k = 0; k < (ann->hidden_layers ? ann->hidden : ann->inputs); ++k) { + for (k = 0; k < ann->hidden; ++k) { sum += *w++ * i[k]; } *o++ = acto(sum);