From d21d0f301bd882cfbf3c91fcc6743b4308a7faf3 Mon Sep 17 00:00:00 2001 From: Andrew Jeffery Date: Mon, 18 Dec 2017 17:39:25 +1030 Subject: [PATCH] genann: Remove branching from back-propagation inner-loop This saves approximately 80 million instructions and 44 million branches in the trace of example4, shaving off around 8ms: Before: ``` Performance counter stats for './example4': 92.629610 task-clock (msec) # 0.997 CPUs utilized 0 context-switches # 0.000 K/sec 0 cpu-migrations # 0.000 K/sec 78 page-faults # 0.842 K/sec 291,863,801 cycles # 3.151 GHz 1,000,931,204 instructions # 3.43 insn per cycle 202,465,800 branches # 2185.757 M/sec 50,949 branch-misses # 0.03% of all branches 0.092889789 seconds time elapsed ``` After: ``` Performance counter stats for './example4': 84.473035 task-clock (msec) # 0.997 CPUs utilized 3 context-switches # 0.036 K/sec 0 cpu-migrations # 0.000 K/sec 81 page-faults # 0.959 K/sec 265,472,170 cycles # 3.143 GHz 919,372,488 instructions # 3.46 insn per cycle 158,754,885 branches # 1879.356 M/sec 65,337 branch-misses # 0.04% of all branches 0.084755458 seconds time elapsed ``` Signed-off-by: Andrew Jeffery --- genann.c | 18 ++++++------------ 1 file changed, 6 insertions(+), 12 deletions(-) diff --git a/genann.c b/genann.c index c3c0aca..08034b2 100644 --- a/genann.c +++ b/genann.c @@ -344,12 +344,9 @@ void genann_train(genann const *ann, double const *inputs, double const *desired /* Set output layer weights. */ for (j = 0; j < ann->outputs; ++j) { - for (k = 0; k < (ann->hidden_layers ? ann->hidden : ann->inputs) + 1; ++k) { - if (k == 0) { - *w++ += *d * learning_rate * -1.0; - } else { - *w++ += *d * learning_rate * i[k-1]; - } + *w++ += *d * learning_rate * -1.0; + for (k = 1; k < (ann->hidden_layers ? ann->hidden : ann->inputs) + 1; ++k) { + *w++ += *d * learning_rate * i[k-1]; } ++d; @@ -377,12 +374,9 @@ void genann_train(genann const *ann, double const *inputs, double const *desired for (j = 0; j < ann->hidden; ++j) { - for (k = 0; k < (h == 0 ? ann->inputs : ann->hidden) + 1; ++k) { - if (k == 0) { - *w++ += *d * learning_rate * -1.0; - } else { - *w++ += *d * learning_rate * i[k-1]; - } + *w++ += *d * learning_rate * -1.0; + for (k = 1; k < (h == 0 ? ann->inputs : ann->hidden) + 1; ++k) { + *w++ += *d * learning_rate * i[k-1]; } ++d; }