commit b80b1b78901154240bc5d2ab6f5ea4465bf7fa67
parent 5216fb4be01e88f5b4928a69287e8f7ee3b3af5b
Author: Sebastiano Tronto <sebastiano@tronto.net>
Date: Fri, 18 Oct 2024 19:06:45 +0200
Fixed performance regression
Diffstat:
2 files changed, 28 insertions(+), 22 deletions(-)
diff --git a/src/solvers/h48/solve.h b/src/solvers/h48/solve.h
@@ -17,8 +17,8 @@ typedef struct {
uint8_t nissbranch;
int8_t npremoves;
uint8_t premoves[MAXLEN];
- _Atomic long long *nodes_visited;
- _Atomic long long *table_fallbacks;
+ long long nodes_visited;
+ long long table_fallbacks;
} dfsarg_solveh48_t;
STATIC uint32_t allowednextmove_h48(uint8_t *, uint8_t, uint8_t);
@@ -105,7 +105,7 @@ solve_h48_stop(dfsarg_solveh48_t *arg)
int8_t cbound, cbound_inv, h48bound, h48bound_inv;
int64_t coord, coord_inv;
- (*arg->nodes_visited)++;
+ arg->nodes_visited++;
arg->nissbranch = MM_NORMAL;
cbound = get_h48_cdata(arg->cube, arg->cocsepdata, &data);
@@ -124,7 +124,7 @@ solve_h48_stop(dfsarg_solveh48_t *arg)
if (arg->k == 2) {
if (h48bound == 0) {
- (*arg->table_fallbacks)++;
+ arg->table_fallbacks++;
h48bound = get_h48_pval(
arg->h48data_fallback, coord >> arg->h, 4);
} else {
@@ -141,7 +141,7 @@ solve_h48_stop(dfsarg_solveh48_t *arg)
h48bound_inv = get_h48_pval(arg->h48data, coord_inv, arg->k);
if (arg->k == 2) {
if (h48bound_inv == 0) {
- (*arg->table_fallbacks)++;
+ arg->table_fallbacks++;
h48bound_inv = get_h48_pval(
arg->h48data_fallback, coord_inv >> arg->h, 4);
} else {
@@ -203,6 +203,8 @@ solve_h48_dfs(dfsarg_solveh48_t *arg)
}
}
+ arg->nodes_visited = nextarg.nodes_visited;
+ arg->table_fallbacks = nextarg.table_fallbacks;
return ret;
}
@@ -220,14 +222,12 @@ solve_h48(
)
{
_Atomic int64_t nsols;
- _Atomic long long nodes, fallbacks;
dfsarg_solveh48_t arg;
tableinfo_t info, fbinfo;
if(readtableinfo_n(data_size, data, 2, &info) != NISSY_OK)
goto solve_h48_error_data;
- nodes = fallbacks = 0;
arg = (dfsarg_solveh48_t) {
.cube = cube,
.inverse = inverse(cube),
@@ -240,8 +240,8 @@ solve_h48(
.h48data = (uint8_t *)data + COCSEP_FULLSIZE + INFOSIZE,
.solutions_size = solutions_size,
.nextsol = &solutions,
- .nodes_visited = &nodes,
- .table_fallbacks = &fallbacks
+ .nodes_visited = 0,
+ .table_fallbacks = 0
};
if (info.bits == 2) {
@@ -268,9 +268,10 @@ solve_h48(
}
**arg.nextsol = '\0';
- stats[0] = nodes;
- stats[1] = fallbacks;
- LOG("Nodes visited: %lld\nTable fallbacks: %lld\n", nodes, fallbacks);
+ stats[0] = arg.nodes_visited;
+ stats[1] = arg.table_fallbacks;
+ LOG("Nodes visited: %lld\nTable fallbacks: %lld\n",
+ arg.nodes_visited, arg.table_fallbacks);
return nsols;
diff --git a/src/solvers/h48/solve_multithread.h b/src/solvers/h48/solve_multithread.h
@@ -11,6 +11,8 @@ typedef struct {
pthread_cond_t cond;
pthread_cond_t active_cond;
atomic_bool terminate;
+ _Atomic long long nodes_visited_global;
+ _Atomic long long table_fallbacks_global;
} task_queue_t;
STATIC void solve_h48_appendsolution_thread(dfsarg_solveh48_t *, task_queue_t *);
@@ -128,6 +130,8 @@ start_thread(void *arg)
pthread_mutex_unlock(&queue->mutex);
solve_h48_single(&task, queue);
+ queue->nodes_visited_global += task.nodes_visited;
+ queue->table_fallbacks_global += task.table_fallbacks;
pthread_mutex_lock(&queue->mutex);
queue->active--;
@@ -243,6 +247,9 @@ solve_h48_single(dfsarg_solveh48_t *arg, task_queue_t *tq)
}
}
}
+
+ arg->nodes_visited = nextarg.nodes_visited;
+ arg->table_fallbacks = nextarg.table_fallbacks;
return ret;
}
@@ -260,7 +267,6 @@ solve_h48_multithread(
)
{
_Atomic int64_t nsols = 0;
- _Atomic long long nodes, fallbacks;
int p_depth = 0;
dfsarg_solveh48_t arg;
tableinfo_t info, fbinfo;
@@ -269,7 +275,6 @@ solve_h48_multithread(
if (readtableinfo_n(data_size, data, 2, &info) != NISSY_OK)
goto solve_h48_multithread_error_data;
- nodes = fallbacks = 0;
arg = (dfsarg_solveh48_t){
.cube = cube,
.inverse = inverse(cube),
@@ -283,8 +288,8 @@ solve_h48_multithread(
.h48data = (uint8_t *)data + COCSEP_FULLSIZE + INFOSIZE,
.solutions_size = solutions_size,
.nextsol = &solutions,
- .nodes_visited = &nodes,
- .table_fallbacks = &fallbacks
+ .nodes_visited = 0,
+ .table_fallbacks = 0
};
if (info.bits == 2) {
@@ -306,6 +311,7 @@ solve_h48_multithread(
task_queue_t nq;
init_queue(&nq);
+ nq.nodes_visited_global = nq.table_fallbacks_global = 0;
for (int i = 0; i < THREADS; i++) {
pthread_create(&threads[i], NULL, &start_thread, &nq);
}
@@ -333,13 +339,12 @@ solve_h48_multithread(
}
**arg.nextsol = '\0';
- stats[0] = nodes;
- stats[1] = fallbacks;
- LOG("Nodes visited: %lld\nTable fallbacks: %lld\n", nodes, fallbacks);
+ stats[0] = nq.nodes_visited_global;
+ stats[1] = nq.table_fallbacks_global;
+ LOG("Nodes visited: %lld\nTable fallbacks: %lld\n",
+ nq.nodes_visited_global, nq.table_fallbacks_global);
return nsols;
-solve_h48_multithread_error_data:
- LOG("solve_h48: error reading table\n");
- return NISSY_ERROR_DATA;
+solve_h48_multithread_error_data: LOG("solve_h48: error reading table\n"); return NISSY_ERROR_DATA;
}