h48

A prototype for an optimal Rubik's cube solver, work in progress.
git clone https://git.tronto.net/h48
Download | Log | Files | Refs | README | LICENSE

commit b80b1b78901154240bc5d2ab6f5ea4465bf7fa67
parent 5216fb4be01e88f5b4928a69287e8f7ee3b3af5b
Author: Sebastiano Tronto <sebastiano@tronto.net>
Date:   Fri, 18 Oct 2024 19:06:45 +0200

Fixed performance regression

Diffstat:
Msrc/solvers/h48/solve.h | 25+++++++++++++------------
Msrc/solvers/h48/solve_multithread.h | 25+++++++++++++++----------
2 files changed, 28 insertions(+), 22 deletions(-)

diff --git a/src/solvers/h48/solve.h b/src/solvers/h48/solve.h @@ -17,8 +17,8 @@ typedef struct { uint8_t nissbranch; int8_t npremoves; uint8_t premoves[MAXLEN]; - _Atomic long long *nodes_visited; - _Atomic long long *table_fallbacks; + long long nodes_visited; + long long table_fallbacks; } dfsarg_solveh48_t; STATIC uint32_t allowednextmove_h48(uint8_t *, uint8_t, uint8_t); @@ -105,7 +105,7 @@ solve_h48_stop(dfsarg_solveh48_t *arg) int8_t cbound, cbound_inv, h48bound, h48bound_inv; int64_t coord, coord_inv; - (*arg->nodes_visited)++; + arg->nodes_visited++; arg->nissbranch = MM_NORMAL; cbound = get_h48_cdata(arg->cube, arg->cocsepdata, &data); @@ -124,7 +124,7 @@ solve_h48_stop(dfsarg_solveh48_t *arg) if (arg->k == 2) { if (h48bound == 0) { - (*arg->table_fallbacks)++; + arg->table_fallbacks++; h48bound = get_h48_pval( arg->h48data_fallback, coord >> arg->h, 4); } else { @@ -141,7 +141,7 @@ solve_h48_stop(dfsarg_solveh48_t *arg) h48bound_inv = get_h48_pval(arg->h48data, coord_inv, arg->k); if (arg->k == 2) { if (h48bound_inv == 0) { - (*arg->table_fallbacks)++; + arg->table_fallbacks++; h48bound_inv = get_h48_pval( arg->h48data_fallback, coord_inv >> arg->h, 4); } else { @@ -203,6 +203,8 @@ solve_h48_dfs(dfsarg_solveh48_t *arg) } } + arg->nodes_visited = nextarg.nodes_visited; + arg->table_fallbacks = nextarg.table_fallbacks; return ret; } @@ -220,14 +222,12 @@ solve_h48( ) { _Atomic int64_t nsols; - _Atomic long long nodes, fallbacks; dfsarg_solveh48_t arg; tableinfo_t info, fbinfo; if(readtableinfo_n(data_size, data, 2, &info) != NISSY_OK) goto solve_h48_error_data; - nodes = fallbacks = 0; arg = (dfsarg_solveh48_t) { .cube = cube, .inverse = inverse(cube), @@ -240,8 +240,8 @@ solve_h48( .h48data = (uint8_t *)data + COCSEP_FULLSIZE + INFOSIZE, .solutions_size = solutions_size, .nextsol = &solutions, - .nodes_visited = &nodes, - .table_fallbacks = &fallbacks + .nodes_visited = 0, + .table_fallbacks = 0 }; if (info.bits == 2) { @@ -268,9 +268,10 @@ solve_h48( } **arg.nextsol = '\0'; - stats[0] = nodes; - stats[1] = fallbacks; - LOG("Nodes visited: %lld\nTable fallbacks: %lld\n", nodes, fallbacks); + stats[0] = arg.nodes_visited; + stats[1] = arg.table_fallbacks; + LOG("Nodes visited: %lld\nTable fallbacks: %lld\n", + arg.nodes_visited, arg.table_fallbacks); return nsols; diff --git a/src/solvers/h48/solve_multithread.h b/src/solvers/h48/solve_multithread.h @@ -11,6 +11,8 @@ typedef struct { pthread_cond_t cond; pthread_cond_t active_cond; atomic_bool terminate; + _Atomic long long nodes_visited_global; + _Atomic long long table_fallbacks_global; } task_queue_t; STATIC void solve_h48_appendsolution_thread(dfsarg_solveh48_t *, task_queue_t *); @@ -128,6 +130,8 @@ start_thread(void *arg) pthread_mutex_unlock(&queue->mutex); solve_h48_single(&task, queue); + queue->nodes_visited_global += task.nodes_visited; + queue->table_fallbacks_global += task.table_fallbacks; pthread_mutex_lock(&queue->mutex); queue->active--; @@ -243,6 +247,9 @@ solve_h48_single(dfsarg_solveh48_t *arg, task_queue_t *tq) } } } + + arg->nodes_visited = nextarg.nodes_visited; + arg->table_fallbacks = nextarg.table_fallbacks; return ret; } @@ -260,7 +267,6 @@ solve_h48_multithread( ) { _Atomic int64_t nsols = 0; - _Atomic long long nodes, fallbacks; int p_depth = 0; dfsarg_solveh48_t arg; tableinfo_t info, fbinfo; @@ -269,7 +275,6 @@ solve_h48_multithread( if (readtableinfo_n(data_size, data, 2, &info) != NISSY_OK) goto solve_h48_multithread_error_data; - nodes = fallbacks = 0; arg = (dfsarg_solveh48_t){ .cube = cube, .inverse = inverse(cube), @@ -283,8 +288,8 @@ solve_h48_multithread( .h48data = (uint8_t *)data + COCSEP_FULLSIZE + INFOSIZE, .solutions_size = solutions_size, .nextsol = &solutions, - .nodes_visited = &nodes, - .table_fallbacks = &fallbacks + .nodes_visited = 0, + .table_fallbacks = 0 }; if (info.bits == 2) { @@ -306,6 +311,7 @@ solve_h48_multithread( task_queue_t nq; init_queue(&nq); + nq.nodes_visited_global = nq.table_fallbacks_global = 0; for (int i = 0; i < THREADS; i++) { pthread_create(&threads[i], NULL, &start_thread, &nq); } @@ -333,13 +339,12 @@ solve_h48_multithread( } **arg.nextsol = '\0'; - stats[0] = nodes; - stats[1] = fallbacks; - LOG("Nodes visited: %lld\nTable fallbacks: %lld\n", nodes, fallbacks); + stats[0] = nq.nodes_visited_global; + stats[1] = nq.table_fallbacks_global; + LOG("Nodes visited: %lld\nTable fallbacks: %lld\n", + nq.nodes_visited_global, nq.table_fallbacks_global); return nsols; -solve_h48_multithread_error_data: - LOG("solve_h48: error reading table\n"); - return NISSY_ERROR_DATA; +solve_h48_multithread_error_data: LOG("solve_h48: error reading table\n"); return NISSY_ERROR_DATA; }