h48

A prototype for an optimal Rubik's cube solver, work in progress.
git clone https://git.tronto.net/h48
Download | Log | Files | Refs | README | LICENSE

commit 8fcfb3a33fe053ed2032d58ecc0b5d640c155931
parent 105259a58425a9ca1cec1b85339588410ae4fff7
Author: enricotenuti <tenutz_27@outlook.it>
Date:   Sun, 29 Sep 2024 12:38:43 +0200

minor changes for PR

Diffstat:
Msrc/nissy.c | 3++-
Msrc/solvers/h48/h48.h | 2+-
Msrc/solvers/h48/solve.h | 4++--
Asrc/solvers/h48/solve_multithread.h | 300+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Dsrc/solvers/h48/thread.h | 301-------------------------------------------------------------------------------
Mtools/200_solve_small/solve_small.c | 2--
6 files changed, 305 insertions(+), 307 deletions(-)

diff --git a/src/nissy.c b/src/nissy.c @@ -1,5 +1,6 @@ #include <inttypes.h> #include <pthread.h> +#include <stdatomic.h> #include <stdarg.h> #include <stdbool.h> #include <string.h> @@ -333,7 +334,7 @@ nissy_solve( ret = -1; } else { ret = THREADS > 1 ? - solve_h48_parent(c, minmoves, maxmoves, maxsolutions, data, solutions) : + solve_h48_multithread(c, minmoves, maxmoves, maxsolutions, data, solutions) : solve_h48(c, minmoves, maxmoves, maxsolutions, data, solutions); } } else if (!strcmp(solver, "h48stats")) { diff --git a/src/solvers/h48/h48.h b/src/solvers/h48/h48.h @@ -4,4 +4,4 @@ #include "gendata_h48.h" #include "stats.h" #include "solve.h" -#include "thread.h" +#include "solve_multithread.h" diff --git a/src/solvers/h48/solve.h b/src/solvers/h48/solve.h @@ -4,7 +4,7 @@ typedef struct { int8_t nmoves; int8_t depth; uint8_t moves[MAXLEN]; - int64_t *nsols; + _Atomic int64_t *nsols; int64_t maxsolutions; uint8_t h; uint8_t k; @@ -167,7 +167,7 @@ solve_h48( char *solutions ) { - int64_t nsols; + _Atomic int64_t nsols; dfsarg_solveh48_t arg; tableinfo_t info; diff --git a/src/solvers/h48/solve_multithread.h b/src/solvers/h48/solve_multithread.h @@ -0,0 +1,300 @@ +#define MAX_QUEUE_SIZE 244 +#define BFS_DEPTH 2 + +typedef struct { + dfsarg_solveh48_t tasks[MAX_QUEUE_SIZE]; + int front; + int rear; + int tasks_count; + int active; + pthread_mutex_t mutex; + pthread_cond_t cond; + pthread_cond_t active_cond; + atomic_bool terminate; +} task_queue_t; + +STATIC void solve_h48_appendsolution_thread(dfsarg_solveh48_t *, task_queue_t *); +STATIC void init_queue(task_queue_t *); +STATIC void submit_task(task_queue_t *, dfsarg_solveh48_t); +STATIC void copy_queue(task_queue_t *, task_queue_t *, int, _Atomic int64_t *); +STATIC void *start_thread(void *); +STATIC int64_t solve_h48_bfs(dfsarg_solveh48_t *, task_queue_t *, int8_t); +STATIC int64_t solve_h48_single(dfsarg_solveh48_t *, task_queue_t *); +STATIC int64_t solve_h48_multithread(cube_t, int8_t, int8_t, int8_t, const void *, char *); + +STATIC void +solve_h48_appendsolution_thread(dfsarg_solveh48_t *arg, task_queue_t *tq) +{ + pthread_mutex_lock(&tq->mutex); + int strl = 0; + uint8_t invertedpremoves[MAXLEN]; + char *solution = *arg->nextsol; + + strl = writemoves(arg->moves, arg->nmoves, *arg->nextsol); + *arg->nextsol += strl; + + if (arg->npremoves) + { + **arg->nextsol = ' '; + (*arg->nextsol)++; + + invertmoves(arg->premoves, arg->npremoves, invertedpremoves); + strl = writemoves(invertedpremoves, arg->npremoves, *arg->nextsol); + *arg->nextsol += strl; + } + LOG("Solution found: %s\n", solution); + + **arg->nextsol = '\n'; + (*arg->nextsol)++; + (*arg->nsols)++; + pthread_mutex_unlock(&tq->mutex); +} + +STATIC void +init_queue(task_queue_t *queue) +{ + queue->front = 0; + queue->rear = 0; + queue->tasks_count = 0; + queue->active = 0; + queue->terminate = ATOMIC_VAR_INIT(false); + pthread_mutex_init(&queue->mutex, NULL); + pthread_cond_init(&queue->cond, NULL); + pthread_cond_init(&queue->active_cond, NULL); +} + +STATIC void +submit_task(task_queue_t *queue, dfsarg_solveh48_t task) +{ + pthread_mutex_lock(&queue->mutex); + queue->tasks[queue->rear] = task; + queue->rear = (queue->rear + 1) % MAX_QUEUE_SIZE; + queue->tasks_count++; + pthread_cond_broadcast(&queue->cond); + pthread_mutex_unlock(&queue->mutex); +} + +STATIC void +copy_queue(task_queue_t *src, task_queue_t *dest, int depth, _Atomic int64_t *nsols) +{ + pthread_mutex_lock(&dest->mutex); + for (int i = src->front; i != src->rear; i = (i + 1) % MAX_QUEUE_SIZE) + { + dest->tasks[i] = src->tasks[i]; + dest->tasks[i].depth = depth; + } + dest->front = src->front; + dest->rear = src->rear; + dest->tasks_count = src->tasks_count; + pthread_cond_broadcast(&dest->cond); + pthread_mutex_unlock(&dest->mutex); +} + +STATIC void * +start_thread(void *arg) +{ + task_queue_t *queue = (task_queue_t *)arg; + while (1) { + pthread_mutex_lock(&queue->mutex); + while (queue->tasks_count == 0 && !queue->terminate) { + pthread_cond_wait(&queue->cond, &queue->mutex); + } + if (queue->tasks_count == 0 && queue->terminate) { + pthread_mutex_unlock(&queue->mutex); + break; + } + + if (queue->tasks_count > 0) { + dfsarg_solveh48_t task = queue->tasks[queue->front]; + queue->front = (queue->front + 1) % MAX_QUEUE_SIZE; + queue->tasks_count--; + queue->active++; + pthread_mutex_unlock(&queue->mutex); + + solve_h48_single(&task, queue); + + pthread_mutex_lock(&queue->mutex); + queue->active--; + + if(queue->tasks_count == 0 && queue->active == 0) + pthread_cond_signal(&queue->active_cond); + } + pthread_mutex_unlock(&queue->mutex); + } + return NULL; +} + +STATIC int64_t +solve_h48_bfs(dfsarg_solveh48_t *arg_zero, task_queue_t *tq, int8_t maxmoves) +{ + dfsarg_solveh48_t queue[MAX_QUEUE_SIZE]; + int front = 0, rear = 0; + dfsarg_solveh48_t nextarg; + int depth = 0; + int nodes_at_current_depth = 1; + int nodes_at_next_depth = 0; + queue[rear++] = *arg_zero; + + dfsarg_solveh48_t task_pool[MAX_QUEUE_SIZE]; + + while (front < rear){ + dfsarg_solveh48_t arg = queue[front++]; + nodes_at_current_depth--; + + if (*arg.nsols == arg.maxsolutions) + return 1; + + if (issolved(arg.cube)){ + if (arg.nmoves + arg.npremoves >= arg.depth && arg.nmoves + arg.npremoves <= maxmoves) + solve_h48_appendsolution(&arg); + continue; + } + + arg.nissbranch = MM_NORMAL; + uint32_t allowed = allowednextmove_h48(arg.moves, arg.nmoves, arg.nissbranch); + + for (uint8_t m = 0; m < 18; m++){ + if (allowed & (1 << m)){ + nextarg = arg; + nextarg.nmoves = arg.nmoves + 1; + nextarg.moves[arg.nmoves] = m; + nextarg.cube = move(arg.cube, m); + nextarg.inverse = premove(arg.inverse, m); + + if (nextarg.nmoves == BFS_DEPTH){ + dfsarg_solveh48_t *task = &task_pool[rear % MAX_QUEUE_SIZE]; + *task = nextarg; + submit_task(tq, *task); + } else { + queue[rear++] = nextarg; + nodes_at_next_depth++; + } + } + } + if (nodes_at_current_depth == 0){ + nodes_at_current_depth = nodes_at_next_depth; + nodes_at_next_depth = 0; + LOG("Found %" PRId64 " solutions, searching at depth %" PRId8 "\n", *nextarg.nsols, depth++); + } + if (depth == BFS_DEPTH) return 0; + } + return 1; +} + +STATIC int64_t +solve_h48_single(dfsarg_solveh48_t *arg, task_queue_t *tq) +{ + dfsarg_solveh48_t nextarg; + int64_t ret; + uint8_t m; + + if (*arg->nsols == arg->maxsolutions) + return 0; + + if (solve_h48_stop(arg)) + return 0; + + if (issolved(arg->cube)){ + if (arg->nmoves + arg->npremoves != arg->depth) + return 0; + solve_h48_appendsolution_thread(arg, tq); + return 1; + } + + nextarg = *arg; + ret = 0; + uint32_t allowed; + if (arg->nissbranch & MM_INVERSE){ + allowed = allowednextmove_h48(arg->premoves, arg->npremoves, arg->nissbranch); + for (m = 0; m < 18; m++){ + if (allowed & (1 << m)){ + nextarg.npremoves = arg->npremoves + 1; + nextarg.premoves[arg->npremoves] = m; + nextarg.inverse = move(arg->inverse, m); + nextarg.cube = premove(arg->cube, m); + ret += solve_h48_single(&nextarg, tq); + } + } + } else { + allowed = allowednextmove_h48(arg->moves, arg->nmoves, arg->nissbranch); + for (m = 0; m < 18; m++){ + if (allowed & (1 << m)){ + nextarg.nmoves = arg->nmoves + 1; + nextarg.moves[arg->nmoves] = m; + nextarg.cube = move(arg->cube, m); + nextarg.inverse = premove(arg->inverse, m); + ret += solve_h48_single(&nextarg, tq); + } + } + } + return ret; +} + +STATIC int64_t +solve_h48_multithread( + cube_t cube, + int8_t minmoves, + int8_t maxmoves, + int8_t maxsolutions, + const void *data, + char *solutions) +{ + _Atomic int64_t nsols = 0; + int p_depth = 0; + dfsarg_solveh48_t arg; + tableinfo_t info; + pthread_t threads[THREADS]; + + if (!readtableinfo_n(data, 2, &info)){ + LOG("solve_h48: error reading table\n"); + return 0; + } + + arg = (dfsarg_solveh48_t){ + .cube = cube, + .inverse = inverse(cube), + .nsols = &nsols, + .depth = minmoves, + .maxsolutions = maxsolutions, + .h = info.h48h, + .k = info.bits, + .cocsepdata = get_cocsepdata_ptr(data), + .h48data = get_h48data_ptr(data), + .nextsol = &solutions}; + + task_queue_t q; + init_queue(&q); + if (solve_h48_bfs(&arg, &q, maxmoves)) + return nsols; + + task_queue_t nq; + init_queue(&nq); + + for (int i = 0; i < THREADS; i++){ + pthread_create(&threads[i], NULL, &start_thread, &nq); + } + + nsols = 0; + for (p_depth = minmoves > BFS_DEPTH ? minmoves : BFS_DEPTH; + p_depth <= maxmoves && nsols < maxsolutions; + p_depth++) + { + LOG("Found %" PRId64 " solutions, searching at depth %" PRId8 "\n", nsols, p_depth); + copy_queue(&q, &nq, p_depth, &nsols); + + pthread_mutex_lock(&nq.mutex); + while (nq.active > 0 || nq.tasks_count > 0) + pthread_cond_wait(&nq.active_cond, &nq.mutex); + pthread_mutex_unlock(&nq.mutex); + } + + atomic_store(&nq.terminate, true); + pthread_cond_broadcast(&nq.cond); + + for (int i = 0; i < THREADS; i++){ + pthread_join(threads[i], NULL); + } + **arg.nextsol = '\0'; + (*arg.nextsol)++; + return nsols; +} diff --git a/src/solvers/h48/thread.h b/src/solvers/h48/thread.h @@ -1,301 +0,0 @@ -#include <stdatomic.h> - -#define MAX_QUEUE_SIZE 244 -#define BFS_DEPTH 2 -typedef struct { - dfsarg_solveh48_t tasks[MAX_QUEUE_SIZE]; - int front; - int rear; - int tasks_count; - int active; - pthread_mutex_t mutex; - pthread_cond_t cond; - pthread_cond_t active_cond; - atomic_bool terminate; -} task_queue_t; - -STATIC void solve_h48_appendsolution_thread(dfsarg_solveh48_t *, task_queue_t *); -STATIC void init_queue(task_queue_t *); -STATIC void submit_task(task_queue_t *, dfsarg_solveh48_t); -STATIC void copy_queue(task_queue_t *, task_queue_t *, int, int64_t *); -STATIC void *start_thread(void *); -STATIC int64_t solve_h48_bfs(dfsarg_solveh48_t *, task_queue_t *); -STATIC int64_t solve_h48_single(dfsarg_solveh48_t *, task_queue_t *); -STATIC int64_t solve_h48_parent(cube_t, int8_t, int8_t, int8_t, const void *, char *); - -STATIC void -solve_h48_appendsolution_thread(dfsarg_solveh48_t *arg, task_queue_t *tq) -{ - pthread_mutex_lock(&tq->mutex); - int strl = 0; - uint8_t invertedpremoves[MAXLEN]; - char *solution = *arg->nextsol; - - strl = writemoves(arg->moves, arg->nmoves, *arg->nextsol); - *arg->nextsol += strl; - - if (arg->npremoves) - { - **arg->nextsol = ' '; - (*arg->nextsol)++; - - invertmoves(arg->premoves, arg->npremoves, invertedpremoves); - strl = writemoves(invertedpremoves, arg->npremoves, *arg->nextsol); - *arg->nextsol += strl; - } - LOG("Solution found: %s\n", solution); - - **arg->nextsol = '\n'; - (*arg->nextsol)++; - (*arg->nsols)++; - pthread_mutex_unlock(&tq->mutex); -} - -STATIC void -init_queue(task_queue_t *queue) -{ - queue->front = 0; - queue->rear = 0; - queue->tasks_count = 0; - queue->active = 0; - queue->terminate = ATOMIC_VAR_INIT(false); - pthread_mutex_init(&queue->mutex, NULL); - pthread_cond_init(&queue->cond, NULL); - pthread_cond_init(&queue->active_cond, NULL); -} - -STATIC void -submit_task(task_queue_t *queue, dfsarg_solveh48_t task) -{ - pthread_mutex_lock(&queue->mutex); - queue->tasks[queue->rear] = task; - queue->rear = (queue->rear + 1) % MAX_QUEUE_SIZE; - queue->tasks_count++; - pthread_cond_broadcast(&queue->cond); - pthread_mutex_unlock(&queue->mutex); -} - -STATIC void -copy_queue(task_queue_t *src, task_queue_t *dest, int depth, int64_t *nsols) -{ - pthread_mutex_lock(&dest->mutex); - for (int i = src->front; i != src->rear; i = (i + 1) % MAX_QUEUE_SIZE) - { - dest->tasks[i] = src->tasks[i]; - dest->tasks[i].depth = depth; - } - dest->front = src->front; - dest->rear = src->rear; - dest->tasks_count = src->tasks_count; - pthread_cond_broadcast(&dest->cond); - pthread_mutex_unlock(&dest->mutex); -} - -STATIC void * -start_thread(void *arg) -{ - task_queue_t *queue = (task_queue_t *)arg; - while (1) { - pthread_mutex_lock(&queue->mutex); - while (queue->tasks_count == 0 && !queue->terminate) { - pthread_cond_wait(&queue->cond, &queue->mutex); - } - if (queue->tasks_count == 0 && queue->terminate) { - pthread_mutex_unlock(&queue->mutex); - break; - } - - if (queue->tasks_count > 0) { - dfsarg_solveh48_t task = queue->tasks[queue->front]; - queue->front = (queue->front + 1) % MAX_QUEUE_SIZE; - queue->tasks_count--; - queue->active++; - pthread_mutex_unlock(&queue->mutex); - - solve_h48_single(&task, queue); - - pthread_mutex_lock(&queue->mutex); - queue->active--; - - if(queue->tasks_count == 0 && queue->active == 0) - pthread_cond_signal(&queue->active_cond); - } - pthread_mutex_unlock(&queue->mutex); - } - return NULL; -} - -STATIC int64_t -solve_h48_bfs(dfsarg_solveh48_t *arg_zero, task_queue_t *tq) -{ - dfsarg_solveh48_t queue[MAX_QUEUE_SIZE]; - int front = 0, rear = 0; - dfsarg_solveh48_t nextarg; - int depth = 0; - int nodes_at_current_depth = 1; - int nodes_at_next_depth = 0; - queue[rear++] = *arg_zero; - - dfsarg_solveh48_t task_pool[MAX_QUEUE_SIZE]; - - while (front < rear){ - dfsarg_solveh48_t arg = queue[front++]; - nodes_at_current_depth--; - - if (*arg.nsols == arg.maxsolutions) - return 1; - - if (issolved(arg.cube)){ - if (arg.nmoves + arg.npremoves != arg.depth) - continue; - solve_h48_appendsolution(&arg); - continue; - } - - arg.nissbranch = MM_NORMAL; - uint32_t allowed = allowednextmove_h48(arg.moves, arg.nmoves, arg.nissbranch); - - for (uint8_t m = 0; m < 18; m++){ - if (allowed & (1 << m)){ - nextarg = arg; - nextarg.nmoves = arg.nmoves + 1; - nextarg.moves[arg.nmoves] = m; - nextarg.cube = move(arg.cube, m); - nextarg.inverse = premove(arg.inverse, m); - - if (nextarg.nmoves == BFS_DEPTH){ - dfsarg_solveh48_t *task = &task_pool[rear % MAX_QUEUE_SIZE]; - *task = nextarg; - submit_task(tq, *task); - } else { - queue[rear++] = nextarg; - nodes_at_next_depth++; - } - } - } - if (nodes_at_current_depth == 0){ - nodes_at_current_depth = nodes_at_next_depth; - nodes_at_next_depth = 0; - LOG("Found %" PRId64 " solutions, searching at depth %" PRId8 "\n", *nextarg.nsols, depth++); - } - if (depth == BFS_DEPTH) return 0; - } - return 1; -} - -STATIC int64_t -solve_h48_single(dfsarg_solveh48_t *arg, task_queue_t *tq) -{ - dfsarg_solveh48_t nextarg; - int64_t ret; - uint8_t m; - - if (*arg->nsols == arg->maxsolutions) - return 0; - - if (solve_h48_stop(arg)) - return 0; - - if (issolved(arg->cube)){ - if (arg->nmoves + arg->npremoves != arg->depth) - return 0; - solve_h48_appendsolution_thread(arg, tq); - return 1; - } - - nextarg = *arg; - ret = 0; - uint32_t allowed; - if (arg->nissbranch & MM_INVERSE){ - allowed = allowednextmove_h48(arg->premoves, arg->npremoves, arg->nissbranch); - for (m = 0; m < 18; m++){ - if (allowed & (1 << m)){ - nextarg.npremoves = arg->npremoves + 1; - nextarg.premoves[arg->npremoves] = m; - nextarg.inverse = move(arg->inverse, m); - nextarg.cube = premove(arg->cube, m); - ret += solve_h48_single(&nextarg, tq); - } - } - } else { - allowed = allowednextmove_h48(arg->moves, arg->nmoves, arg->nissbranch); - for (m = 0; m < 18; m++){ - if (allowed & (1 << m)){ - nextarg.nmoves = arg->nmoves + 1; - nextarg.moves[arg->nmoves] = m; - nextarg.cube = move(arg->cube, m); - nextarg.inverse = premove(arg->inverse, m); - ret += solve_h48_single(&nextarg, tq); - } - } - } - return ret; -} - -STATIC int64_t -solve_h48_parent( - cube_t cube, - int8_t minmoves, - int8_t maxmoves, - int8_t maxsolutions, - const void *data, - char *solutions) -{ - int64_t nsols = 0; - int p_depth = 0; - dfsarg_solveh48_t arg; - tableinfo_t info; - pthread_t threads[THREADS]; - - if (!readtableinfo_n(data, 2, &info)){ - LOG("solve_h48: error reading table\n"); - return 0; - } - - arg = (dfsarg_solveh48_t){ - .cube = cube, - .inverse = inverse(cube), - .nsols = &nsols, - .maxsolutions = maxsolutions, - .h = info.h48h, - .k = info.bits, - .cocsepdata = get_cocsepdata_ptr(data), - .h48data = get_h48data_ptr(data), - .nextsol = &solutions}; - - task_queue_t q; - init_queue(&q); - if (solve_h48_bfs(&arg, &q)) - return nsols; - - task_queue_t nq; - init_queue(&nq); - - for (int i = 0; i < THREADS; i++){ - pthread_create(&threads[i], NULL, &start_thread, &nq); - } - - nsols = 0; - for (p_depth = minmoves > BFS_DEPTH ? minmoves : BFS_DEPTH; - p_depth <= maxmoves && nsols < maxsolutions; - p_depth++) - { - LOG("Found %" PRId64 " solutions, searching at depth %" PRId8 "\n", nsols, p_depth); - copy_queue(&q, &nq, p_depth, &nsols); - - pthread_mutex_lock(&nq.mutex); - while (nq.active > 0 || nq.tasks_count > 0) - pthread_cond_wait(&nq.active_cond, &nq.mutex); - pthread_mutex_unlock(&nq.mutex); - } - - atomic_store(&nq.terminate, true); - pthread_cond_broadcast(&nq.cond); - - for (int i = 0; i < THREADS; i++){ - pthread_join(threads[i], NULL); - } - **arg.nextsol = '\0'; - (*arg.nextsol)++; - return nsols; -} diff --git a/tools/200_solve_small/solve_small.c b/tools/200_solve_small/solve_small.c @@ -1,5 +1,3 @@ -#include <pthread.h> - #include "../tool.h" const char *solver = "h48";