h48

A prototype for an optimal Rubik's cube solver, work in progress.
git clone https://git.tronto.net/h48
Download | Log | Files | Refs | README | LICENSE

commit bf6e1c3516a4eb0509c12b00d9a1baf6ee119987
parent 774a824a6c80b5af495f4fb99d98758e3b9f6b81
Author: Sebastiano Tronto <sebastiano@tronto.net>
Date:   Sun, 29 Sep 2024 14:12:01 +0200

Merge pull request #3 from enricotenuti/master

Threaded solver
Diffstat:
M.gitignore | 4++++
Msrc/nissy.c | 6++++--
Msrc/solvers/h48/h48.h | 1+
Msrc/solvers/h48/solve.h | 7++++---
Asrc/solvers/h48/solve_multithread.h | 300+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Mtools/300_solve_small/solve_small.c | 4++--
6 files changed, 315 insertions(+), 7 deletions(-)

diff --git a/.gitignore b/.gitignore @@ -1,14 +1,18 @@ +.DS_Store config.mk gen debuggen flamegraph.html +utils/.DS_Store perf.data perf.data.old run tables/* test/*/runtest +test/.DS_Store test/run test/run.DSYM +tools/.DS_Store run.DSYM run.core test/last.* diff --git a/src/nissy.c b/src/nissy.c @@ -1,5 +1,6 @@ #include <inttypes.h> #include <pthread.h> +#include <stdatomic.h> #include <stdarg.h> #include <stdbool.h> #include <string.h> @@ -401,8 +402,9 @@ nissy_solve( LOG("gendata: could not parse options\n"); ret = -1; } else { - ret = solve_h48(c, minmoves, maxmoves, maxsolutions, - data, solutions); + ret = THREADS > 1 ? + solve_h48_multithread(c, minmoves, maxmoves, maxsolutions, data, solutions) : + solve_h48(c, minmoves, maxmoves, maxsolutions, data, solutions); } } else if (!strcmp(solver, "h48stats")) { ret = solve_h48stats(c, maxmoves, data, solutions); diff --git a/src/solvers/h48/h48.h b/src/solvers/h48/h48.h @@ -4,3 +4,4 @@ #include "gendata_h48.h" #include "stats.h" #include "solve.h" +#include "solve_multithread.h" diff --git a/src/solvers/h48/solve.h b/src/solvers/h48/solve.h @@ -4,7 +4,7 @@ typedef struct { int8_t nmoves; int8_t depth; uint8_t moves[MAXLEN]; - int64_t *nsols; + _Atomic int64_t *nsols; int64_t maxsolutions; uint8_t h; uint8_t k; @@ -167,7 +167,7 @@ solve_h48( char *solutions ) { - int64_t nsols; + _Atomic int64_t nsols; dfsarg_solveh48_t arg; tableinfo_t info; @@ -199,6 +199,7 @@ solve_h48( arg.npremoves = 0; solve_h48_dfs(&arg); } - + **arg.nextsol = '\0'; + (*arg.nextsol)++; return nsols; } diff --git a/src/solvers/h48/solve_multithread.h b/src/solvers/h48/solve_multithread.h @@ -0,0 +1,300 @@ +#define MAX_QUEUE_SIZE 244 +#define BFS_DEPTH 2 + +typedef struct { + dfsarg_solveh48_t tasks[MAX_QUEUE_SIZE]; + int front; + int rear; + int tasks_count; + int active; + pthread_mutex_t mutex; + pthread_cond_t cond; + pthread_cond_t active_cond; + atomic_bool terminate; +} task_queue_t; + +STATIC void solve_h48_appendsolution_thread(dfsarg_solveh48_t *, task_queue_t *); +STATIC void init_queue(task_queue_t *); +STATIC void submit_task(task_queue_t *, dfsarg_solveh48_t); +STATIC void copy_queue(task_queue_t *, task_queue_t *, int, _Atomic int64_t *); +STATIC void *start_thread(void *); +STATIC int64_t solve_h48_bfs(dfsarg_solveh48_t *, task_queue_t *, int8_t); +STATIC int64_t solve_h48_single(dfsarg_solveh48_t *, task_queue_t *); +STATIC int64_t solve_h48_multithread(cube_t, int8_t, int8_t, int8_t, const void *, char *); + +STATIC void +solve_h48_appendsolution_thread(dfsarg_solveh48_t *arg, task_queue_t *tq) +{ + pthread_mutex_lock(&tq->mutex); + int strl = 0; + uint8_t invertedpremoves[MAXLEN]; + char *solution = *arg->nextsol; + + strl = writemoves(arg->moves, arg->nmoves, *arg->nextsol); + *arg->nextsol += strl; + + if (arg->npremoves) + { + **arg->nextsol = ' '; + (*arg->nextsol)++; + + invertmoves(arg->premoves, arg->npremoves, invertedpremoves); + strl = writemoves(invertedpremoves, arg->npremoves, *arg->nextsol); + *arg->nextsol += strl; + } + LOG("Solution found: %s\n", solution); + + **arg->nextsol = '\n'; + (*arg->nextsol)++; + (*arg->nsols)++; + pthread_mutex_unlock(&tq->mutex); +} + +STATIC void +init_queue(task_queue_t *queue) +{ + queue->front = 0; + queue->rear = 0; + queue->tasks_count = 0; + queue->active = 0; + queue->terminate = ATOMIC_VAR_INIT(false); + pthread_mutex_init(&queue->mutex, NULL); + pthread_cond_init(&queue->cond, NULL); + pthread_cond_init(&queue->active_cond, NULL); +} + +STATIC void +submit_task(task_queue_t *queue, dfsarg_solveh48_t task) +{ + pthread_mutex_lock(&queue->mutex); + queue->tasks[queue->rear] = task; + queue->rear = (queue->rear + 1) % MAX_QUEUE_SIZE; + queue->tasks_count++; + pthread_cond_broadcast(&queue->cond); + pthread_mutex_unlock(&queue->mutex); +} + +STATIC void +copy_queue(task_queue_t *src, task_queue_t *dest, int depth, _Atomic int64_t *nsols) +{ + pthread_mutex_lock(&dest->mutex); + for (int i = src->front; i != src->rear; i = (i + 1) % MAX_QUEUE_SIZE) + { + dest->tasks[i] = src->tasks[i]; + dest->tasks[i].depth = depth; + } + dest->front = src->front; + dest->rear = src->rear; + dest->tasks_count = src->tasks_count; + pthread_cond_broadcast(&dest->cond); + pthread_mutex_unlock(&dest->mutex); +} + +STATIC void * +start_thread(void *arg) +{ + task_queue_t *queue = (task_queue_t *)arg; + while (1) { + pthread_mutex_lock(&queue->mutex); + while (queue->tasks_count == 0 && !queue->terminate) { + pthread_cond_wait(&queue->cond, &queue->mutex); + } + if (queue->tasks_count == 0 && queue->terminate) { + pthread_mutex_unlock(&queue->mutex); + break; + } + + if (queue->tasks_count > 0) { + dfsarg_solveh48_t task = queue->tasks[queue->front]; + queue->front = (queue->front + 1) % MAX_QUEUE_SIZE; + queue->tasks_count--; + queue->active++; + pthread_mutex_unlock(&queue->mutex); + + solve_h48_single(&task, queue); + + pthread_mutex_lock(&queue->mutex); + queue->active--; + + if(queue->tasks_count == 0 && queue->active == 0) + pthread_cond_signal(&queue->active_cond); + } + pthread_mutex_unlock(&queue->mutex); + } + return NULL; +} + +STATIC int64_t +solve_h48_bfs(dfsarg_solveh48_t *arg_zero, task_queue_t *tq, int8_t maxmoves) +{ + dfsarg_solveh48_t queue[MAX_QUEUE_SIZE]; + int front = 0, rear = 0; + dfsarg_solveh48_t nextarg; + int depth = 0; + int nodes_at_current_depth = 1; + int nodes_at_next_depth = 0; + queue[rear++] = *arg_zero; + + dfsarg_solveh48_t task_pool[MAX_QUEUE_SIZE]; + + while (front < rear){ + dfsarg_solveh48_t arg = queue[front++]; + nodes_at_current_depth--; + + if (*arg.nsols == arg.maxsolutions) + return 1; + + if (issolved(arg.cube)){ + if (arg.nmoves + arg.npremoves >= arg.depth && arg.nmoves + arg.npremoves <= maxmoves) + solve_h48_appendsolution(&arg); + continue; + } + + arg.nissbranch = MM_NORMAL; + uint32_t allowed = allowednextmove_h48(arg.moves, arg.nmoves, arg.nissbranch); + + for (uint8_t m = 0; m < 18; m++){ + if (allowed & (1 << m)){ + nextarg = arg; + nextarg.nmoves = arg.nmoves + 1; + nextarg.moves[arg.nmoves] = m; + nextarg.cube = move(arg.cube, m); + nextarg.inverse = premove(arg.inverse, m); + + if (nextarg.nmoves == BFS_DEPTH){ + dfsarg_solveh48_t *task = &task_pool[rear % MAX_QUEUE_SIZE]; + *task = nextarg; + submit_task(tq, *task); + } else { + queue[rear++] = nextarg; + nodes_at_next_depth++; + } + } + } + if (nodes_at_current_depth == 0){ + nodes_at_current_depth = nodes_at_next_depth; + nodes_at_next_depth = 0; + LOG("Found %" PRId64 " solutions, searching at depth %" PRId8 "\n", *nextarg.nsols, depth++); + } + if (depth == BFS_DEPTH) return 0; + } + return 1; +} + +STATIC int64_t +solve_h48_single(dfsarg_solveh48_t *arg, task_queue_t *tq) +{ + dfsarg_solveh48_t nextarg; + int64_t ret; + uint8_t m; + + if (*arg->nsols == arg->maxsolutions) + return 0; + + if (solve_h48_stop(arg)) + return 0; + + if (issolved(arg->cube)){ + if (arg->nmoves + arg->npremoves != arg->depth) + return 0; + solve_h48_appendsolution_thread(arg, tq); + return 1; + } + + nextarg = *arg; + ret = 0; + uint32_t allowed; + if (arg->nissbranch & MM_INVERSE){ + allowed = allowednextmove_h48(arg->premoves, arg->npremoves, arg->nissbranch); + for (m = 0; m < 18; m++){ + if (allowed & (1 << m)){ + nextarg.npremoves = arg->npremoves + 1; + nextarg.premoves[arg->npremoves] = m; + nextarg.inverse = move(arg->inverse, m); + nextarg.cube = premove(arg->cube, m); + ret += solve_h48_single(&nextarg, tq); + } + } + } else { + allowed = allowednextmove_h48(arg->moves, arg->nmoves, arg->nissbranch); + for (m = 0; m < 18; m++){ + if (allowed & (1 << m)){ + nextarg.nmoves = arg->nmoves + 1; + nextarg.moves[arg->nmoves] = m; + nextarg.cube = move(arg->cube, m); + nextarg.inverse = premove(arg->inverse, m); + ret += solve_h48_single(&nextarg, tq); + } + } + } + return ret; +} + +STATIC int64_t +solve_h48_multithread( + cube_t cube, + int8_t minmoves, + int8_t maxmoves, + int8_t maxsolutions, + const void *data, + char *solutions) +{ + _Atomic int64_t nsols = 0; + int p_depth = 0; + dfsarg_solveh48_t arg; + tableinfo_t info; + pthread_t threads[THREADS]; + + if (!readtableinfo_n(data, 2, &info)){ + LOG("solve_h48: error reading table\n"); + return 0; + } + + arg = (dfsarg_solveh48_t){ + .cube = cube, + .inverse = inverse(cube), + .nsols = &nsols, + .depth = minmoves, + .maxsolutions = maxsolutions, + .h = info.h48h, + .k = info.bits, + .cocsepdata = get_cocsepdata_ptr(data), + .h48data = get_h48data_ptr(data), + .nextsol = &solutions}; + + task_queue_t q; + init_queue(&q); + if (solve_h48_bfs(&arg, &q, maxmoves)) + return nsols; + + task_queue_t nq; + init_queue(&nq); + + for (int i = 0; i < THREADS; i++){ + pthread_create(&threads[i], NULL, &start_thread, &nq); + } + + nsols = 0; + for (p_depth = minmoves > BFS_DEPTH ? minmoves : BFS_DEPTH; + p_depth <= maxmoves && nsols < maxsolutions; + p_depth++) + { + LOG("Found %" PRId64 " solutions, searching at depth %" PRId8 "\n", nsols, p_depth); + copy_queue(&q, &nq, p_depth, &nsols); + + pthread_mutex_lock(&nq.mutex); + while (nq.active > 0 || nq.tasks_count > 0) + pthread_cond_wait(&nq.active_cond, &nq.mutex); + pthread_mutex_unlock(&nq.mutex); + } + + atomic_store(&nq.terminate, true); + pthread_cond_broadcast(&nq.cond); + + for (int i = 0; i < THREADS; i++){ + pthread_join(threads[i], NULL); + } + **arg.nextsol = '\0'; + (*arg.nextsol)++; + return nsols; +} diff --git a/tools/300_solve_small/solve_small.c b/tools/300_solve_small/solve_small.c @@ -1,5 +1,3 @@ -#include <pthread.h> - #include "../tool.h" const char *solver = "h48"; @@ -10,6 +8,8 @@ char *buf; char *scrambles[] = { "R D' R2 D R U2 R' D' R U2 R D R'", /* 12 optimal */ "RLUD RLUD RLUD", /* 12 optimal */ + // "R' U' F D2 L2 F R2 U2 R2 B D2 L B2 D' B2 L' R' B D2 B U2 L U2 R' U' F", /* FMC2019 A1 - 16 optimal */ + // "R' U' F D R F2 D L F D2 F2 L' U R' L2 D' R2 F2 R2 D L2 U2 R' U' F", /* FMC2024 A1 - 19 optimal */ NULL };