h48

A prototype for an optimal Rubik's cube solver, work in progress.
git clone https://git.tronto.net/h48
Download | Log | Files | Refs | README | LICENSE

commit d0c592bdc3beb12074f085ab085dd90de9b8e643
parent 91afd406dca8dee4dfdb84ad025dd390f4e5a70c
Author: Sebastiano Tronto <sebastiano@tronto.net>
Date:   Sun, 12 Nov 2023 14:42:20 +0100

Simple solver: added tests, fixed

Diffstat:
MTODO.txt | 11++++++++---
Mcube.c | 111+++++++++++++++++++++++++++++++++++++++++++++++++++++++++----------------------
Mcube.h | 5++++-
Atest/060_solve_simple/01_U_U3.in | 8++++++++
Atest/060_solve_simple/01_U_U3.out | 1+
Atest/060_solve_simple/02_MUMU_alloptimal.in | 8++++++++
Atest/060_solve_simple/02_MUMU_alloptimal.out | 4++++
Atest/060_solve_simple/solve_simple_tests.c | 58++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
8 files changed, 171 insertions(+), 35 deletions(-)

diff --git a/TODO.txt b/TODO.txt @@ -2,12 +2,14 @@ See the sections below for details -* Tests for simple solver -* Extend cube and moves to include centers +* Tests for multisolve +* More tests for simple solver? +* Benchmarks? * More complex optimal solvers, pruning tables -* Benchmarks +* (More) benchmarks * Multithreading (build-time option number of threads) * Other optimizations +* Extend cube and moves to include centers * NISS * Move manipulation utilities * Coordinate solvers and other steps @@ -76,6 +78,8 @@ What about symcoord? ### General things +* Moves: don't do full compose for U*, D*, *2 (I removed this because I + was using shuffle intructions wrong, should re-do it) * Trans: don't do full compose, for some trans composing perm is enough. Split out sumco() as a separate function and refactor, optimize. * Use multi-move (up to 4/5 moves at once) @@ -113,6 +117,7 @@ What about symcoord? * print ptables (or layout data in such a way that can be printed easily, e.g. first bytes are null-terminated strig and can be printed by user) +* remove writetrans? ## "Front-end" diff --git a/cube.c b/cube.c @@ -277,7 +277,7 @@ _move_U(cube_fast_t c) 0, 0, 0, 0, 0, 0, 0, 0, 7, 6, 1, 0, 3, 2, 4, 5 ); - return _mm256_shuffle_epi8(c, m); + return compose_fast(c, m); } static inline cube_fast_t @@ -288,7 +288,7 @@ _move_U2(cube_fast_t c) 0, 0, 0, 0, 0, 0, 0, 0, 7, 6, 4, 5, 3, 2, 0, 1 ); - return _mm256_shuffle_epi8(c, m); + return compose_fast(c, m); } static inline cube_fast_t @@ -299,7 +299,7 @@ _move_U3(cube_fast_t c) 0, 0, 0, 0, 0, 0, 0, 0, 7, 6, 0, 1, 3, 2, 5, 4 ); - return _mm256_shuffle_epi8(c, m); + return compose_fast(c, m); } static inline cube_fast_t @@ -310,7 +310,7 @@ _move_D(cube_fast_t c) 0, 0, 0, 0, 0, 0, 0, 0, 3, 2, 5, 4, 6, 7, 1, 0 ); - return _mm256_shuffle_epi8(c, m); + return compose_fast(c, m); } static inline cube_fast_t @@ -321,7 +321,7 @@ _move_D2(cube_fast_t c) 0, 0, 0, 0, 0, 0, 0, 0, 6, 7, 5, 4, 2, 3, 1, 0 ); - return _mm256_shuffle_epi8(c, m); + return compose_fast(c, m); } static inline cube_fast_t @@ -332,7 +332,7 @@ _move_D3(cube_fast_t c) 0, 0, 0, 0, 0, 0, 0, 0, 2, 3, 5, 4, 7, 6, 1, 0 ); - return _mm256_shuffle_epi8(c, m); + return compose_fast(c, m); } static inline cube_fast_t @@ -354,7 +354,7 @@ _move_R2(cube_fast_t c) 0, 0, 0, 0, 0, 0, 0, 0, 7, 5, 6, 4, 0, 2, 1, 3 ); - return _mm256_shuffle_epi8(c, m); + return compose_fast(c, m); } static inline cube_fast_t @@ -387,7 +387,7 @@ _move_L2(cube_fast_t c) 0, 0, 0, 0, 0, 0, 0, 0, 4, 6, 5, 7, 3, 1, 2, 0 ); - return _mm256_shuffle_epi8(c, m); + return compose_fast(c, m); } static inline cube_fast_t @@ -420,7 +420,7 @@ _move_F2(cube_fast_t c) 0, 0, 0, 0, 0, 0, 0, 0, 7, 4, 5, 6, 3, 0, 1, 2 ); - return _mm256_shuffle_epi8(c, m); + return compose_fast(c, m); } static inline cube_fast_t @@ -453,7 +453,7 @@ _move_B2(cube_fast_t c) 0, 0, 0, 0, 0, 0, 0, 0, 5, 6, 7, 4, 1, 2, 3, 0 ); - return _mm256_shuffle_epi8(c, m); + return compose_fast(c, m); } static inline cube_fast_t @@ -1479,13 +1479,13 @@ fasttocube(cube_fast_t c) static inline bool equal_fast(cube_fast_t c1, cube_fast_t c2) { - uint32_t mask; + int32_t mask; __m256i cmp; cmp = _mm256_cmpeq_epi8(c1, c2); mask = _mm256_movemask_epi8(cmp); - return mask == 0xffffffffU; + return mask == ~0; } static inline bool @@ -1511,6 +1511,19 @@ invertco_fast(cube_fast_t c) } static inline cube_fast_t +cleanaftershuffle(cube_fast_t c) +{ + __m256i b; + + b = _mm256_set_epi8( + ~0, ~0, ~0, ~0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, 0, 0, 0, 0, 0, 0, 0, 0 + ); + + return _mm256_andnot_si256(b, c); +} + +static inline cube_fast_t inverse_fast(cube_fast_t c) { /* Method taken from Andrew Skalski's vcube[1]. The addition sequence @@ -1544,6 +1557,7 @@ inverse_fast(cube_fast_t c) vo = _mm256_shuffle_epi8(vo, vi); vp = _mm256_andnot_si256(_mm256_or_si256(_eo_avx2, _co2_avx2), vi); ret = _mm256_or_si256(vp, vo); + ret = cleanaftershuffle(ret); return invertco_fast(ret); } @@ -1557,6 +1571,7 @@ compose_fast(cube_fast_t c1, cube_fast_t c2) eo2 = _mm256_and_si256(c2, _eo_avx2); s = _mm256_shuffle_epi8(c1, c2); + s = cleanaftershuffle(s); ed = _mm256_xor_si256(s, eo2); co1 = _mm256_and_si256(s, _co2_avx2); co2 = _mm256_and_si256(c2, _co2_avx2); @@ -2939,6 +2954,8 @@ Some of these routines depend on the efficient functions implemented in the previous sections, while some other operate directly on the cube. ******************************************************************************/ +static inline uint8_t movebase(uint8_t); +static inline uint8_t moveaxis(uint8_t); static uint8_t readco(char *); static uint8_t readcp(char *); static uint8_t readeo(char *); @@ -2990,6 +3007,18 @@ iserror(cube_t cube) return equal(cube, zero); } +static inline uint8_t +movebase(uint8_t move) +{ + return move / 3; +} + +static inline uint8_t +moveaxis(uint8_t move) +{ + return move / 6; +} + static uint8_t readco(char *str) { @@ -3292,6 +3321,9 @@ writemoves(uint8_t *m, int n, char *buf) b += len; *b = ' '; } + + if (b != buf) + b--; /* Remove last space */ *b = '\0'; return b - buf; @@ -3653,8 +3685,8 @@ typedef struct { cube_fast_t cube; uint8_t depth; int64_t maxsols; - char *sols; - int64_t nsols; + char **nextsol; + int64_t *nsols; uint8_t nmoves; uint8_t moves[20]; uint8_t (*estimate)(cube_fast_t); @@ -3671,10 +3703,10 @@ allowednextmove(dfsarg_generic_t arg, uint8_t m) if (n == 0) return true; - mbase = m / 3; - maxis = mbase / 2; - l1base = arg.moves[n-1] / 3; - l1axis = l1base / 2; + mbase = movebase(m); + maxis = moveaxis(m); + l1base = movebase(arg.moves[n-1]); + l1axis = moveaxis(arg.moves[n-1]); if (mbase == l1base || (maxis == l1axis && mbase < l1base)) return false; @@ -3682,12 +3714,25 @@ allowednextmove(dfsarg_generic_t arg, uint8_t m) if (n == 1) return true; - l2base = arg.moves[n-2] / 3; - l2axis = l1base / 2; + l2base = movebase(arg.moves[n-2]); + l2axis = moveaxis(arg.moves[n-2]); return l1axis != l2axis || mbase != l2base; } +static void +solve_generic_appendsolution(dfsarg_generic_t arg) +{ + int strl; + + strl = writemoves(arg.moves, arg.depth, *arg.nextsol); + DBG_LOG("Solution found: %s\n", *arg.nextsol); + *arg.nextsol += strl; + **arg.nextsol = '\n'; + (*arg.nextsol)++; + (*arg.nsols)++; +} + static int solve_generic_dfs(dfsarg_generic_t arg) { @@ -3697,15 +3742,13 @@ solve_generic_dfs(dfsarg_generic_t arg) bound = arg.estimate(arg.cube); - if (arg.nsols == arg.maxsols || bound + arg.nmoves > arg.depth) + if (*arg.nsols == arg.maxsols || bound + arg.nmoves > arg.depth) return 0; if (bound == 0) { if (arg.nmoves != arg.depth) return 0; - arg.sols += writemoves(arg.moves, arg.depth, arg.sols); - *arg.sols = '\n'; - arg.sols++; + solve_generic_appendsolution(arg); return 1; } @@ -3746,6 +3789,13 @@ solve_generic( return -1; } + if (issolved(cube)) { + DBG_LOG("solve: cube is already solved\n"); + sols[0] = '\n'; + sols[1] = 0; + return 1; + } + DBG_WARN(!strcmp(nisstype, ""), "solve: NISS not implemented yet, 'nisstype' ignored\n"); @@ -3782,8 +3832,8 @@ solve_generic( arg = (dfsarg_generic_t) { .cube = cubetofast(cube), .maxsols = maxsols, - .sols = sols, - .nsols = 0, + .nextsol = &sols, + .nsols = &ret, .nmoves = 0, .moves = {0}, .estimate = estimate, @@ -3793,12 +3843,11 @@ solve_generic( first = -1; for (arg.depth = minmoves; arg.depth <= maxmoves; arg.depth++) { tmp = solve_generic_dfs(arg); - ret += tmp; if (tmp != 0) first = arg.depth; - DBG_LOG("Found %" PRId64 " solutions at depth %" PRIu8 "\n", - tmp, arg.depth); + DBG_LOG("Found %" PRId64 " solution%s at depth %" PRIu8 "\n", + tmp, tmp == 1 ? "" : "s", arg.depth); if (ret >= maxsols) break; @@ -3864,7 +3913,7 @@ solve( DBG_WARN(data == NULL, "solve: 'data' not implemented yet, ignoring\n"); - if (!strcmp(solver, "simple")) { + if (!strcmp(solver, "optimal") || !strcmp(solver, "simple")) { return solve_simple( cube, minmoves, @@ -3874,7 +3923,7 @@ solve( solutions ); } else { - DBG_LOG("solve: unknown solver\n"); + DBG_LOG("solve: unknown solver '%s'\n", solver); return -1; } diff --git a/cube.h b/cube.h @@ -120,7 +120,10 @@ could. int64_t solve( cube_t cube, /* The cube to solve. Must be solvable. */ - char *solver, /* The solver. Supported solvers: TODO. */ + char *solver, /* Supported solvers: + * "optimal" - currently the same as "simple" + * "simple" - a simple, slow solver using no tables + */ char *options, /* Some solvers accept extra options, * like "!filter". */ diff --git a/test/060_solve_simple/01_U_U3.in b/test/060_solve_simple/01_U_U3.in @@ -0,0 +1,8 @@ +UR0 UL0 DB0 DF0 UB0 UF0 DL0 DR0 FR0 FL0 BL0 BR0 UBR0 UFL0 DFL0 DBR0 UFR0 UBL0 DFR0 DBL0 +simple + +normal +0 +3 +3 +-1 diff --git a/test/060_solve_simple/01_U_U3.out b/test/060_solve_simple/01_U_U3.out @@ -0,0 +1 @@ +U' diff --git a/test/060_solve_simple/02_MUMU_alloptimal.in b/test/060_solve_simple/02_MUMU_alloptimal.in @@ -0,0 +1,8 @@ +UB0 DF0 DB0 UF0 UR0 UL0 DL0 DR0 FR0 FL0 BL0 BR0 UFR0 UBL0 DFL0 DBR0 UFL0 UBR0 DFR0 DBL0 +simple + +normal +0 +-1 +10 +0 diff --git a/test/060_solve_simple/02_MUMU_alloptimal.out b/test/060_solve_simple/02_MUMU_alloptimal.out @@ -0,0 +1,4 @@ +U2 R' L F2 R L' +R U2 R' L F2 L' +R L' U2 R' L F2 +L' U2 R' L F2 R diff --git a/test/060_solve_simple/solve_simple_tests.c b/test/060_solve_simple/solve_simple_tests.c @@ -0,0 +1,58 @@ +#include <stdbool.h> +#include <stdint.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +#ifdef CUBE_AVX2 +#include <immintrin.h> +#endif + +#include "../../cube.h" + +#define S 10000 + +int main() { + char cubestr[S], solverstr[S], optionsstr[S], nisstypestr[S]; + char minmovesstr[S], maxmovesstr[S], maxsolsstr[S], optimalstr[S]; + char solutionsstr[S]; + cube_t cube; + int64_t maxsols; + int8_t minmoves, maxmoves, optimal; + + fgets(cubestr, S, stdin); + fgets(solverstr, S, stdin); + fgets(optionsstr, S, stdin); + fgets(nisstypestr, S, stdin); + fgets(minmovesstr, S, stdin); + fgets(maxmovesstr, S, stdin); + fgets(maxsolsstr, S, stdin); + fgets(optimalstr, S, stdin); + + solverstr[strcspn(solverstr, "\n")] = 0; + optionsstr[strcspn(optionsstr, "\n")] = 0; + nisstypestr[strcspn(nisstypestr, "\n")] = 0; + + cube = readcube("H48", cubestr); + minmoves = atoi(minmovesstr); + maxmoves = atoi(maxmovesstr); + maxsols = atoi(maxsolsstr); + optimal = atoi(optimalstr); + + solve( + cube, + solverstr, + optionsstr, + nisstypestr, + minmoves, + maxmoves, + maxsols, + optimal, + NULL, + solutionsstr + ); + + printf("%s", solutionsstr); + + return 0; +}