commit d0c592bdc3beb12074f085ab085dd90de9b8e643
parent 91afd406dca8dee4dfdb84ad025dd390f4e5a70c
Author: Sebastiano Tronto <sebastiano@tronto.net>
Date: Sun, 12 Nov 2023 14:42:20 +0100
Simple solver: added tests, fixed
Diffstat:
8 files changed, 171 insertions(+), 35 deletions(-)
diff --git a/TODO.txt b/TODO.txt
@@ -2,12 +2,14 @@
See the sections below for details
-* Tests for simple solver
-* Extend cube and moves to include centers
+* Tests for multisolve
+* More tests for simple solver?
+* Benchmarks?
* More complex optimal solvers, pruning tables
-* Benchmarks
+* (More) benchmarks
* Multithreading (build-time option number of threads)
* Other optimizations
+* Extend cube and moves to include centers
* NISS
* Move manipulation utilities
* Coordinate solvers and other steps
@@ -76,6 +78,8 @@ What about symcoord?
### General things
+* Moves: don't do full compose for U*, D*, *2 (I removed this because I
+ was using shuffle intructions wrong, should re-do it)
* Trans: don't do full compose, for some trans composing perm is enough.
Split out sumco() as a separate function and refactor, optimize.
* Use multi-move (up to 4/5 moves at once)
@@ -113,6 +117,7 @@ What about symcoord?
* print ptables (or layout data in such a way that can be printed
easily, e.g. first bytes are null-terminated strig and can be
printed by user)
+* remove writetrans?
## "Front-end"
diff --git a/cube.c b/cube.c
@@ -277,7 +277,7 @@ _move_U(cube_fast_t c)
0, 0, 0, 0, 0, 0, 0, 0, 7, 6, 1, 0, 3, 2, 4, 5
);
- return _mm256_shuffle_epi8(c, m);
+ return compose_fast(c, m);
}
static inline cube_fast_t
@@ -288,7 +288,7 @@ _move_U2(cube_fast_t c)
0, 0, 0, 0, 0, 0, 0, 0, 7, 6, 4, 5, 3, 2, 0, 1
);
- return _mm256_shuffle_epi8(c, m);
+ return compose_fast(c, m);
}
static inline cube_fast_t
@@ -299,7 +299,7 @@ _move_U3(cube_fast_t c)
0, 0, 0, 0, 0, 0, 0, 0, 7, 6, 0, 1, 3, 2, 5, 4
);
- return _mm256_shuffle_epi8(c, m);
+ return compose_fast(c, m);
}
static inline cube_fast_t
@@ -310,7 +310,7 @@ _move_D(cube_fast_t c)
0, 0, 0, 0, 0, 0, 0, 0, 3, 2, 5, 4, 6, 7, 1, 0
);
- return _mm256_shuffle_epi8(c, m);
+ return compose_fast(c, m);
}
static inline cube_fast_t
@@ -321,7 +321,7 @@ _move_D2(cube_fast_t c)
0, 0, 0, 0, 0, 0, 0, 0, 6, 7, 5, 4, 2, 3, 1, 0
);
- return _mm256_shuffle_epi8(c, m);
+ return compose_fast(c, m);
}
static inline cube_fast_t
@@ -332,7 +332,7 @@ _move_D3(cube_fast_t c)
0, 0, 0, 0, 0, 0, 0, 0, 2, 3, 5, 4, 7, 6, 1, 0
);
- return _mm256_shuffle_epi8(c, m);
+ return compose_fast(c, m);
}
static inline cube_fast_t
@@ -354,7 +354,7 @@ _move_R2(cube_fast_t c)
0, 0, 0, 0, 0, 0, 0, 0, 7, 5, 6, 4, 0, 2, 1, 3
);
- return _mm256_shuffle_epi8(c, m);
+ return compose_fast(c, m);
}
static inline cube_fast_t
@@ -387,7 +387,7 @@ _move_L2(cube_fast_t c)
0, 0, 0, 0, 0, 0, 0, 0, 4, 6, 5, 7, 3, 1, 2, 0
);
- return _mm256_shuffle_epi8(c, m);
+ return compose_fast(c, m);
}
static inline cube_fast_t
@@ -420,7 +420,7 @@ _move_F2(cube_fast_t c)
0, 0, 0, 0, 0, 0, 0, 0, 7, 4, 5, 6, 3, 0, 1, 2
);
- return _mm256_shuffle_epi8(c, m);
+ return compose_fast(c, m);
}
static inline cube_fast_t
@@ -453,7 +453,7 @@ _move_B2(cube_fast_t c)
0, 0, 0, 0, 0, 0, 0, 0, 5, 6, 7, 4, 1, 2, 3, 0
);
- return _mm256_shuffle_epi8(c, m);
+ return compose_fast(c, m);
}
static inline cube_fast_t
@@ -1479,13 +1479,13 @@ fasttocube(cube_fast_t c)
static inline bool
equal_fast(cube_fast_t c1, cube_fast_t c2)
{
- uint32_t mask;
+ int32_t mask;
__m256i cmp;
cmp = _mm256_cmpeq_epi8(c1, c2);
mask = _mm256_movemask_epi8(cmp);
- return mask == 0xffffffffU;
+ return mask == ~0;
}
static inline bool
@@ -1511,6 +1511,19 @@ invertco_fast(cube_fast_t c)
}
static inline cube_fast_t
+cleanaftershuffle(cube_fast_t c)
+{
+ __m256i b;
+
+ b = _mm256_set_epi8(
+ ~0, ~0, ~0, ~0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, 0, 0, 0, 0, 0, 0, 0, 0
+ );
+
+ return _mm256_andnot_si256(b, c);
+}
+
+static inline cube_fast_t
inverse_fast(cube_fast_t c)
{
/* Method taken from Andrew Skalski's vcube[1]. The addition sequence
@@ -1544,6 +1557,7 @@ inverse_fast(cube_fast_t c)
vo = _mm256_shuffle_epi8(vo, vi);
vp = _mm256_andnot_si256(_mm256_or_si256(_eo_avx2, _co2_avx2), vi);
ret = _mm256_or_si256(vp, vo);
+ ret = cleanaftershuffle(ret);
return invertco_fast(ret);
}
@@ -1557,6 +1571,7 @@ compose_fast(cube_fast_t c1, cube_fast_t c2)
eo2 = _mm256_and_si256(c2, _eo_avx2);
s = _mm256_shuffle_epi8(c1, c2);
+ s = cleanaftershuffle(s);
ed = _mm256_xor_si256(s, eo2);
co1 = _mm256_and_si256(s, _co2_avx2);
co2 = _mm256_and_si256(c2, _co2_avx2);
@@ -2939,6 +2954,8 @@ Some of these routines depend on the efficient functions implemented in the
previous sections, while some other operate directly on the cube.
******************************************************************************/
+static inline uint8_t movebase(uint8_t);
+static inline uint8_t moveaxis(uint8_t);
static uint8_t readco(char *);
static uint8_t readcp(char *);
static uint8_t readeo(char *);
@@ -2990,6 +3007,18 @@ iserror(cube_t cube)
return equal(cube, zero);
}
+static inline uint8_t
+movebase(uint8_t move)
+{
+ return move / 3;
+}
+
+static inline uint8_t
+moveaxis(uint8_t move)
+{
+ return move / 6;
+}
+
static uint8_t
readco(char *str)
{
@@ -3292,6 +3321,9 @@ writemoves(uint8_t *m, int n, char *buf)
b += len;
*b = ' ';
}
+
+ if (b != buf)
+ b--; /* Remove last space */
*b = '\0';
return b - buf;
@@ -3653,8 +3685,8 @@ typedef struct {
cube_fast_t cube;
uint8_t depth;
int64_t maxsols;
- char *sols;
- int64_t nsols;
+ char **nextsol;
+ int64_t *nsols;
uint8_t nmoves;
uint8_t moves[20];
uint8_t (*estimate)(cube_fast_t);
@@ -3671,10 +3703,10 @@ allowednextmove(dfsarg_generic_t arg, uint8_t m)
if (n == 0)
return true;
- mbase = m / 3;
- maxis = mbase / 2;
- l1base = arg.moves[n-1] / 3;
- l1axis = l1base / 2;
+ mbase = movebase(m);
+ maxis = moveaxis(m);
+ l1base = movebase(arg.moves[n-1]);
+ l1axis = moveaxis(arg.moves[n-1]);
if (mbase == l1base || (maxis == l1axis && mbase < l1base))
return false;
@@ -3682,12 +3714,25 @@ allowednextmove(dfsarg_generic_t arg, uint8_t m)
if (n == 1)
return true;
- l2base = arg.moves[n-2] / 3;
- l2axis = l1base / 2;
+ l2base = movebase(arg.moves[n-2]);
+ l2axis = moveaxis(arg.moves[n-2]);
return l1axis != l2axis || mbase != l2base;
}
+static void
+solve_generic_appendsolution(dfsarg_generic_t arg)
+{
+ int strl;
+
+ strl = writemoves(arg.moves, arg.depth, *arg.nextsol);
+ DBG_LOG("Solution found: %s\n", *arg.nextsol);
+ *arg.nextsol += strl;
+ **arg.nextsol = '\n';
+ (*arg.nextsol)++;
+ (*arg.nsols)++;
+}
+
static int
solve_generic_dfs(dfsarg_generic_t arg)
{
@@ -3697,15 +3742,13 @@ solve_generic_dfs(dfsarg_generic_t arg)
bound = arg.estimate(arg.cube);
- if (arg.nsols == arg.maxsols || bound + arg.nmoves > arg.depth)
+ if (*arg.nsols == arg.maxsols || bound + arg.nmoves > arg.depth)
return 0;
if (bound == 0) {
if (arg.nmoves != arg.depth)
return 0;
- arg.sols += writemoves(arg.moves, arg.depth, arg.sols);
- *arg.sols = '\n';
- arg.sols++;
+ solve_generic_appendsolution(arg);
return 1;
}
@@ -3746,6 +3789,13 @@ solve_generic(
return -1;
}
+ if (issolved(cube)) {
+ DBG_LOG("solve: cube is already solved\n");
+ sols[0] = '\n';
+ sols[1] = 0;
+ return 1;
+ }
+
DBG_WARN(!strcmp(nisstype, ""),
"solve: NISS not implemented yet, 'nisstype' ignored\n");
@@ -3782,8 +3832,8 @@ solve_generic(
arg = (dfsarg_generic_t) {
.cube = cubetofast(cube),
.maxsols = maxsols,
- .sols = sols,
- .nsols = 0,
+ .nextsol = &sols,
+ .nsols = &ret,
.nmoves = 0,
.moves = {0},
.estimate = estimate,
@@ -3793,12 +3843,11 @@ solve_generic(
first = -1;
for (arg.depth = minmoves; arg.depth <= maxmoves; arg.depth++) {
tmp = solve_generic_dfs(arg);
- ret += tmp;
if (tmp != 0)
first = arg.depth;
- DBG_LOG("Found %" PRId64 " solutions at depth %" PRIu8 "\n",
- tmp, arg.depth);
+ DBG_LOG("Found %" PRId64 " solution%s at depth %" PRIu8 "\n",
+ tmp, tmp == 1 ? "" : "s", arg.depth);
if (ret >= maxsols)
break;
@@ -3864,7 +3913,7 @@ solve(
DBG_WARN(data == NULL,
"solve: 'data' not implemented yet, ignoring\n");
- if (!strcmp(solver, "simple")) {
+ if (!strcmp(solver, "optimal") || !strcmp(solver, "simple")) {
return solve_simple(
cube,
minmoves,
@@ -3874,7 +3923,7 @@ solve(
solutions
);
} else {
- DBG_LOG("solve: unknown solver\n");
+ DBG_LOG("solve: unknown solver '%s'\n", solver);
return -1;
}
diff --git a/cube.h b/cube.h
@@ -120,7 +120,10 @@ could.
int64_t solve(
cube_t cube, /* The cube to solve. Must be solvable. */
- char *solver, /* The solver. Supported solvers: TODO. */
+ char *solver, /* Supported solvers:
+ * "optimal" - currently the same as "simple"
+ * "simple" - a simple, slow solver using no tables
+ */
char *options, /* Some solvers accept extra options,
* like "!filter".
*/
diff --git a/test/060_solve_simple/01_U_U3.in b/test/060_solve_simple/01_U_U3.in
@@ -0,0 +1,8 @@
+UR0 UL0 DB0 DF0 UB0 UF0 DL0 DR0 FR0 FL0 BL0 BR0 UBR0 UFL0 DFL0 DBR0 UFR0 UBL0 DFR0 DBL0
+simple
+
+normal
+0
+3
+3
+-1
diff --git a/test/060_solve_simple/01_U_U3.out b/test/060_solve_simple/01_U_U3.out
@@ -0,0 +1 @@
+U'
diff --git a/test/060_solve_simple/02_MUMU_alloptimal.in b/test/060_solve_simple/02_MUMU_alloptimal.in
@@ -0,0 +1,8 @@
+UB0 DF0 DB0 UF0 UR0 UL0 DL0 DR0 FR0 FL0 BL0 BR0 UFR0 UBL0 DFL0 DBR0 UFL0 UBR0 DFR0 DBL0
+simple
+
+normal
+0
+-1
+10
+0
diff --git a/test/060_solve_simple/02_MUMU_alloptimal.out b/test/060_solve_simple/02_MUMU_alloptimal.out
@@ -0,0 +1,4 @@
+U2 R' L F2 R L'
+R U2 R' L F2 L'
+R L' U2 R' L F2
+L' U2 R' L F2 R
diff --git a/test/060_solve_simple/solve_simple_tests.c b/test/060_solve_simple/solve_simple_tests.c
@@ -0,0 +1,58 @@
+#include <stdbool.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#ifdef CUBE_AVX2
+#include <immintrin.h>
+#endif
+
+#include "../../cube.h"
+
+#define S 10000
+
+int main() {
+ char cubestr[S], solverstr[S], optionsstr[S], nisstypestr[S];
+ char minmovesstr[S], maxmovesstr[S], maxsolsstr[S], optimalstr[S];
+ char solutionsstr[S];
+ cube_t cube;
+ int64_t maxsols;
+ int8_t minmoves, maxmoves, optimal;
+
+ fgets(cubestr, S, stdin);
+ fgets(solverstr, S, stdin);
+ fgets(optionsstr, S, stdin);
+ fgets(nisstypestr, S, stdin);
+ fgets(minmovesstr, S, stdin);
+ fgets(maxmovesstr, S, stdin);
+ fgets(maxsolsstr, S, stdin);
+ fgets(optimalstr, S, stdin);
+
+ solverstr[strcspn(solverstr, "\n")] = 0;
+ optionsstr[strcspn(optionsstr, "\n")] = 0;
+ nisstypestr[strcspn(nisstypestr, "\n")] = 0;
+
+ cube = readcube("H48", cubestr);
+ minmoves = atoi(minmovesstr);
+ maxmoves = atoi(maxmovesstr);
+ maxsols = atoi(maxsolsstr);
+ optimal = atoi(optimalstr);
+
+ solve(
+ cube,
+ solverstr,
+ optionsstr,
+ nisstypestr,
+ minmoves,
+ maxmoves,
+ maxsols,
+ optimal,
+ NULL,
+ solutionsstr
+ );
+
+ printf("%s", solutionsstr);
+
+ return 0;
+}