h48

A prototype for an optimal Rubik's cube solver, work in progress.
git clone https://git.tronto.net/h48
Download | Log | Files | Refs | README | LICENSE

commit 2cff8fe8f8d18d0d6ed51d5a25b43b20ddf20901
parent d46f5be9da49b4f353ac1aa09cc54c223f4fe5b5
Author: Sebastiano Tronto <sebastiano@tronto.net>
Date:   Mon,  1 Apr 2024 10:11:50 +0200

Performance improvement for gendata_cocsep

Diffstat:
M.gitignore | 1+
MMakefile | 3+++
MTODO.txt | 1-
Mcube.c | 50++++++++++++++++++++++++++++----------------------
Aold/gendata_bfs_attempt.c | 153+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
5 files changed, 185 insertions(+), 23 deletions(-)

diff --git a/.gitignore b/.gitignore @@ -7,3 +7,4 @@ test/*/runtest test/run test/last.* *.o +*.s diff --git a/Makefile b/Makefile @@ -2,6 +2,9 @@ include config.mk all: cube.o debugcube.o +cube.s: clean + ${CC} -D${CUBETYPE} ${CFLAGS} -c -S -o cube.s cube.c + cube.o: clean ${CC} -D${CUBETYPE} ${CFLAGS} -c -o cube.o cube.c diff --git a/TODO.txt b/TODO.txt @@ -1,4 +1,3 @@ -TODO optimize cocsep generation (very slow!) TODO check which is faster: foreach_move or simple for loop? same for transformations TODO implement big pruning table for H48 solver diff --git a/cube.c b/cube.c @@ -1075,6 +1075,7 @@ previous sections, while some other operate directly on the cube. invertco_fast(compose_fast(compose_fast(_trans_cube_ ## T, c), \ _trans_cube_ ## T ## _inverse)) +/* #define _foreach_move(_m, _c, _d, instruction) \ _m = U; _d = _move(U, _c); instruction \ _m = U2; _d = _move(U2, _c); instruction \ @@ -1094,10 +1095,9 @@ previous sections, while some other operate directly on the cube. _m = B; _d = _move(B, _c); instruction \ _m = B2; _d = _move(B2, _c); instruction \ _m = B3; _d = _move(B3, _c); instruction -/* +*/ #define _foreach_move(_m, _c, _d, instruction) \ for (_m = 0; _m < 18; _m++) { _d = move(_c, _m); instruction } -*/ cube_t solvedcube(void); bool isconsistent(cube_t); @@ -1867,7 +1867,8 @@ Section: auxiliary procedures for H48 optimal solver (temporary) ******************************************************************************/ _static size_t gendata_cocsep(void *); -_static uint32_t dfs_cocsep(cube_fast_t, uint8_t, uint8_t, uint16_t *, uint32_t *); +_static uint32_t dfs_cocsep( + cube_fast_t, uint8_t, uint8_t, uint16_t *, uint32_t *, bool *); /* Each element of the cocsep table is a uint32_t used as follows: @@ -1883,33 +1884,36 @@ After the data as described above, more auxiliary information is appended: _static size_t gendata_cocsep(void *buf) { + static size_t tablesize = _3p7 << 7U; + + cube_fast_t solved; uint32_t *buf32, cc; uint16_t n; uint8_t i, j; - size_t tablesize; - - tablesize = _3p7 << 7U; + bool visited[tablesize]; buf32 = (uint32_t *)buf; memset(buf32, 0xFFU, 4*tablesize); memset(buf32 + tablesize, 0, 21*4); - for (i = 0, n = 0, cc = 0; cc != 0 || i == 0; i++) { + solved = cubetofast(solvedcube()); + buf32[tablesize+1] = 9U; /* Known max pruning value */ + for (i = 0, n = 0, cc = 0; i < 10; i++) { + memset(visited, 0, tablesize); /* Set visited bit for dfs */ DBG_LOG("gendata_cocsep: generating depth %" PRIu8 "\n", i); - cc = dfs_cocsep(cubetofast(solvedcube()), 0, i, &n, buf32); + cc = dfs_cocsep(solved, 0, i, &n, buf32, visited); buf32[tablesize+i+2] = cc; DBG_LOG("found %" PRIu32 "\n", cc); } buf32[tablesize] = (uint32_t)n; - buf32[tablesize+1] = (uint32_t)(i-2); DBG_LOG("cocsep data computed, %" PRIu32 " symmetry classes\n", n); DBG_LOG("Maximum pruning value: %" PRIu32 "\n", buf32[tablesize+1]); DBG_LOG("Pruning value distribution:\n"); - for (j = 0; j < i-1; j++) + for (j = 0; j < 10; j++) DBG_LOG("%" PRIu8 ":\t%" PRIu32 "\n", j, buf32[tablesize+j+2]); - return 4*(tablesize + i + 1); + return 4*(tablesize + 12); } _static uint32_t @@ -1918,25 +1922,31 @@ dfs_cocsep( uint8_t depth, uint8_t maxdepth, uint16_t *n, - uint32_t *buf32 + uint32_t *buf32, + bool *visited ) { uint8_t m, t, tinv, olddepth; - uint32_t cc, oldvalue; + uint32_t cc; uint64_t i; cube_fast_t d; - oldvalue = buf32[coord_fast_cocsep(c)]; + i = coord_fast_cocsep(c); + olddepth = (uint8_t)(buf32[i] & 0xFFU); + if (olddepth < depth || visited[i]) + return 0; + visited[i] = true; + if (depth == maxdepth) { - if ((oldvalue & 0xFFU) != 0xFFU) + if ((buf32[i] & 0xFFU) != 0xFFU) return 0; for (t = 0, cc = 0; t < 48; t++) { d = transform(c, t); i = coord_fast_cocsep(d); + visited[i] = true; tinv = inverse_trans(t); - if ((buf32[i] & 0xFFU) == 0xFFU) - cc++; + cc += (buf32[i] & 0xFFU) == 0xFFU; buf32[i] = (*n << 16U) | (tinv << 8U) | depth; } (*n)++; @@ -1944,13 +1954,9 @@ dfs_cocsep( return cc; } - olddepth = (uint8_t)(oldvalue & 0xFFU); - if (olddepth != depth) - return 0; - cc = 0; _foreach_move(m, c, d, - cc += dfs_cocsep(d, depth+1, maxdepth, n, buf32); + cc += dfs_cocsep(d, depth+1, maxdepth, n, buf32, visited); ) return cc; diff --git a/old/gendata_bfs_attempt.c b/old/gendata_bfs_attempt.c @@ -0,0 +1,153 @@ +_static size_t gendata_cocsep(void *); +_static uint32_t dfs_cocsep(cube_fast_t, uint8_t, uint8_t, uint32_t *); + +/* +Each element of the cocsep table is a uint32_t used as follows: + - Lowest 8-bit block: pruning value + - Second-lower 8-bit block: "ttrep" (transformation to representative) + - Top 16-bit block: symcoord value +After the data as described above, more auxiliary information is appended: + - A uint32_t representing the number of symmetry classes + - A uint32_t representing the highest value of the pruning table + - One uint32_t for each "line" of the pruning table, representing the number + of positions having that pruning value. +*/ +_static size_t +gendata_cocsep(void *buf) +{ + uint32_t *buf32, cc; + uint64_t i64; + uint16_t n; + uint8_t i, j; + size_t tablesize; + + tablesize = _3p7 << 7U; + + buf32 = (uint32_t *)buf; + memset(buf32, 0xFFU, 4*tablesize); + memset(buf32 + tablesize, 0, 21*4); + +/* New impl BFS + + uint32_t nold = 0, nnew = 0; + uint64_t coord; + uint8_t m, olddepth; + cube_fast_t c, d, oldlevel[100000], newlevel[100000]; + newlevel[0] = cubetofast(solvedcube()); + nnew = 1; + buf32[coord_fast_cocsep(newlevel[0])] = UFr << 8U; + n = 1; + DBG_LOG("gendata_cocsep: found 1 position at depth 0\n"); + for (i = 1; i < 10; i++) { + DBG_LOG("gendata_cocsep: generating depth %" PRIu8 "\n", i); + memcpy(oldlevel, newlevel, nnew * sizeof(cube_fast_t)); + nold = nnew; + nnew = 0; + for (j = 0; j < nold; j++) { + _foreach_move(m, oldlevel[j], newlevel[nnew], + coord = coord_fast_cocsep(newlevel[nnew]); + olddepth = buf32[coord] & 0xFFU; + buf32[coord] = i; + nnew += olddepth > i; + ) + } + DBG_LOG("found %" PRIu32 "\n", nnew); + } + +End new impl BFS */ + + /* Pruning values */ + buf32[tablesize+1] = 9U; /* Known max pruning value */ + for (i = 0, cc = 0; i < 10; i++) { + DBG_LOG("gendata_cocsep: generating depth %" PRIu8 "\n", i); + cc = dfs_cocsep(cubetofast(solvedcube()), 0, i, buf32); + buf32[tablesize+i+2] = cc; + DBG_LOG("found %" PRIu32 "\n", cc); + } + + /* Symmetries */ + for (i64 = 0, n = 0; i64 < tablesize; i64++) { + } + buf32[tablesize] = (uint32_t)n; + + DBG_LOG("cocsep data computed, %" PRIu32 " symmetry classes\n", n); + DBG_LOG("Maximum pruning value: %" PRIu32 "\n", buf32[tablesize+1]); + DBG_LOG("Pruning value distribution:\n"); + for (j = 0; j < 10; j++) + DBG_LOG("%" PRIu8 ":\t%" PRIu32 "\n", j, buf32[tablesize+j+2]); + + return 4*(tablesize + 11); +} + +_static uint32_t +dfs_cocsep(cube_fast_t c, uint8_t depth, uint8_t maxdepth, uint32_t *buf32) +{ + uint8_t m, olddepth; + uint32_t update, cc; + uint64_t i; + cube_fast_t d; + + i = coord_fast_cocsep(c); + olddepth = (uint8_t)(buf32[i] & 0xFFU); + if (olddepth < depth) + return 0; + + if (depth == maxdepth) { + update = (buf32[i] & 0xFFU) == 0xFFU; + buf32[i] = depth; + return update; + } + + cc = 0; + _foreach_move(m, c, d, + cc += dfs_cocsep(d, depth+1, maxdepth, buf32); + ) + + return cc; +} + +/* +_static uint32_t +dfs_cocsep( + cube_fast_t c, + uint8_t depth, + uint8_t maxdepth, + uint16_t *n, + uint32_t *buf32 +) +{ + uint8_t m, t, tinv, olddepth; + uint32_t cc, oldvalue; + uint64_t i; + cube_fast_t d; + + oldvalue = buf32[coord_fast_cocsep(c)]; + if (depth == maxdepth) { + if ((oldvalue & 0xFFU) != 0xFFU) + return 0; + + for (t = 0, cc = 0; t < 48; t++) { + d = transform(c, t); + i = coord_fast_cocsep(d); + tinv = inverse_trans(t); + if ((buf32[i] & 0xFFU) == 0xFFU) + cc++; + buf32[i] = (*n << 16U) | (tinv << 8U) | depth; + } + (*n)++; + + return cc; + } + + olddepth = (uint8_t)(oldvalue & 0xFFU); + if (olddepth != depth) + return 0; + + cc = 0; + _foreach_move(m, c, d, + cc += dfs_cocsep(d, depth+1, maxdepth, n, buf32); + ) + + return cc; +} +*/