h48

A prototype for an optimal Rubik's cube solver, work in progress.
git clone https://git.tronto.net/h48
Download | Log | Files | Refs | README | LICENSE

commit d26d9e46d305b3ab3c6ae3b55f1859cf413a1c94
parent 88577f1ad03bde81ff56914fd6b05c3cec3aba19
Author: Sebastiano Tronto <sebastiano@tronto.net>
Date:   Fri, 10 May 2024 12:52:50 +0200

Started implementing new table generation

Diffstat:
MTODO.txt | 17++++++++++++-----
Msrc/cube_avx2.h | 7+++++++
Msrc/cube_portable.h | 7+++++++
Msrc/solve_h48.h | 101++++++++++++++++++++++++++++++++++++++++++++++++++++++-------------------------
4 files changed, 95 insertions(+), 37 deletions(-)

diff --git a/TODO.txt b/TODO.txt @@ -1,9 +1,15 @@ +In progress: go back to nissy-style BFS for eosep data computation + - (done) compute selfsim and cocsep representatives + - implement set_eo_fast for invcoord_h48 (with tests for set_eo_fast) + also: implement other set_stuff methods for fast cube representation + - in gendata_esep, compute representatives for esep inverse coordinate + - change to BFS + TODO pruning tables: - - go back to nissy-style BFS for both cocsep and eoesep - - compute selfsim and keep list of representatives - four different ruotines for k=4,2,1 for eoesep - try: do not compute CO, but use its binary representation (x8 memory for cocsep) + (isn't this already done?) numbers so far (eoesep h=0, k=0): 0 1 @@ -64,12 +70,13 @@ switch. Here NISS may be useful. ## Optimizations +* Moves: don't do full compose for U*, D*, *2 (I removed this because I + was using shuffle intructions wrong, should re-do it) +* transform edges only for h48 coord calculation +* ptable: since it is fully symmetric, do only U or U2 at depth 1 * use threads: how to detect at runtime? what is sane number to default to? pthreads or threads.h? * multisolve with adaptive threading -* transform edges only for h48 coord calculation -* Moves: don't do full compose for U*, D*, *2 (I removed this because I - was using shuffle intructions wrong, should re-do it) * Trans: don't do full compose, for some trans composing perm is enough. Split out sumco() as a separate function and refactor, optimize. * Use multi-move (up to 4/5 moves at once) diff --git a/src/cube_avx2.h b/src/cube_avx2.h @@ -23,6 +23,7 @@ _static_inline int64_t coord_fast_co(cube_fast_t); _static_inline int64_t coord_fast_csep(cube_fast_t); _static_inline int64_t coord_fast_cocsep(cube_fast_t); _static_inline int64_t coord_fast_eo(cube_fast_t); +_static_inline void set_eo_fast(cube_fast_t *, int64_t); _static_inline int64_t coord_fast_esep(cube_fast_t); _static_inline cube_fast_t @@ -198,6 +199,12 @@ coord_fast_eo(cube_fast_t c) return mask >> 17; } +_static_inline void +set_eo_fast(cube_fast_t *c, int64_t eo) +{ + /* TODO */ +} + _static_inline int64_t coord_fast_esep(cube_fast_t c) { diff --git a/src/cube_portable.h b/src/cube_portable.h @@ -17,6 +17,7 @@ _static_inline int64_t coord_fast_co(cube_fast_t); _static_inline int64_t coord_fast_csep(cube_fast_t); _static_inline int64_t coord_fast_cocsep(cube_fast_t); _static_inline int64_t coord_fast_eo(cube_fast_t); +_static_inline void set_eo_fast(cube_fast_t *, int64_t eo); _static_inline int64_t coord_fast_esep(cube_fast_t); _static_inline cube_fast_t @@ -188,6 +189,12 @@ coord_fast_eo(cube_fast_t c) return ret; } +_static_inline void +_set_eo_fast(cube_fast_t *cube, int64_t eo) +{ + /* TODO */ +} + /* We encode the edge separation as a number from 0 to C(12,4)*C(8,4). It can be seen as the composition of two "subset index" coordinates. diff --git a/src/solve_h48.h b/src/solve_h48.h @@ -1,11 +1,21 @@ +#define COCSEP_CLASSES 3393U +#define COCSEP_TABLESIZE (_3p7 << 7U) +#define COCSEP_VISITEDSIZE ((COCSEP_TABLESIZE + 7U) / 8U) +#define COCSEP_INFOSIZE 12U +#define COCSEP_FULLSIZE (4 * (COCSEP_TABLESIZE + COCSEP_INFOSIZE)) + +#define ESEP_TABLESIZE ((COCSEP_CLASSES * _12c4 * _8c4) / 2U) +#define ESEP_VISITEDSIZE ((ESEP_TABLESIZE * 2U + 7U) / 8U) +#define ESEP_INFOSIZE 25 /* TODO unknown yet */ + +#define H48_ESIZE ((_12c4 * _8c4) << h) + #define _esep_ind(i) (i / 8U) #define _esep_shift(i) (4U * (i % 8U)) #define _esep_mask(i) (((1U << 4U) - 1U) << _esep_shift(i)) #define _visited_ind(i) (i / 8U) #define _visited_mask(i) (1U << (i % 8U)) -#define COCSEP_CLASSES 3393U - typedef struct { cube_fast_t cube; uint8_t depth; @@ -28,11 +38,13 @@ typedef struct { uint32_t *buf32; } dfsarg_esep_t; -_static_inline int64_t coord_h48(cube_fast_t, uint32_t *, uint8_t); +_static_inline int64_t coord_h48(cube_fast_t, const uint32_t *, uint8_t); +_static_inline cube_fast_t invcoord_h48(int64_t, const uint32_t *, + const cube_fast_t *, const cube_fast_t *, uint8_t); _static size_t gendata_cocsep(void *, uint64_t *, cube_fast_t *); _static uint32_t gendata_cocsep_dfs(dfsarg_cocsep_t *); -_static size_t gendata_esep(const void *, void *); +_static size_t gendata_esep(void *); _static uint32_t gendata_esep_dfs(dfsarg_esep_t *); _static_inline bool get_visited(const uint8_t *, int64_t); @@ -41,10 +53,10 @@ _static_inline uint8_t get_esep_pval(const uint32_t *, int64_t); _static_inline void set_esep_pval(uint32_t *, int64_t, uint8_t); _static_inline int64_t -coord_h48(cube_fast_t c, uint32_t *cocsepdata, uint8_t h) +coord_h48(cube_fast_t c, const uint32_t *cocsepdata, uint8_t h) { cube_fast_t d; - int64_t cocsep, coclass, esep, eo, esize, ret; + int64_t cocsep, coclass, esep, eo, ret; uint32_t data; uint8_t ttrep; @@ -59,8 +71,33 @@ coord_h48(cube_fast_t c, uint32_t *cocsepdata, uint8_t h) esep = coord_fast_esep(d); eo = coord_fast_eo(d); - esize = (_12c4 * _8c4) << h; - ret = (coclass * esize) + (esep << h) + (eo >> (11-h)); + ret = (coclass * H48_ESIZE) + (esep << h) + (eo >> (11-h)); + + return ret; +} + +_static_inline cube_fast_t +invcoord_h48( + int64_t i, + const uint32_t *cocsepdata, + const cube_fast_t *crep, + const cube_fast_t *erep, + uint8_t h +) +{ + cube_fast_t ret; + int64_t coclass, ee, esep, eo; + + coclass = i / H48_ESIZE; + ee = i % H48_ESIZE; + esep = ee >> h; + eo = (ee & ((1<<h)-1)) << (11-h); + +/* TODO: implement set_stuff methods + ret.c = crep[coclass]; + ret.e = erep[esep]; + set_eo_fast(&ret, eo); +*/ return ret; } @@ -79,19 +116,15 @@ After the data as described above, more auxiliary information is appended: _static size_t gendata_cocsep(void *buf, uint64_t *selfsim, cube_fast_t *rep) { - size_t tablesize = _3p7 << 7U; - size_t visitedsize = (tablesize + 7U) / 8U; - size_t infosize = 12; - uint32_t *buf32, *info, cc; uint16_t n; - uint8_t i, j, visited[visitedsize]; + uint8_t i, j, visited[COCSEP_VISITEDSIZE]; dfsarg_cocsep_t arg; buf32 = (uint32_t *)buf; - info = buf32 + tablesize; - memset(buf32, 0xFFU, sizeof(uint32_t) * tablesize); - memset(info, 0, sizeof(uint32_t) * infosize); + info = buf32 + COCSEP_TABLESIZE; + memset(buf32, 0xFFU, sizeof(uint32_t) * COCSEP_TABLESIZE); + memset(info, 0, sizeof(uint32_t) * COCSEP_INFOSIZE); memset(selfsim, 0, sizeof(uint64_t) * COCSEP_CLASSES); arg = (dfsarg_cocsep_t) { @@ -104,7 +137,7 @@ gendata_cocsep(void *buf, uint64_t *selfsim, cube_fast_t *rep) }; for (i = 0, n = 0, cc = 0; i < 10; i++) { DBG_LOG("cocsep: generating depth %" PRIu8 "\n", i); - memset(visited, 0, visitedsize); + memset(visited, 0, COCSEP_VISITEDSIZE); arg.depth = 0; arg.maxdepth = i; cc = gendata_cocsep_dfs(&arg); @@ -125,7 +158,7 @@ gendata_cocsep(void *buf, uint64_t *selfsim, cube_fast_t *rep) for (j = 0; j < 10; j++) DBG_LOG("%" PRIu8 ":\t%" PRIu32 "\n", j, info[j+2]); - return 4*(tablesize + infosize); + return COCSEP_FULLSIZE; } _static uint32_t @@ -134,6 +167,7 @@ gendata_cocsep_dfs(dfsarg_cocsep_t *arg) uint8_t m, t, tinv, olddepth; uint32_t cc; int64_t i; + uint64_t sim; cube_fast_t d; dfsarg_cocsep_t nextarg; @@ -149,6 +183,8 @@ gendata_cocsep_dfs(dfsarg_cocsep_t *arg) for (t = 0, cc = 0; t < 48; t++) { d = transform(arg->cube, t); + sim = equal_fast(arg->cube, d); + arg->selfsim[*arg->n] |= sim << t; i = coord_fast_cocsep(d); set_visited(arg->visited, i); tinv = inverse_trans(t); @@ -156,6 +192,7 @@ gendata_cocsep_dfs(dfsarg_cocsep_t *arg) arg->buf32[i] = (*arg->n << 16U) | (tinv << 8U) | arg->depth; } + arg->rep[*arg->n] = arg->cube; (*arg->n)++; return cc; @@ -176,38 +213,38 @@ TODO description generating fixed table with h=0, k=4 */ _static size_t -gendata_esep(const void *cocsepdata, void *buf) +gendata_esep(void *buf) { - size_t tablesize = (COCSEP_CLASSES * _12c4 * _8c4) / 2U; - size_t visitedsize = (tablesize * 2U + 7U) / 8U; - size_t infosize = 25; /* TODO unknown yet */ - - uint32_t *buf32, *info, cc; + uint32_t *buf32, *info, *cocsepdata, cc; uint8_t moves[20]; dfsarg_esep_t arg; - arg.visited = malloc(visitedsize); - buf32 = (uint32_t *)buf; - info = buf32 + tablesize; - memset(buf32, 0xFFU, 4*tablesize); - memset(info, 0, 4*infosize); + arg.visited = malloc(ESEP_TABLESIZE); + + cocsepdata = (uint32_t *)buf; + buf32 = cocsepdata + COCSEP_FULLSIZE; + info = buf32 + ESEP_TABLESIZE; + memset(buf32, 0xFFU, sizeof(uint32_t) * ESEP_TABLESIZE); + memset(info, 0, sizeof(uint32_t) * ESEP_INFOSIZE); arg.cube = cubetofast(solvedcube()); arg.moves = moves; arg.nmoves = 0; - arg.cocsepdata = (uint32_t *)cocsepdata; + arg.cocsepdata = cocsepdata; arg.buf32 = buf32; /* TODO loop until no more is done, not until 12! (or hardcode limits)*/ + /* TODO use bfs instead + how to work with different k? k=1 is the easiest */ for (arg.depth = 0, cc = 0; arg.depth < 12; arg.depth++) { DBG_LOG("esep: generating depth %" PRIu8 "\n", arg.depth); - memset(arg.visited, 0, visitedsize); + memset(arg.visited, 0, ESEP_VISITEDSIZE); cc = gendata_esep_dfs(&arg); info[arg.depth+1] = cc; DBG_LOG("found %" PRIu32 "\n", cc); } free(arg.visited); - return cc; + return COCSEP_FULLSIZE + cc; } _static uint32_t