h48

A prototype for an optimal Rubik's cube solver, work in progress.
git clone https://git.tronto.net/h48
Download | Log | Files | Refs | README | LICENSE

commit 561b7521370fc0f3c97b86aa71005baa9fea08ce
parent 8f6ac739c35c1d94adc504eb74a41b64d94a953f
Author: Sebastiano Tronto <sebastiano@tronto.net>
Date:   Sun,  6 Oct 2024 08:49:30 +0200

Fix concurrency issues

So I did not have any misconception about Atomics, I just could
not apply it where I did because the logic doesn't allow for it.

Diffstat:
Msrc/nissy.c | 2+-
Msrc/solvers/h48/gendata_h48.h | 91+++++++++++++++++++++++++++++++++++++++++++------------------------------------
Msrc/solvers/h48/gendata_types_macros.h | 24++++++++++++++++++++++--
3 files changed, 73 insertions(+), 44 deletions(-)

diff --git a/src/nissy.c b/src/nissy.c @@ -75,7 +75,7 @@ checkdata(const void *buf, const tableinfo_t *info) getdistribution_cocsep( (uint32_t *)((char *)buf + INFOSIZE), distr); } else if (!strncmp(info->solver, "h48", 3)) { - getdistribution_h48((_Atomic uint8_t *)buf + INFOSIZE, distr, + getdistribution_h48((uint8_t *)buf + INFOSIZE, distr, info->h48h, info->bits); } else { LOG("checkdata: unknown solver %s\n", info->solver); diff --git a/src/solvers/h48/gendata_h48.h b/src/solvers/h48/gendata_h48.h @@ -1,43 +1,27 @@ -/* -TODO: This loop over similar h48 coordinates can be improved by only -transforming edges, but we need to compose transformations (i.e. conjugate -_t by _ttrep). -*/ -#define FOREACH_H48SIM(ARG_CUBE, ARG_COCSEPDATA, ARG_SELFSIM, ARG_ACTION) \ - int64_t VAR_COCSEP = coord_cocsep(ARG_CUBE); \ - uint8_t VAR_TTREP = TTREP(ARG_COCSEPDATA[VAR_COCSEP]); \ - uint8_t VAR_INVERSE_TTREP = inverse_trans(VAR_TTREP); \ - int64_t VAR_COCLASS = COCLASS(ARG_COCSEPDATA[VAR_COCSEP]); \ - cube_t VAR_REP = transform(ARG_CUBE, VAR_TTREP); \ - uint64_t VAR_S = ARG_SELFSIM[VAR_COCLASS]; \ - for (uint8_t VAR_T = 0; VAR_T < 48 && VAR_S; VAR_T++, VAR_S >>= 1) { \ - if (!(VAR_S & 1)) continue; \ - ARG_CUBE = transform(VAR_REP, VAR_T); \ - ARG_CUBE = transform(ARG_CUBE, VAR_INVERSE_TTREP); \ - ARG_ACTION \ - } - STATIC uint64_t gendata_h48short(gendata_h48short_arg_t *); STATIC size_t gendata_h48(gendata_h48_arg_t *); STATIC size_t gendata_h48h0k4(gendata_h48_arg_t *); STATIC size_t gendata_h48k2(gendata_h48_arg_t *); STATIC void * gendata_h48h0k4_runthread(void *); +STATIC_INLINE void gendata_h48_mark_atomic(gendata_h48_mark_t *); STATIC_INLINE void gendata_h48_mark(gendata_h48_mark_t *); STATIC_INLINE bool gendata_h48k2_dfs_stop(cube_t, int8_t, h48k2_dfs_arg_t *); STATIC size_t gendata_h48k2_realcoord(gendata_h48_arg_t *); STATIC void gendata_h48k2_dfs(h48k2_dfs_arg_t *arg); STATIC void * gendata_h48k2_runthread(void *); STATIC tableinfo_t makeinfo_h48k2(gendata_h48_arg_t *); -STATIC void getdistribution_h48(_Atomic const uint8_t *, +STATIC void getdistribution_h48(const uint8_t *, uint64_t [static INFO_DISTRIBUTION_LEN], uint8_t, uint8_t); STATIC const uint32_t *get_cocsepdata_constptr(const void *); STATIC const uint8_t *get_h48data_constptr(const void *); +STATIC_INLINE uint8_t get_h48_pval(const uint8_t *, int64_t, uint8_t); +STATIC_INLINE void set_h48_pval(uint8_t *, int64_t, uint8_t, uint8_t); STATIC_INLINE uint8_t get_h48_pval_atomic( _Atomic const uint8_t *, int64_t, uint8_t); -STATIC_INLINE uint8_t get_h48_pval(const uint8_t *, int64_t, uint8_t); -STATIC_INLINE void set_h48_pval(_Atomic uint8_t *, int64_t, uint8_t, uint8_t); +STATIC_INLINE void set_h48_pval_atomic( + _Atomic uint8_t *, int64_t, uint8_t, uint8_t); STATIC_INLINE uint8_t get_h48_bound( cube_t, uint32_t, uint8_t, uint8_t, const uint8_t *); @@ -166,7 +150,7 @@ gendata_h48h0k4(gendata_h48_arg_t *arg) h48max = (int64_t)H48_COORDMAX(0); sc = coord_h48(SOLVED_CUBE, arg->cocsepdata, 0); - set_h48_pval(table, sc, 4, 0); + set_h48_pval_atomic(table, sc, 4, 0); arg->info.distribution[0] = 1; isize = h48max / THREADS; @@ -235,7 +219,7 @@ gendata_h48h0k4_runthread(void *arg) .k = 4, .cocsepdata = bfsarg->cocsepdata, .selfsim = bfsarg->selfsim, - .table = bfsarg->table, + .table_atomic = bfsarg->table, .table_mutex = bfsarg->table_mutex, }; @@ -259,12 +243,12 @@ gendata_h48h0k4_runthread(void *arg) if (c <= bfsarg->depth) continue; markarg.cube = moved; - gendata_h48_mark(&markarg); + gendata_h48_mark_atomic(&markarg); } else { if (c >= bfsarg->depth) continue; markarg.cube = cube; - gendata_h48_mark(&markarg); + gendata_h48_mark_atomic(&markarg); break; /* Enough to find one, skip the rest */ } } @@ -326,7 +310,7 @@ gendata_h48k2(gendata_h48_arg_t *arg) }; uint8_t t; - _Atomic uint8_t *table; + uint8_t *table; int64_t j; uint64_t i, ii, inext, count; h48map_t shortcubes; @@ -338,7 +322,7 @@ gendata_h48k2(gendata_h48_arg_t *arg) if (arg->buf == NULL) goto gendata_h48k2_return_size; - table = (_Atomic uint8_t *)arg->h48buf + INFOSIZE; + table = (uint8_t *)arg->h48buf + INFOSIZE; if (arg->buf != NULL) memset(table, 0xFF, H48_TABLESIZE(arg->h, arg->k)); @@ -389,7 +373,7 @@ gendata_h48k2(gendata_h48_arg_t *arg) h48map_destroy(&shortcubes); for (j = 0; j < H48_COORDMAX(arg->h); j++) { - t = get_h48_pval_atomic(table, j, 2); + t = get_h48_pval(table, j, 2); arg->info.distribution[t]++; } @@ -517,24 +501,42 @@ gendata_h48k2_dfs(h48k2_dfs_arg_t *arg) } STATIC_INLINE void -gendata_h48_mark(gendata_h48_mark_t *arg) +gendata_h48_mark_atomic(gendata_h48_mark_t *arg) { uint8_t oldval, newval; int64_t coord, mutex; FOREACH_H48SIM(arg->cube, arg->cocsepdata, arg->selfsim, coord = coord_h48(arg->cube, arg->cocsepdata, arg->h); - oldval = get_h48_pval_atomic(arg->table, coord, arg->k); + oldval = get_h48_pval_atomic(arg->table_atomic, coord, arg->k); newval = (uint8_t)MAX(arg->depth, 0); if (newval < oldval) { mutex = H48_INDEX(coord, arg->k) % CHUNKS; pthread_mutex_lock(arg->table_mutex[mutex]); - set_h48_pval(arg->table, coord, arg->k, newval); + set_h48_pval_atomic( + arg->table_atomic, coord, arg->k, newval); pthread_mutex_unlock(arg->table_mutex[mutex]); } ) } +STATIC_INLINE void +gendata_h48_mark(gendata_h48_mark_t *arg) +{ + uint8_t oldval, newval; + int64_t coord, mutex; + + FOREACH_H48SIM(arg->cube, arg->cocsepdata, arg->selfsim, + coord = coord_h48(arg->cube, arg->cocsepdata, arg->h); + mutex = H48_INDEX(coord, arg->k) % CHUNKS; + pthread_mutex_lock(arg->table_mutex[mutex]); + oldval = get_h48_pval(arg->table, coord, arg->k); + newval = (uint8_t)MAX(arg->depth, 0); + set_h48_pval(arg->table, coord, arg->k, MIN(newval, oldval)); + pthread_mutex_unlock(arg->table_mutex[mutex]); + ) +} + STATIC_INLINE bool gendata_h48k2_dfs_stop(cube_t cube, int8_t depth, h48k2_dfs_arg_t *arg) { @@ -548,7 +550,7 @@ gendata_h48k2_dfs_stop(cube_t cube, int8_t depth, h48k2_dfs_arg_t *arg) coord = coord_h48(cube, arg->cocsepdata, arg->h); mutex = H48_INDEX(coord, arg->k) % CHUNKS; pthread_mutex_lock(arg->table_mutex[mutex]); - oldval = get_h48_pval_atomic(arg->table, coord, arg->k); + oldval = get_h48_pval(arg->table, coord, arg->k); pthread_mutex_unlock(arg->table_mutex[mutex]); return oldval <= depth; } else { @@ -603,7 +605,7 @@ makeinfo_h48k2(gendata_h48_arg_t *arg) STATIC void getdistribution_h48( - _Atomic const uint8_t *table, + const uint8_t *table, uint64_t distr[static INFO_DISTRIBUTION_LEN], uint8_t h, uint8_t k @@ -615,7 +617,7 @@ getdistribution_h48( h48max = H48_COORDMAX(h); for (i = 0; i < h48max; i++) { - val = get_h48_pval_atomic(table, i, k); + val = get_h48_pval(table, i, k); distr[val]++; } } @@ -633,19 +635,26 @@ get_h48data_constptr(const void *data) } STATIC_INLINE uint8_t -get_h48_pval_atomic(_Atomic const uint8_t *table, int64_t i, uint8_t k) +get_h48_pval(const uint8_t *table, int64_t i, uint8_t k) { return (table[H48_INDEX(i, k)] & H48_MASK(i, k)) >> H48_SHIFT(i, k); } STATIC_INLINE uint8_t -get_h48_pval(const uint8_t *table, int64_t i, uint8_t k) +get_h48_pval_atomic(_Atomic const uint8_t *table, int64_t i, uint8_t k) { return (table[H48_INDEX(i, k)] & H48_MASK(i, k)) >> H48_SHIFT(i, k); } STATIC_INLINE void -set_h48_pval(_Atomic uint8_t *table, int64_t i, uint8_t k, uint8_t val) +set_h48_pval(uint8_t *table, int64_t i, uint8_t k, uint8_t val) +{ + table[H48_INDEX(i, k)] = (table[H48_INDEX(i, k)] & (~H48_MASK(i, k))) + | (val << H48_SHIFT(i, k)); +} + +STATIC_INLINE void +set_h48_pval_atomic(_Atomic uint8_t *table, int64_t i, uint8_t k, uint8_t val) { table[H48_INDEX(i, k)] = (table[H48_INDEX(i, k)] & (~H48_MASK(i, k))) | (val << H48_SHIFT(i, k)); @@ -671,7 +680,7 @@ gendata_h48_derive(uint8_t h, const void *fulltable, void *buf) size_t cocsepsize, h48size; uint8_t val_full, val_derive; const uint8_t *h48full; - _Atomic uint8_t *h48derive; + uint8_t *h48derive; int64_t i, j, h48max; gendata_h48_arg_t arg; tableinfo_t cocsepinfo, fulltableinfo; @@ -712,7 +721,7 @@ gendata_h48_derive(uint8_t h, const void *fulltable, void *buf) } h48full = (const uint8_t *)fulltable + cocsepsize + INFOSIZE; - h48derive = (_Atomic uint8_t *)arg.h48buf + INFOSIZE; + h48derive = (uint8_t *)arg.h48buf + INFOSIZE; memset(h48derive, 0xFF, H48_TABLESIZE(h, arg.k)); memset(arg.info.distribution, 0, INFO_DISTRIBUTION_LEN * sizeof(uint64_t)); @@ -723,7 +732,7 @@ gendata_h48_derive(uint8_t h, const void *fulltable, void *buf) LOG("Processing %" PRId64 "th coordinate\n", i); j = i >> (int64_t)(fulltableinfo.h48h - h); val_full = get_h48_pval(h48full, i, arg.k); - val_derive = get_h48_pval_atomic(h48derive, j, arg.k); + val_derive = get_h48_pval(h48derive, j, arg.k); set_h48_pval( h48derive, j, arg.k, MIN(val_full, val_derive)); } diff --git a/src/solvers/h48/gendata_types_macros.h b/src/solvers/h48/gendata_types_macros.h @@ -22,6 +22,25 @@ #define MAXLEN 20 #define CHUNKS COCSEP_CLASSES +/* +TODO: This loop over similar h48 coordinates can be improved by only +transforming edges, but we need to compose transformations (i.e. conjugate +_t by _ttrep). +*/ +#define FOREACH_H48SIM(ARG_CUBE, ARG_COCSEPDATA, ARG_SELFSIM, ARG_ACTION) \ + int64_t VAR_COCSEP = coord_cocsep(ARG_CUBE); \ + uint8_t VAR_TTREP = TTREP(ARG_COCSEPDATA[VAR_COCSEP]); \ + uint8_t VAR_INVERSE_TTREP = inverse_trans(VAR_TTREP); \ + int64_t VAR_COCLASS = COCLASS(ARG_COCSEPDATA[VAR_COCSEP]); \ + cube_t VAR_REP = transform(ARG_CUBE, VAR_TTREP); \ + uint64_t VAR_S = ARG_SELFSIM[VAR_COCLASS]; \ + for (uint8_t VAR_T = 0; VAR_T < 48 && VAR_S; VAR_T++, VAR_S >>= 1) { \ + if (!(VAR_S & 1)) continue; \ + ARG_CUBE = transform(VAR_REP, VAR_T); \ + ARG_CUBE = transform(ARG_CUBE, VAR_INVERSE_TTREP); \ + ARG_ACTION \ + } + typedef struct { cube_t cube; uint8_t depth; @@ -72,7 +91,7 @@ typedef struct { uint8_t base; uint8_t shortdepth; uint32_t *cocsepdata; - _Atomic uint8_t *table; + uint8_t *table; uint64_t *selfsim; cube_t *crep; h48map_t *shortcubes; @@ -89,6 +108,7 @@ typedef struct { uint8_t k; uint32_t *cocsepdata; uint64_t *selfsim; - _Atomic uint8_t *table; + uint8_t *table; + _Atomic uint8_t *table_atomic; pthread_mutex_t **table_mutex; } gendata_h48_mark_t;