nissy-core

The "engine" of nissy, including the H48 optimal solver.
git clone https://git.tronto.net/nissy-core
Download | Log | Files | Refs | README | LICENSE

commit 21fdf9697ddfab0a9c082a91ae0a79b1b574774c
parent b25989e47adfadabcf0dabfd58d615622887cee0
Author: Sebastiano Tronto <sebastiano@tronto.net>
Date:   Sun, 14 Dec 2025 12:20:37 +0100

Bye bye h0k4

Diffstat:
Msrc/solvers/h48/checkdata.h | 4++++
Msrc/solvers/h48/distribution_h48.h | 6++----
Msrc/solvers/h48/gendata_h48.h | 255+++----------------------------------------------------------------------------
Msrc/solvers/h48/gendata_types_macros.h | 11-----------
Msrc/solvers/h48/solve.h | 61+++++++++++++------------------------------------------------
5 files changed, 27 insertions(+), 310 deletions(-)

diff --git a/src/solvers/h48/checkdata.h b/src/solvers/h48/checkdata.h @@ -171,6 +171,10 @@ struct { }, }; +/* +TODO: this function can be simplified, now we support only one h48 table + +eoesep. The loop can be just two checks. +*/ STATIC long long checkdata_h48( const char *solver, diff --git a/src/solvers/h48/distribution_h48.h b/src/solvers/h48/distribution_h48.h @@ -44,11 +44,9 @@ getdistribution_h48( ) { getdistribution_data_t targ[THREADS]; wrapthread_define_var_thread_t(thread[THREADS]); - uint8_t pval, k; uint64_t local_distr[THREADS][INFO_DISTRIBUTION_LEN]; - uint64_t i, j, lines, lines_per_thread, c, cc; + uint64_t i, j, lines, lines_per_thread, c; - k = info->bits; lines = H48_LINES(info->h48h); lines_per_thread = DIV_ROUND_UP(lines, THREADS); @@ -56,7 +54,7 @@ getdistribution_h48( targ[i] = (getdistribution_data_t) { .min = i * lines_per_thread, .max = MIN((i+1) * lines_per_thread, lines), - .bits = k, + .bits = info->bits, .distr = local_distr[i], .table = table, }; diff --git a/src/solvers/h48/gendata_h48.h b/src/solvers/h48/gendata_h48.h @@ -2,13 +2,9 @@ STATIC long long gendata_h48_dispatch( const char *, unsigned long long, unsigned char *); STATIC uint64_t gendata_h48short(gendata_h48short_arg_t [static 1]); STATIC int64_t gendata_h48(gendata_h48_arg_t [static 1]); -STATIC void gendata_h48h0k4(gendata_h48_arg_t [static 1]); STATIC void gendata_h48k2(gendata_h48_arg_t [static 1]); - -STATIC void *gendata_h48h0k4_runthread(void *); STATIC void *gendata_h48k2_runthread(void *); -STATIC_INLINE void gendata_h48_mark_atomic(gendata_h48_mark_t [static 1]); STATIC_INLINE void gendata_h48_mark(gendata_h48_mark_t [static 1]); STATIC_INLINE bool gendata_h48k2_dfs_stop( cube_t, int8_t, h48k2_dfs_arg_t [static 1]); @@ -24,10 +20,6 @@ STATIC_INLINE uint8_t get_h48_pvalmin( const unsigned char *, uint64_t, uint8_t); STATIC_INLINE void set_h48_pvalmin( unsigned char *, uint64_t, uint8_t, uint8_t); -STATIC_INLINE uint8_t get_h48_pval_atomic( - wrapthread_atomic const unsigned char *, uint64_t, uint8_t); -STATIC_INLINE void set_h48_pval_atomic( - wrapthread_atomic unsigned char *, uint64_t, uint8_t, uint8_t); STATIC long long gendata_h48_dispatch( @@ -87,23 +79,16 @@ gendata_h48short(gendata_h48short_arg_t arg[static 1]) STATIC int64_t gendata_h48(gendata_h48_arg_t arg[static 1]) { - uint64_t size, cocsepsize, h48size, fallbacksize, fallback2size, of; + uint64_t size, cocsepsize, h48size, eoesepsize; long long r; unsigned char *cocsepdata_offset; - tableinfo_t cocsepinfo, h48info, fallbackinfo; - gendata_h48_arg_t arg_h0k4; + tableinfo_t cocsepinfo, h48info; cocsepsize = COCSEP_FULLSIZE; h48size = INFOSIZE + H48_TABLESIZE(arg->h, arg->k); - fallbacksize = arg->k == 2 ? INFOSIZE + H48_TABLESIZE(0, 4) : 0; - fallback2size = EOESEP_FULLSIZE; - - /* Add padding for 8-bit alignment */ - h48size = 8 * DIV_ROUND_UP(h48size, 8); - fallbacksize = 8 * DIV_ROUND_UP(fallbacksize, 8); - fallback2size = 8 * DIV_ROUND_UP(fallback2size, 8); + eoesepsize = EOESEP_FULLSIZE; - size = cocsepsize + h48size + fallbacksize + fallback2size; + size = cocsepsize + h48size + eoesepsize; if (arg->buf == NULL) return size; /* Dry-run */ @@ -121,18 +106,11 @@ gendata_h48(gendata_h48_arg_t arg[static 1]) arg->cocsepdata = (uint32_t *)cocsepdata_offset; arg->h48buf = (wrapthread_atomic unsigned char*)arg->buf + cocsepsize; +/* TODO this can probably be removed now? */ arg->base = 99; +/* */ - if (arg->h == 0 && arg->k == 4) { - gendata_h48h0k4(arg); - } else if (arg->k == 2) { - gendata_h48k2(arg); - } else { - LOG("[H48 gendata] Error: cannot generate data for h = %" PRIu8 - " and k = %" PRIu8 " (not implemented yet)\n", - arg->h, arg->k); - return NISSY_ERROR_INVALID_SOLVER; - } + gendata_h48k2(arg); r = readtableinfo(arg->buf_size, arg->buf, &cocsepinfo); if (r != NISSY_OK) { @@ -149,25 +127,9 @@ gendata_h48(gendata_h48_arg_t arg[static 1]) return NISSY_ERROR_UNKNOWN; } - /* Add h0k4 fallback table */ - - if (arg->k == 2) { - arg_h0k4 = *arg; - arg_h0k4.h = 0; - arg_h0k4.k = 4; - arg_h0k4.base = 0; - arg_h0k4.maxdepth = 20; - arg_h0k4.buf_size = arg->buf_size - h48size; - arg_h0k4.buf = arg->buf + cocsepsize + h48size; - arg_h0k4.h48buf = arg->h48buf + h48size; - - gendata_h48h0k4(&arg_h0k4); - - } - /* Add eoesep fallback table */ - gendata_eoesep(arg->buf + (size - fallback2size), 20); + gendata_eoesep(arg->buf + (size - eoesepsize), 20); /* Update tableinfo with correct next values */ @@ -186,166 +148,10 @@ gendata_h48(gendata_h48_arg_t arg[static 1]) return NISSY_ERROR_UNKNOWN; } - if (arg->k == 2) { - r = readtableinfo_n(arg->buf_size, arg->buf, 3, &fallbackinfo); - if (r != NISSY_OK) { - LOG("[H48 gendata] Error: could not read info for h48 " - "fallback table\n"); - return NISSY_ERROR_UNKNOWN; - } - - of = cocsepsize + h48size; - fallbackinfo.next = fallbacksize; - r = writetableinfo( - &fallbackinfo, arg->buf_size - of, arg->buf + of); - if (r != NISSY_OK) { - LOG("[H48 gendata] Error: could not write info for " - "h48 fallback table\n"); - return NISSY_ERROR_UNKNOWN; - } - } - return size; } STATIC void -gendata_h48h0k4(gendata_h48_arg_t arg[static 1]) -{ - wrapthread_atomic unsigned char *table; - uint8_t val; - uint64_t i, sc, done, d, h48max; - uint64_t t, tt, isize, cc, bufsize; - h48h0k4_bfs_arg_t bfsarg[THREADS]; - wrapthread_define_var_thread_t(thread[THREADS]); - wrapthread_define_var_mutex_t(table_mutex[CHUNKS]); - - arg->info = (tableinfo_t) { - .solver = "h48 solver h = 0, k = 4", - .type = TABLETYPE_PRUNING, - .infosize = INFOSIZE, - .fullsize = H48_TABLESIZE(0, 4) + INFOSIZE, - .hash = 0, - .entries = H48_COORDMAX(0), - .classes = 0, - .h48h = 0, - .bits = 4, - .base = 0, - .maxvalue = 0, - .next = 0, - }; - - table = arg->h48buf + INFOSIZE; - memset(table, 0xFF, H48_TABLESIZE(0, 4)); - - h48max = H48_COORDMAX(0); - sc = coord_h48(SOLVED_CUBE, arg->cocsepdata, 0); - set_h48_pval_atomic(table, sc, 4, 0); - arg->info.distribution[0] = 1; - - isize = h48max / THREADS; - isize = (isize / H48_COEFF(arg->k)) * H48_COEFF(arg->k); - for (t = 0; t < CHUNKS; t++) - wrapthread_mutex_init(&table_mutex[t], NULL); - for (t = 0; t < THREADS; t++) { - bfsarg[t] = (h48h0k4_bfs_arg_t) { - .cocsepdata = arg->cocsepdata, - .table = table, - .selfsim = arg->selfsim, - .crep = arg->crep, - .start = isize * t, - .end = t == THREADS-1 ? h48max : isize * (t+1), - }; - for (tt = 0; tt < CHUNKS; tt++) - bfsarg[t].table_mutex[tt] = &table_mutex[tt]; - } - for (done = 1, d = 1; done < h48max && d <= arg->maxdepth; d++) { - LOG("[H48 gendata] Generating depth %" PRIu64 "\n", d); - - for (t = 0; t < THREADS; t++) { - bfsarg[t].depth = d; - wrapthread_create(&thread[t], NULL, - gendata_h48h0k4_runthread, &bfsarg[t]); - } - - for (t = 0; t < THREADS; t++) - wrapthread_join(thread[t], NULL); - - for (i = 0, cc = 0; i < h48max; i++) { - val = get_h48_pval_atomic(table, i, 4); - cc += val == d; - } - - done += cc; - arg->info.distribution[d] = cc; - - LOG("[H48 gendata] Found %" PRIu64 "\n", cc); - } - - arg->info.maxvalue = d - 1; - bufsize = arg->buf_size - COCSEP_FULLSIZE; - writetableinfo(&arg->info, bufsize, (unsigned char *)arg->h48buf); -} - -STATIC void * -gendata_h48h0k4_runthread(void *arg) -{ - static const uint8_t breakpoint = 10; /* Hand-picked optimal */ - - uint8_t c, m; - uint64_t i; - uint64_t j; - cube_t cube, moved; - gendata_h48_mark_t markarg; - h48h0k4_bfs_arg_t *bfsarg; - - bfsarg = (h48h0k4_bfs_arg_t *)arg; - - markarg = (gendata_h48_mark_t) { - .depth = bfsarg->depth, - .h = 0, - .k = 4, - .base = 0, /* Unused */ - .cocsepdata = bfsarg->cocsepdata, - .selfsim = bfsarg->selfsim, - .table_atomic = bfsarg->table, - .table_mutex = bfsarg->table_mutex, - }; - - /* - * If depth < breakpoint, scan all neighbors of coordinates at depth-1. - * Otherwise, scan all neighbors of unvisited coordinates. - */ - for (i = bfsarg->start; i < bfsarg->end; i++) { - c = get_h48_pval_atomic(bfsarg->table, i, 4); - - if ((bfsarg->depth < breakpoint && c != bfsarg->depth - 1) || - (bfsarg->depth >= breakpoint && c != 0xF)) - continue; - - cube = invcoord_h48(i, bfsarg->crep, 0); - for (m = 0; m < 18; m++) { - moved = move(cube, m); - j = coord_h48(moved, bfsarg->cocsepdata, 0); - c = get_h48_pval_atomic(bfsarg->table, j, 4); - if (bfsarg->depth < breakpoint) { - if (c <= bfsarg->depth) - continue; - markarg.cube = moved; - gendata_h48_mark_atomic(&markarg); - } else { - if (c >= bfsarg->depth) - continue; - markarg.cube = cube; - gendata_h48_mark_atomic(&markarg); - break; /* Enough to find one, skip the rest */ - } - } - } - - return NULL; -} - -STATIC void gendata_h48k2(gendata_h48_arg_t arg[static 1]) { /* @@ -396,10 +202,8 @@ gendata_h48k2(gendata_h48_arg_t arg[static 1]) static const uint64_t capacity = 10000019; static const uint64_t randomizer = 10000079; - uint8_t t; int sleeptime; unsigned char *table; - uint64_t j; wrapthread_atomic uint64_t count; uint64_t i, ii, inext, bufsize, done, nshort, velocity; h48map_t shortcubes; @@ -613,27 +417,6 @@ gendata_h48k2_dfs(h48k2_dfs_arg_t arg[static 1]) } STATIC_INLINE void -gendata_h48_mark_atomic(gendata_h48_mark_t arg[static 1]) -{ - uint8_t oldval, newval; - uint64_t coord; - wrapthread_define_if_threads(uint64_t, mutex); - - FOREACH_H48SIM(arg->cube, arg->cocsepdata, arg->selfsim, - coord = coord_h48(arg->cube, arg->cocsepdata, arg->h); - oldval = get_h48_pval_atomic(arg->table_atomic, coord, arg->k); - newval = (uint8_t)MAX(arg->depth, 0); - if (newval < oldval) { - mutex = H48_INDEX(coord, arg->k) % CHUNKS; - wrapthread_mutex_lock(arg->table_mutex[mutex]); - set_h48_pval_atomic( - arg->table_atomic, coord, arg->k, newval); - wrapthread_mutex_unlock(arg->table_mutex[mutex]); - } - ) -} - -STATIC_INLINE void gendata_h48_mark(gendata_h48_mark_t arg[static 1]) { uint8_t oldval, newval, v; @@ -736,16 +519,6 @@ get_h48_pvalmin(const unsigned char *table, uint64_t i, uint8_t k) get_h48_pval(table, i+UINT64_C(1), k); } -STATIC_INLINE uint8_t -get_h48_pval_atomic( - wrapthread_atomic const unsigned char *table, - uint64_t i, - uint8_t k -) -{ - return (table[H48_INDEX(i, k)] & H48_MASK(i, k)) >> H48_SHIFT(i, k); -} - STATIC_INLINE void set_h48_pval(unsigned char *table, uint64_t i, uint8_t k, uint8_t val) { @@ -762,15 +535,3 @@ set_h48_pvalmin(unsigned char *table, uint64_t i, uint8_t k, uint8_t val) set_h48_pval(table, i, k, v >> UINT8_C(2)); set_h48_pval(table, i+UINT64_C(1), k, v % UINT8_C(4)); } - -STATIC_INLINE void -set_h48_pval_atomic( - wrapthread_atomic unsigned char *table, - uint64_t i, - uint8_t k, - uint8_t val -) -{ - table[H48_INDEX(i, k)] = (table[H48_INDEX(i, k)] & (~H48_MASK(i, k))) - | (val << H48_SHIFT(i, k)); -} diff --git a/src/solvers/h48/gendata_types_macros.h b/src/solvers/h48/gendata_types_macros.h @@ -97,17 +97,6 @@ typedef struct { } gendata_h48short_arg_t; typedef struct { - uint8_t depth; - uint32_t *cocsepdata; - wrapthread_atomic unsigned char *table; - uint64_t *selfsim; - cube_t *crep; - uint64_t start; - uint64_t end; - wrapthread_define_struct_mutex_t(*table_mutex[CHUNKS]); -} h48h0k4_bfs_arg_t; - -typedef struct { cube_t cube; uint8_t h; uint8_t k; diff --git a/src/solvers/h48/solve.h b/src/solvers/h48/solve.h @@ -34,7 +34,6 @@ typedef struct { uint8_t base; const uint32_t *cocsepdata; const unsigned char *h48data; - const unsigned char *h48data_fallback_h0k4; const unsigned char *h48data_fallback_eoesep; uint64_t movemask_normal; uint64_t movemask_inverse; @@ -112,7 +111,7 @@ solve_h48_stop(dfsarg_solve_h48_t arg[static 1]) uint32_t data, data_inv; int64_t coord, coordext, coordmin; int8_t target, nh, n; - uint8_t pval_cocsep, pval_eoesep; + uint8_t pval_min, pval_eoesep; n = arg->solution_moves->nmoves + arg->solution_moves->npremoves; target = arg->target_depth - n; @@ -149,22 +148,12 @@ solve_h48_stop(dfsarg_solve_h48_t arg[static 1]) if (arg->k == 2 && arg->lb_inverse == 0) { arg->table_fallbacks++; - /* - pval_cocsep = get_h48_pval( - arg->h48data_fallback_h0k4, coord >> arg->h, 4); - */ -#if 1 coordmin = H48_LINE_MIN(coord); - pval_cocsep = get_h48_pvalmin( + pval_min = get_h48_pvalmin( arg->h48data, coordmin, arg->k); pval_eoesep = get_eoesep_pval_cube( arg->h48data_fallback_eoesep, arg->inverse); - arg->lb_inverse = MAX(pval_cocsep, pval_eoesep); -#else - coordmin = H48_LINE_MIN(coord); - arg->lb_inverse = get_h48_pvalmin( - arg->h48data, coordmin, arg->k); -#endif + arg->lb_inverse = MAX(pval_min, pval_eoesep); } else { arg->lb_inverse += arg->base; } @@ -189,22 +178,12 @@ solve_h48_stop(dfsarg_solve_h48_t arg[static 1]) if (arg->k == 2 && arg->lb_normal == 0) { arg->table_fallbacks++; - /* - pval_cocsep = get_h48_pval( - arg->h48data_fallback_h0k4, coord >> arg->h, 4); - */ -#if 1 coordmin = H48_LINE_MIN(coord); - pval_cocsep = get_h48_pval( + pval_min = get_h48_pval( arg->h48data, coordmin, arg->k); pval_eoesep = get_eoesep_pval_cube( arg->h48data_fallback_eoesep, arg->cube); - arg->lb_normal = MAX(pval_cocsep, pval_eoesep); -#else - coordmin = H48_LINE_MIN(coord); - arg->lb_normal = get_h48_pval( - arg->h48data, coordmin, arg->k); -#endif + arg->lb_normal = MAX(pval_min, pval_eoesep); } else { arg->lb_normal += arg->base; } @@ -482,7 +461,7 @@ solve_h48( void *poll_status_data ) { - int i, ntasks, eoesep_table_index; + int i, ntasks; bool td; wrapthread_atomic int status, prev_status; size_t lastused; @@ -493,10 +472,10 @@ solve_h48( long double fallback_rate, lookups_per_node; uint64_t offset; uint64_t nodes_visited, table_lookups, table_fallbacks; - tableinfo_t info, fbinfo, fbinfo2; + tableinfo_t info, fbinfo2; const uint32_t *cocsepdata; - const unsigned char *fallback, *h48data; - const unsigned char *fallback2; + const unsigned char *h48data; + const unsigned char *eoesep; solution_moves_t solution_moves[THREADS]; solution_settings_t settings; solution_list_t sollist; @@ -512,29 +491,16 @@ solve_h48( cocsepdata = (uint32_t *)(data + INFOSIZE); h48data = data + COCSEP_FULLSIZE + INFOSIZE; - /* Read fallback table(s) */ - fallback = NULL; - if (readtableinfo_n(data_size, data, 3, &fbinfo) != NISSY_OK) - goto solve_h48_error_data; + /* Read additional eoesep table */ offset = info.next; - eoesep_table_index = 3; - if (info.bits == 2) { - /* We only support h0k4 as fallback table */ - if (fbinfo.h48h != 0 || fbinfo.bits != 4) - goto solve_h48_error_data; - fallback = h48data + offset; - offset += fbinfo.next; - eoesep_table_index++; - } - - if (readtableinfo_n(data_size, data, eoesep_table_index, &fbinfo2) + if (readtableinfo_n(data_size, data, 3, &fbinfo2) != NISSY_OK) goto solve_h48_error_data; /* Some heuristic check to see that it is eoesep */ if (fbinfo2.bits != 4 || fbinfo2.type != TABLETYPE_SPECIAL) goto solve_h48_error_data; - fallback2 = h48data + offset; + eoesep = h48data + offset; settings = (solution_settings_t) { .unniss = true, @@ -553,8 +519,7 @@ solve_h48( .base = info.base, .cocsepdata = cocsepdata, .h48data = h48data, - .h48data_fallback_h0k4 = fallback, - .h48data_fallback_eoesep = fallback2, + .h48data_fallback_eoesep = eoesep, .solution_moves = &solution_moves[i], .solution_settings = &settings, .solution_list = &sollist,