nissy-core

The "engine" of nissy, including the H48 optimal solver.
git clone https://git.tronto.net/nissy-core
Download | Log | Files | Refs | README | LICENSE

commit b25989e47adfadabcf0dabfd58d615622887cee0
parent 6c42463b800bbab4583e90aeb748a79037c65a9e
Author: Sebastiano Tronto <sebastiano@tronto.net>
Date:   Sat, 13 Dec 2025 19:16:32 +0100

Intertwined table seems to work

Diffstat:
Msrc/solvers/dispatch.h | 2+-
Msrc/solvers/distribution.h | 6------
Msrc/solvers/h48/checkdata.h | 2+-
Asrc/solvers/h48/distribution_h48.h | 78++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Msrc/solvers/h48/gendata_h48.h | 71+++++++++++++++++++++++++++++++++++++++++++++++++++--------------------
Msrc/solvers/h48/gendata_types_macros.h | 19+++++++++++++++++--
Msrc/solvers/h48/h48.h | 1+
Msrc/solvers/h48/solve.h | 30+++++++++++++++++++++++++++---
Msrc/solvers/h48/utils.h | 3++-
9 files changed, 178 insertions(+), 34 deletions(-)

diff --git a/src/solvers/dispatch.h b/src/solvers/dispatch.h @@ -43,7 +43,7 @@ solver_dispatch_t solver_dispatchers[] = { }; const char *solver_aliases[][2] = { - { "optimal", "h48h7k2" }, + { "optimal", "h48h7i" }, { "eofb", "coord_EO_UF" }, { "eorl", "coord_EO_UR" }, { "eoud", "coord_EO_FD" }, diff --git a/src/solvers/distribution.h b/src/solvers/distribution.h @@ -95,12 +95,6 @@ distribution_equal( ": expected %" PRIu64 ", found %" PRIu64 "\n", i, expected[i], actual[i]); } - /* - else { - LOG("[checkdata] Value for depth %" PRIu8 - " is correct (%" PRIu64 ")\n", i, actual[i]); - } - */ } return wrong == 0; diff --git a/src/solvers/h48/checkdata.h b/src/solvers/h48/checkdata.h @@ -219,7 +219,7 @@ checkdata_h48( LOG("[checkdata] Checking distribution for '%s' from " "actual table\n", info.solver); - getdistribution(table, actual_distribution, &info); + getdistribution_h48(table, actual_distribution, &info); if (!distribution_equal(ed, actual_distribution, em)) { LOG("[checkdata] Distribution from the actual table " "does not match the expected one\n"); diff --git a/src/solvers/h48/distribution_h48.h b/src/solvers/h48/distribution_h48.h @@ -0,0 +1,78 @@ +/* +This file is very similar to ../distibution.h, but some adaptations are +needed for H48 because of the intertwined fallback table, and it is easier +to have some duplication than to make these functions needlessly generic. +*/ + +STATIC void *getdistribution_h48_runthread(void *); +STATIC void getdistribution_h48(const unsigned char *, + uint64_t [static INFO_DISTRIBUTION_LEN], const tableinfo_t [static 1]); + +STATIC void * +getdistribution_h48_runthread(void *arg) +{ + getdistribution_data_t *data = (getdistribution_data_t *)arg; + const unsigned char *table; + uint8_t j, k, m; + uint64_t line, d; + unsigned char t; + + memset(data->distr, 0, INFO_DISTRIBUTION_LEN * sizeof(uint64_t)); + + k = data->bits; + table = data->table; + m = TABLE_MASK(0, k); + for (line = data->min; line < data->max; line++) { + for (d = 0; d < H48_LINE_BYTES; d++) { + t = table[d + line * H48_LINE_BYTES]; + for (j = 0; j < ENTRIES_PER_BYTE(k); j++) + data->distr[(t & (m << (j*k))) >> (j*k)]++; + } + t = table[(line+1) * H48_LINE_BYTES - 1]; + data->distr[(t & (m << (2*k))) >> (2*k)]--; + data->distr[(t & (m << (3*k))) >> (3*k)]--; + } + + return NULL; +} + +STATIC void +getdistribution_h48( + const unsigned char *table, + uint64_t distr[static INFO_DISTRIBUTION_LEN], + const tableinfo_t info[static 1] +) { + getdistribution_data_t targ[THREADS]; + wrapthread_define_var_thread_t(thread[THREADS]); + uint8_t pval, k; + uint64_t local_distr[THREADS][INFO_DISTRIBUTION_LEN]; + uint64_t i, j, lines, lines_per_thread, c, cc; + + k = info->bits; + lines = H48_LINES(info->h48h); + lines_per_thread = DIV_ROUND_UP(lines, THREADS); + + for (i = 0; i < THREADS; i++) { + targ[i] = (getdistribution_data_t) { + .min = i * lines_per_thread, + .max = MIN((i+1) * lines_per_thread, lines), + .bits = k, + .distr = local_distr[i], + .table = table, + }; + wrapthread_create(&thread[i], NULL, + getdistribution_h48_runthread, &targ[i]); + } + + for (i = 0; i < THREADS; i++) + wrapthread_join(thread[i], NULL); + + memset(distr, 0, INFO_DISTRIBUTION_LEN * sizeof(uint64_t)); + for (i = 0; i < THREADS; i++) + for (j = 0; j < INFO_DISTRIBUTION_LEN; j++) + distr[j] += local_distr[i][j]; + + /* Clean up excess values */ + c = H48_LINE_EXT(H48_COORDMAX(info->h48h)) % H48_LINE_ALLCOORDS; + distr[3] -= H48_LINE_COORDS - c; +} diff --git a/src/solvers/h48/gendata_h48.h b/src/solvers/h48/gendata_h48.h @@ -20,6 +20,10 @@ STATIC const unsigned char *get_h48data_constptr(const unsigned char *); STATIC_INLINE uint8_t get_h48_pval(const unsigned char *, uint64_t, uint8_t); STATIC_INLINE void set_h48_pval(unsigned char *, uint64_t, uint8_t, uint8_t); +STATIC_INLINE uint8_t get_h48_pvalmin( + const unsigned char *, uint64_t, uint8_t); +STATIC_INLINE void set_h48_pvalmin( + unsigned char *, uint64_t, uint8_t, uint8_t); STATIC_INLINE uint8_t get_h48_pval_atomic( wrapthread_atomic const unsigned char *, uint64_t, uint8_t); STATIC_INLINE void set_h48_pval_atomic( @@ -300,6 +304,7 @@ gendata_h48h0k4_runthread(void *arg) .depth = bfsarg->depth, .h = 0, .k = 4, + .base = 0, /* Unused */ .cocsepdata = bfsarg->cocsepdata, .selfsim = bfsarg->selfsim, .table_atomic = bfsarg->table, @@ -343,10 +348,6 @@ gendata_h48h0k4_runthread(void *arg) STATIC void gendata_h48k2(gendata_h48_arg_t arg[static 1]) { - static const uint8_t shortdepth = 8; - static const uint64_t capacity = 10000019; - static const uint64_t randomizer = 10000079; - /* * A good base value for the k=2 tables have few positions with value * 0, because those are treated as lower bound 0 and require a second @@ -391,6 +392,10 @@ gendata_h48k2(gendata_h48_arg_t arg[static 1]) [11] = 10 }; + static const uint8_t shortdepth = 8; + static const uint64_t capacity = 10000019; + static const uint64_t randomizer = 10000079; + uint8_t t; int sleeptime; unsigned char *table; @@ -481,10 +486,7 @@ gendata_h48k2(gendata_h48_arg_t arg[static 1]) h48map_destroy(&shortcubes); - for (j = 0; j < H48_COORDMAX(arg->h); j++) { - t = get_h48_pval(table, j, 2); - arg->info.distribution[t]++; - } + getdistribution_h48(table, arg->info.distribution, &arg->info); bufsize = arg->buf_size - COCSEP_FULLSIZE; writetableinfo(&arg->info, bufsize, (unsigned char *)arg->h48buf); @@ -493,7 +495,7 @@ gendata_h48k2(gendata_h48_arg_t arg[static 1]) STATIC void * gendata_h48k2_runthread(void *arg) { - uint64_t coord; + uint64_t coord, coordext, coordmin; kvpair_t kv; h48k2_dfs_arg_t *dfsarg; wrapthread_define_if_threads(uint64_t, mutex); @@ -513,9 +515,13 @@ gendata_h48k2_runthread(void *arg) if (kv.val < dfsarg->shortdepth) { coord = kv.key >> (uint64_t)(11 - dfsarg->h); - mutex = H48_INDEX(coord, dfsarg->k) % CHUNKS; + coordext = H48_LINE_EXT(coord); + coordmin = H48_LINE_MIN(coord); + + mutex = H48_LINE(coord) % CHUNKS; wrapthread_mutex_lock(dfsarg->table_mutex[mutex]); - set_h48_pval(dfsarg->table, coord, dfsarg->k, 0); + set_h48_pval(dfsarg->table, coordext, dfsarg->k, 0); + set_h48_pvalmin(dfsarg->table, coordmin, dfsarg->k, kv.val); wrapthread_mutex_unlock(dfsarg->table_mutex[mutex]); } else { dfsarg->cube = invcoord_h48(kv.key, dfsarg->crep, 11); @@ -537,6 +543,7 @@ gendata_h48k2_dfs(h48k2_dfs_arg_t arg[static 1]) markarg = (gendata_h48_mark_t) { .h = arg->h, .k = arg->k, + .base = arg->base, .cocsepdata = arg->cocsepdata, .selfsim = arg->selfsim, .table = arg->table, @@ -629,17 +636,23 @@ gendata_h48_mark_atomic(gendata_h48_mark_t arg[static 1]) STATIC_INLINE void gendata_h48_mark(gendata_h48_mark_t arg[static 1]) { - uint8_t oldval, newval; - uint64_t coord; + uint8_t oldval, newval, v; + uint64_t coord, coordext, coordmin; wrapthread_define_if_threads(uint64_t, mutex); FOREACH_H48SIM(arg->cube, arg->cocsepdata, arg->selfsim, coord = coord_h48(arg->cube, arg->cocsepdata, arg->h); - mutex = H48_INDEX(coord, arg->k) % CHUNKS; + coordext = H48_LINE_EXT(coord); + coordmin = H48_LINE_MIN(coord); + + mutex = H48_LINE(coord) % CHUNKS; wrapthread_mutex_lock(arg->table_mutex[mutex]); - oldval = get_h48_pval(arg->table, coord, arg->k); + oldval = get_h48_pval(arg->table, coordext, arg->k); newval = (uint8_t)MAX(arg->depth, 0); - set_h48_pval(arg->table, coord, arg->k, MIN(newval, oldval)); + v = MIN(newval, oldval); + set_h48_pval(arg->table, coordext, arg->k, v); + v = arg->depth + arg->base; + set_h48_pvalmin(arg->table, coordmin, arg->k, v); wrapthread_mutex_unlock(arg->table_mutex[mutex]); ) } @@ -648,7 +661,7 @@ STATIC_INLINE bool gendata_h48k2_dfs_stop(cube_t cube, int8_t d, h48k2_dfs_arg_t arg[static 1]) { uint64_t val; - uint64_t coord; + uint64_t coord, coordext; wrapthread_define_if_threads(uint64_t, mutex); int8_t oldval; @@ -656,9 +669,10 @@ gendata_h48k2_dfs_stop(cube_t cube, int8_t d, h48k2_dfs_arg_t arg[static 1]) /* We are in the "real coordinate" case, we can stop if this coordinate has already been visited */ coord = coord_h48(cube, arg->cocsepdata, arg->h); - mutex = H48_INDEX(coord, arg->k) % CHUNKS; + coordext = H48_LINE_EXT(coord); + mutex = H48_LINE(coord) % CHUNKS; wrapthread_mutex_lock(arg->table_mutex[mutex]); - oldval = get_h48_pval(arg->table, coord, arg->k); + oldval = get_h48_pval(arg->table, coordext, arg->k); wrapthread_mutex_unlock(arg->table_mutex[mutex]); return oldval <= d; } else { @@ -682,7 +696,7 @@ makeinfo_h48k2(gendata_h48_arg_t arg[static 1]) .infosize = INFOSIZE, .fullsize = H48_TABLESIZE(arg->h, 2) + INFOSIZE, .hash = 0, - .entries = H48_COORDMAX(arg->h), + .entries = H48_COORDMAX(arg->h) + 2 * H48_LINES(arg->h), .classes = 0, .h48h = arg->h, .bits = 2, @@ -716,6 +730,13 @@ get_h48_pval(const unsigned char *table, uint64_t i, uint8_t k) } STATIC_INLINE uint8_t +get_h48_pvalmin(const unsigned char *table, uint64_t i, uint8_t k) +{ + return (get_h48_pval(table, i, k) << UINT8_C(2)) + + get_h48_pval(table, i+UINT64_C(1), k); +} + +STATIC_INLINE uint8_t get_h48_pval_atomic( wrapthread_atomic const unsigned char *table, uint64_t i, @@ -733,6 +754,16 @@ set_h48_pval(unsigned char *table, uint64_t i, uint8_t k, uint8_t val) } STATIC_INLINE void +set_h48_pvalmin(unsigned char *table, uint64_t i, uint8_t k, uint8_t val) +{ + uint8_t v; + + v = MIN(val, get_h48_pvalmin(table, i, k)); + set_h48_pval(table, i, k, v >> UINT8_C(2)); + set_h48_pval(table, i+UINT64_C(1), k, v % UINT8_C(4)); +} + +STATIC_INLINE void set_h48_pval_atomic( wrapthread_atomic unsigned char *table, uint64_t i, diff --git a/src/solvers/h48/gendata_types_macros.h b/src/solvers/h48/gendata_types_macros.h @@ -21,14 +21,28 @@ #define H48_COORDMAX_NOEO (COCSEP_CLASSES * ESEP_MAX) #define H48_COORDMAX(h) (H48_COORDMAX_NOEO << (uint64_t)(h)) #define H48_DIV(k) ((size_t)8 / (size_t)(k)) -#define H48_TABLESIZE(h, k) DIV_ROUND_UP((size_t)H48_COORDMAX((h)), H48_DIV(k)) +#define H48_TABLESIZE_K4(h) DIV_ROUND_UP((size_t)H48_COORDMAX(h), H48_DIV(4)) #define H48_COEFF(k) (UINT64_C(8) / (uint64_t)(k)) #define H48_INDEX(i, k) ((i) / H48_COEFF(k)) #define H48_SHIFT(i, k) ((uint8_t)(k) * (uint8_t)((i) % H48_COEFF(k))) #define H48_MASK(i, k) ((UINT8_BIT(k) - UINT8_C(1)) << H48_SHIFT(i, k)) -#define CHUNKS COCSEP_CLASSES +#define H48_LINE_BITS UINT64_C(512) +#define H48_LINE_BYTES (H48_LINE_BITS >> UINT64_C(3)) +#define H48_LINE_ALLCOORDS (H48_LINE_BITS / UINT64_C(2)) +#define H48_LINE_COORDS ((H48_LINE_BITS - UINT64_C(4)) / UINT64_C(2)) +#define H48_LINE(i) ((i) / H48_LINE_COORDS) +#define H48_LINE_EXT(i) ((i) + UINT64_C(2) * H48_LINE(i)) +#define H48_LINE_MIN(i) \ + ((H48_LINE(i) + UINT64_C(1)) * H48_LINE_ALLCOORDS - UINT64_C(2)) +#define H48_LINES(h) DIV_ROUND_UP(H48_COORDMAX(h), H48_LINE_COORDS) +#define H48_TABLESIZE_K2(h) ((size_t)(H48_LINE_BYTES * H48_LINES(h))) + +#define H48_TABLESIZE(h, k) \ + ((k) == 4 ? H48_TABLESIZE_K4(h) : H48_TABLESIZE_K2(h)) + +#define CHUNKS 2000 /* TODO: This loop over similar h48 coordinates can be improved by only @@ -115,6 +129,7 @@ typedef struct { int8_t depth; uint8_t h; uint8_t k; + uint8_t base; uint32_t *cocsepdata; uint64_t *selfsim; unsigned char *table; diff --git a/src/solvers/h48/h48.h b/src/solvers/h48/h48.h @@ -6,6 +6,7 @@ #include "map.h" #include "gendata_cocsep.h" #include "gendata_eoesep.h" +#include "distribution_h48.h" #include "gendata_h48.h" #include "checkdata.h" #include "solve.h" diff --git a/src/solvers/h48/solve.h b/src/solvers/h48/solve.h @@ -110,7 +110,7 @@ STATIC_INLINE bool solve_h48_stop(dfsarg_solve_h48_t arg[static 1]) { uint32_t data, data_inv; - int64_t coord; + int64_t coord, coordext, coordmin; int8_t target, nh, n; uint8_t pval_cocsep, pval_eoesep; @@ -142,17 +142,29 @@ solve_h48_stop(dfsarg_solve_h48_t arg[static 1]) if (!arg->use_lb_inverse) { coord = coord_h48_edges( arg->inverse, COCLASS(data_inv), TTREP(data_inv), arg->h); - arg->lb_inverse = get_h48_pval(arg->h48data, coord, arg->k); + coordext = H48_LINE_EXT(coord); + arg->lb_inverse = get_h48_pval(arg->h48data, coordext, arg->k); arg->table_lookups++; if (arg->k == 2 && arg->lb_inverse == 0) { arg->table_fallbacks++; + /* pval_cocsep = get_h48_pval( arg->h48data_fallback_h0k4, coord >> arg->h, 4); + */ +#if 1 + coordmin = H48_LINE_MIN(coord); + pval_cocsep = get_h48_pvalmin( + arg->h48data, coordmin, arg->k); pval_eoesep = get_eoesep_pval_cube( arg->h48data_fallback_eoesep, arg->inverse); arg->lb_inverse = MAX(pval_cocsep, pval_eoesep); +#else + coordmin = H48_LINE_MIN(coord); + arg->lb_inverse = get_h48_pvalmin( + arg->h48data, coordmin, arg->k); +#endif } else { arg->lb_inverse += arg->base; } @@ -170,17 +182,29 @@ solve_h48_stop(dfsarg_solve_h48_t arg[static 1]) if (!arg->use_lb_normal) { coord = coord_h48_edges( arg->cube, COCLASS(data), TTREP(data), arg->h); - arg->lb_normal = get_h48_pval(arg->h48data, coord, arg->k); + coordext = H48_LINE_EXT(coord); + arg->lb_normal = get_h48_pval(arg->h48data, coordext, arg->k); arg->table_lookups++; if (arg->k == 2 && arg->lb_normal == 0) { arg->table_fallbacks++; + /* pval_cocsep = get_h48_pval( arg->h48data_fallback_h0k4, coord >> arg->h, 4); + */ +#if 1 + coordmin = H48_LINE_MIN(coord); + pval_cocsep = get_h48_pval( + arg->h48data, coordmin, arg->k); pval_eoesep = get_eoesep_pval_cube( arg->h48data_fallback_eoesep, arg->cube); arg->lb_normal = MAX(pval_cocsep, pval_eoesep); +#else + coordmin = H48_LINE_MIN(coord); + arg->lb_normal = get_h48_pval( + arg->h48data, coordmin, arg->k); +#endif } else { arg->lb_normal += arg->base; } diff --git a/src/solvers/h48/utils.h b/src/solvers/h48/utils.h @@ -68,6 +68,7 @@ dataid_h48(const char *hk, char buf[static NISSY_SIZE_DATAID]) if (err < 0) return err; - sprintf(buf, "h48h%" PRIu8 "k%" PRIu8, h, k); + sprintf(buf, "h48h%" PRIu8 "k%" PRIu8 "%s", h, k, + k == 2 ? "i" : ""); return NISSY_OK; }