commit b25989e47adfadabcf0dabfd58d615622887cee0
parent 6c42463b800bbab4583e90aeb748a79037c65a9e
Author: Sebastiano Tronto <sebastiano@tronto.net>
Date: Sat, 13 Dec 2025 19:16:32 +0100
Intertwined table seems to work
Diffstat:
9 files changed, 178 insertions(+), 34 deletions(-)
diff --git a/src/solvers/dispatch.h b/src/solvers/dispatch.h
@@ -43,7 +43,7 @@ solver_dispatch_t solver_dispatchers[] = {
};
const char *solver_aliases[][2] = {
- { "optimal", "h48h7k2" },
+ { "optimal", "h48h7i" },
{ "eofb", "coord_EO_UF" },
{ "eorl", "coord_EO_UR" },
{ "eoud", "coord_EO_FD" },
diff --git a/src/solvers/distribution.h b/src/solvers/distribution.h
@@ -95,12 +95,6 @@ distribution_equal(
": expected %" PRIu64 ", found %" PRIu64 "\n",
i, expected[i], actual[i]);
}
- /*
- else {
- LOG("[checkdata] Value for depth %" PRIu8
- " is correct (%" PRIu64 ")\n", i, actual[i]);
- }
- */
}
return wrong == 0;
diff --git a/src/solvers/h48/checkdata.h b/src/solvers/h48/checkdata.h
@@ -219,7 +219,7 @@ checkdata_h48(
LOG("[checkdata] Checking distribution for '%s' from "
"actual table\n", info.solver);
- getdistribution(table, actual_distribution, &info);
+ getdistribution_h48(table, actual_distribution, &info);
if (!distribution_equal(ed, actual_distribution, em)) {
LOG("[checkdata] Distribution from the actual table "
"does not match the expected one\n");
diff --git a/src/solvers/h48/distribution_h48.h b/src/solvers/h48/distribution_h48.h
@@ -0,0 +1,78 @@
+/*
+This file is very similar to ../distibution.h, but some adaptations are
+needed for H48 because of the intertwined fallback table, and it is easier
+to have some duplication than to make these functions needlessly generic.
+*/
+
+STATIC void *getdistribution_h48_runthread(void *);
+STATIC void getdistribution_h48(const unsigned char *,
+ uint64_t [static INFO_DISTRIBUTION_LEN], const tableinfo_t [static 1]);
+
+STATIC void *
+getdistribution_h48_runthread(void *arg)
+{
+ getdistribution_data_t *data = (getdistribution_data_t *)arg;
+ const unsigned char *table;
+ uint8_t j, k, m;
+ uint64_t line, d;
+ unsigned char t;
+
+ memset(data->distr, 0, INFO_DISTRIBUTION_LEN * sizeof(uint64_t));
+
+ k = data->bits;
+ table = data->table;
+ m = TABLE_MASK(0, k);
+ for (line = data->min; line < data->max; line++) {
+ for (d = 0; d < H48_LINE_BYTES; d++) {
+ t = table[d + line * H48_LINE_BYTES];
+ for (j = 0; j < ENTRIES_PER_BYTE(k); j++)
+ data->distr[(t & (m << (j*k))) >> (j*k)]++;
+ }
+ t = table[(line+1) * H48_LINE_BYTES - 1];
+ data->distr[(t & (m << (2*k))) >> (2*k)]--;
+ data->distr[(t & (m << (3*k))) >> (3*k)]--;
+ }
+
+ return NULL;
+}
+
+STATIC void
+getdistribution_h48(
+ const unsigned char *table,
+ uint64_t distr[static INFO_DISTRIBUTION_LEN],
+ const tableinfo_t info[static 1]
+) {
+ getdistribution_data_t targ[THREADS];
+ wrapthread_define_var_thread_t(thread[THREADS]);
+ uint8_t pval, k;
+ uint64_t local_distr[THREADS][INFO_DISTRIBUTION_LEN];
+ uint64_t i, j, lines, lines_per_thread, c, cc;
+
+ k = info->bits;
+ lines = H48_LINES(info->h48h);
+ lines_per_thread = DIV_ROUND_UP(lines, THREADS);
+
+ for (i = 0; i < THREADS; i++) {
+ targ[i] = (getdistribution_data_t) {
+ .min = i * lines_per_thread,
+ .max = MIN((i+1) * lines_per_thread, lines),
+ .bits = k,
+ .distr = local_distr[i],
+ .table = table,
+ };
+ wrapthread_create(&thread[i], NULL,
+ getdistribution_h48_runthread, &targ[i]);
+ }
+
+ for (i = 0; i < THREADS; i++)
+ wrapthread_join(thread[i], NULL);
+
+ memset(distr, 0, INFO_DISTRIBUTION_LEN * sizeof(uint64_t));
+ for (i = 0; i < THREADS; i++)
+ for (j = 0; j < INFO_DISTRIBUTION_LEN; j++)
+ distr[j] += local_distr[i][j];
+
+ /* Clean up excess values */
+ c = H48_LINE_EXT(H48_COORDMAX(info->h48h)) % H48_LINE_ALLCOORDS;
+ distr[3] -= H48_LINE_COORDS - c;
+}
diff --git a/src/solvers/h48/gendata_h48.h b/src/solvers/h48/gendata_h48.h
@@ -20,6 +20,10 @@ STATIC const unsigned char *get_h48data_constptr(const unsigned char *);
STATIC_INLINE uint8_t get_h48_pval(const unsigned char *, uint64_t, uint8_t);
STATIC_INLINE void set_h48_pval(unsigned char *, uint64_t, uint8_t, uint8_t);
+STATIC_INLINE uint8_t get_h48_pvalmin(
+ const unsigned char *, uint64_t, uint8_t);
+STATIC_INLINE void set_h48_pvalmin(
+ unsigned char *, uint64_t, uint8_t, uint8_t);
STATIC_INLINE uint8_t get_h48_pval_atomic(
wrapthread_atomic const unsigned char *, uint64_t, uint8_t);
STATIC_INLINE void set_h48_pval_atomic(
@@ -300,6 +304,7 @@ gendata_h48h0k4_runthread(void *arg)
.depth = bfsarg->depth,
.h = 0,
.k = 4,
+ .base = 0, /* Unused */
.cocsepdata = bfsarg->cocsepdata,
.selfsim = bfsarg->selfsim,
.table_atomic = bfsarg->table,
@@ -343,10 +348,6 @@ gendata_h48h0k4_runthread(void *arg)
STATIC void
gendata_h48k2(gendata_h48_arg_t arg[static 1])
{
- static const uint8_t shortdepth = 8;
- static const uint64_t capacity = 10000019;
- static const uint64_t randomizer = 10000079;
-
/*
* A good base value for the k=2 tables have few positions with value
* 0, because those are treated as lower bound 0 and require a second
@@ -391,6 +392,10 @@ gendata_h48k2(gendata_h48_arg_t arg[static 1])
[11] = 10
};
+ static const uint8_t shortdepth = 8;
+ static const uint64_t capacity = 10000019;
+ static const uint64_t randomizer = 10000079;
+
uint8_t t;
int sleeptime;
unsigned char *table;
@@ -481,10 +486,7 @@ gendata_h48k2(gendata_h48_arg_t arg[static 1])
h48map_destroy(&shortcubes);
- for (j = 0; j < H48_COORDMAX(arg->h); j++) {
- t = get_h48_pval(table, j, 2);
- arg->info.distribution[t]++;
- }
+ getdistribution_h48(table, arg->info.distribution, &arg->info);
bufsize = arg->buf_size - COCSEP_FULLSIZE;
writetableinfo(&arg->info, bufsize, (unsigned char *)arg->h48buf);
@@ -493,7 +495,7 @@ gendata_h48k2(gendata_h48_arg_t arg[static 1])
STATIC void *
gendata_h48k2_runthread(void *arg)
{
- uint64_t coord;
+ uint64_t coord, coordext, coordmin;
kvpair_t kv;
h48k2_dfs_arg_t *dfsarg;
wrapthread_define_if_threads(uint64_t, mutex);
@@ -513,9 +515,13 @@ gendata_h48k2_runthread(void *arg)
if (kv.val < dfsarg->shortdepth) {
coord = kv.key >> (uint64_t)(11 - dfsarg->h);
- mutex = H48_INDEX(coord, dfsarg->k) % CHUNKS;
+ coordext = H48_LINE_EXT(coord);
+ coordmin = H48_LINE_MIN(coord);
+
+ mutex = H48_LINE(coord) % CHUNKS;
wrapthread_mutex_lock(dfsarg->table_mutex[mutex]);
- set_h48_pval(dfsarg->table, coord, dfsarg->k, 0);
+ set_h48_pval(dfsarg->table, coordext, dfsarg->k, 0);
+ set_h48_pvalmin(dfsarg->table, coordmin, dfsarg->k, kv.val);
wrapthread_mutex_unlock(dfsarg->table_mutex[mutex]);
} else {
dfsarg->cube = invcoord_h48(kv.key, dfsarg->crep, 11);
@@ -537,6 +543,7 @@ gendata_h48k2_dfs(h48k2_dfs_arg_t arg[static 1])
markarg = (gendata_h48_mark_t) {
.h = arg->h,
.k = arg->k,
+ .base = arg->base,
.cocsepdata = arg->cocsepdata,
.selfsim = arg->selfsim,
.table = arg->table,
@@ -629,17 +636,23 @@ gendata_h48_mark_atomic(gendata_h48_mark_t arg[static 1])
STATIC_INLINE void
gendata_h48_mark(gendata_h48_mark_t arg[static 1])
{
- uint8_t oldval, newval;
- uint64_t coord;
+ uint8_t oldval, newval, v;
+ uint64_t coord, coordext, coordmin;
wrapthread_define_if_threads(uint64_t, mutex);
FOREACH_H48SIM(arg->cube, arg->cocsepdata, arg->selfsim,
coord = coord_h48(arg->cube, arg->cocsepdata, arg->h);
- mutex = H48_INDEX(coord, arg->k) % CHUNKS;
+ coordext = H48_LINE_EXT(coord);
+ coordmin = H48_LINE_MIN(coord);
+
+ mutex = H48_LINE(coord) % CHUNKS;
wrapthread_mutex_lock(arg->table_mutex[mutex]);
- oldval = get_h48_pval(arg->table, coord, arg->k);
+ oldval = get_h48_pval(arg->table, coordext, arg->k);
newval = (uint8_t)MAX(arg->depth, 0);
- set_h48_pval(arg->table, coord, arg->k, MIN(newval, oldval));
+ v = MIN(newval, oldval);
+ set_h48_pval(arg->table, coordext, arg->k, v);
+ v = arg->depth + arg->base;
+ set_h48_pvalmin(arg->table, coordmin, arg->k, v);
wrapthread_mutex_unlock(arg->table_mutex[mutex]);
)
}
@@ -648,7 +661,7 @@ STATIC_INLINE bool
gendata_h48k2_dfs_stop(cube_t cube, int8_t d, h48k2_dfs_arg_t arg[static 1])
{
uint64_t val;
- uint64_t coord;
+ uint64_t coord, coordext;
wrapthread_define_if_threads(uint64_t, mutex);
int8_t oldval;
@@ -656,9 +669,10 @@ gendata_h48k2_dfs_stop(cube_t cube, int8_t d, h48k2_dfs_arg_t arg[static 1])
/* We are in the "real coordinate" case, we can stop
if this coordinate has already been visited */
coord = coord_h48(cube, arg->cocsepdata, arg->h);
- mutex = H48_INDEX(coord, arg->k) % CHUNKS;
+ coordext = H48_LINE_EXT(coord);
+ mutex = H48_LINE(coord) % CHUNKS;
wrapthread_mutex_lock(arg->table_mutex[mutex]);
- oldval = get_h48_pval(arg->table, coord, arg->k);
+ oldval = get_h48_pval(arg->table, coordext, arg->k);
wrapthread_mutex_unlock(arg->table_mutex[mutex]);
return oldval <= d;
} else {
@@ -682,7 +696,7 @@ makeinfo_h48k2(gendata_h48_arg_t arg[static 1])
.infosize = INFOSIZE,
.fullsize = H48_TABLESIZE(arg->h, 2) + INFOSIZE,
.hash = 0,
- .entries = H48_COORDMAX(arg->h),
+ .entries = H48_COORDMAX(arg->h) + 2 * H48_LINES(arg->h),
.classes = 0,
.h48h = arg->h,
.bits = 2,
@@ -716,6 +730,13 @@ get_h48_pval(const unsigned char *table, uint64_t i, uint8_t k)
}
STATIC_INLINE uint8_t
+get_h48_pvalmin(const unsigned char *table, uint64_t i, uint8_t k)
+{
+ return (get_h48_pval(table, i, k) << UINT8_C(2)) +
+ get_h48_pval(table, i+UINT64_C(1), k);
+}
+
+STATIC_INLINE uint8_t
get_h48_pval_atomic(
wrapthread_atomic const unsigned char *table,
uint64_t i,
@@ -733,6 +754,16 @@ set_h48_pval(unsigned char *table, uint64_t i, uint8_t k, uint8_t val)
}
STATIC_INLINE void
+set_h48_pvalmin(unsigned char *table, uint64_t i, uint8_t k, uint8_t val)
+{
+ uint8_t v;
+
+ v = MIN(val, get_h48_pvalmin(table, i, k));
+ set_h48_pval(table, i, k, v >> UINT8_C(2));
+ set_h48_pval(table, i+UINT64_C(1), k, v % UINT8_C(4));
+}
+
+STATIC_INLINE void
set_h48_pval_atomic(
wrapthread_atomic unsigned char *table,
uint64_t i,
diff --git a/src/solvers/h48/gendata_types_macros.h b/src/solvers/h48/gendata_types_macros.h
@@ -21,14 +21,28 @@
#define H48_COORDMAX_NOEO (COCSEP_CLASSES * ESEP_MAX)
#define H48_COORDMAX(h) (H48_COORDMAX_NOEO << (uint64_t)(h))
#define H48_DIV(k) ((size_t)8 / (size_t)(k))
-#define H48_TABLESIZE(h, k) DIV_ROUND_UP((size_t)H48_COORDMAX((h)), H48_DIV(k))
+#define H48_TABLESIZE_K4(h) DIV_ROUND_UP((size_t)H48_COORDMAX(h), H48_DIV(4))
#define H48_COEFF(k) (UINT64_C(8) / (uint64_t)(k))
#define H48_INDEX(i, k) ((i) / H48_COEFF(k))
#define H48_SHIFT(i, k) ((uint8_t)(k) * (uint8_t)((i) % H48_COEFF(k)))
#define H48_MASK(i, k) ((UINT8_BIT(k) - UINT8_C(1)) << H48_SHIFT(i, k))
-#define CHUNKS COCSEP_CLASSES
+#define H48_LINE_BITS UINT64_C(512)
+#define H48_LINE_BYTES (H48_LINE_BITS >> UINT64_C(3))
+#define H48_LINE_ALLCOORDS (H48_LINE_BITS / UINT64_C(2))
+#define H48_LINE_COORDS ((H48_LINE_BITS - UINT64_C(4)) / UINT64_C(2))
+#define H48_LINE(i) ((i) / H48_LINE_COORDS)
+#define H48_LINE_EXT(i) ((i) + UINT64_C(2) * H48_LINE(i))
+#define H48_LINE_MIN(i) \
+ ((H48_LINE(i) + UINT64_C(1)) * H48_LINE_ALLCOORDS - UINT64_C(2))
+#define H48_LINES(h) DIV_ROUND_UP(H48_COORDMAX(h), H48_LINE_COORDS)
+#define H48_TABLESIZE_K2(h) ((size_t)(H48_LINE_BYTES * H48_LINES(h)))
+
+#define H48_TABLESIZE(h, k) \
+ ((k) == 4 ? H48_TABLESIZE_K4(h) : H48_TABLESIZE_K2(h))
+
+#define CHUNKS 2000
/*
TODO: This loop over similar h48 coordinates can be improved by only
@@ -115,6 +129,7 @@ typedef struct {
int8_t depth;
uint8_t h;
uint8_t k;
+ uint8_t base;
uint32_t *cocsepdata;
uint64_t *selfsim;
unsigned char *table;
diff --git a/src/solvers/h48/h48.h b/src/solvers/h48/h48.h
@@ -6,6 +6,7 @@
#include "map.h"
#include "gendata_cocsep.h"
#include "gendata_eoesep.h"
+#include "distribution_h48.h"
#include "gendata_h48.h"
#include "checkdata.h"
#include "solve.h"
diff --git a/src/solvers/h48/solve.h b/src/solvers/h48/solve.h
@@ -110,7 +110,7 @@ STATIC_INLINE bool
solve_h48_stop(dfsarg_solve_h48_t arg[static 1])
{
uint32_t data, data_inv;
- int64_t coord;
+ int64_t coord, coordext, coordmin;
int8_t target, nh, n;
uint8_t pval_cocsep, pval_eoesep;
@@ -142,17 +142,29 @@ solve_h48_stop(dfsarg_solve_h48_t arg[static 1])
if (!arg->use_lb_inverse) {
coord = coord_h48_edges(
arg->inverse, COCLASS(data_inv), TTREP(data_inv), arg->h);
- arg->lb_inverse = get_h48_pval(arg->h48data, coord, arg->k);
+ coordext = H48_LINE_EXT(coord);
+ arg->lb_inverse = get_h48_pval(arg->h48data, coordext, arg->k);
arg->table_lookups++;
if (arg->k == 2 && arg->lb_inverse == 0) {
arg->table_fallbacks++;
+ /*
pval_cocsep = get_h48_pval(
arg->h48data_fallback_h0k4, coord >> arg->h, 4);
+ */
+#if 1
+ coordmin = H48_LINE_MIN(coord);
+ pval_cocsep = get_h48_pvalmin(
+ arg->h48data, coordmin, arg->k);
pval_eoesep = get_eoesep_pval_cube(
arg->h48data_fallback_eoesep, arg->inverse);
arg->lb_inverse = MAX(pval_cocsep, pval_eoesep);
+#else
+ coordmin = H48_LINE_MIN(coord);
+ arg->lb_inverse = get_h48_pvalmin(
+ arg->h48data, coordmin, arg->k);
+#endif
} else {
arg->lb_inverse += arg->base;
}
@@ -170,17 +182,29 @@ solve_h48_stop(dfsarg_solve_h48_t arg[static 1])
if (!arg->use_lb_normal) {
coord = coord_h48_edges(
arg->cube, COCLASS(data), TTREP(data), arg->h);
- arg->lb_normal = get_h48_pval(arg->h48data, coord, arg->k);
+ coordext = H48_LINE_EXT(coord);
+ arg->lb_normal = get_h48_pval(arg->h48data, coordext, arg->k);
arg->table_lookups++;
if (arg->k == 2 && arg->lb_normal == 0) {
arg->table_fallbacks++;
+ /*
pval_cocsep = get_h48_pval(
arg->h48data_fallback_h0k4, coord >> arg->h, 4);
+ */
+#if 1
+ coordmin = H48_LINE_MIN(coord);
+ pval_cocsep = get_h48_pval(
+ arg->h48data, coordmin, arg->k);
pval_eoesep = get_eoesep_pval_cube(
arg->h48data_fallback_eoesep, arg->cube);
arg->lb_normal = MAX(pval_cocsep, pval_eoesep);
+#else
+ coordmin = H48_LINE_MIN(coord);
+ arg->lb_normal = get_h48_pval(
+ arg->h48data, coordmin, arg->k);
+#endif
} else {
arg->lb_normal += arg->base;
}
diff --git a/src/solvers/h48/utils.h b/src/solvers/h48/utils.h
@@ -68,6 +68,7 @@ dataid_h48(const char *hk, char buf[static NISSY_SIZE_DATAID])
if (err < 0)
return err;
- sprintf(buf, "h48h%" PRIu8 "k%" PRIu8, h, k);
+ sprintf(buf, "h48h%" PRIu8 "k%" PRIu8 "%s", h, k,
+ k == 2 ? "i" : "");
return NISSY_OK;
}