commit 1a092414735f244bffa3c6878e57463b7dd1e9ba
parent be2d3fe10fd8adbf3fa4ce7cd436e2ad33363fcc
Author: Enrico Tenuti <123324975+enricotenuti@users.noreply.github.com>
Date: Tue, 24 Sep 2024 07:13:13 +0200
Merge branch 'sebastianotronto:master' into master
Diffstat:
2 files changed, 232 insertions(+), 139 deletions(-)
diff --git a/src/solvers/h48/gendata_h48.h b/src/solvers/h48/gendata_h48.h
@@ -33,6 +33,7 @@ _t by _ttrep).
typedef struct {
uint8_t h;
uint8_t k;
+ uint8_t base;
uint8_t maxdepth;
tableinfo_t info;
void *buf;
@@ -56,6 +57,12 @@ typedef struct {
uint8_t *table;
uint64_t *selfsim;
cube_t *crep;
+ uint64_t start;
+ uint64_t end;
+ uint64_t count;
+ uint64_t *distribution;
+ pthread_mutex_t *distribution_mutex;
+ pthread_mutex_t *table_mutex[CHUNKS];
} h48h0k4_bfs_arg_t;
typedef struct {
@@ -75,17 +82,28 @@ typedef struct {
uint64_t *count;
} h48k2_dfs_arg_t;
+typedef struct {
+ cube_t cube;
+ int8_t depth;
+ uint8_t h;
+ uint8_t k;
+ uint32_t *cocsepdata;
+ uint64_t *selfsim;
+ uint8_t *table;
+ pthread_mutex_t **table_mutex;
+} gendata_h48_mark_t;
+
STATIC uint64_t gendata_h48short(gendata_h48short_arg_t *);
STATIC size_t gendata_h48(gendata_h48_arg_t *);
STATIC size_t gendata_h48h0k4(gendata_h48_arg_t *);
-STATIC int64_t gendata_h48h0k4_bfs(h48h0k4_bfs_arg_t *);
-STATIC int64_t gendata_h48h0k4_bfs_fromdone(h48h0k4_bfs_arg_t *);
-STATIC int64_t gendata_h48h0k4_bfs_fromnew(h48h0k4_bfs_arg_t *);
STATIC size_t gendata_h48k2(gendata_h48_arg_t *);
-STATIC void * gendata_h48k2_runthread(void *);
-STATIC_INLINE void gendata_h48k2_mark(cube_t, int8_t, h48k2_dfs_arg_t *);
-STATIC_INLINE bool gendata_h48k2_dfs_stop(cube_t, uint8_t, h48k2_dfs_arg_t *);
+STATIC void * gendata_h48h0k4_runthread(void *);
+STATIC_INLINE uint64_t gendata_h48_mark(gendata_h48_mark_t *);
+STATIC_INLINE bool gendata_h48k2_dfs_stop(cube_t, int8_t, h48k2_dfs_arg_t *);
+STATIC size_t gendata_h48k2_realcoord(gendata_h48_arg_t *);
STATIC void gendata_h48k2_dfs(h48k2_dfs_arg_t *arg);
+STATIC void * gendata_h48k2_runthread(void *);
+STATIC tableinfo_t makeinfo_h48k2(gendata_h48_arg_t *, uint8_t);
STATIC uint32_t *get_cocsepdata_ptr(const void *);
STATIC uint8_t *get_h48data_ptr(const void *);
@@ -147,8 +165,12 @@ gendata_h48(gendata_h48_arg_t *arg)
arg->cocsepdata = (uint32_t *)cocsepdata_offset;
arg->h48buf = (char *)arg->buf + cocsepsize;
+ arg->base = 99; // TODO: set this somewhere else
+
if (arg->h == 0 && arg->k == 4) {
h48size = gendata_h48h0k4(arg);
+ } else if ((arg->h == 0 || arg->h == 11) && arg->k == 2) {
+ h48size = gendata_h48k2_realcoord(arg);
} else if (arg->k == 2) {
h48size = gendata_h48k2(arg);
} else {
@@ -179,17 +201,15 @@ gendata_h48_error:
return 0;
}
-/*
-TODO description
-generating fixed table with h=0, k=4
-*/
STATIC size_t
gendata_h48h0k4(gendata_h48_arg_t *arg)
{
- uint32_t j;
uint8_t *table;
- h48h0k4_bfs_arg_t bfsarg;
- int64_t sc, cc, done, h48max;
+ int64_t sc, done, d, h48max;
+ uint64_t t, tt, isize, cc;
+ h48h0k4_bfs_arg_t bfsarg[THREADS];
+ pthread_t thread[THREADS];
+ pthread_mutex_t distribution_mutex, table_mutex[CHUNKS];
if (arg->buf == NULL)
goto gendata_h48h0k4_return_size;
@@ -216,109 +236,134 @@ gendata_h48h0k4(gendata_h48_arg_t *arg)
sc = coord_h48(SOLVED_CUBE, arg->cocsepdata, 0);
set_h48_pval(table, sc, 4, 0);
arg->info.distribution[0] = 1;
- bfsarg = (h48h0k4_bfs_arg_t) {
- .cocsepdata = arg->cocsepdata,
- .table = table,
- .selfsim = arg->selfsim,
- .crep = arg->crep
- };
- for (
- done = 1, bfsarg.depth = 1, cc = 0;
- done < h48max && bfsarg.depth <= arg->maxdepth;
- bfsarg.depth++
- ) {
- LOG("h48: generating depth %" PRIu8 "\n", bfsarg.depth);
- cc = gendata_h48h0k4_bfs(&bfsarg);
- done += cc;
- arg->info.distribution[bfsarg.depth] = cc;
- LOG("found %" PRId64 "\n", cc);
+
+ isize = h48max / THREADS;
+ isize = (isize / H48_COEFF(arg->k)) * H48_COEFF(arg->k);
+ pthread_mutex_init(&distribution_mutex, NULL);
+ for (t = 0; t < CHUNKS; t++)
+ pthread_mutex_init(&table_mutex[t], NULL);
+ for (t = 0; t < THREADS; t++) {
+ bfsarg[t] = (h48h0k4_bfs_arg_t) {
+ .cocsepdata = arg->cocsepdata,
+ .table = table,
+ .selfsim = arg->selfsim,
+ .crep = arg->crep,
+ .start = isize * t,
+ .end = t == THREADS-1 ? (uint64_t)h48max : isize * (t+1),
+ .distribution = arg->info.distribution,
+ .distribution_mutex = &distribution_mutex,
+ };
+ for (tt = 0; tt < CHUNKS; tt++)
+ bfsarg[t].table_mutex[tt] = &table_mutex[tt];
}
+ for (done = 1, d = 1; done < h48max && d <= arg->maxdepth; d++) {
+ LOG("h48: generating depth %" PRIu8 "\n", d);
+
+ for (t = 0; t < THREADS; t++) {
+ bfsarg[t].depth = d;
+ bfsarg[t].count = 0;
+ pthread_create(&thread[t], NULL,
+ gendata_h48h0k4_runthread, &bfsarg[t]);
+ }
+
+ for (t = 0, cc = 0; t < THREADS; t++) {
+ pthread_join(thread[t], NULL);
+ cc += bfsarg[t].count;
+ }
- arg->info.maxvalue = bfsarg.depth-1;
+ done += cc;
+ arg->info.distribution[d] = cc;
- LOG("h48 pruning table computed\n");
- LOG("Maximum pruning value: %" PRIu32 "\n", arg->info.maxvalue);
- LOG("Pruning value distribution:\n");
- for (j = 0; j <= arg->info.maxvalue; j++)
- LOG("%" PRIu8 ":\t%" PRIu32 "\n", j, arg->info.distribution[j]);
+ LOG("found %" PRId64 "\n", cc);
+ }
+ arg->info.maxvalue = d - 1;
writetableinfo(&arg->info, arg->h48buf);
gendata_h48h0k4_return_size:
return H48_TABLESIZE(0, 4) + INFOSIZE;
}
-STATIC int64_t
-gendata_h48h0k4_bfs(h48h0k4_bfs_arg_t *arg)
-{
- const uint8_t breakpoint = 10; /* Hand-picked optimal */
+/*
+TODO: the following function suffers from a big performance loss (about 40%)
+because of all the mutex locking. It is possible to reduce the first lock
+(at the beginning of the outermost for loop) by caching the next few items at
+once and then looping over the cached elements. But for the second one (inner
+loop) the access is non-sequential, so this cannot be done.
- if (arg->depth < breakpoint)
- return gendata_h48h0k4_bfs_fromdone(arg);
- else
- return gendata_h48h0k4_bfs_fromnew(arg);
-}
+However, in both cases in theory we do not care if we are reading the old
+value or the updated one. Depending on the compiler guarantees on concurrent
+memory access, we may be fine just removing the locks altogether.
-STATIC int64_t
-gendata_h48h0k4_bfs_fromdone(h48h0k4_bfs_arg_t *arg)
+It is probably possible to solve part of this by using atomics.
+*/
+STATIC void *
+gendata_h48h0k4_runthread(void *arg)
{
- uint8_t c, m, x;
- uint32_t cc;
- int64_t i, j, k;
- cube_t cube, moved;
-
- for (i = 0, cc = 0; i < (int64_t)H48_COORDMAX(0); i++) {
- c = get_h48_pval(arg->table, i, 4);
- if (c != arg->depth - 1)
- continue;
- cube = invcoord_h48(i, arg->crep, 0);
- for (m = 0; m < 18; m++) {
- moved = move(cube, m);
- j = coord_h48(moved, arg->cocsepdata, 0);
- if (get_h48_pval(arg->table, j, 4) <= arg->depth)
- continue;
- FOREACH_H48SIM(moved, arg->cocsepdata, arg->selfsim,
- k = coord_h48(moved, arg->cocsepdata, 0);
- x = get_h48_pval(arg->table, k, 4);
- set_h48_pval(arg->table, k, 4, arg->depth);
- cc += x != arg->depth;
- )
- }
- }
-
- return cc;
-}
+ static const uint8_t breakpoint = 10; /* Hand-picked optimal */
-STATIC int64_t
-gendata_h48h0k4_bfs_fromnew(h48h0k4_bfs_arg_t *arg)
-{
- uint8_t c, m, x;
- uint32_t cc;
- int64_t i, j;
+ uint8_t c, m;
+ uint64_t i, d, mutex;
+ int64_t j;
cube_t cube, moved;
+ gendata_h48_mark_t markarg;
+ h48h0k4_bfs_arg_t *bfsarg;
+
+ bfsarg = (h48h0k4_bfs_arg_t *)arg;
+
+ markarg = (gendata_h48_mark_t) {
+ .depth = bfsarg->depth,
+ .h = 0,
+ .k = 4,
+ .cocsepdata = bfsarg->cocsepdata,
+ .selfsim = bfsarg->selfsim,
+ .table = bfsarg->table,
+ .table_mutex = bfsarg->table_mutex,
+ };
- for (i = 0, cc = 0; i < (int64_t)H48_COORDMAX(0); i++) {
- c = get_h48_pval(arg->table, i, 4);
- if (c != 0xF)
+ /*
+ * If depth < breakpoint, scan all neighbors of coordinates at depth-1.
+ * Otherwise, scan all neighbors of unvisited coordinates.
+ */
+ for (i = bfsarg->start, d = 0; i < bfsarg->end; i++) {
+ mutex = H48_INDEX(i, 4) % CHUNKS;
+ pthread_mutex_lock(bfsarg->table_mutex[mutex]);
+ c = get_h48_pval(bfsarg->table, i, 4);
+ pthread_mutex_unlock(bfsarg->table_mutex[mutex]);
+
+ if ((bfsarg->depth < breakpoint && c != bfsarg->depth - 1) ||
+ (bfsarg->depth >= breakpoint && c != 0xF))
continue;
- cube = invcoord_h48(i, arg->crep, 0);
+
+ cube = invcoord_h48(i, bfsarg->crep, 0);
for (m = 0; m < 18; m++) {
moved = move(cube, m);
- j = coord_h48(moved, arg->cocsepdata, 0);
- x = get_h48_pval(arg->table, j, 4);
- if (x >= arg->depth)
- continue;
- FOREACH_H48SIM(cube, arg->cocsepdata, arg->selfsim,
- j = coord_h48(cube, arg->cocsepdata, 0);
- x = get_h48_pval(arg->table, j, 4);
- set_h48_pval(arg->table, j, 4, arg->depth);
- cc += x == 0xF;
- )
- break; /* Enough to find one, skip the rest */
+ j = coord_h48(moved, bfsarg->cocsepdata, 0);
+ mutex = H48_INDEX(j, 4) % CHUNKS;
+ pthread_mutex_lock(bfsarg->table_mutex[mutex]);
+ c = get_h48_pval(bfsarg->table, j, 4);
+ pthread_mutex_unlock(bfsarg->table_mutex[mutex]);
+ if (bfsarg->depth < breakpoint) {
+ if (c <= bfsarg->depth)
+ continue;
+ markarg.cube = moved;
+ d += gendata_h48_mark(&markarg);
+ } else {
+ if (c >= bfsarg->depth)
+ continue;
+ markarg.cube = cube;
+ d += gendata_h48_mark(&markarg);
+ break; /* Enough to find one, skip the rest */
+ }
}
}
- return cc;
+ pthread_mutex_lock(bfsarg->distribution_mutex);
+ bfsarg->count += d;
+ bfsarg->distribution[bfsarg->depth] += d;
+ pthread_mutex_unlock(bfsarg->distribution_mutex);
+
+ return NULL;
}
STATIC size_t
@@ -369,24 +414,8 @@ gendata_h48k2(gendata_h48_arg_t *arg)
};
gendata_h48short(&shortarg);
- selectedbase = base[arg->h];
- arg->info = (tableinfo_t) {
- .solver = "h48 solver h = , k = 2",
- .type = TABLETYPE_PRUNING,
- .infosize = INFOSIZE,
- .fullsize = H48_TABLESIZE(arg->h, 2) + INFOSIZE,
- .hash = 0, /* TODO */
- .entries = H48_COORDMAX(arg->h),
- .classes = 0,
- .h48h = arg->h,
- .bits = 2,
- .base = selectedbase,
- .maxvalue = 3,
- .next = 0,
- };
- arg->info.solver[15] = (arg->h % 10) + '0';
- if (arg->h >= 10)
- arg->info.solver[14] = (arg->h / 10) + '0';
+ selectedbase = arg->base < 20 ? arg->base : base[arg->h];
+ arg->info = makeinfo_h48k2(arg, selectedbase);
inext = count = 0;
pthread_mutex_init(&shortcubes_mutex, NULL);
@@ -419,6 +448,7 @@ gendata_h48k2(gendata_h48_arg_t *arg)
h48map_destroy(&shortcubes);
+ /* TODO: inline into mark */
for (j = 0; j < H48_COORDMAX(arg->h); j++) {
t = get_h48_pval(table, j, 2);
arg->info.distribution[t]++;
@@ -474,21 +504,36 @@ gendata_h48k2_dfs(h48k2_dfs_arg_t *arg)
int8_t d;
uint8_t m[4];
cube_t cube[4];
+ gendata_h48_mark_t markarg;
+
+ markarg = (gendata_h48_mark_t) {
+ .h = arg->h,
+ .k = arg->k,
+ .cocsepdata = arg->cocsepdata,
+ .selfsim = arg->selfsim,
+ .table = arg->table,
+ .table_mutex = arg->table_mutex,
+ };
d = (int8_t)arg->shortdepth - (int8_t)arg->base;
/* Depth d+0 (shortcubes) */
- gendata_h48k2_mark(arg->cube, d, arg);
+ markarg.depth = d;
+ markarg.cube = arg->cube;
+ gendata_h48_mark(&markarg);
/* Depth d+1 */
for (m[0] = 0; m[0] < 18; m[0]++) {
+ markarg.depth = d+1;
cube[0] = move(arg->cube, m[0]);
if (gendata_h48k2_dfs_stop(cube[0], d+1, arg))
continue;
- gendata_h48k2_mark(cube[0], d+1, arg);
+ markarg.cube = cube[0];
+ gendata_h48_mark(&markarg);
/* Depth d+2 */
for (m[1] = 0; m[1] < 18; m[1]++) {
+ markarg.depth = d+2;
if (m[0] / 3 == m[1] / 3) {
m[1] += 2;
continue;
@@ -496,12 +541,14 @@ gendata_h48k2_dfs(h48k2_dfs_arg_t *arg)
cube[1] = move(cube[0], m[1]);
if (gendata_h48k2_dfs_stop(cube[1], d+2, arg))
continue;
- gendata_h48k2_mark(cube[1], d+2, arg);
+ markarg.cube = cube[1];
+ gendata_h48_mark(&markarg);
if (d >= 0)
continue;
/* Depth d+3 */
for (m[2] = 0; m[2] < 18; m[2]++) {
+ markarg.depth = d+3;
if (!allowednextmove(m, 3)) {
m[2] += 2;
continue;
@@ -509,48 +556,55 @@ gendata_h48k2_dfs(h48k2_dfs_arg_t *arg)
cube[2] = move(cube[1], m[2]);
if (gendata_h48k2_dfs_stop(cube[2], d+3, arg))
continue;
- gendata_h48k2_mark(cube[2], d+3, arg);
+ markarg.cube = cube[2];
+ gendata_h48_mark(&markarg);
if (d >= -1)
continue;
/* Depth d+4 */
for (m[3] = 0; m[3] < 18; m[3]++) {
+ markarg.depth = d+4;
if (!allowednextmove(m, 4)) {
m[3] += 2;
continue;
}
cube[3] = move(cube[2], m[3]);
- gendata_h48k2_mark(cube[3], d+4, arg);
+ markarg.cube = cube[3];
+ gendata_h48_mark(&markarg);
}
}
}
}
}
-STATIC_INLINE void
-gendata_h48k2_mark(cube_t cube, int8_t depth, h48k2_dfs_arg_t *arg)
+STATIC_INLINE uint64_t
+gendata_h48_mark(gendata_h48_mark_t *arg)
{
uint8_t oldval, newval;
- int64_t coord, fullcoord, mutex;
+ uint64_t d;
+ int64_t coord, mutex;
- FOREACH_H48SIM(cube, arg->cocsepdata, arg->selfsim,
- fullcoord = coord_h48(cube, arg->cocsepdata, 11);
- coord = fullcoord >> (int64_t)(11 - arg->h);
+ d = 0;
+ FOREACH_H48SIM(arg->cube, arg->cocsepdata, arg->selfsim,
+ coord = coord_h48(arg->cube, arg->cocsepdata, arg->h);
mutex = H48_INDEX(coord, arg->k) % CHUNKS;
pthread_mutex_lock(arg->table_mutex[mutex]);
oldval = get_h48_pval(arg->table, coord, arg->k);
- newval = (uint8_t)MAX(depth, 0);
+ newval = (uint8_t)MAX(arg->depth, 0);
+ d += newval < oldval;
set_h48_pval(arg->table, coord, arg->k, MIN(oldval, newval));
pthread_mutex_unlock(arg->table_mutex[mutex]);
)
+
+ return d;
}
STATIC_INLINE bool
-gendata_h48k2_dfs_stop(cube_t cube, uint8_t depth, h48k2_dfs_arg_t *arg)
+gendata_h48k2_dfs_stop(cube_t cube, int8_t depth, h48k2_dfs_arg_t *arg)
{
uint64_t val;
int64_t coord;
- uint8_t oldval;
+ int8_t oldval;
if (arg->h == 0 || arg->h == 11) {
/* We are in the "real coordinate" case, we can stop
@@ -568,26 +622,44 @@ gendata_h48k2_dfs_stop(cube_t cube, uint8_t depth, h48k2_dfs_arg_t *arg)
}
}
-STATIC_INLINE uint8_t
-get_h48_pval(const uint8_t *table, int64_t i, uint8_t k)
+STATIC size_t
+gendata_h48k2_realcoord(gendata_h48_arg_t *arg)
{
- return (table[H48_INDEX(i, k)] & H48_MASK(i, k)) >> H48_SHIFT(i, k);
+ /* TODO */
+ return gendata_h48k2(arg);
}
-STATIC_INLINE void
-set_h48_pval(uint8_t *table, int64_t i, uint8_t k, uint8_t val)
+STATIC void *
+gendata_h48k2_realcoord_runthread(void *arg)
{
- table[H48_INDEX(i, k)] = (table[H48_INDEX(i, k)] & (~H48_MASK(i, k)))
- | (val << H48_SHIFT(i, k));
+ /* TODO */
+ return NULL;
}
-STATIC_INLINE uint8_t
-get_h48_bound(cube_t cube, uint32_t cdata, uint8_t h, uint8_t k, uint8_t *table)
+STATIC tableinfo_t
+makeinfo_h48k2(gendata_h48_arg_t *arg, uint8_t base)
{
- int64_t coord;
+ tableinfo_t info;
- coord = coord_h48_edges(cube, COCLASS(cdata), TTREP(cdata), h);
- return get_h48_pval(table, coord, k);
+ info = (tableinfo_t) {
+ .solver = "h48 solver h = , k = 2",
+ .type = TABLETYPE_PRUNING,
+ .infosize = INFOSIZE,
+ .fullsize = H48_TABLESIZE(arg->h, 2) + INFOSIZE,
+ .hash = 0, /* TODO */
+ .entries = H48_COORDMAX(arg->h),
+ .classes = 0,
+ .h48h = arg->h,
+ .bits = 2,
+ .base = base,
+ .maxvalue = 3,
+ .next = 0,
+ };
+ info.solver[15] = (arg->h % 10) + '0';
+ if (arg->h >= 10)
+ info.solver[14] = (arg->h / 10) + '0';
+
+ return info;
}
STATIC uint32_t *
@@ -601,3 +673,25 @@ get_h48data_ptr(const void *data)
{
return (uint8_t *)data + COCSEP_FULLSIZE + INFOSIZE;
}
+
+STATIC_INLINE uint8_t
+get_h48_pval(const uint8_t *table, int64_t i, uint8_t k)
+{
+ return (table[H48_INDEX(i, k)] & H48_MASK(i, k)) >> H48_SHIFT(i, k);
+}
+
+STATIC_INLINE void
+set_h48_pval(uint8_t *table, int64_t i, uint8_t k, uint8_t val)
+{
+ table[H48_INDEX(i, k)] = (table[H48_INDEX(i, k)] & (~H48_MASK(i, k)))
+ | (val << H48_SHIFT(i, k));
+}
+
+STATIC_INLINE uint8_t
+get_h48_bound(cube_t cube, uint32_t cdata, uint8_t h, uint8_t k, uint8_t *table)
+{
+ int64_t coord;
+
+ coord = coord_h48_edges(cube, COCLASS(cdata), TTREP(cdata), h);
+ return get_h48_pval(table, coord, k);
+}
diff --git a/tools/tool.h b/tools/tool.h
@@ -4,7 +4,6 @@
#include <inttypes.h>
#include <stdio.h>
#include <stdlib.h>
-#include <time.h>
#include "../src/nissy.h"