commit d25f23805c6d7128b508a359d47af136dd2fd0db
parent 9c5c7af7abe9e444210ac86b903eee775fc76a84
Author: Sebastiano Tronto <sebastiano@tronto.net>
Date: Mon, 11 Aug 2025 10:22:56 +0200
Wrapped pthread use in custom API
Diffstat:
9 files changed, 121 insertions(+), 65 deletions(-)
diff --git a/build b/build
@@ -117,15 +117,23 @@ parsesanitize() {
done
}
+maybe_pthread() {
+ if [ "$THREADS" -gt 1 ]; then
+ echo "-pthread"
+ else
+ echo ""
+ fi
+}
+
# Build flags
-CFLAGS="-std=c11 -fPIC -D_POSIX_C_SOURCE=199309L -pthread"
-PYCFLAGS="-std=c11 -fPIC -pthread"
+CFLAGS="-std=c11 -fPIC -D_POSIX_C_SOURCE=199309L $(maybe_pthread)"
+PYCFLAGS="-std=c11 -fPIC $(maybe_pthread)"
[ "$ARCH" = "AVX2" ] && CFLAGS="$CFLAGS -mavx2"
WFLAGS="-pedantic -Wall -Wextra -Wno-unused-parameter -Wno-unused-function"
OFLAGS="$OPTIMIZE"
DFLAGS="-DDEBUG -g3 $(parsesanitize "$SANITIZE")"
MFLAGS="-DTHREADS=$THREADS -D$ARCH"
-CPPFLAGS="-std=c++20 -pthread"
+CPPFLAGS="-std=c++20 $(maybe_pthread)"
# TODO:
# MEMORY64 is supported on Firefox (from version 134) and Chrome (from 133),
@@ -140,9 +148,9 @@ CPPFLAGS="-std=c++20 -pthread"
# The options below have to be adjusted when native WASM_SIMD is implemented.
# Build flags for emscripten (WASM target)
-WASMCFLAGS="-std=c11 -fPIC -D_POSIX_C_SOURCE=199309L -pthread
+WASMCFLAGS="-std=c11 -fPIC -D_POSIX_C_SOURCE=199309L $(maybe_pthread)
-mfpu=neon -mrelaxed-simd"
-WASMCPPFLAGS="-std=c++20 -pthread"
+WASMCPPFLAGS="-std=c++20 $(maybe_pthread)"
WASMDBGFLAGS="-sASSERTIONS"
WASMMFLAGS="-DTHREADS=$THREADS -DNEON"
WASMLINKFLAGS="--no-entry -sEXPORT_NAME='Nissy' -sMODULARIZE
diff --git a/src/solvers/distribution.h b/src/solvers/distribution.h
@@ -43,7 +43,7 @@ getdistribution(
const tableinfo_t info[static 1]
) {
getdistribution_data_t targ[THREADS];
- pthread_t thread[THREADS];
+ wrapthread_define_var_thread_t(thread[THREADS]);
uint8_t pval, k;
uint64_t local_distr[THREADS][INFO_DISTRIBUTION_LEN];
uint64_t i, j, nbytes, sz, epb;
@@ -60,12 +60,12 @@ getdistribution(
.distr = local_distr[i],
.table = table,
};
- pthread_create(&thread[i], NULL,
+ wrapthread_create(&thread[i], NULL,
getdistribution_runthread, &targ[i]);
}
for (i = 0; i < THREADS; i++)
- pthread_join(thread[i], NULL);
+ wrapthread_join(thread[i], NULL);
memset(distr, 0, INFO_DISTRIBUTION_LEN * sizeof(uint64_t));
for (i = 0; i < THREADS; i++)
diff --git a/src/solvers/h48/gendata_h48.h b/src/solvers/h48/gendata_h48.h
@@ -21,9 +21,9 @@ STATIC const unsigned char *get_h48data_constptr(const unsigned char *);
STATIC_INLINE uint8_t get_h48_pval(const unsigned char *, uint64_t, uint8_t);
STATIC_INLINE void set_h48_pval(unsigned char *, uint64_t, uint8_t, uint8_t);
STATIC_INLINE uint8_t get_h48_pval_atomic(
- _Atomic const unsigned char *, uint64_t, uint8_t);
+ wrapthread_atomic const unsigned char *, uint64_t, uint8_t);
STATIC_INLINE void set_h48_pval_atomic(
- _Atomic unsigned char *, uint64_t, uint8_t, uint8_t);
+ wrapthread_atomic unsigned char *, uint64_t, uint8_t, uint8_t);
STATIC long long
gendata_h48_dispatch(
@@ -115,7 +115,7 @@ gendata_h48(gendata_h48_arg_t arg[static 1])
cocsepdata_offset = arg->buf + INFOSIZE;
arg->cocsepdata = (uint32_t *)cocsepdata_offset;
- arg->h48buf = (_Atomic unsigned char*)arg->buf + cocsepsize;
+ arg->h48buf = (wrapthread_atomic unsigned char*)arg->buf + cocsepsize;
arg->base = 99;
@@ -207,13 +207,13 @@ gendata_h48(gendata_h48_arg_t arg[static 1])
STATIC void
gendata_h48h0k4(gendata_h48_arg_t arg[static 1])
{
- _Atomic unsigned char *table;
+ wrapthread_atomic unsigned char *table;
uint8_t val;
uint64_t i, sc, done, d, h48max;
uint64_t t, tt, isize, cc, bufsize;
h48h0k4_bfs_arg_t bfsarg[THREADS];
- pthread_t thread[THREADS];
- pthread_mutex_t table_mutex[CHUNKS];
+ wrapthread_define_var_thread_t(thread[THREADS]);
+ wrapthread_define_var_mutex_t(table_mutex[CHUNKS]);
arg->info = (tableinfo_t) {
.solver = "h48 solver h = 0, k = 4",
@@ -241,7 +241,7 @@ gendata_h48h0k4(gendata_h48_arg_t arg[static 1])
isize = h48max / THREADS;
isize = (isize / H48_COEFF(arg->k)) * H48_COEFF(arg->k);
for (t = 0; t < CHUNKS; t++)
- pthread_mutex_init(&table_mutex[t], NULL);
+ wrapthread_mutex_init(&table_mutex[t], NULL);
for (t = 0; t < THREADS; t++) {
bfsarg[t] = (h48h0k4_bfs_arg_t) {
.cocsepdata = arg->cocsepdata,
@@ -259,12 +259,12 @@ gendata_h48h0k4(gendata_h48_arg_t arg[static 1])
for (t = 0; t < THREADS; t++) {
bfsarg[t].depth = d;
- pthread_create(&thread[t], NULL,
+ wrapthread_create(&thread[t], NULL,
gendata_h48h0k4_runthread, &bfsarg[t]);
}
for (t = 0; t < THREADS; t++)
- pthread_join(thread[t], NULL);
+ wrapthread_join(thread[t], NULL);
for (i = 0, cc = 0; i < h48max; i++) {
val = get_h48_pval_atomic(table, i, 4);
@@ -395,13 +395,14 @@ gendata_h48k2(gendata_h48_arg_t arg[static 1])
int sleeptime;
unsigned char *table;
uint64_t j;
- _Atomic uint64_t count;
+ wrapthread_atomic uint64_t count;
uint64_t i, ii, inext, bufsize, done, nshort, velocity;
h48map_t shortcubes;
gendata_h48short_arg_t shortarg;
h48k2_dfs_arg_t dfsarg[THREADS];
- pthread_t thread[THREADS];
- pthread_mutex_t shortcubes_mutex, table_mutex[CHUNKS];
+ wrapthread_define_var_thread_t(thread[THREADS]);
+ wrapthread_define_var_mutex_t(shortcubes_mutex);
+ wrapthread_define_var_mutex_t(table_mutex[CHUNKS]);
table = (unsigned char *)arg->h48buf + INFOSIZE;
memset(table, 0xFF, H48_TABLESIZE(arg->h, arg->k));
@@ -425,9 +426,9 @@ gendata_h48k2(gendata_h48_arg_t arg[static 1])
inext = 0;
count = 0;
- pthread_mutex_init(&shortcubes_mutex, NULL);
+ wrapthread_mutex_init(&shortcubes_mutex, NULL);
for (i = 0; i < CHUNKS; i++)
- pthread_mutex_init(&table_mutex[i], NULL);
+ wrapthread_mutex_init(&table_mutex[i], NULL);
for (i = 0; i < THREADS; i++) {
dfsarg[i] = (h48k2_dfs_arg_t){
.h = arg->h,
@@ -446,7 +447,7 @@ gendata_h48k2(gendata_h48_arg_t arg[static 1])
for (ii = 0; ii < CHUNKS; ii++)
dfsarg[i].table_mutex[ii] = &table_mutex[ii];
- pthread_create(
+ wrapthread_create(
&thread[i], NULL, gendata_h48k2_runthread, &dfsarg[i]);
}
@@ -464,9 +465,9 @@ gendata_h48k2(gendata_h48_arg_t arg[static 1])
done = count;
while (nshort - done > (velocity * sleeptime) / 1000) {
msleep(sleeptime);
- pthread_mutex_lock(&shortcubes_mutex);
+ wrapthread_mutex_lock(&shortcubes_mutex);
done = count;
- pthread_mutex_unlock(&shortcubes_mutex);
+ wrapthread_mutex_unlock(&shortcubes_mutex);
LOG("Processed %" PRIu64 " / %" PRIu64 " cubes\n",
(done / 1000) * 1000, nshort);
}
@@ -476,7 +477,7 @@ gendata_h48k2(gendata_h48_arg_t arg[static 1])
}
for (i = 0; i < THREADS; i++)
- pthread_join(thread[i], NULL);
+ wrapthread_join(thread[i], NULL);
h48map_destroy(&shortcubes);
@@ -492,29 +493,30 @@ gendata_h48k2(gendata_h48_arg_t arg[static 1])
STATIC void *
gendata_h48k2_runthread(void *arg)
{
- uint64_t coord, mutex;
+ uint64_t coord;
kvpair_t kv;
h48k2_dfs_arg_t *dfsarg;
+ wrapthread_define_if_threads(uint64_t, mutex);
dfsarg = (h48k2_dfs_arg_t *)arg;
while (true) {
- pthread_mutex_lock(dfsarg->shortcubes_mutex);
+ wrapthread_mutex_lock(dfsarg->shortcubes_mutex);
kv = h48map_nextkvpair(dfsarg->shortcubes, dfsarg->next);
if (*dfsarg->next == dfsarg->shortcubes->capacity) {
- pthread_mutex_unlock(dfsarg->shortcubes_mutex);
+ wrapthread_mutex_unlock(dfsarg->shortcubes_mutex);
break;
}
(*dfsarg->count)++;
- pthread_mutex_unlock(dfsarg->shortcubes_mutex);
+ wrapthread_mutex_unlock(dfsarg->shortcubes_mutex);
if (kv.val < dfsarg->shortdepth) {
coord = kv.key >> (uint64_t)(11 - dfsarg->h);
mutex = H48_INDEX(coord, dfsarg->k) % CHUNKS;
- pthread_mutex_lock(dfsarg->table_mutex[mutex]);
+ wrapthread_mutex_lock(dfsarg->table_mutex[mutex]);
set_h48_pval(dfsarg->table, coord, dfsarg->k, 0);
- pthread_mutex_unlock(dfsarg->table_mutex[mutex]);
+ wrapthread_mutex_unlock(dfsarg->table_mutex[mutex]);
} else {
dfsarg->cube = invcoord_h48(kv.key, dfsarg->crep, 11);
gendata_h48k2_dfs(dfsarg);
@@ -607,7 +609,8 @@ STATIC_INLINE void
gendata_h48_mark_atomic(gendata_h48_mark_t arg[static 1])
{
uint8_t oldval, newval;
- uint64_t coord, mutex;
+ uint64_t coord;
+ wrapthread_define_if_threads(uint64_t, mutex);
FOREACH_H48SIM(arg->cube, arg->cocsepdata, arg->selfsim,
coord = coord_h48(arg->cube, arg->cocsepdata, arg->h);
@@ -615,10 +618,10 @@ gendata_h48_mark_atomic(gendata_h48_mark_t arg[static 1])
newval = (uint8_t)MAX(arg->depth, 0);
if (newval < oldval) {
mutex = H48_INDEX(coord, arg->k) % CHUNKS;
- pthread_mutex_lock(arg->table_mutex[mutex]);
+ wrapthread_mutex_lock(arg->table_mutex[mutex]);
set_h48_pval_atomic(
arg->table_atomic, coord, arg->k, newval);
- pthread_mutex_unlock(arg->table_mutex[mutex]);
+ wrapthread_mutex_unlock(arg->table_mutex[mutex]);
}
)
}
@@ -627,16 +630,17 @@ STATIC_INLINE void
gendata_h48_mark(gendata_h48_mark_t arg[static 1])
{
uint8_t oldval, newval;
- uint64_t coord, mutex;
+ uint64_t coord;
+ wrapthread_define_if_threads(uint64_t, mutex);
FOREACH_H48SIM(arg->cube, arg->cocsepdata, arg->selfsim,
coord = coord_h48(arg->cube, arg->cocsepdata, arg->h);
mutex = H48_INDEX(coord, arg->k) % CHUNKS;
- pthread_mutex_lock(arg->table_mutex[mutex]);
+ wrapthread_mutex_lock(arg->table_mutex[mutex]);
oldval = get_h48_pval(arg->table, coord, arg->k);
newval = (uint8_t)MAX(arg->depth, 0);
set_h48_pval(arg->table, coord, arg->k, MIN(newval, oldval));
- pthread_mutex_unlock(arg->table_mutex[mutex]);
+ wrapthread_mutex_unlock(arg->table_mutex[mutex]);
)
}
@@ -644,7 +648,8 @@ STATIC_INLINE bool
gendata_h48k2_dfs_stop(cube_t cube, int8_t d, h48k2_dfs_arg_t arg[static 1])
{
uint64_t val;
- uint64_t coord, mutex;
+ uint64_t coord;
+ wrapthread_define_if_threads(uint64_t, mutex);
int8_t oldval;
if (arg->h == 0 || arg->h == 11) {
@@ -652,9 +657,9 @@ gendata_h48k2_dfs_stop(cube_t cube, int8_t d, h48k2_dfs_arg_t arg[static 1])
if this coordinate has already been visited */
coord = coord_h48(cube, arg->cocsepdata, arg->h);
mutex = H48_INDEX(coord, arg->k) % CHUNKS;
- pthread_mutex_lock(arg->table_mutex[mutex]);
+ wrapthread_mutex_lock(arg->table_mutex[mutex]);
oldval = get_h48_pval(arg->table, coord, arg->k);
- pthread_mutex_unlock(arg->table_mutex[mutex]);
+ wrapthread_mutex_unlock(arg->table_mutex[mutex]);
return oldval <= d;
} else {
/* With 0 < k < 11 we do not have a "real coordinate".
@@ -711,7 +716,11 @@ get_h48_pval(const unsigned char *table, uint64_t i, uint8_t k)
}
STATIC_INLINE uint8_t
-get_h48_pval_atomic(_Atomic const unsigned char *table, uint64_t i, uint8_t k)
+get_h48_pval_atomic(
+ wrapthread_atomic const unsigned char *table,
+ uint64_t i,
+ uint8_t k
+)
{
return (table[H48_INDEX(i, k)] & H48_MASK(i, k)) >> H48_SHIFT(i, k);
}
@@ -725,7 +734,7 @@ set_h48_pval(unsigned char *table, uint64_t i, uint8_t k, uint8_t val)
STATIC_INLINE void
set_h48_pval_atomic(
- _Atomic unsigned char *table,
+ wrapthread_atomic unsigned char *table,
uint64_t i,
uint8_t k,
uint8_t val
diff --git a/src/solvers/h48/gendata_types_macros.h b/src/solvers/h48/gendata_types_macros.h
@@ -68,7 +68,7 @@ typedef struct {
tableinfo_t info;
uint64_t buf_size;
unsigned char *buf;
- _Atomic unsigned char *h48buf;
+ wrapthread_atomic unsigned char *h48buf;
uint32_t *cocsepdata;
uint64_t selfsim[COCSEP_CLASSES];
cube_t crep[COCSEP_CLASSES];
@@ -85,12 +85,12 @@ typedef struct {
typedef struct {
uint8_t depth;
uint32_t *cocsepdata;
- _Atomic unsigned char *table;
+ wrapthread_atomic unsigned char *table;
uint64_t *selfsim;
cube_t *crep;
uint64_t start;
uint64_t end;
- pthread_mutex_t *table_mutex[CHUNKS];
+ wrapthread_define_struct_mutex_t(*table_mutex[CHUNKS]);
} h48h0k4_bfs_arg_t;
typedef struct {
@@ -104,10 +104,10 @@ typedef struct {
uint64_t *selfsim;
cube_t *crep;
h48map_t *shortcubes;
- pthread_mutex_t *shortcubes_mutex;
- pthread_mutex_t *table_mutex[CHUNKS];
+ wrapthread_define_struct_mutex_t(*shortcubes_mutex);
+ wrapthread_define_struct_mutex_t(*table_mutex[CHUNKS]);
uint64_t *next;
- _Atomic uint64_t *count;
+ wrapthread_atomic uint64_t *count;
} h48k2_dfs_arg_t;
typedef struct {
@@ -118,6 +118,6 @@ typedef struct {
uint32_t *cocsepdata;
uint64_t *selfsim;
unsigned char *table;
- _Atomic unsigned char *table_atomic;
- pthread_mutex_t **table_mutex;
+ wrapthread_atomic unsigned char *table_atomic;
+ wrapthread_define_struct_mutex_t(**table_mutex);
} gendata_h48_mark_t;
diff --git a/src/solvers/h48/solve.h b/src/solvers/h48/solve.h
@@ -43,9 +43,9 @@ typedef struct {
int ntasks;
solve_h48_task_t *tasks;
int thread_id;
- pthread_mutex_t *solutions_mutex;
- _Atomic int *status;
- _Atomic bool thread_done;
+ wrapthread_define_struct_mutex_t(*solutions_mutex);
+ wrapthread_atomic int *status;
+ wrapthread_atomic bool thread_done;
} dfsarg_solve_h48_t;
typedef struct {
@@ -207,10 +207,10 @@ solve_h48_dfs(dfsarg_solve_h48_t arg[static 1])
+ arg->solution_moves->npremoves;
if (arg->target_depth != nm)
return 0;
- pthread_mutex_lock(arg->solutions_mutex);
+ wrapthread_mutex_lock(arg->solutions_mutex);
ret = appendsolution(arg->solution_moves,
arg->solution_settings, arg->solution_list);
- pthread_mutex_unlock(arg->solutions_mutex);
+ wrapthread_mutex_unlock(arg->solutions_mutex);
return ret;
}
@@ -455,7 +455,7 @@ solve_h48(
{
int i, ntasks, eoesep_table_index;
bool td;
- _Atomic int status, prev_status;
+ wrapthread_atomic int status, prev_status;
size_t lastused;
int8_t d;
dfsarg_solve_h48_t arg[THREADS];
@@ -471,8 +471,8 @@ solve_h48(
solution_moves_t solution_moves[THREADS];
solution_settings_t settings;
solution_list_t sollist;
- pthread_t thread[THREADS];
- pthread_mutex_t solutions_mutex;
+ wrapthread_define_var_thread_t(thread[THREADS]);
+ wrapthread_define_var_mutex_t(solutions_mutex);
if (!solution_list_init(&sollist, solutions_size, solutions))
goto solve_h48_error_solutions_buffer;
@@ -541,7 +541,7 @@ solve_h48(
}
- pthread_mutex_init(&solutions_mutex, NULL);
+ wrapthread_mutex_init(&solutions_mutex, NULL);
maketasks_arg = (dfsarg_solve_h48_maketasks_t) {
.cube = oc.cube,
@@ -591,7 +591,7 @@ solve_h48(
for (i = 0; i < threads; i++) {
arg[i].target_depth = d;
arg[i].thread_done = false;
- pthread_create(
+ wrapthread_create(
&thread[i], NULL, solve_h48_runthread, &arg[i]);
}
@@ -601,10 +601,10 @@ solve_h48(
while (!td && status != NISSY_STATUS_STOP) {
msleep(BASE_SLEEP_TIME);
- pthread_mutex_lock(&solutions_mutex);
+ wrapthread_mutex_lock(&solutions_mutex);
solve_h48_log_solutions(&sollist, lastused);
lastused = sollist.used;
- pthread_mutex_unlock(&solutions_mutex);
+ wrapthread_mutex_unlock(&solutions_mutex);
prev_status = status;
status = poll_status(poll_status_data);
@@ -621,7 +621,7 @@ solve_h48(
}
for (i = 0; i < threads; i++)
- pthread_join(thread[i], NULL);
+ wrapthread_join(thread[i], NULL);
solve_h48_log_solutions(&sollist, lastused);
lastused = sollist.used;
diff --git a/src/solvers/solutions_types_macros.h b/src/solvers/solutions_types_macros.h
@@ -17,7 +17,7 @@ typedef struct {
} solution_settings_t;
typedef struct {
- _Atomic uint64_t nsols;
+ wrapthread_atomic uint64_t nsols;
uint8_t shortest_sol;
size_t size;
size_t used;
diff --git a/src/utils/utils.h b/src/utils/utils.h
@@ -2,3 +2,4 @@
#include "constants.h"
#include "math.h"
#include "sleep.h"
+#include "wrapthread.h"
diff --git a/src/utils/wrapthread.h b/src/utils/wrapthread.h
@@ -0,0 +1,37 @@
+#if THREADS == 1
+
+#define wrapthread_atomic
+
+#define wrapthread_define_var_thread_t(x) char x; (void)(x)
+#define wrapthread_define_var_mutex_t(x) char x; (void)(x)
+#define wrapthread_define_struct_thread_t(x) char x
+#define wrapthread_define_struct_mutex_t(x) char x
+
+#define wrapthread_define_if_threads(T, x) T x; (void)(x)
+
+#define wrapthread_create(a, b, c, d) c(d)
+#define wrapthread_join(a, b)
+#define wrapthread_mutex_init(a, b)
+#define wrapthread_mutex_lock(a)
+#define wrapthread_mutex_unlock(a)
+
+#else
+
+#include <pthread.h>
+
+#define wrapthread_atomic _Atomic
+
+#define wrapthread_define_var_thread_t(x) pthread_t x
+#define wrapthread_define_var_mutex_t(x) pthread_mutex_t x
+#define wrapthread_define_struct_thread_t(x) pthread_t x
+#define wrapthread_define_struct_mutex_t(x) pthread_mutex_t x
+
+#define wrapthread_define_if_threads(T, x) T x
+
+#define wrapthread_create(a, b, c, d) pthread_create(a, b, c, d)
+#define wrapthread_join(a, b) pthread_join(a, b)
+#define wrapthread_mutex_init(a, b) pthread_mutex_init(a, b)
+#define wrapthread_mutex_lock(a) pthread_mutex_lock(a)
+#define wrapthread_mutex_unlock(a) pthread_mutex_unlock(a)
+
+#endif
diff --git a/test/test.h b/test/test.h
@@ -8,6 +8,7 @@
#include <stdlib.h>
#include <string.h>
+#include "../src/utils/wrapthread.h"
#include "../src/nissy.h"
#include "../src/arch/arch.h"
#include "../src/core/core_types.h"