commit a13d749f7e19eb1b1fc8d178610ba269df6cfe84
parent ed304cbf68b60c982f9f204b5956dae5672c946a
Author: Sebastiano Tronto <sebastiano@tronto.net>
Date: Wed, 1 Jun 2022 14:54:18 +0200
parallelized genptable_fixnasty
Diffstat:
5 files changed, 72 insertions(+), 22 deletions(-)
diff --git a/TODO.md b/TODO.md
@@ -5,7 +5,6 @@ It's more of a personal reminder than anything else.
## For version 2.1
### Moving coordinates
-* parallelize genptable_fixnasty
* general cleanup
### Changes to Step and Solve
* add a list of "helper" coordinates to every step
diff --git a/nissy b/nissy
Binary files differ.
diff --git a/src/cubetypes.h b/src/cubetypes.h
@@ -173,6 +173,7 @@ coordinate
CoordTransformer transform;
SymData * sd;
TransFinder tfind; /* TODO: should be easy to remove */
+ Coordinate * base; /* TODO: part of refactor */
};
struct
diff --git a/src/pruning.c b/src/pruning.c
@@ -6,9 +6,10 @@
static int findchunk(PruneData *pd, int nchunks, uint64_t i);
static void genptable_bfs(PruneData *pd, int d, int nt, int nc);
static void genptable_compress(PruneData *pd);
-static void genptable_fixnasty(PruneData *pd, int d);
+static void genptable_fixnasty(PruneData *pd, int d, int nthreads);
static void genptable_setbase(PruneData *pd);
static void * instance_bfs(void *arg);
+static void * instance_fixnasty(void *arg);
static void ptable_update(PruneData *pd, Cube cube, int m);
static void ptable_update_index(PruneData *pd, uint64_t ind, int m);
static int ptableval_index(PruneData *pd, uint64_t ind);
@@ -171,7 +172,7 @@ genptable(PruneData *pd, int nthreads)
ptable_update(pd, (Cube){0}, 0);
pd->n = 1;
oldn = 0;
- genptable_fixnasty(pd, 0);
+ genptable_fixnasty(pd, 0, nthreads);
fprintf(stderr, "Depth %d done, generated %"
PRIu64 "\t(%" PRIu64 "/%" PRIu64 ")\n",
0, pd->n - oldn, pd->n, pd->coord->max);
@@ -179,7 +180,7 @@ genptable(PruneData *pd, int nthreads)
pd->count[0] = pd->n;
for (d = 0; d < 15 && pd->n < pd->coord->max; d++) {
genptable_bfs(pd, d, nthreads, nchunks);
- genptable_fixnasty(pd, d+1);
+ genptable_fixnasty(pd, d+1, nthreads);
fprintf(stderr, "Depth %d done, generated %"
PRIu64 "\t(%" PRIu64 "/%" PRIu64 ")\n",
d+1, pd->n - oldn, pd->n, pd->coord->max);
@@ -256,30 +257,31 @@ genptable_compress(PruneData *pd)
}
static void
-genptable_fixnasty(PruneData *pd, int d)
+genptable_fixnasty(PruneData *pd, int d, int nthreads)
{
- uint64_t i, ii;
- int j, n;
- Trans t, aux[NTRANS];
+ int i;
+ pthread_t t[nthreads];
+ ThreadDataGenpt td[nthreads];
+ pthread_mutex_t *upmtx;
if (pd->coord->tfind == NULL)
return;
- for (i = 0; i < pd->coord->max; i++) {
- if (ptableval_index(pd, i) == d) {
- if ((n = pd->coord->tfind(i, aux)) == 1)
- continue;
-
- for (j = 0; j < n; j++) {
- t = aux[j];
- ii = pd->coord->transform(t, i);
- if (ptableval_index(pd, ii) > d) {
- ptable_update_index(pd, ii, d);
- pd->n++;
- }
- }
- }
+ upmtx = malloc(sizeof(pthread_mutex_t));
+ pthread_mutex_init(upmtx, NULL);
+ for (i = 0; i < nthreads; i++) {
+ td[i].thid = i;
+ td[i].nthreads = nthreads;
+ td[i].pd = pd;
+ td[i].d = d;
+ td[i].upmutex = upmtx;
+ pthread_create(&t[i], NULL, instance_fixnasty, &td[i]);
}
+
+ for (i = 0; i < nthreads; i++)
+ pthread_join(t[i], NULL);
+
+ free(upmtx);
}
static void
@@ -335,6 +337,51 @@ instance_bfs(void *arg)
}
}
}
+
+ pthread_mutex_lock(td->upmutex);
+ td->pd->n += updated;
+ pthread_mutex_unlock(td->upmutex);
+
+ return NULL;
+}
+
+static void *
+instance_fixnasty(void *arg)
+{
+ ThreadDataGenpt *td;
+ uint64_t i, ii, nb, blocksize, rmin, rmax, updated;
+ int j, n;
+ Trans t, aux[NTRANS];
+
+ td = (ThreadDataGenpt *)arg;
+ nb = td->pd->coord->max / td->pd->coord->base->max;
+ blocksize = (uint64_t)((nb / td->nthreads) * td->pd->coord->base->max);
+ rmin = ((uint64_t)td->thid) * blocksize;
+ rmax = td->thid == td->nthreads - 1 ?
+ td->pd->coord->max :
+ ((uint64_t)td->thid + 1) * blocksize;
+
+ updated = 0;
+ for (i = rmin; i < rmax; i++) {
+ if (ptableval_index(td->pd, i) == td->d) {
+ if ((n = td->pd->coord->tfind(i, aux)) == 1)
+ continue;
+
+ for (j = 0; j < n; j++) {
+ if ((t = aux[j]) == uf)
+ continue;
+ ii = td->pd->coord->transform(t, i);
+ if (ii < rmin || ii >= rmax)
+ fprintf(stderr,
+ "Error: transformed out of bound!\n");
+ if (ptableval_index(td->pd, ii) > td->d) {
+ ptable_update_index(td->pd, ii, td->d);
+ updated++;
+ }
+ }
+ }
+ }
+
pthread_mutex_lock(td->upmutex);
td->pd->n += updated;
pthread_mutex_unlock(td->upmutex);
diff --git a/src/symcoord.c b/src/symcoord.c
@@ -106,6 +106,7 @@ coord_drud_sym16 = {
.index = index_drud_sym16,
.move = move_drud_sym16,
.max = POW3TO7 * CLASSES_EOFBEPOS_16,
+ .base = &coord_eofbepos_sym16,
.transform = transform_drud_sym16,
.tfind = tfind_drud_sym16,
};
@@ -115,6 +116,7 @@ coord_drudfin_noE_sym16 = {
.index = index_drudfin_noE_sym16,
.move = move_drudfin_noE_sym16,
.max = FACTORIAL8 * CLASSES_CP_16,
+ .base = &coord_cp_sym16,
.transform = transform_drudfin_noE_sym16,
.tfind = tfind_drudfin_noE_sym16,
};
@@ -124,6 +126,7 @@ coord_nxopt31 = {
.index = index_nxopt31,
.move = move_nxopt31,
.max = POW3TO7 * BINOM8ON4 * CLASSES_EOFBEPOS_16,
+ .base = &coord_eofbepos_sym16,
.transform = transform_nxopt31,
.tfind = tfind_nxopt31,
};