commit a13d749f7e19eb1b1fc8d178610ba269df6cfe84
parent ed304cbf68b60c982f9f204b5956dae5672c946a
Author: Sebastiano Tronto <sebastiano@tronto.net>
Date:   Wed,  1 Jun 2022 14:54:18 +0200
parallelized genptable_fixnasty
Diffstat:
5 files changed, 72 insertions(+), 22 deletions(-)
diff --git a/TODO.md b/TODO.md
@@ -5,7 +5,6 @@ It's more of a personal reminder than anything else.
 
 ## For version 2.1
 ### Moving coordinates
-* parallelize genptable_fixnasty
 * general cleanup
 ### Changes to Step and Solve
 * add a list of "helper" coordinates to every step
diff --git a/nissy b/nissy
Binary files differ.
diff --git a/src/cubetypes.h b/src/cubetypes.h
@@ -173,6 +173,7 @@ coordinate
 	CoordTransformer          transform;
 	SymData *                 sd;
 	TransFinder               tfind; /* TODO: should be easy to remove */
+	Coordinate *              base; /* TODO: part of refactor */
 };
 
 struct
diff --git a/src/pruning.c b/src/pruning.c
@@ -6,9 +6,10 @@
 static int         findchunk(PruneData *pd, int nchunks, uint64_t i);
 static void        genptable_bfs(PruneData *pd, int d, int nt, int nc);
 static void        genptable_compress(PruneData *pd);
-static void        genptable_fixnasty(PruneData *pd, int d);
+static void        genptable_fixnasty(PruneData *pd, int d, int nthreads);
 static void        genptable_setbase(PruneData *pd);
 static void *      instance_bfs(void *arg);
+static void *      instance_fixnasty(void *arg);
 static void        ptable_update(PruneData *pd, Cube cube, int m);
 static void        ptable_update_index(PruneData *pd, uint64_t ind, int m);
 static int         ptableval_index(PruneData *pd, uint64_t ind);
@@ -171,7 +172,7 @@ genptable(PruneData *pd, int nthreads)
 	ptable_update(pd, (Cube){0}, 0);
 	pd->n = 1;
 	oldn = 0;
-	genptable_fixnasty(pd, 0);
+	genptable_fixnasty(pd, 0, nthreads);
 	fprintf(stderr, "Depth %d done, generated %"
 		PRIu64 "\t(%" PRIu64 "/%" PRIu64 ")\n",
 		0, pd->n - oldn, pd->n, pd->coord->max);
@@ -179,7 +180,7 @@ genptable(PruneData *pd, int nthreads)
 	pd->count[0] = pd->n;
 	for (d = 0; d < 15 && pd->n < pd->coord->max; d++) {
 		genptable_bfs(pd, d, nthreads, nchunks);
-		genptable_fixnasty(pd, d+1);
+		genptable_fixnasty(pd, d+1, nthreads);
 		fprintf(stderr, "Depth %d done, generated %"
 			PRIu64 "\t(%" PRIu64 "/%" PRIu64 ")\n",
 			d+1, pd->n - oldn, pd->n, pd->coord->max);
@@ -256,30 +257,31 @@ genptable_compress(PruneData *pd)
 }
 
 static void
-genptable_fixnasty(PruneData *pd, int d)
+genptable_fixnasty(PruneData *pd, int d, int nthreads)
 {
-	uint64_t i, ii;
-	int j, n;
-	Trans t, aux[NTRANS];
+	int i;
+	pthread_t t[nthreads];
+	ThreadDataGenpt td[nthreads];
+	pthread_mutex_t *upmtx;
 
 	if (pd->coord->tfind == NULL)
 		return;
 
-	for (i = 0; i < pd->coord->max; i++) {
-		if (ptableval_index(pd, i) == d) {
-			if ((n = pd->coord->tfind(i, aux)) == 1)
-				continue;
-
-			for (j = 0; j < n; j++) {
-				t = aux[j];
-				ii = pd->coord->transform(t, i);
-				if (ptableval_index(pd, ii) > d) {
-					ptable_update_index(pd, ii, d);
-					pd->n++;
-				}
-			}
-		}
+	upmtx = malloc(sizeof(pthread_mutex_t));
+	pthread_mutex_init(upmtx, NULL);
+	for (i = 0; i < nthreads; i++) {
+		td[i].thid     = i;
+		td[i].nthreads = nthreads;
+		td[i].pd       = pd;
+		td[i].d        = d;
+		td[i].upmutex  = upmtx;
+		pthread_create(&t[i], NULL, instance_fixnasty, &td[i]);
 	}
+
+	for (i = 0; i < nthreads; i++)
+		pthread_join(t[i], NULL);
+
+	free(upmtx);
 }
 
 static void
@@ -335,6 +337,51 @@ instance_bfs(void *arg)
 			}
 		}
 	}
+
+	pthread_mutex_lock(td->upmutex);
+	td->pd->n += updated;
+	pthread_mutex_unlock(td->upmutex);
+
+	return NULL;
+}
+
+static void *
+instance_fixnasty(void *arg)
+{
+	ThreadDataGenpt *td;
+	uint64_t i, ii, nb, blocksize, rmin, rmax, updated;
+	int j, n;
+	Trans t, aux[NTRANS];
+
+	td = (ThreadDataGenpt *)arg;
+	nb = td->pd->coord->max / td->pd->coord->base->max;
+	blocksize = (uint64_t)((nb / td->nthreads) * td->pd->coord->base->max);
+	rmin = ((uint64_t)td->thid) * blocksize;
+	rmax = td->thid == td->nthreads - 1 ?
+	       td->pd->coord->max :
+	       ((uint64_t)td->thid + 1) * blocksize;
+
+	updated = 0;
+	for (i = rmin; i < rmax; i++) {
+		if (ptableval_index(td->pd, i) == td->d) {
+			if ((n = td->pd->coord->tfind(i, aux)) == 1)
+				continue;
+
+			for (j = 0; j < n; j++) {
+				if ((t = aux[j]) == uf)
+					continue;
+				ii = td->pd->coord->transform(t, i);
+				if (ii < rmin || ii >= rmax)
+					fprintf(stderr,
+					    "Error: transformed out of bound!\n");
+				if (ptableval_index(td->pd, ii) > td->d) {
+					ptable_update_index(td->pd, ii, td->d);
+					updated++;
+				}
+			}
+		}
+	}
+
 	pthread_mutex_lock(td->upmutex);
 	td->pd->n += updated;
 	pthread_mutex_unlock(td->upmutex);
diff --git a/src/symcoord.c b/src/symcoord.c
@@ -106,6 +106,7 @@ coord_drud_sym16 = {
 	.index     = index_drud_sym16,
 	.move      = move_drud_sym16,
 	.max       = POW3TO7 * CLASSES_EOFBEPOS_16,
+	.base      = &coord_eofbepos_sym16,
 	.transform = transform_drud_sym16,
 	.tfind     = tfind_drud_sym16,
 };
@@ -115,6 +116,7 @@ coord_drudfin_noE_sym16 = {
 	.index     = index_drudfin_noE_sym16,
 	.move      = move_drudfin_noE_sym16,
 	.max       = FACTORIAL8 * CLASSES_CP_16,
+	.base      = &coord_cp_sym16,
 	.transform = transform_drudfin_noE_sym16,
 	.tfind     = tfind_drudfin_noE_sym16,
 };
@@ -124,6 +126,7 @@ coord_nxopt31 = {
 	.index     = index_nxopt31,
 	.move      = move_nxopt31,
 	.max       = POW3TO7 * BINOM8ON4 * CLASSES_EOFBEPOS_16,
+	.base      = &coord_eofbepos_sym16,
 	.transform = transform_nxopt31,
 	.tfind     = tfind_nxopt31,
 };