nissy-classic

Stable branch of nissy
git clone https://git.tronto.net/nissy-classic
Download | Log | Files | Refs | README | LICENSE

commit a13d749f7e19eb1b1fc8d178610ba269df6cfe84
parent ed304cbf68b60c982f9f204b5956dae5672c946a
Author: Sebastiano Tronto <sebastiano@tronto.net>
Date:   Wed,  1 Jun 2022 14:54:18 +0200

parallelized genptable_fixnasty

Diffstat:
MTODO.md | 1-
Mnissy | 0
Msrc/cubetypes.h | 1+
Msrc/pruning.c | 89++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++-------------------
Msrc/symcoord.c | 3+++
5 files changed, 72 insertions(+), 22 deletions(-)

diff --git a/TODO.md b/TODO.md @@ -5,7 +5,6 @@ It's more of a personal reminder than anything else. ## For version 2.1 ### Moving coordinates -* parallelize genptable_fixnasty * general cleanup ### Changes to Step and Solve * add a list of "helper" coordinates to every step diff --git a/nissy b/nissy Binary files differ. diff --git a/src/cubetypes.h b/src/cubetypes.h @@ -173,6 +173,7 @@ coordinate CoordTransformer transform; SymData * sd; TransFinder tfind; /* TODO: should be easy to remove */ + Coordinate * base; /* TODO: part of refactor */ }; struct diff --git a/src/pruning.c b/src/pruning.c @@ -6,9 +6,10 @@ static int findchunk(PruneData *pd, int nchunks, uint64_t i); static void genptable_bfs(PruneData *pd, int d, int nt, int nc); static void genptable_compress(PruneData *pd); -static void genptable_fixnasty(PruneData *pd, int d); +static void genptable_fixnasty(PruneData *pd, int d, int nthreads); static void genptable_setbase(PruneData *pd); static void * instance_bfs(void *arg); +static void * instance_fixnasty(void *arg); static void ptable_update(PruneData *pd, Cube cube, int m); static void ptable_update_index(PruneData *pd, uint64_t ind, int m); static int ptableval_index(PruneData *pd, uint64_t ind); @@ -171,7 +172,7 @@ genptable(PruneData *pd, int nthreads) ptable_update(pd, (Cube){0}, 0); pd->n = 1; oldn = 0; - genptable_fixnasty(pd, 0); + genptable_fixnasty(pd, 0, nthreads); fprintf(stderr, "Depth %d done, generated %" PRIu64 "\t(%" PRIu64 "/%" PRIu64 ")\n", 0, pd->n - oldn, pd->n, pd->coord->max); @@ -179,7 +180,7 @@ genptable(PruneData *pd, int nthreads) pd->count[0] = pd->n; for (d = 0; d < 15 && pd->n < pd->coord->max; d++) { genptable_bfs(pd, d, nthreads, nchunks); - genptable_fixnasty(pd, d+1); + genptable_fixnasty(pd, d+1, nthreads); fprintf(stderr, "Depth %d done, generated %" PRIu64 "\t(%" PRIu64 "/%" PRIu64 ")\n", d+1, pd->n - oldn, pd->n, pd->coord->max); @@ -256,30 +257,31 @@ genptable_compress(PruneData *pd) } static void -genptable_fixnasty(PruneData *pd, int d) +genptable_fixnasty(PruneData *pd, int d, int nthreads) { - uint64_t i, ii; - int j, n; - Trans t, aux[NTRANS]; + int i; + pthread_t t[nthreads]; + ThreadDataGenpt td[nthreads]; + pthread_mutex_t *upmtx; if (pd->coord->tfind == NULL) return; - for (i = 0; i < pd->coord->max; i++) { - if (ptableval_index(pd, i) == d) { - if ((n = pd->coord->tfind(i, aux)) == 1) - continue; - - for (j = 0; j < n; j++) { - t = aux[j]; - ii = pd->coord->transform(t, i); - if (ptableval_index(pd, ii) > d) { - ptable_update_index(pd, ii, d); - pd->n++; - } - } - } + upmtx = malloc(sizeof(pthread_mutex_t)); + pthread_mutex_init(upmtx, NULL); + for (i = 0; i < nthreads; i++) { + td[i].thid = i; + td[i].nthreads = nthreads; + td[i].pd = pd; + td[i].d = d; + td[i].upmutex = upmtx; + pthread_create(&t[i], NULL, instance_fixnasty, &td[i]); } + + for (i = 0; i < nthreads; i++) + pthread_join(t[i], NULL); + + free(upmtx); } static void @@ -335,6 +337,51 @@ instance_bfs(void *arg) } } } + + pthread_mutex_lock(td->upmutex); + td->pd->n += updated; + pthread_mutex_unlock(td->upmutex); + + return NULL; +} + +static void * +instance_fixnasty(void *arg) +{ + ThreadDataGenpt *td; + uint64_t i, ii, nb, blocksize, rmin, rmax, updated; + int j, n; + Trans t, aux[NTRANS]; + + td = (ThreadDataGenpt *)arg; + nb = td->pd->coord->max / td->pd->coord->base->max; + blocksize = (uint64_t)((nb / td->nthreads) * td->pd->coord->base->max); + rmin = ((uint64_t)td->thid) * blocksize; + rmax = td->thid == td->nthreads - 1 ? + td->pd->coord->max : + ((uint64_t)td->thid + 1) * blocksize; + + updated = 0; + for (i = rmin; i < rmax; i++) { + if (ptableval_index(td->pd, i) == td->d) { + if ((n = td->pd->coord->tfind(i, aux)) == 1) + continue; + + for (j = 0; j < n; j++) { + if ((t = aux[j]) == uf) + continue; + ii = td->pd->coord->transform(t, i); + if (ii < rmin || ii >= rmax) + fprintf(stderr, + "Error: transformed out of bound!\n"); + if (ptableval_index(td->pd, ii) > td->d) { + ptable_update_index(td->pd, ii, td->d); + updated++; + } + } + } + } + pthread_mutex_lock(td->upmutex); td->pd->n += updated; pthread_mutex_unlock(td->upmutex); diff --git a/src/symcoord.c b/src/symcoord.c @@ -106,6 +106,7 @@ coord_drud_sym16 = { .index = index_drud_sym16, .move = move_drud_sym16, .max = POW3TO7 * CLASSES_EOFBEPOS_16, + .base = &coord_eofbepos_sym16, .transform = transform_drud_sym16, .tfind = tfind_drud_sym16, }; @@ -115,6 +116,7 @@ coord_drudfin_noE_sym16 = { .index = index_drudfin_noE_sym16, .move = move_drudfin_noE_sym16, .max = FACTORIAL8 * CLASSES_CP_16, + .base = &coord_cp_sym16, .transform = transform_drudfin_noE_sym16, .tfind = tfind_drudfin_noE_sym16, }; @@ -124,6 +126,7 @@ coord_nxopt31 = { .index = index_nxopt31, .move = move_nxopt31, .max = POW3TO7 * BINOM8ON4 * CLASSES_EOFBEPOS_16, + .base = &coord_eofbepos_sym16, .transform = transform_nxopt31, .tfind = tfind_nxopt31, };