commit a8c4da5b955eab2eed9ebb03ee4b1212ec6fe042
parent 0df4f6f98101bb3be192ce892aa1452f2c6de1a4
Author: Sebastiano Tronto <sebastiano.tronto@gmail.com>
Date: Sat, 20 Nov 2021 16:08:43 +0100
Multithreading seems to be working now, it was easier than expected!
Diffstat:
10 files changed, 349 insertions(+), 163 deletions(-)
diff --git a/Makefile b/Makefile
@@ -1,13 +1,13 @@
# See LICENSE file for copyright and license details.
-VERSION = 2.0beta2
+VERSION = 2.0beta3
PREFIX = /usr/local
MANPREFIX = ${PREFIX}/share/man
CPPFLAGS = -DVERSION=\"${VERSION}\"
-CFLAGS = -std=c99 -pedantic -Wall -Wextra -Wno-unused-parameter -O3 ${CPPFLAGS}
-DBGFLAGS = -std=c99 -pedantic -Wall -Wextra -Wno-unused-parameter -g ${CPPFLAGS}
+CFLAGS = -std=c99 -pthread -pedantic -Wall -Wextra -Wno-unused-parameter -O3 ${CPPFLAGS}
+DBGFLAGS = -std=c99 -pthread -pedantic -Wall -Wextra -Wno-unused-parameter -g ${CPPFLAGS}
CC = cc
diff --git a/TODO.md b/TODO.md
@@ -3,6 +3,9 @@
This is a list of things that I would like to add or change at some point.
It's more of a personal reminder than anything else.
+## Bugs
+* Segfault on "nissy solve -s 10U" (no space between 10 and U)
+
## Commands
### Commands that are available in nissy 1.0, but not in this version (yet):
@@ -37,22 +40,14 @@ It's more of a personal reminder than anything else.
* default to current directory for tables; this will work on any OS, up to
using the correct #ifdef guards to avoid checking for posix directories
in non-posix systems
-* better internal help page for each command (take it from man page)
* better man page
* find a better way to distribute the large tables, especially khuge
## Technical stuff
-### Small fixes
-* printf with stdint.h: use proper macros instead of %llu
-
### Better pruning tables
* Use pruning values mod 4 instead of mod 16
-### Code simplification
-* Remove anti-indeces. I think I can do this by using an iterative deepening
- dfs method for generating pruning tables, like I do in solve()
-
### Memory management
* fail gracefully when there is not enough memory to load a large table
* free tables from memory when not used
diff --git a/doc/nissy.1 b/doc/nissy.1
@@ -93,6 +93,14 @@ Only find solutions that require the minimum number of moves.
.It Fl p
Plain style: do not print the number of moves.
.
+.It Fl t Ar n
+Use
+.Ar n
+CPU threads. By default nissy uses only 1 thread. Using more than one
+thread will improve performance, but the optimal number depends on your
+machine and operating system. Generally, using one less than the number
+of threads of your CPU works quite well.
+.
.It Fl v
Verbose mode: print some information during the search and print each solution
as it is found instead of only printing them all together at the end.
diff --git a/nissy-2.0beta3.tar.gz b/nissy-2.0beta3.tar.gz
Binary files differ.
diff --git a/src/commands.c b/src/commands.c
@@ -28,7 +28,7 @@ Command
solve_cmd = {
.name = "solve",
.usage = "solve STEP [OPTIONS] SCRAMBLE",
- .description = "Solve a step",
+ .description = "Solve a step; see command steps for a list of steps",
.parse_args = solve_parse_args,
.exec = solve_exec
};
@@ -116,6 +116,7 @@ solve_parse_args(int c, char **v)
a->opts->min_moves = 0;
a->opts->max_moves = 20;
a->opts->max_solutions = 1;
+ a->opts->nthreads = 1;
a->opts->optimal_only = false;
a->opts->can_niss = false;
a->opts->verbose = false;
@@ -127,7 +128,8 @@ solve_parse_args(int c, char **v)
val = strtol(v[++i], NULL, 10);
if (val < 0 || val > 100) {
fprintf(stderr,
- "Invalid min number of moves.\n");
+ "Invalid min number of moves"
+ "(0 <= m <= 100).\n");
return a;
}
a->opts->min_moves = val;
@@ -135,10 +137,20 @@ solve_parse_args(int c, char **v)
val = strtol(v[++i], NULL, 10);
if (val < 0 || val > 100) {
fprintf(stderr,
- "Invalid max number of moves.\n");
+ "Invalid max number of moves"
+ "(0 <= M <= 100).\n");
return a;
}
a->opts->max_moves = val;
+ } else if (!strcmp(v[i], "-t")) {
+ val = strtol(v[++i], NULL, 10);
+ if (val < 1 || val > 64) {
+ fprintf(stderr,
+ "Invalid number of threads."
+ "1 <= t <= 64\n");
+ return a;
+ }
+ a->opts->nthreads = val;
} else if (!strcmp(v[i], "-s")) {
val = strtol(v[++i], NULL, 10);
if (val < 1 || val > 1000000) {
@@ -255,11 +267,19 @@ print_exec(CommandArgs *args)
static void
help_exec(CommandArgs *args)
{
- /* TODO: print full nissy manpage */
if (args->command == NULL) {
- printf("Type help COMMAND for information on a ");
- printf("specific command.\n");
- printf("A more complete manual page is work in progress.\n");
+ printf(
+ "Use the nissy command \"help COMMAND\" for a short "
+ "description of a specific command.\n"
+ "Use the nissy command \"commands\" for a list of "
+ "available commands.\n"
+ "See the manual page for more details. The manual"
+ " page is available with \"man nissy\" on a UNIX"
+ " system (such a Linux or MacOS) or in pdf and html"
+ " format in the docs folder.\n"
+ "Nissy is available for free at "
+ "https://github.com/sebastianotronto/nissy"
+ );
} else {
printf("Command %s: %s\nusage: %s\n", args->command->name,
args->command->description, args->command->usage);
diff --git a/src/cubetypes.h b/src/cubetypes.h
@@ -3,6 +3,7 @@
#include <stdbool.h>
#include <inttypes.h>
+#include <pthread.h>
#define NMOVES 55 /* Actually 55, but one is NULLMOVE */
#define NTRANS 48
@@ -87,6 +88,7 @@ typedef struct prunedata PruneData;
typedef struct solveoptions SolveOptions;
typedef struct step Step;
typedef struct symdata SymData;
+typedef struct threaddata ThreadData;
typedef Cube (*AntiIndexer) (uint64_t);
typedef bool (*Checker) (Cube);
@@ -104,186 +106,205 @@ typedef Trans (*TransDetector) (Cube);
struct
alg
{
- Move * move;
- bool * inv;
- int len;
- int allocated;
+ Move * move;
+ bool * inv;
+ int len;
+ int allocated;
};
struct
alglist
{
- AlgListNode * first;
- AlgListNode * last;
- int len;
+ AlgListNode * first;
+ AlgListNode * last;
+ int len;
};
struct
alglistnode
{
- Alg * alg;
- AlgListNode * next;
+ Alg * alg;
+ AlgListNode * next;
};
struct
block
{
- bool edge[12];
- bool corner[8];
- bool center[6];
+ bool edge[12];
+ bool corner[8];
+ bool center[6];
};
struct
command
{
- char * name;
- char * usage;
- char * description;
- ArgParser parse_args;
- Exec exec;
+ char * name;
+ char * usage;
+ char * description;
+ ArgParser parse_args;
+ Exec exec;
};
struct
commandargs
{
- bool success;
- Alg * scramble;
- SolveOptions * opts;
- Step * step;
- Command * command; /* For help */
+ bool success;
+ Alg * scramble;
+ SolveOptions * opts;
+ Step * step;
+ Command * command; /* For help */
};
struct
coordinate
{
- Indexer index;
- AntiIndexer cube;
- uint64_t max;
- int ntrans;
- Trans * trans;
+ Indexer index;
+ AntiIndexer cube;
+ uint64_t max;
+ int ntrans;
+ Trans * trans;
};
struct
cube
{
- int epose;
- int eposs;
- int eposm;
- int eofb;
- int eorl;
- int eoud;
- int cp;
- int coud;
- int cofb;
- int corl;
- int cpos;
+ int epose;
+ int eposs;
+ int eposm;
+ int eofb;
+ int eorl;
+ int eoud;
+ int cp;
+ int coud;
+ int cofb;
+ int corl;
+ int cpos;
};
struct
cubearray
{
- int * ep;
- int * eofb;
- int * eorl;
- int * eoud;
- int * cp;
- int * coud;
- int * corl;
- int * cofb;
- int * cpos;
+ int * ep;
+ int * eofb;
+ int * eorl;
+ int * eoud;
+ int * cp;
+ int * coud;
+ int * corl;
+ int * cofb;
+ int * cpos;
};
struct
cubetarget
{
- Cube cube;
- int target;
+ Cube cube;
+ int target;
};
struct
dfsdata
{
- int d;
- int m;
- int lb;
- bool niss;
- Move last1;
- Move last2;
- AlgList * sols;
- Alg * current_alg;
- Move sorted_moves[NMOVES];
- int move_position[NMOVES];
- uint8_t * visited;
+ int d;
+ int m;
+ int lb;
+ bool niss;
+ Move last1;
+ Move last2;
+ AlgList * sols;
+ pthread_mutex_t * sols_mutex;
+ Alg * current_alg;
+ Move * sorted_moves;
+ int * move_position;
+ uint8_t * visited;
};
struct
piecefilter
{
- bool epose;
- bool eposs;
- bool eposm;
- bool eofb;
- bool eorl;
- bool eoud;
- bool cp;
- bool coud;
- bool cofb;
- bool corl;
- bool cpos;
+ bool epose;
+ bool eposs;
+ bool eposm;
+ bool eofb;
+ bool eorl;
+ bool eoud;
+ bool cp;
+ bool coud;
+ bool cofb;
+ bool corl;
+ bool cpos;
};
struct
prunedata
{
- char * filename;
- uint8_t * ptable;
- bool generated;
- uint64_t n;
- Coordinate * coord;
- Moveset moveset;
+ char * filename;
+ uint8_t * ptable;
+ bool generated;
+ uint64_t n;
+ Coordinate * coord;
+ Moveset moveset;
};
struct
solveoptions
{
- int min_moves;
- int max_moves;
- int max_solutions;
- bool optimal_only;
- bool can_niss;
- bool verbose;
- bool all;
- bool print_number;
+ int min_moves;
+ int max_moves;
+ int max_solutions;
+ int nthreads;
+ bool optimal_only;
+ bool can_niss;
+ bool verbose;
+ bool all;
+ bool print_number;
};
struct
step
{
- char * shortname;
- char * name;
- Estimator estimate;
- Checker ready;
- char * ready_msg;
- Validator is_valid;
- Moveset moveset;
- Trans pre_trans;
- TransDetector detect;
- int ntables;
- PruneData * tables[10];
+ char * shortname;
+ char * name;
+ Estimator estimate;
+ Checker ready;
+ char * ready_msg;
+ Validator is_valid;
+ Moveset moveset;
+ Trans pre_trans;
+ TransDetector detect;
+ int ntables;
+ PruneData * tables[10];
};
struct
symdata
{
- char * filename;
- bool generated;
- Coordinate * coord;
- Coordinate * sym_coord;
- int ntrans;
- Trans * trans;
- uint64_t * class;
- Cube * rep;
- Trans * transtorep;
+ char * filename;
+ bool generated;
+ Coordinate * coord;
+ Coordinate * sym_coord;
+ int ntrans;
+ Trans * trans;
+ uint64_t * class;
+ Cube * rep;
+ Trans * transtorep;
+};
+
+struct
+threaddata
+{
+ int thid;
+ Cube cube;
+ Step * step;
+ int depth;
+ Move * sorted_moves;
+ int * move_position;
+ SolveOptions * opts;
+ AlgList * start;
+ AlgListNode ** node;
+ AlgList * sols;
+ pthread_mutex_t * start_mutex;
+ pthread_mutex_t * sols_mutex;
};
#endif
diff --git a/src/pruning.c b/src/pruning.c
@@ -95,7 +95,7 @@ pd_khuge_HTM = {
void
genptable(PruneData *pd)
{
- Move ms[NMOVES];
+ Move *ms;
uint64_t j, oldn;
DfsData dd;
@@ -112,6 +112,7 @@ genptable(PruneData *pd)
fprintf(stderr, "Cannot load %s, generating it\n", pd->filename);
+ ms = malloc(NMOVES * sizeof(Move));
moveset_to_list(pd->moveset, ms);
for (j = 0; j < pd->coord->max; j++)
@@ -119,6 +120,7 @@ genptable(PruneData *pd)
dd = (DfsData) { .m = 0 };
dd.visited = malloc((ptablesize(pd)/4 + 1) * sizeof(uint8_t));
+ dd.sorted_moves = malloc(NMOVES * sizeof(Move));
moveset_to_list(pd->moveset, dd.sorted_moves);
oldn = 0;
pd->n = 0;
@@ -136,14 +138,16 @@ genptable(PruneData *pd)
if (!write_ptable_file(pd))
fprintf(stderr, "Error writing ptable file\n");
+ free(ms);
free(dd.visited);
+ free(dd.sorted_moves);
}
*/
void
genptable(PruneData *pd)
{
- Move ms[NMOVES];
+ Move *ms;
int d;
uint64_t j, oldn;
@@ -161,6 +165,7 @@ genptable(PruneData *pd)
fprintf(stderr, "Cannot load %s, generating it\n", pd->filename);
+ ms = malloc(NMOVES * sizeof(Move));
moveset_to_list(pd->moveset, ms);
/* We use 4 bits per value, so any distance >= 15 is set to 15 */
@@ -186,6 +191,7 @@ genptable(PruneData *pd)
if (!write_ptable_file(pd))
fprintf(stderr, "Error writing ptable file\n");
+ free(ms);
}
/*
diff --git a/src/solve.c b/src/solve.c
@@ -8,6 +8,8 @@ static void dfs_branch(Cube c, Step *s, SolveOptions *os, DfsData *dd);
static bool dfs_check_solved(Step *s, SolveOptions *opts, DfsData *dd);
static void dfs_niss(Cube c, Step *s, SolveOptions *opts, DfsData *dd);
static bool dfs_stop(Cube c, Step *s, SolveOptions *opts, DfsData *dd);
+static void * instance_thread(void *arg);
+static void multidfs(Cube c, Step *s, SolveOptions *opts, AlgList *sols, int d);
/* Local functions ***********************************************************/
@@ -41,12 +43,24 @@ dfs(Cube c, Step *s, SolveOptions *opts, DfsData *dd)
static void
dfs_branch(Cube c, Step *s, SolveOptions *opts, DfsData *dd)
{
- Move m, l1 = dd->last1, l2 = dd->last2, *moves = dd->sorted_moves;
-
- int i, maxnsol = opts->max_solutions;
-
- for (i = 0; moves[i] != NULLMOVE && dd->sols->len < maxnsol; i++) {
- m = moves[i];
+ bool b = false;
+ int i;
+ Move m, l1, l2;
+
+ l1 = dd->last1;
+ l2 = dd->last2;
+
+ for (i = 0; dd->sorted_moves[i] != NULLMOVE; i++) {
+ /*
+ pthread_mutex_lock(dd->sols_mutex);
+ b = dd->sols->len >= opts->max_solutions;
+ pthread_mutex_unlock(dd->sols_mutex);
+ */
+
+ if (b)
+ break;
+
+ m = dd->sorted_moves[i];
if (allowed_next(m, dd)) {
dd->last2 = dd->last1;
dd->last1 = m;
@@ -68,8 +82,12 @@ dfs_check_solved(Step *s, SolveOptions *opts, DfsData *dd)
return false;
if (dd->current_alg->len == dd->d) {
- if (s->is_valid(dd->current_alg) || opts->all)
- append_alg(dd->sols, dd->current_alg);
+ if (s->is_valid(dd->current_alg) || opts->all) {
+ pthread_mutex_lock(dd->sols_mutex);
+ if (dd->sols->len < opts->max_solutions)
+ append_alg(dd->sols, dd->current_alg);
+ pthread_mutex_unlock(dd->sols_mutex);
+ }
if (opts->verbose)
print_alg(dd->current_alg, false);
@@ -103,14 +121,13 @@ dfs_niss(Cube c, Step *s, SolveOptions *opts, DfsData *dd)
static bool
dfs_stop(Cube c, Step *s, SolveOptions *opts, DfsData *dd)
{
+ bool b = false;
+
CubeTarget ct = {
.cube = c,
.target = dd->d - dd->current_alg->len
};
- if (dd->sols->len >= opts->max_solutions)
- return true;
-
dd->lb = s->estimate(ct);
if (opts->can_niss && !dd->niss)
dd->lb = MIN(1, dd->lb);
@@ -118,7 +135,131 @@ dfs_stop(Cube c, Step *s, SolveOptions *opts, DfsData *dd)
if (dd->current_alg->len + dd->lb > dd->d)
return true;
- return false;
+ pthread_mutex_lock(dd->sols_mutex);
+ b = dd->sols->len >= opts->max_solutions;
+ pthread_mutex_unlock(dd->sols_mutex);
+
+ return b;
+}
+
+static void *
+instance_thread(void *arg)
+{
+ bool b;
+ Cube c;
+ ThreadData *td;
+ AlgListNode *node;
+ DfsData dd;
+
+ td = (ThreadData *)arg;
+
+ while (1) {
+ b = false;
+
+ pthread_mutex_lock(td->start_mutex);
+ if ((node = *(td->node)) == NULL)
+ b = true;
+ else
+ *(td->node) = (*(td->node))->next;
+ pthread_mutex_unlock(td->start_mutex);
+
+ if (b)
+ break;
+
+ c = node->alg->inv[0] ?
+ apply_move(node->alg->move[0], inverse_cube(td->cube)) :
+ apply_move(node->alg->move[0], td->cube);
+
+ dd.d = td->depth;
+ dd.m = 1;
+ dd.niss = node->alg->inv[0];
+ dd.lb = -1;
+ dd.last1 = node->alg->move[0];
+ dd.last2 = NULLMOVE;
+ dd.sols = td->sols;
+ dd.sols_mutex = td->sols_mutex;
+ dd.current_alg = new_alg("");
+ append_move(dd.current_alg, node->alg->move[0],
+ node->alg->inv[0]);
+ dd.sorted_moves = td->sorted_moves;
+ dd.move_position = td->move_position;
+
+/*
+ pthread_mutex_lock(td->sols_mutex);
+ printf("Starting thread %d with move: ", td->thid);
+ print_alg(dd.current_alg, false);
+ pthread_mutex_unlock(td->sols_mutex);
+*/
+
+ dfs(c, td->step, td->opts, &dd);
+
+ free_alg(dd.current_alg);
+ }
+
+ return NULL;
+}
+
+static void
+multidfs(Cube c, Step *s, SolveOptions *opts, AlgList *sols, int d)
+{
+ int i, *move_position;
+ Move *sorted_moves;
+ Alg *alg;
+ AlgList *start;
+ AlgListNode **node;
+ pthread_t t[opts->nthreads];
+ ThreadData td[opts->nthreads];
+ pthread_mutex_t *start_mutex, *sols_mutex;
+
+ move_position = malloc(NMOVES * sizeof(int));
+ sorted_moves = malloc(NMOVES * sizeof(Move));
+ node = malloc(sizeof(AlgListNode *));
+ start_mutex = malloc(sizeof(pthread_mutex_t));
+ sols_mutex = malloc(sizeof(pthread_mutex_t));
+
+ start = new_alglist();
+ pthread_mutex_init(start_mutex, NULL);
+ pthread_mutex_init(sols_mutex, NULL);
+
+ moveset_to_list(s->moveset, sorted_moves);
+ movelist_to_position(sorted_moves, move_position);
+ for (i = 0; sorted_moves[i] != NULLMOVE; i++) {
+ alg = new_alg("");
+ append_move(alg, sorted_moves[i], false);
+ append_alg(start, alg);
+ if (opts->can_niss) {
+ alg->inv[0] = true;
+ append_alg(start, alg);
+ }
+ free_alg(alg);
+ }
+ *node = start->first;
+
+ for (i = 0; i < opts->nthreads; i++) {
+ td[i].thid = i;
+ td[i].cube = c;
+ td[i].step = s;
+ td[i].depth = d;
+ td[i].sorted_moves = sorted_moves;
+ td[i].move_position = move_position;
+ td[i].opts = opts;
+ td[i].start = start;
+ td[i].node = node;
+ td[i].sols = sols;
+ td[i].start_mutex = start_mutex;
+ td[i].sols_mutex = sols_mutex;
+ pthread_create(&t[i], NULL, instance_thread, &td[i]);
+ }
+
+ for (i = 0; i < opts->nthreads; i++)
+ pthread_join(t[i], NULL);
+
+ free_alglist(start);
+ free(node);
+ free(start_mutex);
+ free(sols_mutex);
+ free(move_position);
+ free(sorted_moves);
}
/* Public functions **********************************************************/
@@ -126,11 +267,12 @@ dfs_stop(Cube c, Step *s, SolveOptions *opts, DfsData *dd)
AlgList *
solve(Cube cube, Step *step, SolveOptions *opts)
{
+ int d;
+ AlgList *sols = new_alglist();
AlgListNode *node;
- DfsData dd;
Cube c;
- prepare_step(step, &dd);
+ prepare_step(step);
if (step->detect != NULL)
step->pre_trans = step->detect(cube);
@@ -139,24 +281,29 @@ solve(Cube cube, Step *step, SolveOptions *opts)
if (step->ready != NULL && !step->ready(c)) {
fprintf(stderr, "Cube not ready for solving step: ");
fprintf(stderr, "%s\n", step->ready_msg);
- return dd.sols;
+ return sols;
+ }
+
+ if (step->estimate((CubeTarget){.cube = c, .target = 0}) == 0 &&
+ opts->min_moves == 0) {
+ append_alg(sols, new_alg(""));
+ return sols;
}
- for (dd.d = opts->min_moves;
- dd.d <= opts->max_moves &&
- !(dd.sols->len && opts->optimal_only) &&
- dd.sols->len < opts->max_solutions;
- dd.d++) {
+ for (d = MAX(1, opts->min_moves);
+ d <= opts->max_moves &&
+ !(sols->len && opts->optimal_only) &&
+ sols->len < opts->max_solutions;
+ d++) {
if (opts->verbose)
fprintf(stderr,
"Found %d solutions, searching depth %d...\n",
- dd.sols->len, dd.d);
- dfs(c, step, opts, &dd);
+ sols->len, d);
+ multidfs(c, step, opts, sols, d);
}
- for (node = dd.sols->first; node != NULL; node = node->next)
+ for (node = sols->first; node != NULL; node = node->next)
transform_alg(inverse_trans(step->pre_trans), node->alg);
- free_alg(dd.current_alg);
- return dd.sols;
+ return sols;
}
diff --git a/src/steps.c b/src/steps.c
@@ -1063,21 +1063,10 @@ detect_pretrans_drud(Cube cube)
/* Public functions **********************************************************/
void
-prepare_step(Step *step, DfsData *dd)
+prepare_step(Step *step)
{
int i;
- dd->m = 0;
- dd->niss = false;
- dd->lb = -1;
- dd->last1 = NULLMOVE;
- dd->last2 = NULLMOVE;
- dd->sols = new_alglist();
- dd->current_alg = new_alg("");
-
- moveset_to_list(step->moveset, dd->sorted_moves);
- movelist_to_position(dd->sorted_moves, dd->move_position);
-
for (i = 0; i < step->ntables; i++)
genptable(step->tables[i]);
}
diff --git a/src/steps.h b/src/steps.h
@@ -7,6 +7,6 @@
extern Step * steps[NSTEPS];
-void prepare_step(Step *step, DfsData *dd);
+void prepare_step(Step *step);
#endif