commit b6c1ff6cfdfe0f4ce602fe83e54c3112a1c95690
parent 0f23987edbbabdf88fbe175f695b4fdb727586fd
Author: Sebastiano Tronto <sebastiano@tronto.net>
Date: Sat, 29 Mar 2025 10:04:16 +0100
Optimize genptable for coordinate solve (20x speedup)
Diffstat:
12 files changed, 170 insertions(+), 20 deletions(-)
diff --git a/shell/shell.c b/shell/shell.c
@@ -641,7 +641,7 @@ parse_uint(const char *argv, unsigned *result)
static uint8_t
parse_nisstype(const char *arg)
{
- if (!strcmp("normal", arg))
+ if (!strcmp("normal", arg) || !strcmp("", arg))
return NISSY_NISSFLAG_NORMAL;
if (!strcmp("inverse", arg))
diff --git a/src/solvers/coord/dr.h b/src/solvers/coord/dr.h
@@ -102,18 +102,21 @@ STATIC bool
coordinate_dr_isnasty(uint64_t coord, const void *data)
{
const char *datanoinfo;
- const uint32_t *classttrep;
+ const uint32_t *classttrep, *rep32;
+ uint32_t r;
datanoinfo = (const char *)data + INFOSIZE;
classttrep = (const uint32_t *)datanoinfo;
+ rep32 = (const uint32_t *)(datanoinfo + DR_CLASS_TABLESIZE);
+ r = rep32[coord / POW_3_7];
- return DR_ISNASTY(classttrep[coord]);
+ return DR_ISNASTY(classttrep[r]);
}
STATIC size_t
coordinate_dr_gendata(void *data)
{
- uint64_t i, ii, j, n, t, nasty;
+ uint64_t i, j, n, t, nasty;
char *datanoinfo;
uint32_t *classttrep, *rep;
cube_t c;
@@ -146,12 +149,11 @@ coordinate_dr_gendata(void *data)
continue;
c = invcoord_dreoesep_nosym(i);
- ii = coord_dreoesep_nosym(c);
- for (t = 0, nasty = 0; t < NTRANS && !nasty; t++) {
+ for (t = 1, nasty = 0; t < NTRANS && !nasty; t++) {
if (!((UINT64_C(1) << t) & coordinate_dr.trans_mask))
continue;
- nasty = ii == coord_dreoesep_nosym(transform(c, t));
+ nasty = i == coord_dreoesep_nosym(transform(c, t));
}
for (t = 0; t < NTRANS; t++) {
diff --git a/src/solvers/coord/gendata.h b/src/solvers/coord/gendata.h
@@ -2,8 +2,11 @@ STATIC size_t gendata_coord(const coord_t [static 1], void *);
STATIC int64_t gendata_coord_dispatch(const char *, void *);
STATIC tableinfo_t genptable_coord(
const coord_t [static 1], const void *, uint8_t *);
+STATIC bool switch_to_fromnew(uint64_t, uint64_t, uint64_t);
STATIC uint64_t genptable_coord_fillneighbors(
const coord_t [static 1], const void *, uint64_t, uint8_t, uint8_t *);
+STATIC uint64_t genptable_coord_fillfromnew(
+ const coord_t [static 1], const void *, uint64_t, uint8_t, uint8_t *);
STATIC void getdistribution_coord(
const uint8_t *, const char *, uint64_t [static INFO_DISTRIBUTION_LEN]);
STATIC uint8_t get_coord_pval(
@@ -83,7 +86,7 @@ genptable_coord(
uint8_t *table
)
{
- uint64_t tablesize, i, d, tot, t;
+ uint64_t tablesize, i, d, tot, t, nm;
tableinfo_t info;
tablesize = DIV_ROUND_UP(coord->max, 2);
@@ -107,26 +110,46 @@ genptable_coord(
memset(info.distribution, 0, INFO_DISTRIBUTION_LEN * sizeof(uint64_t));
append_coord_name(coord, info.solver);
+ nm = popcount_u32(coord->moves_mask);
i = coord->coord(SOLVED_CUBE, data);
set_coord_pval(coord, table, i, 0);
info.distribution[0] = 1;
- for (d = 1, tot = 1; tot < coord->max; d++) {
- for (i = 0; i < coord->max; i++) {
- if (get_coord_pval(coord, table, i) == d-1) {
- t = genptable_coord_fillneighbors(
- coord, data, i, d, table);
- tot += t;
- info.distribution[d] += t;
- }
+ for (d = 1, tot = 1; tot < coord->max && d < 20; d++) {
+ t = 0;
+ if (switch_to_fromnew(tot, coord->max, nm)) {
+ for (i = 0; i < coord->max; i++)
+ if (get_coord_pval(coord, table, i) > d)
+ t += genptable_coord_fillfromnew(
+ coord, data, i, d, table);
+ } else {
+ for (i = 0; i < coord->max; i++)
+ if (get_coord_pval(coord, table, i) == d-1)
+ t += genptable_coord_fillneighbors(
+ coord, data, i, d, table);
}
- LOG("Depth %" PRIu64 ": %" PRIu64 " of %" PRIu64 "\n",
- d, tot, coord->max);
+ tot += t;
+ info.distribution[d] = t;
+ LOG("Depth %" PRIu64 ": found %" PRIu64 " (%" PRIu64 " of %"
+ PRIu64 ")\n", d, t, tot, coord->max);
}
info.maxvalue = d-1;
return info;
}
+STATIC bool
+switch_to_fromnew(uint64_t done, uint64_t max, uint64_t nm)
+{
+ /*
+ Heuristic to determine if it is more conveniente to loop over
+ done coordinates and fill the new discovered, or to loop from new
+ coordinates and fill them if they have a done neighbor.
+ */
+
+ double r = (double)done / (double)max;
+ return 1.0 - intpow(1.0-r, nm) > nm * (1.0-r);
+}
+
STATIC uint64_t
genptable_coord_fillneighbors(
const coord_t coord[static 1],
@@ -136,15 +159,18 @@ genptable_coord_fillneighbors(
uint8_t *table
)
{
+ bool isnasty;
uint8_t m;
- uint64_t j, t, tot;
+ uint64_t ii, j, t, tot;
cube_t c, moved;
c = coord->cube(i, data);
tot = 0;
for (m = 0; m < NMOVES; m++) {
moved = move(c, m);
- for (t = 0; t < NTRANS; t++) {
+ ii = coord->coord(moved, data);
+ isnasty = coord->isnasty(ii, data);
+ for (t = 0; t < NTRANS && (t == 0 || isnasty); t++) {
if (!((UINT64_C(1) << t) & coord->trans_mask))
continue;
@@ -159,6 +185,58 @@ genptable_coord_fillneighbors(
return tot;
}
+STATIC uint64_t
+genptable_coord_fillfromnew(
+ const coord_t coord[static 1],
+ const void *data,
+ uint64_t i,
+ uint8_t d,
+ uint8_t *table
+)
+{
+ bool found;
+ uint8_t m;
+ uint64_t tot, t, ii, j, nsim, sim[NTRANS];
+ cube_t c;
+
+ tot = 0;
+ c = coord->cube(i, data);
+
+ for (t = 0, nsim = 0; t < NTRANS; t++) {
+ if (!((UINT64_C(1) << t) & coord->trans_mask))
+ continue;
+
+ ii = coord->coord(transform(c, t), data);
+ for (j = 0, found = false; j < nsim && !found; j++)
+ found = sim[j] == ii;
+ if (!found)
+ sim[nsim++] = ii;
+ }
+
+ for (j = 0, found = false; j < nsim && !found; j++) {
+ c = coord->cube(sim[j], data);
+ for (m = 0; m < NMOVES; m++) {
+ ii = coord->coord(move(c, m), data);
+ if (get_coord_pval(coord, table, ii) < d) {
+ found = true;
+ break;
+ }
+ }
+ }
+
+ tot = 0;
+ if (found) {
+ for (j = 0; j < nsim; j++) {
+ if (get_coord_pval(coord, table, sim[j]) > d) {
+ set_coord_pval(coord, table, sim[j], d);
+ tot++;
+ }
+ }
+ }
+
+ return tot;
+}
+
STATIC void
getdistribution_coord(
const uint8_t *table,
diff --git a/src/utils/math.h b/src/utils/math.h
@@ -10,6 +10,7 @@ STATIC void indextoperm(int64_t, size_t n, uint8_t [n]);
STATIC int permsign(size_t n, const uint8_t [n]);
STATIC int64_t digitstosumzero(size_t n, const uint8_t [n], uint8_t);
STATIC void sumzerotodigits(int64_t, size_t n, uint8_t, uint8_t [n]);
+STATIC double intpow(double, uint64_t);
STATIC int64_t
factorial(int64_t n)
@@ -183,3 +184,16 @@ sumzerotodigits(int64_t d, size_t n, uint8_t b, uint8_t a[n])
sumzerotodigits_error:
memset(a, UINT8_ERROR, n);
}
+
+STATIC double
+intpow(double b, uint64_t e)
+{
+ double r;
+
+ if (e == 0)
+ return 1;
+
+ r = intpow(b, e/2);
+
+ return e % 2 == 0 ? r * r : b * r * r;
+}
diff --git a/test/015_math_intpow/00_1_0.in b/test/015_math_intpow/00_1_0.in
@@ -0,0 +1,3 @@
+1.0
+0
+1.0
diff --git a/test/015_math_intpow/00_1_0.out b/test/015_math_intpow/00_1_0.out
@@ -0,0 +1 @@
+Ok
diff --git a/test/015_math_intpow/01_x_0.in b/test/015_math_intpow/01_x_0.in
@@ -0,0 +1,3 @@
+234324.52324234
+0
+1.0
diff --git a/test/015_math_intpow/01_x_0.out b/test/015_math_intpow/01_x_0.out
@@ -0,0 +1 @@
+Ok
diff --git a/test/015_math_intpow/02_3_2.in b/test/015_math_intpow/02_3_2.in
@@ -0,0 +1,3 @@
+3.0
+2
+9.0
diff --git a/test/015_math_intpow/02_3_2.out b/test/015_math_intpow/02_3_2.out
@@ -0,0 +1 @@
+Ok
diff --git a/test/015_math_intpow/intpow_tests.c b/test/015_math_intpow/intpow_tests.c
@@ -0,0 +1,26 @@
+#include "../test.h"
+
+#define ABS(x) ((x) < 0 ? (-(x)) : (x))
+
+double intpow(double, uint64_t);
+
+const double tolerance = 1e-6;
+
+void run(void) {
+ char str[STRLENMAX];
+ double b, r, c;
+ uint64_t e;
+
+ fgets(str, STRLENMAX, stdin);
+ b = atof(str);
+ fgets(str, STRLENMAX, stdin);
+ e = atoll(str);
+ fgets(str, STRLENMAX, stdin);
+ r = atof(str);
+
+ c = intpow(b, e);
+ if (ABS(r - c) > tolerance)
+ printf("Error! Expected %lf but got %lf\n", r, c);
+ else
+ printf("Ok\n");
+}
diff --git a/tools/expected_distributions.h b/tools/expected_distributions.h
@@ -136,6 +136,22 @@ uint64_t expected_eo[21] = {
[7] = 13,
};
+uint64_t expected_dr[21] = {
+ [0] = 1,
+ [1] = 1,
+ [2] = 5,
+ [3] = 44,
+ [4] = 487,
+ [5] = 5841,
+ [6] = 68364,
+ [7] = 776568,
+ [8] = 7950748,
+ [9] = 52098876,
+ [10] = 76236234,
+ [11] = 3771112,
+ [12] = 129,
+};
+
static bool
distribution_equal(const uint64_t *expected, const uint64_t *actual, int n)
{
@@ -218,6 +234,8 @@ check_distribution(const char *solver, size_t data_size, const void *data)
str = info.solver + 22; /* "coordinate solver for COORD" */
if (!strcmp(str, "EO")) {
return check_table(expected_eo, &info);
+ } else if (!strcmp(str, "DR")) {
+ return check_table(expected_dr, &info);
} else {
goto check_distribution_unknown;
}