commit bcd52547af58b15868e24f3904e307548d1fd505
parent 21fdf9697ddfab0a9c082a91ae0a79b1b574774c
Author: Sebastiano Tronto <sebastiano@tronto.net>
Date: Sun, 14 Dec 2025 15:12:36 +0100
Cleanup, update documentation, fix examples
Diffstat:
14 files changed, 314 insertions(+), 417 deletions(-)
diff --git a/.gitignore b/.gitignore
@@ -12,6 +12,7 @@ run
run.exe
debugrun
tables/*
+tables-old/*
test/*/runtest
test/.DS_Store
test/run
diff --git a/cpp/examples/solve.cpp b/cpp/examples/solve.cpp
@@ -0,0 +1,89 @@
+/* Simple demo for an H48 solver */
+
+#include "../nissy.h"
+#include <filesystem>
+#include <fstream>
+#include <iostream>
+#include <string>
+
+int main() {
+ // Get verbose output
+ nissy::set_logger([](const char* s, void *u) { std::cout << s; }, NULL);
+
+ // Get the scramble from the user
+ std::cout << "Enter scramble: ";
+ std::string scramble;
+ std::getline(std::cin, scramble);
+
+ // Apply scramble to a solved cube
+ nissy::cube c;
+ if (!c.move(scramble).ok()) {
+ std::cout << "Invalid scramble!" << std::endl;
+ return 1;
+ }
+
+ // Ask for a limit on the solution's length
+ int maxmoves;
+ std::cout << "Maximum number of moves: ";
+ std::cin >> maxmoves;
+
+ // Load the solver
+ auto h48h3 = std::get<nissy::solver>(nissy::solver::get("h48h3"));
+ std::filesystem::path tableDir("tables");
+ std::filesystem::create_directories(tableDir); // Ignored if dir exists
+ std::filesystem::path tableFile("tables/" + h48h3.id);
+
+ // If the table is not present, generate it
+ if (!std::filesystem::exists(tableFile)) {
+ std::cout << "Data for h48h3 solver was not found, "
+ << "generating it..." << std::endl;
+ auto err = h48h3.generate_data();
+ if (!err.ok()) {
+ std::cout << "Unexpected error! Error code: "
+ << err.value << std::endl;
+ return 1;
+ }
+ std::ofstream ofs(tableFile, std::ios::binary);
+ ofs.write(reinterpret_cast<char *>(h48h3.data.data()),
+ h48h3.size);
+ std::cout << "Table generated and written to "
+ << tableFile << std::endl;
+ ofs.close();
+ }
+ // Otherwise read it from file
+ else {
+ std::cout << "Loading data table from file" << std::endl;
+ std::ifstream ifs(tableFile, std::ios::binary);
+ h48h3.read_data(ifs);
+ ifs.close();
+ std::cout << "Data loaded, checking table..." << std::endl;
+ auto err = h48h3.check_data();
+ if (!err.ok()) {
+ std::cout << "Error reading data table from file! "
+ << "Error code: " << err.value << std::endl;
+ return 1;
+ }
+ std::cout << "Data table loaded" << std::endl;
+ }
+
+ // Solve
+ auto solve_result = h48h3.solve(c, nissy::nissflag::NORMAL,
+ 0, maxmoves, 1, 20, 8, NULL, NULL);
+
+ // Write the result
+ if (!solve_result.err.ok()) {
+ std::cout << "Error solving! Error code: " <<
+ solve_result.err.value << std::endl;
+ return 1;
+ }
+
+ if (solve_result.solutions.size() == 0) {
+ std::cout << "No solution found!" << std::endl;
+ } else {
+ auto len = nissy::count_moves(solve_result.solutions).value;
+ std::cout << "Solution (" << len << " moves): "
+ << solve_result.solutions << std::endl;
+ }
+
+ return 0;
+}
diff --git a/cpp/examples/solve_h48h3k2.cpp b/cpp/examples/solve_h48h3k2.cpp
@@ -1,89 +0,0 @@
-/* Simple demo for an H48 solver */
-
-#include "../nissy.h"
-#include <filesystem>
-#include <fstream>
-#include <iostream>
-#include <string>
-
-int main() {
- // Get verbose output
- nissy::set_logger([](const char* s, void *u) { std::cout << s; }, NULL);
-
- // Get the scramble from the user
- std::cout << "Enter scramble: ";
- std::string scramble;
- std::getline(std::cin, scramble);
-
- // Apply scramble to a solved cube
- nissy::cube c;
- if (!c.move(scramble).ok()) {
- std::cout << "Invalid scramble!" << std::endl;
- return 1;
- }
-
- // Ask for a limit on the solution's length
- int maxmoves;
- std::cout << "Maximum number of moves: ";
- std::cin >> maxmoves;
-
- // Load the solver
- auto h48h3k2 = std::get<nissy::solver>(nissy::solver::get("h48h3k2"));
- std::filesystem::path tableDir("tables");
- std::filesystem::create_directories(tableDir); // Ignored if dir exists
- std::filesystem::path tableFile("tables/" + h48h3k2.id);
-
- // If the table is not present, generate it
- if (!std::filesystem::exists(tableFile)) {
- std::cout << "Data for h48h3k2 solver was not found, "
- << "generating it..." << std::endl;
- auto err = h48h3k2.generate_data();
- if (!err.ok()) {
- std::cout << "Unexpected error! Error code: "
- << err.value << std::endl;
- return 1;
- }
- std::ofstream ofs(tableFile, std::ios::binary);
- ofs.write(reinterpret_cast<char *>(h48h3k2.data.data()),
- h48h3k2.size);
- std::cout << "Table generated and written to "
- << tableFile << std::endl;
- ofs.close();
- }
- // Otherwise read it from file
- else {
- std::cout << "Loading data table from file" << std::endl;
- std::ifstream ifs(tableFile, std::ios::binary);
- h48h3k2.read_data(ifs);
- ifs.close();
- std::cout << "Data loaded, checking table..." << std::endl;
- auto err = h48h3k2.check_data();
- if (!err.ok()) {
- std::cout << "Error reading data table from file! "
- << "Error code: " << err.value << std::endl;
- return 1;
- }
- std::cout << "Data table loaded" << std::endl;
- }
-
- // Solve
- auto solve_result = h48h3k2.solve(c, nissy::nissflag::NORMAL,
- 0, maxmoves, 1, 20, 8, NULL, NULL);
-
- // Write the result
- if (!solve_result.err.ok()) {
- std::cout << "Error solving! Error code: " <<
- solve_result.err.value << std::endl;
- return 1;
- }
-
- if (solve_result.solutions.size() == 0) {
- std::cout << "No solution found!" << std::endl;
- } else {
- auto len = nissy::count_moves(solve_result.solutions).value;
- std::cout << "Solution (" << len << " moves): "
- << solve_result.solutions << std::endl;
- }
-
- return 0;
-}
diff --git a/doc/h48.md b/doc/h48.md
@@ -1,9 +1,8 @@
# The H48 optimal solver
This document contains information on the H48 Rubik's Cube optimal solver.
-The implementation of the solver is still in progress. This document
-partly describes ideas that have not been implemented yet, and it will
-be updated to reflect the actual implementation.
+This solver is occasionally improved and optimized, and this document
+is updated accordingly to reflect the current implementation.
I highly encourage the reader to check out Jaap Scherphuis'
[Computer Puzzling page](https://www.jaapsch.net/puzzles/compcube.htm)
@@ -215,46 +214,40 @@ stored in a single 32 bit integer, so that the table uses less than 12MB.
### Getting the pruning value
-Once we have computed the full h0 coordinate, we can access the correct
-entry in the full pruning table. As mentioned above, the pruning table
-can be one of three kinds:
-
-* 4 bits per entry, or `k4`: In this case the pruning value (between 0
- and 15) can be simply read off the table.
-* 2 bits per entry, or `k2`: Tables of this kind work as described by
- Rokicki in the
- [nxopt document](https://github.com/rokicki/cube20src/blob/master/nxopt.md).
- In this case the pruning table also has a *base value*, that determines
- the offset to be added to each entry (each entry can only be 0, 1, 2 or 3).
- If the base value is `b`, a pruning value of 1, 2 or 3 can be used directly
- as a lower bound of b+1, b+2 and b+3 respectively. However, a value of 0
- could mean that the actual lower bound is anything between 0 and b, so we
- cannot take b as a lower bound. Instead we have to use a pruning value from
- another table - see the section "Fallback tables" below.
-* 1 bit per entry, or `k1`: With one bit per entry, the only information we
- can get from the pruning table is wether or not the current position
- requires more or fewer moves than a fixed base value b. This can still be
- valuable if most positions are more or less equally split between two
- pruning values.
- (Work in progress - `k1` tables not available in the code yet)
-
-### Fallback tables
-
-When a pruning table does not store the exact lower bound value, for
-example in the case of `k2` table as described above, we need to refine
-our estimate using another table, which we call *fallback table*.
-In the current implementation, we actually use two different tables:
-
-* **h0** table: for larger `k2` tables we get a fallback value from the full
- table for the **h0** coordinate.
-* edges-only table: to improve the pruning value for certain specific
- scrambles, namely whose with solved corners such as the superflip,
- we employ a second table that takes into account the edges of a full
- **h11** coordinate. This table is small (around 1MB).
-
-More tables could be used to refine the fallback estimate, but each
-additional table leads to longer lookup times, especially if it is
-too large to fit in cache.
+Once we have computed the full h0 coordinate, we can access
+the correct entry in the full pruning table. We chose to
+use 2 bits per entry, as described by Rokicki in the [nxopt
+document](https://github.com/rokicki/cube20src/blob/master/nxopt.md).
+This means that every pruning table needs to have an associated *base
+value*, that determines the offset to be added to each entry (each entry
+can only be 0, 1, 2 or 3). If the base value is `b`, a pruning value
+of 1, 2 or 3 can be used directly as a lower bound of b+1, b+2 and b+3
+respectively. However, a value of 0 could mean that the actual lower
+bound is anything between 0 and b, so we cannot take b as a lower bound.
+
+To be able to still use some sort of pruning value even when we get a
+0 read, we use a **fallback table**. Inspired by nxopt, this table is
+interleaved with the main table for cache efficiency: every 254 entries
+(508 bits), we store in 4 bits the minimum of the pruning values of these
+254 entries as a number from 0 to 16, without subtracting the table's
+base. Since a cache line is 512 bits long, we never get a cache miss
+when looking up the minimum value in the fallback table after a 0 read.
+Smaller lines (of 256 or 128 bits) have been tried, but they do not
+give any significant improvement over 512 bit lines.
+
+Moreover, as an additional heuristic, in case of a 0 read we also look
+up another pruning value in a table that takes into account only the
+position of the edges. This table is small (around 1MB), so repeated
+accesses to it are not too slow. In practice, this gives a small speed up
+of around 5%. More tables could be used to refine the fallback estimate,
+but each additional table leads to longer lookup times, especially if
+it is too large to fit in cache.
+
+Previous versions of this implementation (up to commit 6c42463, or
+to version 0.2) also included the possibility of a table with 4 bits
+per entry, at least for the `h0` case. Such tables did not require
+a fallback lookup, but due to their large size they were not less
+efficient. Therefore, they have been removed.
### Estimation refinements
@@ -368,20 +361,20 @@ they are obviously inexistent when looking for *all* optimal solutions.
## Pruning table computation
-### 4 bits tables for h0 and h11
+We first describe how the pruning table would be computed if we stored
+each pruning value fully, using 4 bits per entry. This algorithm used
+to be implemented (up to commit 6c42463), but has since been removed.
-Computing the pruning table for a "real" h48 coordinate, that **h0**
-or **h11**, using 4 bits per entry is quite simple. The method currently
-implemented works as follows.
+### Full tables for h0 and h11 (not implemented)
-First, we set the value of the solved cube's coordinate to 0 and every
-other entry to 15, the largest 4-bit integer. Then we iteratively scan
-through the table and compute the coordinates at depth n+1 from those at
-depth n as follows: for each coordinate at depth n, we compute a valid
-representative for it, we apply each of the possible 18 moves to it, and
-we set their value to n+1. Once the table is filled or we have reached
-depth 15, we stop (there are no **h0** coordinates at depth 16 or more,
-but I currently don't know if this is the case for **h11**).
+If we are allowed to store the full pruning value for each entry,
+computing the pruning tables is not that difficult. First, we set the
+value of the solved cube's coordinate to 0 and every other entry to 15,
+the largest 4-bit integer. Then we iteratively scan through the table and
+compute the coordinates at depth n+1 from those at depth n as follows:
+for each coordinate at depth n, we compute a valid representative for it,
+we apply each of the possible 18 moves to it, and we set their value to
+n+1. Once the table is filled or we have reached depth 15, we stop.
Unfortunately what I described above is an oversimplification: one
also must take into account the case where the corner coordinate is
@@ -400,20 +393,7 @@ Finally, this algorithm can be easily parallelized by dividing the set
of coordinates into separate sections, but one must take into account
that a coordinate and its neighbors are usually not in the same section.
-This method is currently implemented only for **h0**, since the 4 bits
-table for **h11** would require around 115GB of RAM to compute, and I
-don't have this much memory.
-
-### 2 bits tables with for h0 and h11
-
-(Work in progress - currently there is no specialized routine for
-computing 2 bits table for "real" coordinates; instead, an optimized
-version of the generic method explained below is used)
-
-### A generic method for intermediate coordinates (from h1 to h10)
-
-(Work in progress - this method is quite slow and it may be replaced
-by a better algorithm in the future)
+### 2 bit tables
Computing the pruning tables for intermediate coordinates is not as
simple. The reason is that these coordinates are not invariant under
@@ -441,14 +421,13 @@ map indexed by their **h11** coordinate value. This way we do not have
to brute-force our way through from depth 0, and it make this method
considerably faster. Unfortunately, since the number of coordinates
at a certain depth increases exponentially with the depth, we are for
-now limited at storing the coordinates at depth 8. (Work in progress -
-we may experiment with depth 9 in the future)
+now limited at storing the coordinates at depth 8.
This method works for the "real" coordinates **h0** and **h11** as well.
Moreover, in this case one can optimize it further by avoiding to repeat
-the search from a coordinate that has already been visited. (Work
-in progress - this method will be replaced in the future by a more
-efficient one)
+the search from a coordinate that has already been visited. Further
+optimization are possible for **h0** and **h11**, and we may implement
+them in the future.
## Possible future improvements
@@ -458,9 +437,6 @@ notes rather than a description of the solver.*
There are some areas where this implementation of the H48 optimal solver
can be improved:
-* Intertwining fallback tables to the main table. This is a trick that
- nxopt uses to reduce the number of cache misses, but we have not
- implemented in H48 yet.
* Faster pruning table generation for **h11** and **h0**. Since these
two coordinates are "real" coordinates, we can use a different technique
to generate their tables faster. This won't affect the solver speed.
diff --git a/doc/solvers.md b/doc/solvers.md
@@ -10,12 +10,11 @@ An HTM-optimal solver using fully-symmetric pruning tables. For details
about how this solver works, see [h48.md](./h48.md). For benchmarks see
[benchmarks/benchmarks.md](../benchmarks/benchmarks.md).
-* Name: of the form `h8hXk2` for `X` from 0 to 11, or `h48h0k4`. The name
- `optimal` is an alias for `h48h7k2`.
+* Name: of the form `h8hX` for `X` from 0 to 11. The name `optimal` is
+ an alias for `h48h7`.
* Requisites: none.
* Moveset: HTM (all 18 basic moves).
-* Data size: 59MB for `h48h0k4`, from 115MB to 59GB for `h48hXk2`
- (roughly 59MB + 2<sup>X</sup>*56MB).
+* From 115MB to 59GB (roughly 2<sup>X</sup>*56MB).
## Coordinate solvers
diff --git a/python/examples/solve.py b/python/examples/solve.py
@@ -12,14 +12,14 @@ sys.path.append(os.getcwd() + os.path.sep + "python")
import nissy
# Choose the solver you prefer
-solver = "h48h0k4"
+solver = "h48h3"
# Load the pruning table from file, generate it if needed
datapath = "tables" + os.path.sep + solver
if os.path.exists(datapath):
data = bytearray(open(datapath, "rb").read())
else:
- data = nissy.gendata("h48h0k4")
+ data = nissy.gendata(solver)
print("Generated data will NOT be persisted")
# Get a scrambled cube
diff --git a/src/solvers/dispatch.h b/src/solvers/dispatch.h
@@ -43,7 +43,7 @@ solver_dispatch_t solver_dispatchers[] = {
};
const char *solver_aliases[][2] = {
- { "optimal", "h48h7i" },
+ { "optimal", "h48h7" },
{ "eofb", "coord_EO_UF" },
{ "eorl", "coord_EO_UR" },
{ "eoud", "coord_EO_FD" },
diff --git a/src/solvers/h48/checkdata.h b/src/solvers/h48/checkdata.h
@@ -18,155 +18,113 @@ uint64_t expected_cocsep[21] = {
struct {
uint8_t max;
uint64_t table[21];
-} expected_h48[12][9] = {
+} expected_h48[12] = {
[0] = {
- [2] = {
- .max = 3,
- .table = {
- [0] = 5473562,
- [1] = 34776317,
- [2] = 68566704,
- [3] = 8750867,
- },
- },
- [4] = {
- .max = 12,
- .table = {
- [0] = 1,
- [1] = 1,
- [2] = 4,
- [3] = 34,
- [4] = 331,
- [5] = 3612,
- [6] = 41605,
- [7] = 474128,
- [8] = 4953846,
- [9] = 34776317,
- [10] = 68566704,
- [11] = 8749194,
- [12] = 1673,
- },
+ .max = 3,
+ .table = {
+ [0] = 5473562,
+ [1] = 34776317,
+ [2] = 68566704,
+ [3] = 8750867,
},
},
[1] = {
- [2] = {
- .max = 3,
- .table = {
- [0] = 6012079,
- [1] = 45822302,
- [2] = 142018732,
- [3] = 41281787,
- },
+ .max = 3,
+ .table = {
+ [0] = 6012079,
+ [1] = 45822302,
+ [2] = 142018732,
+ [3] = 41281787,
},
},
[2] = {
- [2] = {
- .max = 3,
- .table = {
- [0] = 6391286,
- [1] = 55494785,
- [2] = 252389935,
- [3] = 155993794,
- },
+ .max = 3,
+ .table = {
+ [0] = 6391286,
+ [1] = 55494785,
+ [2] = 252389935,
+ [3] = 155993794,
},
},
[3] = {
- [2] = {
- .max = 3,
- .table = {
- [0] = 6686828,
- [1] = 63867852,
- [2] = 392789689,
- [3] = 477195231,
- },
+ .max = 3,
+ .table = {
+ [0] = 6686828,
+ [1] = 63867852,
+ [2] = 392789689,
+ [3] = 477195231,
},
},
[4] = {
- [2] = {
- .max = 3,
- .table = {
- [0] = 77147213,
- [1] = 543379415,
- [2] = 1139570251,
- [3] = 120982321,
- },
+ .max = 3,
+ .table = {
+ [0] = 77147213,
+ [1] = 543379415,
+ [2] = 1139570251,
+ [3] = 120982321,
},
},
[5] = {
- [2] = {
- .max = 3,
- .table = {
- [0] = 82471284,
- [1] = 687850732,
- [2] = 2345840746,
- [3] = 645995638,
- },
+ .max = 3,
+ .table = {
+ [0] = 82471284,
+ [1] = 687850732,
+ [2] = 2345840746,
+ [3] = 645995638,
},
},
[6] = {
- [2] = {
- .max = 3,
- .table = {
- [0] = 85941099,
- [1] = 804752968,
- [2] = 4077248182,
- [3] = 2556374551,
- },
+ .max = 3,
+ .table = {
+ [0] = 85941099,
+ [1] = 804752968,
+ [2] = 4077248182,
+ [3] = 2556374551,
},
},
[7] = {
- [2] = {
- .max = 3,
- .table = {
- [0] = 88529761,
- [1] = 897323475,
- [2] = 6126260791,
- [3] = 7936519573,
- },
+ .max = 3,
+ .table = {
+ [0] = 88529761,
+ [1] = 897323475,
+ [2] = 6126260791,
+ [3] = 7936519573,
},
},
[8] = {
- [2] = {
- .max = 3,
- .table = {
- [0] = 1051579940,
- [1] = 8136021316,
- [2] = 19024479822,
- [3] = 1885186122,
- },
+ .max = 3,
+ .table = {
+ [0] = 1051579940,
+ [1] = 8136021316,
+ [2] = 19024479822,
+ [3] = 1885186122,
},
},
[9] = {
- [2] = {
- .max = 3,
- .table = {
- [0] = 1102038189,
- [1] = 9888265242,
- [2] = 38299375805,
- [3] = 10904855164,
- },
+ .max = 3,
+ .table = {
+ [0] = 1102038189,
+ [1] = 9888265242,
+ [2] = 38299375805,
+ [3] = 10904855164,
},
},
[10] = {
- [2] = {
- .max = 3,
- .table = {
- [0] = 1133240039,
- [1] = 11196285614,
- [2] = 64164702961,
- [3] = 43894840186,
- },
+ .max = 3,
+ .table = {
+ [0] = 1133240039,
+ [1] = 11196285614,
+ [2] = 64164702961,
+ [3] = 43894840186,
},
},
[11] = {
- [2] = {
- .max = 3,
- .table = {
- [0] = 1150763161,
- [1] = 12045845660,
- [2] = 91163433330,
- [3] = 136418095449,
- },
+ .max = 3,
+ .table = {
+ [0] = 1150763161,
+ [1] = 12045845660,
+ [2] = 91163433330,
+ [3] = 136418095449,
},
},
};
@@ -210,8 +168,8 @@ checkdata_h48(
continue;
}
- ed = expected_h48[info.h48h][info.bits].table;
- em = expected_h48[info.h48h][info.bits].max;
+ ed = expected_h48[info.h48h].table;
+ em = expected_h48[info.h48h].max;
LOG("[checkdata] Checking distribution for '%s' from "
"table preamble\n", info.solver);
diff --git a/src/solvers/h48/gendata_h48.h b/src/solvers/h48/gendata_h48.h
@@ -2,24 +2,22 @@ STATIC long long gendata_h48_dispatch(
const char *, unsigned long long, unsigned char *);
STATIC uint64_t gendata_h48short(gendata_h48short_arg_t [static 1]);
STATIC int64_t gendata_h48(gendata_h48_arg_t [static 1]);
-STATIC void gendata_h48k2(gendata_h48_arg_t [static 1]);
-STATIC void *gendata_h48k2_runthread(void *);
+STATIC void gendata_h48_maintable(gendata_h48_arg_t [static 1]);
+STATIC void *gendata_h48_runthread(void *);
STATIC_INLINE void gendata_h48_mark(gendata_h48_mark_t [static 1]);
-STATIC_INLINE bool gendata_h48k2_dfs_stop(
- cube_t, int8_t, h48k2_dfs_arg_t [static 1]);
-STATIC void gendata_h48k2_dfs(h48k2_dfs_arg_t [static 1]);
-STATIC tableinfo_t makeinfo_h48k2(gendata_h48_arg_t [static 1]);
+STATIC_INLINE bool gendata_h48_dfs_stop(
+ cube_t, int8_t, h48_dfs_arg_t [static 1]);
+STATIC void gendata_h48_dfs(h48_dfs_arg_t [static 1]);
+STATIC tableinfo_t makeinfo_h48(gendata_h48_arg_t [static 1]);
STATIC const uint32_t *get_cocsepdata_constptr(const unsigned char *);
STATIC const unsigned char *get_h48data_constptr(const unsigned char *);
-STATIC_INLINE uint8_t get_h48_pval(const unsigned char *, uint64_t, uint8_t);
-STATIC_INLINE void set_h48_pval(unsigned char *, uint64_t, uint8_t, uint8_t);
-STATIC_INLINE uint8_t get_h48_pvalmin(
- const unsigned char *, uint64_t, uint8_t);
-STATIC_INLINE void set_h48_pvalmin(
- unsigned char *, uint64_t, uint8_t, uint8_t);
+STATIC_INLINE uint8_t get_h48_pval(const unsigned char *, uint64_t);
+STATIC_INLINE void set_h48_pval(unsigned char *, uint64_t, uint8_t);
+STATIC_INLINE uint8_t get_h48_pvalmin(const unsigned char *, uint64_t);
+STATIC_INLINE void set_h48_pvalmin(unsigned char *, uint64_t, uint8_t);
STATIC long long
gendata_h48_dispatch(
@@ -31,7 +29,7 @@ gendata_h48_dispatch(
long long err;
gendata_h48_arg_t arg;
- err = parse_h48_hk(solver, &arg.h, &arg.k);
+ err = parse_h48h(solver, &arg.h);
if (err != NISSY_OK)
return err;
@@ -85,7 +83,7 @@ gendata_h48(gendata_h48_arg_t arg[static 1])
tableinfo_t cocsepinfo, h48info;
cocsepsize = COCSEP_FULLSIZE;
- h48size = INFOSIZE + H48_TABLESIZE(arg->h, arg->k);
+ h48size = INFOSIZE + H48_TABLESIZE(arg->h);
eoesepsize = EOESEP_FULLSIZE;
size = cocsepsize + h48size + eoesepsize;
@@ -106,11 +104,8 @@ gendata_h48(gendata_h48_arg_t arg[static 1])
arg->cocsepdata = (uint32_t *)cocsepdata_offset;
arg->h48buf = (wrapthread_atomic unsigned char*)arg->buf + cocsepsize;
-/* TODO this can probably be removed now? */
- arg->base = 99;
-/* */
-
- gendata_h48k2(arg);
+ /* Compute the main H48 table with two bits per entry */
+ gendata_h48_maintable(arg);
r = readtableinfo(arg->buf_size, arg->buf, &cocsepinfo);
if (r != NISSY_OK) {
@@ -152,10 +147,10 @@ gendata_h48(gendata_h48_arg_t arg[static 1])
}
STATIC void
-gendata_h48k2(gendata_h48_arg_t arg[static 1])
+gendata_h48_maintable(gendata_h48_arg_t arg[static 1])
{
/*
- * A good base value for the k=2 tables have few positions with value
+ * A good base value for the h48 tables have few positions with value
* 0, because those are treated as lower bound 0 and require a second
* lookup in another table, and at the same time not too many positions
* with value 3, because some of those are under-estimates.
@@ -208,13 +203,13 @@ gendata_h48k2(gendata_h48_arg_t arg[static 1])
uint64_t i, ii, inext, bufsize, done, nshort, velocity;
h48map_t shortcubes;
gendata_h48short_arg_t shortarg;
- h48k2_dfs_arg_t dfsarg[THREADS];
+ h48_dfs_arg_t dfsarg[THREADS];
wrapthread_define_var_thread_t(thread[THREADS]);
wrapthread_define_var_mutex_t(shortcubes_mutex);
wrapthread_define_var_mutex_t(table_mutex[CHUNKS]);
table = (unsigned char *)arg->h48buf + INFOSIZE;
- memset(table, 0xFF, H48_TABLESIZE(arg->h, arg->k));
+ memset(table, 0xFF, H48_TABLESIZE(arg->h));
LOG("[H48 gendata] Computing depth <=%" PRIu8 "\n", shortdepth)
h48map_create(&shortcubes, capacity, randomizer);
@@ -229,9 +224,8 @@ gendata_h48k2(gendata_h48_arg_t arg[static 1])
nshort = shortarg.map->n;
LOG("[H48 gendata] Computed %" PRIu64 " positions\n", nshort);
- if (arg->base >= 20)
- arg->base = base[arg->h];
- arg->info = makeinfo_h48k2(arg);
+ arg->base = base[arg->h];
+ arg->info = makeinfo_h48(arg);
inext = 0;
count = 0;
@@ -239,9 +233,8 @@ gendata_h48k2(gendata_h48_arg_t arg[static 1])
for (i = 0; i < CHUNKS; i++)
wrapthread_mutex_init(&table_mutex[i], NULL);
for (i = 0; i < THREADS; i++) {
- dfsarg[i] = (h48k2_dfs_arg_t){
+ dfsarg[i] = (h48_dfs_arg_t){
.h = arg->h,
- .k = arg->k,
.base = arg->base,
.shortdepth = shortdepth,
.cocsepdata = arg->cocsepdata,
@@ -257,12 +250,13 @@ gendata_h48k2(gendata_h48_arg_t arg[static 1])
dfsarg[i].table_mutex[ii] = &table_mutex[ii];
wrapthread_create(
- &thread[i], NULL, gendata_h48k2_runthread, &dfsarg[i]);
+ &thread[i], NULL, gendata_h48_runthread, &dfsarg[i]);
}
if (NISSY_CANSLEEP) {
/* Log the progress periodically */
- LOG("Processing 'short cubes'. This will take a while.\n");
+ LOG("[H48 gendata] Processing 'short cubes'. "
+ "This will take a while.\n");
/* Estimate velocity by checking how much is done after 1s */
msleep(1000);
@@ -277,8 +271,8 @@ gendata_h48k2(gendata_h48_arg_t arg[static 1])
wrapthread_mutex_lock(&shortcubes_mutex);
done = count;
wrapthread_mutex_unlock(&shortcubes_mutex);
- LOG("Processed %" PRIu64 " / %" PRIu64 " cubes\n",
- (done / 1000) * 1000, nshort);
+ LOG("[H48 gendata] Processed %" PRIu64 " / %" PRIu64
+ " cubes\n", (done / 1000) * 1000, nshort);
}
} else {
LOG("Status updates won't be available because the sleep() "
@@ -297,14 +291,14 @@ gendata_h48k2(gendata_h48_arg_t arg[static 1])
}
STATIC void *
-gendata_h48k2_runthread(void *arg)
+gendata_h48_runthread(void *arg)
{
uint64_t coord, coordext, coordmin;
kvpair_t kv;
- h48k2_dfs_arg_t *dfsarg;
+ h48_dfs_arg_t *dfsarg;
wrapthread_define_if_threads(uint64_t, mutex);
- dfsarg = (h48k2_dfs_arg_t *)arg;
+ dfsarg = (h48_dfs_arg_t *)arg;
while (true) {
wrapthread_mutex_lock(dfsarg->shortcubes_mutex);
@@ -324,12 +318,12 @@ gendata_h48k2_runthread(void *arg)
mutex = H48_LINE(coord) % CHUNKS;
wrapthread_mutex_lock(dfsarg->table_mutex[mutex]);
- set_h48_pval(dfsarg->table, coordext, dfsarg->k, 0);
- set_h48_pvalmin(dfsarg->table, coordmin, dfsarg->k, kv.val);
+ set_h48_pval(dfsarg->table, coordext, 0);
+ set_h48_pvalmin(dfsarg->table, coordmin, kv.val);
wrapthread_mutex_unlock(dfsarg->table_mutex[mutex]);
} else {
dfsarg->cube = invcoord_h48(kv.key, dfsarg->crep, 11);
- gendata_h48k2_dfs(dfsarg);
+ gendata_h48_dfs(dfsarg);
}
}
@@ -337,7 +331,7 @@ gendata_h48k2_runthread(void *arg)
}
STATIC void
-gendata_h48k2_dfs(h48k2_dfs_arg_t arg[static 1])
+gendata_h48_dfs(h48_dfs_arg_t arg[static 1])
{
int8_t d;
uint8_t m[4];
@@ -346,7 +340,6 @@ gendata_h48k2_dfs(h48k2_dfs_arg_t arg[static 1])
markarg = (gendata_h48_mark_t) {
.h = arg->h,
- .k = arg->k,
.base = arg->base,
.cocsepdata = arg->cocsepdata,
.selfsim = arg->selfsim,
@@ -365,7 +358,7 @@ gendata_h48k2_dfs(h48k2_dfs_arg_t arg[static 1])
for (m[0] = 0; m[0] < 18; m[0]++) {
markarg.depth = d+1;
cube[0] = move(arg->cube, m[0]);
- if (gendata_h48k2_dfs_stop(cube[0], d+1, arg))
+ if (gendata_h48_dfs_stop(cube[0], d+1, arg))
continue;
markarg.cube = cube[0];
gendata_h48_mark(&markarg);
@@ -378,7 +371,7 @@ gendata_h48k2_dfs(h48k2_dfs_arg_t arg[static 1])
continue;
}
cube[1] = move(cube[0], m[1]);
- if (gendata_h48k2_dfs_stop(cube[1], d+2, arg))
+ if (gendata_h48_dfs_stop(cube[1], d+2, arg))
continue;
markarg.cube = cube[1];
gendata_h48_mark(&markarg);
@@ -393,7 +386,7 @@ gendata_h48k2_dfs(h48k2_dfs_arg_t arg[static 1])
continue;
}
cube[2] = move(cube[1], m[2]);
- if (gendata_h48k2_dfs_stop(cube[2], d+3, arg))
+ if (gendata_h48_dfs_stop(cube[2], d+3, arg))
continue;
markarg.cube = cube[2];
gendata_h48_mark(&markarg);
@@ -430,18 +423,18 @@ gendata_h48_mark(gendata_h48_mark_t arg[static 1])
mutex = H48_LINE(coord) % CHUNKS;
wrapthread_mutex_lock(arg->table_mutex[mutex]);
- oldval = get_h48_pval(arg->table, coordext, arg->k);
+ oldval = get_h48_pval(arg->table, coordext);
newval = (uint8_t)MAX(arg->depth, 0);
v = MIN(newval, oldval);
- set_h48_pval(arg->table, coordext, arg->k, v);
+ set_h48_pval(arg->table, coordext, v);
v = arg->depth + arg->base;
- set_h48_pvalmin(arg->table, coordmin, arg->k, v);
+ set_h48_pvalmin(arg->table, coordmin, v);
wrapthread_mutex_unlock(arg->table_mutex[mutex]);
)
}
STATIC_INLINE bool
-gendata_h48k2_dfs_stop(cube_t cube, int8_t d, h48k2_dfs_arg_t arg[static 1])
+gendata_h48_dfs_stop(cube_t cube, int8_t d, h48_dfs_arg_t arg[static 1])
{
uint64_t val;
uint64_t coord, coordext;
@@ -455,11 +448,11 @@ gendata_h48k2_dfs_stop(cube_t cube, int8_t d, h48k2_dfs_arg_t arg[static 1])
coordext = H48_LINE_EXT(coord);
mutex = H48_LINE(coord) % CHUNKS;
wrapthread_mutex_lock(arg->table_mutex[mutex]);
- oldval = get_h48_pval(arg->table, coordext, arg->k);
+ oldval = get_h48_pval(arg->table, coordext);
wrapthread_mutex_unlock(arg->table_mutex[mutex]);
return oldval <= d;
} else {
- /* With 0 < k < 11 we do not have a "real coordinate".
+ /* With 0 < h < 11 we do not have a "real coordinate".
The best we can do is checking if we backtracked to
one of the "short cubes". */
coord = coord_h48(cube, arg->cocsepdata, 11);
@@ -469,15 +462,15 @@ gendata_h48k2_dfs_stop(cube_t cube, int8_t d, h48k2_dfs_arg_t arg[static 1])
}
STATIC tableinfo_t
-makeinfo_h48k2(gendata_h48_arg_t arg[static 1])
+makeinfo_h48(gendata_h48_arg_t arg[static 1])
{
tableinfo_t info;
info = (tableinfo_t) {
- .solver = "h48 solver h = , k = 2",
+ .solver = "h48 solver h = ",
.type = TABLETYPE_PRUNING,
.infosize = INFOSIZE,
- .fullsize = H48_TABLESIZE(arg->h, 2) + INFOSIZE,
+ .fullsize = H48_TABLESIZE(arg->h) + INFOSIZE,
.hash = 0,
.entries = H48_COORDMAX(arg->h) + 2 * H48_LINES(arg->h),
.classes = 0,
@@ -507,31 +500,31 @@ get_h48data_constptr(const unsigned char *data)
}
STATIC_INLINE uint8_t
-get_h48_pval(const unsigned char *table, uint64_t i, uint8_t k)
+get_h48_pval(const unsigned char *table, uint64_t i)
{
- return (table[H48_INDEX(i, k)] & H48_MASK(i, k)) >> H48_SHIFT(i, k);
+ return (table[H48_INDEX(i)] & H48_MASK(i)) >> H48_SHIFT(i);
}
STATIC_INLINE uint8_t
-get_h48_pvalmin(const unsigned char *table, uint64_t i, uint8_t k)
+get_h48_pvalmin(const unsigned char *table, uint64_t i)
{
- return (get_h48_pval(table, i, k) << UINT8_C(2)) +
- get_h48_pval(table, i+UINT64_C(1), k);
+ return (get_h48_pval(table, i) << UINT8_C(2)) +
+ get_h48_pval(table, i+UINT64_C(1));
}
STATIC_INLINE void
-set_h48_pval(unsigned char *table, uint64_t i, uint8_t k, uint8_t val)
+set_h48_pval(unsigned char *table, uint64_t i, uint8_t val)
{
- table[H48_INDEX(i, k)] = (table[H48_INDEX(i, k)] & (~H48_MASK(i, k)))
- | (val << H48_SHIFT(i, k));
+ table[H48_INDEX(i)] = (table[H48_INDEX(i)] & (~H48_MASK(i)))
+ | (val << H48_SHIFT(i));
}
STATIC_INLINE void
-set_h48_pvalmin(unsigned char *table, uint64_t i, uint8_t k, uint8_t val)
+set_h48_pvalmin(unsigned char *table, uint64_t i, uint8_t val)
{
uint8_t v;
- v = MIN(val, get_h48_pvalmin(table, i, k));
- set_h48_pval(table, i, k, v >> UINT8_C(2));
- set_h48_pval(table, i+UINT64_C(1), k, v % UINT8_C(4));
+ v = MIN(val, get_h48_pvalmin(table, i));
+ set_h48_pval(table, i, v >> UINT8_C(2));
+ set_h48_pval(table, i+UINT64_C(1), v % UINT8_C(4));
}
diff --git a/src/solvers/h48/gendata_types_macros.h b/src/solvers/h48/gendata_types_macros.h
@@ -20,13 +20,10 @@
#define H48_COORDMAX_NOEO (COCSEP_CLASSES * ESEP_MAX)
#define H48_COORDMAX(h) (H48_COORDMAX_NOEO << (uint64_t)(h))
-#define H48_DIV(k) ((size_t)8 / (size_t)(k))
-#define H48_TABLESIZE_K4(h) DIV_ROUND_UP((size_t)H48_COORDMAX(h), H48_DIV(4))
-#define H48_COEFF(k) (UINT64_C(8) / (uint64_t)(k))
-#define H48_INDEX(i, k) ((i) / H48_COEFF(k))
-#define H48_SHIFT(i, k) ((uint8_t)(k) * (uint8_t)((i) % H48_COEFF(k)))
-#define H48_MASK(i, k) ((UINT8_BIT(k) - UINT8_C(1)) << H48_SHIFT(i, k))
+#define H48_INDEX(i) ((i) / UINT64_C(4))
+#define H48_SHIFT(i) (UINT8_C(2) * (uint8_t)((i) % UINT64_C(4)))
+#define H48_MASK(i) (UINT8_C(3) << H48_SHIFT(i))
#define H48_LINE_BITS UINT64_C(512)
#define H48_LINE_BYTES (H48_LINE_BITS >> UINT64_C(3))
@@ -37,10 +34,7 @@
#define H48_LINE_MIN(i) \
((H48_LINE(i) + UINT64_C(1)) * H48_LINE_ALLCOORDS - UINT64_C(2))
#define H48_LINES(h) DIV_ROUND_UP(H48_COORDMAX(h), H48_LINE_COORDS)
-#define H48_TABLESIZE_K2(h) ((size_t)(H48_LINE_BYTES * H48_LINES(h)))
-
-#define H48_TABLESIZE(h, k) \
- ((k) == 4 ? H48_TABLESIZE_K4(h) : H48_TABLESIZE_K2(h))
+#define H48_TABLESIZE(h) ((size_t)(H48_LINE_BYTES * H48_LINES(h)))
#define CHUNKS 2000
@@ -76,7 +70,6 @@ typedef struct {
typedef struct {
uint8_t h;
- uint8_t k;
uint8_t base;
uint8_t maxdepth;
tableinfo_t info;
@@ -99,7 +92,6 @@ typedef struct {
typedef struct {
cube_t cube;
uint8_t h;
- uint8_t k;
uint8_t base;
uint8_t shortdepth;
uint32_t *cocsepdata;
@@ -111,13 +103,12 @@ typedef struct {
wrapthread_define_struct_mutex_t(*table_mutex[CHUNKS]);
uint64_t *next;
wrapthread_atomic uint64_t *count;
-} h48k2_dfs_arg_t;
+} h48_dfs_arg_t;
typedef struct {
cube_t cube;
int8_t depth;
uint8_t h;
- uint8_t k;
uint8_t base;
uint32_t *cocsepdata;
uint64_t *selfsim;
diff --git a/src/solvers/h48/solve.h b/src/solvers/h48/solve.h
@@ -30,7 +30,6 @@ typedef struct {
bool use_lb_normal;
bool use_lb_inverse;
uint8_t h;
- uint8_t k;
uint8_t base;
const uint32_t *cocsepdata;
const unsigned char *h48data;
@@ -93,10 +92,10 @@ STATIC long long solve_h48_dispatch(
void *poll_status_data
)
{
- uint8_t h, k;
+ uint8_t h;
long long err;
- err = parse_h48_hk(solver, &h, &k);
+ err = parse_h48h(solver, &h);
if (err != NISSY_OK)
return err;
@@ -142,15 +141,14 @@ solve_h48_stop(dfsarg_solve_h48_t arg[static 1])
coord = coord_h48_edges(
arg->inverse, COCLASS(data_inv), TTREP(data_inv), arg->h);
coordext = H48_LINE_EXT(coord);
- arg->lb_inverse = get_h48_pval(arg->h48data, coordext, arg->k);
+ arg->lb_inverse = get_h48_pval(arg->h48data, coordext);
arg->table_lookups++;
- if (arg->k == 2 && arg->lb_inverse == 0) {
+ if (arg->lb_inverse == 0) {
arg->table_fallbacks++;
coordmin = H48_LINE_MIN(coord);
- pval_min = get_h48_pvalmin(
- arg->h48data, coordmin, arg->k);
+ pval_min = get_h48_pvalmin(arg->h48data, coordmin);
pval_eoesep = get_eoesep_pval_cube(
arg->h48data_fallback_eoesep, arg->inverse);
arg->lb_inverse = MAX(pval_min, pval_eoesep);
@@ -172,15 +170,14 @@ solve_h48_stop(dfsarg_solve_h48_t arg[static 1])
coord = coord_h48_edges(
arg->cube, COCLASS(data), TTREP(data), arg->h);
coordext = H48_LINE_EXT(coord);
- arg->lb_normal = get_h48_pval(arg->h48data, coordext, arg->k);
+ arg->lb_normal = get_h48_pval(arg->h48data, coordext);
arg->table_lookups++;
- if (arg->k == 2 && arg->lb_normal == 0) {
+ if (arg->lb_normal == 0) {
arg->table_fallbacks++;
coordmin = H48_LINE_MIN(coord);
- pval_min = get_h48_pval(
- arg->h48data, coordmin, arg->k);
+ pval_min = get_h48_pval(arg->h48data, coordmin);
pval_eoesep = get_eoesep_pval_cube(
arg->h48data_fallback_eoesep, arg->cube);
arg->lb_normal = MAX(pval_min, pval_eoesep);
@@ -515,7 +512,6 @@ solve_h48(
.start_cube = oc.cube,
.cube = oc.cube,
.h = info.h48h,
- .k = info.bits,
.base = info.base,
.cocsepdata = cocsepdata,
.h48data = h48data,
diff --git a/src/solvers/h48/utils.h b/src/solvers/h48/utils.h
@@ -4,22 +4,21 @@
#define H48_HMAX UINT8_C(7)
#endif
-long long parse_h48_hk(
- const char *, uint8_t [static 1], uint8_t [static 1]);
+long long parse_h48h(const char *, uint8_t [static 1]);
STATIC long long dataid_h48(const char *, char [static NISSY_SIZE_DATAID]);
long long
-parse_h48_hk(const char *buf, uint8_t h[static 1], uint8_t k[static 1])
+parse_h48h(const char *buf, uint8_t h[static 1])
{
char format_error_msg[100];
sprintf(format_error_msg, "[H48] Error parsing H48 solver: must be in "
- "'h48h*k*' format, but got '%s'\n", buf);
+ "'h48h*' format, but got '%s'\n", buf);
buf += 3;
if (*buf != 'h') {
LOG(format_error_msg);
- goto parse_h48_hk_error;
+ goto parse_h48h_error;
}
buf++;
@@ -27,48 +26,33 @@ parse_h48_hk(const char *buf, uint8_t h[static 1], uint8_t k[static 1])
if (*h > H48_HMAX) {
LOG("[H48] Invalid value %" PRIu8 " for parameter h (must be "
"at most %" PRIu8 ")\n", *h, H48_HMAX);
- goto parse_h48_hk_error;
+ goto parse_h48h_error;
}
for ( ; *buf >= 0 + '0' && *buf <= 9 + '0'; buf++) {
if (*buf == 0) {
LOG(format_error_msg);
- goto parse_h48_hk_error;
+ goto parse_h48h_error;
}
}
- if (*buf != 'k') {
- LOG(format_error_msg);
- goto parse_h48_hk_error;
- }
- buf++;
-
- *k = atoi(buf);
- if (!(*k == 2 || (*k == 4 && *h == 0))) {
- LOG("[H48] Invalid combinations of values h=%" PRIu8 " and k=%"
- PRIu8 " for parameters h and k\n", *h, *k);
- goto parse_h48_hk_error;
- }
-
return NISSY_OK;
-parse_h48_hk_error:
+parse_h48h_error:
*h = 0;
- *k = 0;
return NISSY_ERROR_INVALID_SOLVER;
}
STATIC long long
-dataid_h48(const char *hk, char buf[static NISSY_SIZE_DATAID])
+dataid_h48(const char *str, char buf[static NISSY_SIZE_DATAID])
{
- uint8_t h, k;
+ uint8_t h;
long long err;
- err = parse_h48_hk(hk, &h, &k);
+ err = parse_h48h(str, &h);
if (err < 0)
return err;
- sprintf(buf, "h48h%" PRIu8 "k%" PRIu8 "%s", h, k,
- k == 2 ? "i" : "");
+ sprintf(buf, "h48h%" PRIu8, h);
return NISSY_OK;
}
diff --git a/web/adapter.cpp b/web/adapter.cpp
@@ -16,12 +16,11 @@ std::map<std::string, nissy::solver> loaded_solvers;
const std::set<std::string> available_solvers
{
- "h48h0k4",
- "h48h1k2",
- "h48h2k2",
- "h48h3k2",
- "h48h4k2",
- "h48h5k2",
+ "h48h1",
+ "h48h2",
+ "h48h3",
+ "h48h4",
+ "h48h5",
};
bool is_solver_available(const std::string& name)
diff --git a/web/http/index.html b/web/http/index.html
@@ -12,11 +12,11 @@
<div id="scrambleRaw">
<input id="scrambleText" placeholder="Type the scramble here...">
<select id="solverSelector">
- <option value="h48h3k2" selected="selected">
- h48 h=4 k=2 (300 Mb) - light
+ <option value="h48h3" selected="selected">
+ h48 h=4 (500 Mb) - light
</option>
- <option value="h48h5k2" selected="selected">
- h48 h=5 k=2 (1 Gb) - fastest
+ <option value="h48h5" selected="selected">
+ h48 h=5 (1 Gb) - fastest
</option>
</select>
<button id="solveButton">Solve</button>