nissy-core

The "engine" of nissy, including the H48 optimal solver.
git clone https://git.tronto.net/nissy-core
Download | Log | Files | Refs | README | LICENSE

commit bcd52547af58b15868e24f3904e307548d1fd505
parent 21fdf9697ddfab0a9c082a91ae0a79b1b574774c
Author: Sebastiano Tronto <sebastiano@tronto.net>
Date:   Sun, 14 Dec 2025 15:12:36 +0100

Cleanup, update documentation, fix examples

Diffstat:
M.gitignore | 1+
Acpp/examples/solve.cpp | 89+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Dcpp/examples/solve_h48h3k2.cpp | 89-------------------------------------------------------------------------------
Mdoc/h48.md | 130++++++++++++++++++++++++++++++++-----------------------------------------------
Mdoc/solvers.md | 7+++----
Mpython/examples/solve.py | 4++--
Msrc/solvers/dispatch.h | 2+-
Msrc/solvers/h48/checkdata.h | 192+++++++++++++++++++++++++++++++------------------------------------------------
Msrc/solvers/h48/gendata_h48.h | 121+++++++++++++++++++++++++++++++++++++------------------------------------------
Msrc/solvers/h48/gendata_types_macros.h | 19+++++--------------
Msrc/solvers/h48/solve.h | 20++++++++------------
Msrc/solvers/h48/utils.h | 38+++++++++++---------------------------
Mweb/adapter.cpp | 11+++++------
Mweb/http/index.html | 8++++----
14 files changed, 314 insertions(+), 417 deletions(-)

diff --git a/.gitignore b/.gitignore @@ -12,6 +12,7 @@ run run.exe debugrun tables/* +tables-old/* test/*/runtest test/.DS_Store test/run diff --git a/cpp/examples/solve.cpp b/cpp/examples/solve.cpp @@ -0,0 +1,89 @@ +/* Simple demo for an H48 solver */ + +#include "../nissy.h" +#include <filesystem> +#include <fstream> +#include <iostream> +#include <string> + +int main() { + // Get verbose output + nissy::set_logger([](const char* s, void *u) { std::cout << s; }, NULL); + + // Get the scramble from the user + std::cout << "Enter scramble: "; + std::string scramble; + std::getline(std::cin, scramble); + + // Apply scramble to a solved cube + nissy::cube c; + if (!c.move(scramble).ok()) { + std::cout << "Invalid scramble!" << std::endl; + return 1; + } + + // Ask for a limit on the solution's length + int maxmoves; + std::cout << "Maximum number of moves: "; + std::cin >> maxmoves; + + // Load the solver + auto h48h3 = std::get<nissy::solver>(nissy::solver::get("h48h3")); + std::filesystem::path tableDir("tables"); + std::filesystem::create_directories(tableDir); // Ignored if dir exists + std::filesystem::path tableFile("tables/" + h48h3.id); + + // If the table is not present, generate it + if (!std::filesystem::exists(tableFile)) { + std::cout << "Data for h48h3 solver was not found, " + << "generating it..." << std::endl; + auto err = h48h3.generate_data(); + if (!err.ok()) { + std::cout << "Unexpected error! Error code: " + << err.value << std::endl; + return 1; + } + std::ofstream ofs(tableFile, std::ios::binary); + ofs.write(reinterpret_cast<char *>(h48h3.data.data()), + h48h3.size); + std::cout << "Table generated and written to " + << tableFile << std::endl; + ofs.close(); + } + // Otherwise read it from file + else { + std::cout << "Loading data table from file" << std::endl; + std::ifstream ifs(tableFile, std::ios::binary); + h48h3.read_data(ifs); + ifs.close(); + std::cout << "Data loaded, checking table..." << std::endl; + auto err = h48h3.check_data(); + if (!err.ok()) { + std::cout << "Error reading data table from file! " + << "Error code: " << err.value << std::endl; + return 1; + } + std::cout << "Data table loaded" << std::endl; + } + + // Solve + auto solve_result = h48h3.solve(c, nissy::nissflag::NORMAL, + 0, maxmoves, 1, 20, 8, NULL, NULL); + + // Write the result + if (!solve_result.err.ok()) { + std::cout << "Error solving! Error code: " << + solve_result.err.value << std::endl; + return 1; + } + + if (solve_result.solutions.size() == 0) { + std::cout << "No solution found!" << std::endl; + } else { + auto len = nissy::count_moves(solve_result.solutions).value; + std::cout << "Solution (" << len << " moves): " + << solve_result.solutions << std::endl; + } + + return 0; +} diff --git a/cpp/examples/solve_h48h3k2.cpp b/cpp/examples/solve_h48h3k2.cpp @@ -1,89 +0,0 @@ -/* Simple demo for an H48 solver */ - -#include "../nissy.h" -#include <filesystem> -#include <fstream> -#include <iostream> -#include <string> - -int main() { - // Get verbose output - nissy::set_logger([](const char* s, void *u) { std::cout << s; }, NULL); - - // Get the scramble from the user - std::cout << "Enter scramble: "; - std::string scramble; - std::getline(std::cin, scramble); - - // Apply scramble to a solved cube - nissy::cube c; - if (!c.move(scramble).ok()) { - std::cout << "Invalid scramble!" << std::endl; - return 1; - } - - // Ask for a limit on the solution's length - int maxmoves; - std::cout << "Maximum number of moves: "; - std::cin >> maxmoves; - - // Load the solver - auto h48h3k2 = std::get<nissy::solver>(nissy::solver::get("h48h3k2")); - std::filesystem::path tableDir("tables"); - std::filesystem::create_directories(tableDir); // Ignored if dir exists - std::filesystem::path tableFile("tables/" + h48h3k2.id); - - // If the table is not present, generate it - if (!std::filesystem::exists(tableFile)) { - std::cout << "Data for h48h3k2 solver was not found, " - << "generating it..." << std::endl; - auto err = h48h3k2.generate_data(); - if (!err.ok()) { - std::cout << "Unexpected error! Error code: " - << err.value << std::endl; - return 1; - } - std::ofstream ofs(tableFile, std::ios::binary); - ofs.write(reinterpret_cast<char *>(h48h3k2.data.data()), - h48h3k2.size); - std::cout << "Table generated and written to " - << tableFile << std::endl; - ofs.close(); - } - // Otherwise read it from file - else { - std::cout << "Loading data table from file" << std::endl; - std::ifstream ifs(tableFile, std::ios::binary); - h48h3k2.read_data(ifs); - ifs.close(); - std::cout << "Data loaded, checking table..." << std::endl; - auto err = h48h3k2.check_data(); - if (!err.ok()) { - std::cout << "Error reading data table from file! " - << "Error code: " << err.value << std::endl; - return 1; - } - std::cout << "Data table loaded" << std::endl; - } - - // Solve - auto solve_result = h48h3k2.solve(c, nissy::nissflag::NORMAL, - 0, maxmoves, 1, 20, 8, NULL, NULL); - - // Write the result - if (!solve_result.err.ok()) { - std::cout << "Error solving! Error code: " << - solve_result.err.value << std::endl; - return 1; - } - - if (solve_result.solutions.size() == 0) { - std::cout << "No solution found!" << std::endl; - } else { - auto len = nissy::count_moves(solve_result.solutions).value; - std::cout << "Solution (" << len << " moves): " - << solve_result.solutions << std::endl; - } - - return 0; -} diff --git a/doc/h48.md b/doc/h48.md @@ -1,9 +1,8 @@ # The H48 optimal solver This document contains information on the H48 Rubik's Cube optimal solver. -The implementation of the solver is still in progress. This document -partly describes ideas that have not been implemented yet, and it will -be updated to reflect the actual implementation. +This solver is occasionally improved and optimized, and this document +is updated accordingly to reflect the current implementation. I highly encourage the reader to check out Jaap Scherphuis' [Computer Puzzling page](https://www.jaapsch.net/puzzles/compcube.htm) @@ -215,46 +214,40 @@ stored in a single 32 bit integer, so that the table uses less than 12MB. ### Getting the pruning value -Once we have computed the full h0 coordinate, we can access the correct -entry in the full pruning table. As mentioned above, the pruning table -can be one of three kinds: - -* 4 bits per entry, or `k4`: In this case the pruning value (between 0 - and 15) can be simply read off the table. -* 2 bits per entry, or `k2`: Tables of this kind work as described by - Rokicki in the - [nxopt document](https://github.com/rokicki/cube20src/blob/master/nxopt.md). - In this case the pruning table also has a *base value*, that determines - the offset to be added to each entry (each entry can only be 0, 1, 2 or 3). - If the base value is `b`, a pruning value of 1, 2 or 3 can be used directly - as a lower bound of b+1, b+2 and b+3 respectively. However, a value of 0 - could mean that the actual lower bound is anything between 0 and b, so we - cannot take b as a lower bound. Instead we have to use a pruning value from - another table - see the section "Fallback tables" below. -* 1 bit per entry, or `k1`: With one bit per entry, the only information we - can get from the pruning table is wether or not the current position - requires more or fewer moves than a fixed base value b. This can still be - valuable if most positions are more or less equally split between two - pruning values. - (Work in progress - `k1` tables not available in the code yet) - -### Fallback tables - -When a pruning table does not store the exact lower bound value, for -example in the case of `k2` table as described above, we need to refine -our estimate using another table, which we call *fallback table*. -In the current implementation, we actually use two different tables: - -* **h0** table: for larger `k2` tables we get a fallback value from the full - table for the **h0** coordinate. -* edges-only table: to improve the pruning value for certain specific - scrambles, namely whose with solved corners such as the superflip, - we employ a second table that takes into account the edges of a full - **h11** coordinate. This table is small (around 1MB). - -More tables could be used to refine the fallback estimate, but each -additional table leads to longer lookup times, especially if it is -too large to fit in cache. +Once we have computed the full h0 coordinate, we can access +the correct entry in the full pruning table. We chose to +use 2 bits per entry, as described by Rokicki in the [nxopt +document](https://github.com/rokicki/cube20src/blob/master/nxopt.md). +This means that every pruning table needs to have an associated *base +value*, that determines the offset to be added to each entry (each entry +can only be 0, 1, 2 or 3). If the base value is `b`, a pruning value +of 1, 2 or 3 can be used directly as a lower bound of b+1, b+2 and b+3 +respectively. However, a value of 0 could mean that the actual lower +bound is anything between 0 and b, so we cannot take b as a lower bound. + +To be able to still use some sort of pruning value even when we get a +0 read, we use a **fallback table**. Inspired by nxopt, this table is +interleaved with the main table for cache efficiency: every 254 entries +(508 bits), we store in 4 bits the minimum of the pruning values of these +254 entries as a number from 0 to 16, without subtracting the table's +base. Since a cache line is 512 bits long, we never get a cache miss +when looking up the minimum value in the fallback table after a 0 read. +Smaller lines (of 256 or 128 bits) have been tried, but they do not +give any significant improvement over 512 bit lines. + +Moreover, as an additional heuristic, in case of a 0 read we also look +up another pruning value in a table that takes into account only the +position of the edges. This table is small (around 1MB), so repeated +accesses to it are not too slow. In practice, this gives a small speed up +of around 5%. More tables could be used to refine the fallback estimate, +but each additional table leads to longer lookup times, especially if +it is too large to fit in cache. + +Previous versions of this implementation (up to commit 6c42463, or +to version 0.2) also included the possibility of a table with 4 bits +per entry, at least for the `h0` case. Such tables did not require +a fallback lookup, but due to their large size they were not less +efficient. Therefore, they have been removed. ### Estimation refinements @@ -368,20 +361,20 @@ they are obviously inexistent when looking for *all* optimal solutions. ## Pruning table computation -### 4 bits tables for h0 and h11 +We first describe how the pruning table would be computed if we stored +each pruning value fully, using 4 bits per entry. This algorithm used +to be implemented (up to commit 6c42463), but has since been removed. -Computing the pruning table for a "real" h48 coordinate, that **h0** -or **h11**, using 4 bits per entry is quite simple. The method currently -implemented works as follows. +### Full tables for h0 and h11 (not implemented) -First, we set the value of the solved cube's coordinate to 0 and every -other entry to 15, the largest 4-bit integer. Then we iteratively scan -through the table and compute the coordinates at depth n+1 from those at -depth n as follows: for each coordinate at depth n, we compute a valid -representative for it, we apply each of the possible 18 moves to it, and -we set their value to n+1. Once the table is filled or we have reached -depth 15, we stop (there are no **h0** coordinates at depth 16 or more, -but I currently don't know if this is the case for **h11**). +If we are allowed to store the full pruning value for each entry, +computing the pruning tables is not that difficult. First, we set the +value of the solved cube's coordinate to 0 and every other entry to 15, +the largest 4-bit integer. Then we iteratively scan through the table and +compute the coordinates at depth n+1 from those at depth n as follows: +for each coordinate at depth n, we compute a valid representative for it, +we apply each of the possible 18 moves to it, and we set their value to +n+1. Once the table is filled or we have reached depth 15, we stop. Unfortunately what I described above is an oversimplification: one also must take into account the case where the corner coordinate is @@ -400,20 +393,7 @@ Finally, this algorithm can be easily parallelized by dividing the set of coordinates into separate sections, but one must take into account that a coordinate and its neighbors are usually not in the same section. -This method is currently implemented only for **h0**, since the 4 bits -table for **h11** would require around 115GB of RAM to compute, and I -don't have this much memory. - -### 2 bits tables with for h0 and h11 - -(Work in progress - currently there is no specialized routine for -computing 2 bits table for "real" coordinates; instead, an optimized -version of the generic method explained below is used) - -### A generic method for intermediate coordinates (from h1 to h10) - -(Work in progress - this method is quite slow and it may be replaced -by a better algorithm in the future) +### 2 bit tables Computing the pruning tables for intermediate coordinates is not as simple. The reason is that these coordinates are not invariant under @@ -441,14 +421,13 @@ map indexed by their **h11** coordinate value. This way we do not have to brute-force our way through from depth 0, and it make this method considerably faster. Unfortunately, since the number of coordinates at a certain depth increases exponentially with the depth, we are for -now limited at storing the coordinates at depth 8. (Work in progress - -we may experiment with depth 9 in the future) +now limited at storing the coordinates at depth 8. This method works for the "real" coordinates **h0** and **h11** as well. Moreover, in this case one can optimize it further by avoiding to repeat -the search from a coordinate that has already been visited. (Work -in progress - this method will be replaced in the future by a more -efficient one) +the search from a coordinate that has already been visited. Further +optimization are possible for **h0** and **h11**, and we may implement +them in the future. ## Possible future improvements @@ -458,9 +437,6 @@ notes rather than a description of the solver.* There are some areas where this implementation of the H48 optimal solver can be improved: -* Intertwining fallback tables to the main table. This is a trick that - nxopt uses to reduce the number of cache misses, but we have not - implemented in H48 yet. * Faster pruning table generation for **h11** and **h0**. Since these two coordinates are "real" coordinates, we can use a different technique to generate their tables faster. This won't affect the solver speed. diff --git a/doc/solvers.md b/doc/solvers.md @@ -10,12 +10,11 @@ An HTM-optimal solver using fully-symmetric pruning tables. For details about how this solver works, see [h48.md](./h48.md). For benchmarks see [benchmarks/benchmarks.md](../benchmarks/benchmarks.md). -* Name: of the form `h8hXk2` for `X` from 0 to 11, or `h48h0k4`. The name - `optimal` is an alias for `h48h7k2`. +* Name: of the form `h8hX` for `X` from 0 to 11. The name `optimal` is + an alias for `h48h7`. * Requisites: none. * Moveset: HTM (all 18 basic moves). -* Data size: 59MB for `h48h0k4`, from 115MB to 59GB for `h48hXk2` - (roughly 59MB + 2<sup>X</sup>*56MB). +* From 115MB to 59GB (roughly 2<sup>X</sup>*56MB). ## Coordinate solvers diff --git a/python/examples/solve.py b/python/examples/solve.py @@ -12,14 +12,14 @@ sys.path.append(os.getcwd() + os.path.sep + "python") import nissy # Choose the solver you prefer -solver = "h48h0k4" +solver = "h48h3" # Load the pruning table from file, generate it if needed datapath = "tables" + os.path.sep + solver if os.path.exists(datapath): data = bytearray(open(datapath, "rb").read()) else: - data = nissy.gendata("h48h0k4") + data = nissy.gendata(solver) print("Generated data will NOT be persisted") # Get a scrambled cube diff --git a/src/solvers/dispatch.h b/src/solvers/dispatch.h @@ -43,7 +43,7 @@ solver_dispatch_t solver_dispatchers[] = { }; const char *solver_aliases[][2] = { - { "optimal", "h48h7i" }, + { "optimal", "h48h7" }, { "eofb", "coord_EO_UF" }, { "eorl", "coord_EO_UR" }, { "eoud", "coord_EO_FD" }, diff --git a/src/solvers/h48/checkdata.h b/src/solvers/h48/checkdata.h @@ -18,155 +18,113 @@ uint64_t expected_cocsep[21] = { struct { uint8_t max; uint64_t table[21]; -} expected_h48[12][9] = { +} expected_h48[12] = { [0] = { - [2] = { - .max = 3, - .table = { - [0] = 5473562, - [1] = 34776317, - [2] = 68566704, - [3] = 8750867, - }, - }, - [4] = { - .max = 12, - .table = { - [0] = 1, - [1] = 1, - [2] = 4, - [3] = 34, - [4] = 331, - [5] = 3612, - [6] = 41605, - [7] = 474128, - [8] = 4953846, - [9] = 34776317, - [10] = 68566704, - [11] = 8749194, - [12] = 1673, - }, + .max = 3, + .table = { + [0] = 5473562, + [1] = 34776317, + [2] = 68566704, + [3] = 8750867, }, }, [1] = { - [2] = { - .max = 3, - .table = { - [0] = 6012079, - [1] = 45822302, - [2] = 142018732, - [3] = 41281787, - }, + .max = 3, + .table = { + [0] = 6012079, + [1] = 45822302, + [2] = 142018732, + [3] = 41281787, }, }, [2] = { - [2] = { - .max = 3, - .table = { - [0] = 6391286, - [1] = 55494785, - [2] = 252389935, - [3] = 155993794, - }, + .max = 3, + .table = { + [0] = 6391286, + [1] = 55494785, + [2] = 252389935, + [3] = 155993794, }, }, [3] = { - [2] = { - .max = 3, - .table = { - [0] = 6686828, - [1] = 63867852, - [2] = 392789689, - [3] = 477195231, - }, + .max = 3, + .table = { + [0] = 6686828, + [1] = 63867852, + [2] = 392789689, + [3] = 477195231, }, }, [4] = { - [2] = { - .max = 3, - .table = { - [0] = 77147213, - [1] = 543379415, - [2] = 1139570251, - [3] = 120982321, - }, + .max = 3, + .table = { + [0] = 77147213, + [1] = 543379415, + [2] = 1139570251, + [3] = 120982321, }, }, [5] = { - [2] = { - .max = 3, - .table = { - [0] = 82471284, - [1] = 687850732, - [2] = 2345840746, - [3] = 645995638, - }, + .max = 3, + .table = { + [0] = 82471284, + [1] = 687850732, + [2] = 2345840746, + [3] = 645995638, }, }, [6] = { - [2] = { - .max = 3, - .table = { - [0] = 85941099, - [1] = 804752968, - [2] = 4077248182, - [3] = 2556374551, - }, + .max = 3, + .table = { + [0] = 85941099, + [1] = 804752968, + [2] = 4077248182, + [3] = 2556374551, }, }, [7] = { - [2] = { - .max = 3, - .table = { - [0] = 88529761, - [1] = 897323475, - [2] = 6126260791, - [3] = 7936519573, - }, + .max = 3, + .table = { + [0] = 88529761, + [1] = 897323475, + [2] = 6126260791, + [3] = 7936519573, }, }, [8] = { - [2] = { - .max = 3, - .table = { - [0] = 1051579940, - [1] = 8136021316, - [2] = 19024479822, - [3] = 1885186122, - }, + .max = 3, + .table = { + [0] = 1051579940, + [1] = 8136021316, + [2] = 19024479822, + [3] = 1885186122, }, }, [9] = { - [2] = { - .max = 3, - .table = { - [0] = 1102038189, - [1] = 9888265242, - [2] = 38299375805, - [3] = 10904855164, - }, + .max = 3, + .table = { + [0] = 1102038189, + [1] = 9888265242, + [2] = 38299375805, + [3] = 10904855164, }, }, [10] = { - [2] = { - .max = 3, - .table = { - [0] = 1133240039, - [1] = 11196285614, - [2] = 64164702961, - [3] = 43894840186, - }, + .max = 3, + .table = { + [0] = 1133240039, + [1] = 11196285614, + [2] = 64164702961, + [3] = 43894840186, }, }, [11] = { - [2] = { - .max = 3, - .table = { - [0] = 1150763161, - [1] = 12045845660, - [2] = 91163433330, - [3] = 136418095449, - }, + .max = 3, + .table = { + [0] = 1150763161, + [1] = 12045845660, + [2] = 91163433330, + [3] = 136418095449, }, }, }; @@ -210,8 +168,8 @@ checkdata_h48( continue; } - ed = expected_h48[info.h48h][info.bits].table; - em = expected_h48[info.h48h][info.bits].max; + ed = expected_h48[info.h48h].table; + em = expected_h48[info.h48h].max; LOG("[checkdata] Checking distribution for '%s' from " "table preamble\n", info.solver); diff --git a/src/solvers/h48/gendata_h48.h b/src/solvers/h48/gendata_h48.h @@ -2,24 +2,22 @@ STATIC long long gendata_h48_dispatch( const char *, unsigned long long, unsigned char *); STATIC uint64_t gendata_h48short(gendata_h48short_arg_t [static 1]); STATIC int64_t gendata_h48(gendata_h48_arg_t [static 1]); -STATIC void gendata_h48k2(gendata_h48_arg_t [static 1]); -STATIC void *gendata_h48k2_runthread(void *); +STATIC void gendata_h48_maintable(gendata_h48_arg_t [static 1]); +STATIC void *gendata_h48_runthread(void *); STATIC_INLINE void gendata_h48_mark(gendata_h48_mark_t [static 1]); -STATIC_INLINE bool gendata_h48k2_dfs_stop( - cube_t, int8_t, h48k2_dfs_arg_t [static 1]); -STATIC void gendata_h48k2_dfs(h48k2_dfs_arg_t [static 1]); -STATIC tableinfo_t makeinfo_h48k2(gendata_h48_arg_t [static 1]); +STATIC_INLINE bool gendata_h48_dfs_stop( + cube_t, int8_t, h48_dfs_arg_t [static 1]); +STATIC void gendata_h48_dfs(h48_dfs_arg_t [static 1]); +STATIC tableinfo_t makeinfo_h48(gendata_h48_arg_t [static 1]); STATIC const uint32_t *get_cocsepdata_constptr(const unsigned char *); STATIC const unsigned char *get_h48data_constptr(const unsigned char *); -STATIC_INLINE uint8_t get_h48_pval(const unsigned char *, uint64_t, uint8_t); -STATIC_INLINE void set_h48_pval(unsigned char *, uint64_t, uint8_t, uint8_t); -STATIC_INLINE uint8_t get_h48_pvalmin( - const unsigned char *, uint64_t, uint8_t); -STATIC_INLINE void set_h48_pvalmin( - unsigned char *, uint64_t, uint8_t, uint8_t); +STATIC_INLINE uint8_t get_h48_pval(const unsigned char *, uint64_t); +STATIC_INLINE void set_h48_pval(unsigned char *, uint64_t, uint8_t); +STATIC_INLINE uint8_t get_h48_pvalmin(const unsigned char *, uint64_t); +STATIC_INLINE void set_h48_pvalmin(unsigned char *, uint64_t, uint8_t); STATIC long long gendata_h48_dispatch( @@ -31,7 +29,7 @@ gendata_h48_dispatch( long long err; gendata_h48_arg_t arg; - err = parse_h48_hk(solver, &arg.h, &arg.k); + err = parse_h48h(solver, &arg.h); if (err != NISSY_OK) return err; @@ -85,7 +83,7 @@ gendata_h48(gendata_h48_arg_t arg[static 1]) tableinfo_t cocsepinfo, h48info; cocsepsize = COCSEP_FULLSIZE; - h48size = INFOSIZE + H48_TABLESIZE(arg->h, arg->k); + h48size = INFOSIZE + H48_TABLESIZE(arg->h); eoesepsize = EOESEP_FULLSIZE; size = cocsepsize + h48size + eoesepsize; @@ -106,11 +104,8 @@ gendata_h48(gendata_h48_arg_t arg[static 1]) arg->cocsepdata = (uint32_t *)cocsepdata_offset; arg->h48buf = (wrapthread_atomic unsigned char*)arg->buf + cocsepsize; -/* TODO this can probably be removed now? */ - arg->base = 99; -/* */ - - gendata_h48k2(arg); + /* Compute the main H48 table with two bits per entry */ + gendata_h48_maintable(arg); r = readtableinfo(arg->buf_size, arg->buf, &cocsepinfo); if (r != NISSY_OK) { @@ -152,10 +147,10 @@ gendata_h48(gendata_h48_arg_t arg[static 1]) } STATIC void -gendata_h48k2(gendata_h48_arg_t arg[static 1]) +gendata_h48_maintable(gendata_h48_arg_t arg[static 1]) { /* - * A good base value for the k=2 tables have few positions with value + * A good base value for the h48 tables have few positions with value * 0, because those are treated as lower bound 0 and require a second * lookup in another table, and at the same time not too many positions * with value 3, because some of those are under-estimates. @@ -208,13 +203,13 @@ gendata_h48k2(gendata_h48_arg_t arg[static 1]) uint64_t i, ii, inext, bufsize, done, nshort, velocity; h48map_t shortcubes; gendata_h48short_arg_t shortarg; - h48k2_dfs_arg_t dfsarg[THREADS]; + h48_dfs_arg_t dfsarg[THREADS]; wrapthread_define_var_thread_t(thread[THREADS]); wrapthread_define_var_mutex_t(shortcubes_mutex); wrapthread_define_var_mutex_t(table_mutex[CHUNKS]); table = (unsigned char *)arg->h48buf + INFOSIZE; - memset(table, 0xFF, H48_TABLESIZE(arg->h, arg->k)); + memset(table, 0xFF, H48_TABLESIZE(arg->h)); LOG("[H48 gendata] Computing depth <=%" PRIu8 "\n", shortdepth) h48map_create(&shortcubes, capacity, randomizer); @@ -229,9 +224,8 @@ gendata_h48k2(gendata_h48_arg_t arg[static 1]) nshort = shortarg.map->n; LOG("[H48 gendata] Computed %" PRIu64 " positions\n", nshort); - if (arg->base >= 20) - arg->base = base[arg->h]; - arg->info = makeinfo_h48k2(arg); + arg->base = base[arg->h]; + arg->info = makeinfo_h48(arg); inext = 0; count = 0; @@ -239,9 +233,8 @@ gendata_h48k2(gendata_h48_arg_t arg[static 1]) for (i = 0; i < CHUNKS; i++) wrapthread_mutex_init(&table_mutex[i], NULL); for (i = 0; i < THREADS; i++) { - dfsarg[i] = (h48k2_dfs_arg_t){ + dfsarg[i] = (h48_dfs_arg_t){ .h = arg->h, - .k = arg->k, .base = arg->base, .shortdepth = shortdepth, .cocsepdata = arg->cocsepdata, @@ -257,12 +250,13 @@ gendata_h48k2(gendata_h48_arg_t arg[static 1]) dfsarg[i].table_mutex[ii] = &table_mutex[ii]; wrapthread_create( - &thread[i], NULL, gendata_h48k2_runthread, &dfsarg[i]); + &thread[i], NULL, gendata_h48_runthread, &dfsarg[i]); } if (NISSY_CANSLEEP) { /* Log the progress periodically */ - LOG("Processing 'short cubes'. This will take a while.\n"); + LOG("[H48 gendata] Processing 'short cubes'. " + "This will take a while.\n"); /* Estimate velocity by checking how much is done after 1s */ msleep(1000); @@ -277,8 +271,8 @@ gendata_h48k2(gendata_h48_arg_t arg[static 1]) wrapthread_mutex_lock(&shortcubes_mutex); done = count; wrapthread_mutex_unlock(&shortcubes_mutex); - LOG("Processed %" PRIu64 " / %" PRIu64 " cubes\n", - (done / 1000) * 1000, nshort); + LOG("[H48 gendata] Processed %" PRIu64 " / %" PRIu64 + " cubes\n", (done / 1000) * 1000, nshort); } } else { LOG("Status updates won't be available because the sleep() " @@ -297,14 +291,14 @@ gendata_h48k2(gendata_h48_arg_t arg[static 1]) } STATIC void * -gendata_h48k2_runthread(void *arg) +gendata_h48_runthread(void *arg) { uint64_t coord, coordext, coordmin; kvpair_t kv; - h48k2_dfs_arg_t *dfsarg; + h48_dfs_arg_t *dfsarg; wrapthread_define_if_threads(uint64_t, mutex); - dfsarg = (h48k2_dfs_arg_t *)arg; + dfsarg = (h48_dfs_arg_t *)arg; while (true) { wrapthread_mutex_lock(dfsarg->shortcubes_mutex); @@ -324,12 +318,12 @@ gendata_h48k2_runthread(void *arg) mutex = H48_LINE(coord) % CHUNKS; wrapthread_mutex_lock(dfsarg->table_mutex[mutex]); - set_h48_pval(dfsarg->table, coordext, dfsarg->k, 0); - set_h48_pvalmin(dfsarg->table, coordmin, dfsarg->k, kv.val); + set_h48_pval(dfsarg->table, coordext, 0); + set_h48_pvalmin(dfsarg->table, coordmin, kv.val); wrapthread_mutex_unlock(dfsarg->table_mutex[mutex]); } else { dfsarg->cube = invcoord_h48(kv.key, dfsarg->crep, 11); - gendata_h48k2_dfs(dfsarg); + gendata_h48_dfs(dfsarg); } } @@ -337,7 +331,7 @@ gendata_h48k2_runthread(void *arg) } STATIC void -gendata_h48k2_dfs(h48k2_dfs_arg_t arg[static 1]) +gendata_h48_dfs(h48_dfs_arg_t arg[static 1]) { int8_t d; uint8_t m[4]; @@ -346,7 +340,6 @@ gendata_h48k2_dfs(h48k2_dfs_arg_t arg[static 1]) markarg = (gendata_h48_mark_t) { .h = arg->h, - .k = arg->k, .base = arg->base, .cocsepdata = arg->cocsepdata, .selfsim = arg->selfsim, @@ -365,7 +358,7 @@ gendata_h48k2_dfs(h48k2_dfs_arg_t arg[static 1]) for (m[0] = 0; m[0] < 18; m[0]++) { markarg.depth = d+1; cube[0] = move(arg->cube, m[0]); - if (gendata_h48k2_dfs_stop(cube[0], d+1, arg)) + if (gendata_h48_dfs_stop(cube[0], d+1, arg)) continue; markarg.cube = cube[0]; gendata_h48_mark(&markarg); @@ -378,7 +371,7 @@ gendata_h48k2_dfs(h48k2_dfs_arg_t arg[static 1]) continue; } cube[1] = move(cube[0], m[1]); - if (gendata_h48k2_dfs_stop(cube[1], d+2, arg)) + if (gendata_h48_dfs_stop(cube[1], d+2, arg)) continue; markarg.cube = cube[1]; gendata_h48_mark(&markarg); @@ -393,7 +386,7 @@ gendata_h48k2_dfs(h48k2_dfs_arg_t arg[static 1]) continue; } cube[2] = move(cube[1], m[2]); - if (gendata_h48k2_dfs_stop(cube[2], d+3, arg)) + if (gendata_h48_dfs_stop(cube[2], d+3, arg)) continue; markarg.cube = cube[2]; gendata_h48_mark(&markarg); @@ -430,18 +423,18 @@ gendata_h48_mark(gendata_h48_mark_t arg[static 1]) mutex = H48_LINE(coord) % CHUNKS; wrapthread_mutex_lock(arg->table_mutex[mutex]); - oldval = get_h48_pval(arg->table, coordext, arg->k); + oldval = get_h48_pval(arg->table, coordext); newval = (uint8_t)MAX(arg->depth, 0); v = MIN(newval, oldval); - set_h48_pval(arg->table, coordext, arg->k, v); + set_h48_pval(arg->table, coordext, v); v = arg->depth + arg->base; - set_h48_pvalmin(arg->table, coordmin, arg->k, v); + set_h48_pvalmin(arg->table, coordmin, v); wrapthread_mutex_unlock(arg->table_mutex[mutex]); ) } STATIC_INLINE bool -gendata_h48k2_dfs_stop(cube_t cube, int8_t d, h48k2_dfs_arg_t arg[static 1]) +gendata_h48_dfs_stop(cube_t cube, int8_t d, h48_dfs_arg_t arg[static 1]) { uint64_t val; uint64_t coord, coordext; @@ -455,11 +448,11 @@ gendata_h48k2_dfs_stop(cube_t cube, int8_t d, h48k2_dfs_arg_t arg[static 1]) coordext = H48_LINE_EXT(coord); mutex = H48_LINE(coord) % CHUNKS; wrapthread_mutex_lock(arg->table_mutex[mutex]); - oldval = get_h48_pval(arg->table, coordext, arg->k); + oldval = get_h48_pval(arg->table, coordext); wrapthread_mutex_unlock(arg->table_mutex[mutex]); return oldval <= d; } else { - /* With 0 < k < 11 we do not have a "real coordinate". + /* With 0 < h < 11 we do not have a "real coordinate". The best we can do is checking if we backtracked to one of the "short cubes". */ coord = coord_h48(cube, arg->cocsepdata, 11); @@ -469,15 +462,15 @@ gendata_h48k2_dfs_stop(cube_t cube, int8_t d, h48k2_dfs_arg_t arg[static 1]) } STATIC tableinfo_t -makeinfo_h48k2(gendata_h48_arg_t arg[static 1]) +makeinfo_h48(gendata_h48_arg_t arg[static 1]) { tableinfo_t info; info = (tableinfo_t) { - .solver = "h48 solver h = , k = 2", + .solver = "h48 solver h = ", .type = TABLETYPE_PRUNING, .infosize = INFOSIZE, - .fullsize = H48_TABLESIZE(arg->h, 2) + INFOSIZE, + .fullsize = H48_TABLESIZE(arg->h) + INFOSIZE, .hash = 0, .entries = H48_COORDMAX(arg->h) + 2 * H48_LINES(arg->h), .classes = 0, @@ -507,31 +500,31 @@ get_h48data_constptr(const unsigned char *data) } STATIC_INLINE uint8_t -get_h48_pval(const unsigned char *table, uint64_t i, uint8_t k) +get_h48_pval(const unsigned char *table, uint64_t i) { - return (table[H48_INDEX(i, k)] & H48_MASK(i, k)) >> H48_SHIFT(i, k); + return (table[H48_INDEX(i)] & H48_MASK(i)) >> H48_SHIFT(i); } STATIC_INLINE uint8_t -get_h48_pvalmin(const unsigned char *table, uint64_t i, uint8_t k) +get_h48_pvalmin(const unsigned char *table, uint64_t i) { - return (get_h48_pval(table, i, k) << UINT8_C(2)) + - get_h48_pval(table, i+UINT64_C(1), k); + return (get_h48_pval(table, i) << UINT8_C(2)) + + get_h48_pval(table, i+UINT64_C(1)); } STATIC_INLINE void -set_h48_pval(unsigned char *table, uint64_t i, uint8_t k, uint8_t val) +set_h48_pval(unsigned char *table, uint64_t i, uint8_t val) { - table[H48_INDEX(i, k)] = (table[H48_INDEX(i, k)] & (~H48_MASK(i, k))) - | (val << H48_SHIFT(i, k)); + table[H48_INDEX(i)] = (table[H48_INDEX(i)] & (~H48_MASK(i))) + | (val << H48_SHIFT(i)); } STATIC_INLINE void -set_h48_pvalmin(unsigned char *table, uint64_t i, uint8_t k, uint8_t val) +set_h48_pvalmin(unsigned char *table, uint64_t i, uint8_t val) { uint8_t v; - v = MIN(val, get_h48_pvalmin(table, i, k)); - set_h48_pval(table, i, k, v >> UINT8_C(2)); - set_h48_pval(table, i+UINT64_C(1), k, v % UINT8_C(4)); + v = MIN(val, get_h48_pvalmin(table, i)); + set_h48_pval(table, i, v >> UINT8_C(2)); + set_h48_pval(table, i+UINT64_C(1), v % UINT8_C(4)); } diff --git a/src/solvers/h48/gendata_types_macros.h b/src/solvers/h48/gendata_types_macros.h @@ -20,13 +20,10 @@ #define H48_COORDMAX_NOEO (COCSEP_CLASSES * ESEP_MAX) #define H48_COORDMAX(h) (H48_COORDMAX_NOEO << (uint64_t)(h)) -#define H48_DIV(k) ((size_t)8 / (size_t)(k)) -#define H48_TABLESIZE_K4(h) DIV_ROUND_UP((size_t)H48_COORDMAX(h), H48_DIV(4)) -#define H48_COEFF(k) (UINT64_C(8) / (uint64_t)(k)) -#define H48_INDEX(i, k) ((i) / H48_COEFF(k)) -#define H48_SHIFT(i, k) ((uint8_t)(k) * (uint8_t)((i) % H48_COEFF(k))) -#define H48_MASK(i, k) ((UINT8_BIT(k) - UINT8_C(1)) << H48_SHIFT(i, k)) +#define H48_INDEX(i) ((i) / UINT64_C(4)) +#define H48_SHIFT(i) (UINT8_C(2) * (uint8_t)((i) % UINT64_C(4))) +#define H48_MASK(i) (UINT8_C(3) << H48_SHIFT(i)) #define H48_LINE_BITS UINT64_C(512) #define H48_LINE_BYTES (H48_LINE_BITS >> UINT64_C(3)) @@ -37,10 +34,7 @@ #define H48_LINE_MIN(i) \ ((H48_LINE(i) + UINT64_C(1)) * H48_LINE_ALLCOORDS - UINT64_C(2)) #define H48_LINES(h) DIV_ROUND_UP(H48_COORDMAX(h), H48_LINE_COORDS) -#define H48_TABLESIZE_K2(h) ((size_t)(H48_LINE_BYTES * H48_LINES(h))) - -#define H48_TABLESIZE(h, k) \ - ((k) == 4 ? H48_TABLESIZE_K4(h) : H48_TABLESIZE_K2(h)) +#define H48_TABLESIZE(h) ((size_t)(H48_LINE_BYTES * H48_LINES(h))) #define CHUNKS 2000 @@ -76,7 +70,6 @@ typedef struct { typedef struct { uint8_t h; - uint8_t k; uint8_t base; uint8_t maxdepth; tableinfo_t info; @@ -99,7 +92,6 @@ typedef struct { typedef struct { cube_t cube; uint8_t h; - uint8_t k; uint8_t base; uint8_t shortdepth; uint32_t *cocsepdata; @@ -111,13 +103,12 @@ typedef struct { wrapthread_define_struct_mutex_t(*table_mutex[CHUNKS]); uint64_t *next; wrapthread_atomic uint64_t *count; -} h48k2_dfs_arg_t; +} h48_dfs_arg_t; typedef struct { cube_t cube; int8_t depth; uint8_t h; - uint8_t k; uint8_t base; uint32_t *cocsepdata; uint64_t *selfsim; diff --git a/src/solvers/h48/solve.h b/src/solvers/h48/solve.h @@ -30,7 +30,6 @@ typedef struct { bool use_lb_normal; bool use_lb_inverse; uint8_t h; - uint8_t k; uint8_t base; const uint32_t *cocsepdata; const unsigned char *h48data; @@ -93,10 +92,10 @@ STATIC long long solve_h48_dispatch( void *poll_status_data ) { - uint8_t h, k; + uint8_t h; long long err; - err = parse_h48_hk(solver, &h, &k); + err = parse_h48h(solver, &h); if (err != NISSY_OK) return err; @@ -142,15 +141,14 @@ solve_h48_stop(dfsarg_solve_h48_t arg[static 1]) coord = coord_h48_edges( arg->inverse, COCLASS(data_inv), TTREP(data_inv), arg->h); coordext = H48_LINE_EXT(coord); - arg->lb_inverse = get_h48_pval(arg->h48data, coordext, arg->k); + arg->lb_inverse = get_h48_pval(arg->h48data, coordext); arg->table_lookups++; - if (arg->k == 2 && arg->lb_inverse == 0) { + if (arg->lb_inverse == 0) { arg->table_fallbacks++; coordmin = H48_LINE_MIN(coord); - pval_min = get_h48_pvalmin( - arg->h48data, coordmin, arg->k); + pval_min = get_h48_pvalmin(arg->h48data, coordmin); pval_eoesep = get_eoesep_pval_cube( arg->h48data_fallback_eoesep, arg->inverse); arg->lb_inverse = MAX(pval_min, pval_eoesep); @@ -172,15 +170,14 @@ solve_h48_stop(dfsarg_solve_h48_t arg[static 1]) coord = coord_h48_edges( arg->cube, COCLASS(data), TTREP(data), arg->h); coordext = H48_LINE_EXT(coord); - arg->lb_normal = get_h48_pval(arg->h48data, coordext, arg->k); + arg->lb_normal = get_h48_pval(arg->h48data, coordext); arg->table_lookups++; - if (arg->k == 2 && arg->lb_normal == 0) { + if (arg->lb_normal == 0) { arg->table_fallbacks++; coordmin = H48_LINE_MIN(coord); - pval_min = get_h48_pval( - arg->h48data, coordmin, arg->k); + pval_min = get_h48_pval(arg->h48data, coordmin); pval_eoesep = get_eoesep_pval_cube( arg->h48data_fallback_eoesep, arg->cube); arg->lb_normal = MAX(pval_min, pval_eoesep); @@ -515,7 +512,6 @@ solve_h48( .start_cube = oc.cube, .cube = oc.cube, .h = info.h48h, - .k = info.bits, .base = info.base, .cocsepdata = cocsepdata, .h48data = h48data, diff --git a/src/solvers/h48/utils.h b/src/solvers/h48/utils.h @@ -4,22 +4,21 @@ #define H48_HMAX UINT8_C(7) #endif -long long parse_h48_hk( - const char *, uint8_t [static 1], uint8_t [static 1]); +long long parse_h48h(const char *, uint8_t [static 1]); STATIC long long dataid_h48(const char *, char [static NISSY_SIZE_DATAID]); long long -parse_h48_hk(const char *buf, uint8_t h[static 1], uint8_t k[static 1]) +parse_h48h(const char *buf, uint8_t h[static 1]) { char format_error_msg[100]; sprintf(format_error_msg, "[H48] Error parsing H48 solver: must be in " - "'h48h*k*' format, but got '%s'\n", buf); + "'h48h*' format, but got '%s'\n", buf); buf += 3; if (*buf != 'h') { LOG(format_error_msg); - goto parse_h48_hk_error; + goto parse_h48h_error; } buf++; @@ -27,48 +26,33 @@ parse_h48_hk(const char *buf, uint8_t h[static 1], uint8_t k[static 1]) if (*h > H48_HMAX) { LOG("[H48] Invalid value %" PRIu8 " for parameter h (must be " "at most %" PRIu8 ")\n", *h, H48_HMAX); - goto parse_h48_hk_error; + goto parse_h48h_error; } for ( ; *buf >= 0 + '0' && *buf <= 9 + '0'; buf++) { if (*buf == 0) { LOG(format_error_msg); - goto parse_h48_hk_error; + goto parse_h48h_error; } } - if (*buf != 'k') { - LOG(format_error_msg); - goto parse_h48_hk_error; - } - buf++; - - *k = atoi(buf); - if (!(*k == 2 || (*k == 4 && *h == 0))) { - LOG("[H48] Invalid combinations of values h=%" PRIu8 " and k=%" - PRIu8 " for parameters h and k\n", *h, *k); - goto parse_h48_hk_error; - } - return NISSY_OK; -parse_h48_hk_error: +parse_h48h_error: *h = 0; - *k = 0; return NISSY_ERROR_INVALID_SOLVER; } STATIC long long -dataid_h48(const char *hk, char buf[static NISSY_SIZE_DATAID]) +dataid_h48(const char *str, char buf[static NISSY_SIZE_DATAID]) { - uint8_t h, k; + uint8_t h; long long err; - err = parse_h48_hk(hk, &h, &k); + err = parse_h48h(str, &h); if (err < 0) return err; - sprintf(buf, "h48h%" PRIu8 "k%" PRIu8 "%s", h, k, - k == 2 ? "i" : ""); + sprintf(buf, "h48h%" PRIu8, h); return NISSY_OK; } diff --git a/web/adapter.cpp b/web/adapter.cpp @@ -16,12 +16,11 @@ std::map<std::string, nissy::solver> loaded_solvers; const std::set<std::string> available_solvers { - "h48h0k4", - "h48h1k2", - "h48h2k2", - "h48h3k2", - "h48h4k2", - "h48h5k2", + "h48h1", + "h48h2", + "h48h3", + "h48h4", + "h48h5", }; bool is_solver_available(const std::string& name) diff --git a/web/http/index.html b/web/http/index.html @@ -12,11 +12,11 @@ <div id="scrambleRaw"> <input id="scrambleText" placeholder="Type the scramble here..."> <select id="solverSelector"> - <option value="h48h3k2" selected="selected"> - h48 h=4 k=2 (300 Mb) - light + <option value="h48h3" selected="selected"> + h48 h=4 (500 Mb) - light </option> - <option value="h48h5k2" selected="selected"> - h48 h=5 k=2 (1 Gb) - fastest + <option value="h48h5" selected="selected"> + h48 h=5 (1 Gb) - fastest </option> </select> <button id="solveButton">Solve</button>