commit 36f37564cd566e91771cd1f92ac3886917c769d1 parent c6ab40b1602443938e431b996b28fb3d888365bb Author: Sebastiano Tronto <sebastiano@tronto.net> Date: Wed, 17 Dec 2025 09:26:53 +0100 Update benchmarks Diffstat:
21 files changed, 456 insertions(+), 202 deletions(-)
diff --git a/.gitignore b/.gitignore @@ -49,3 +49,4 @@ python/*.lib *.sketch __pycache__ */__pycache__ +benchmarks/tables.md diff --git a/benchmarks/benchmarks.md b/benchmarks/benchmarks.md @@ -58,163 +58,164 @@ All benchmark tests were done on the following configuration: * CPU: AMD Ryzen 7 7700 (8 cores, 16 virtual threads) * Memory: 2x Corsair Vengeance 32GB 5600MHz * Motherboard: Gigabyte B650M K -* Operating system: Debian 12 (Linux kernel 6.1.0) -* Compiler: GCC 12.2.0 for H48 and Clang 14.0.6 for vcube +* Operating system: Debian 13 (Linux kernel 6.12.57) +* Compiler: GCC 14.2.0 for H48 and Clang 19.1.7 for vcube -## Single solution +## Results -Average time for finding a single optimal solution. - -### Single thread +<details><summary>Single solution, single thread</summary> Time per cube (in seconds, lower is better). | Solver | Size |17 moves|18 moves|19 moves|20 moves|Superflip| |:---------|:-------|-------:|-------:|-------:|-------:|--------:| -|H48 h11 k2|56.1GiB | 0.23 | 1.15 | 5.08 | 31.30 | 53.67 | -|vcube 404 |31.8GiB | 0.30 | 1.25 | 6.87 | 57.49 | 291.31 | -|H48 h10 k2|28.1GiB | 0.34 | 1.80 | 7.77 | | 81.89 | -|vcube 308 |21.2GiB | 0.20 | 1.11 | 6.92 | | | -|H48 h9 k2 |14.1GiB | 0.42 | 2.84 | 12.86 | | | -|vcube 208 | 7.3GiB | 0.57 | 4.41 | 20.75 | | | -|H48 h8 k2 | 7.1GiB | 0.86 | 6.66 | 27.40 | | | -|H48 h7 k2 | 3.6GiB | 1.47 | 8.90 | 42.46 | | | -|vcube 112 | 2.4GiB | 1.01 | 9.39 | | | | -|H48 h6 k2 | 1.8GiB | 2.28 | 16.89 | | | | - -Time per cube adjusted for tables size (in seconds \* GiB, lower is better). +|vcube 212 |58.2 Gib| 0.11| 0.75| 3.43| 27.28| 19.30| +|H48 h11 |56.5 Gib| 0.15| 0.91| 4.08| 26.81| 26.90| +|vcube 404 |31.8 Gib| 0.23| 1.24| 6.10| 59.33| 268.26| +|H48 h10 |28.3 Gib| 0.27| 1.47| 6.75| 46.65| 60.55| +|vcube 308 |21.2 Gib| 0.17| 1.02| 6.20| 58.70| 604.35| +|H48 h9 |14.1 Gib| 0.38| 2.66| 12.21| | | +|vcube 208 | 7.3 Gib| 0.56| 4.36| 20.58| | | +|H48 h8 | 7.1 Gib| 0.87| 6.61| 26.57| | | +|H48 h7 | 3.5 Gib| 1.02| 8.21| 41.25| | | +|vcube 112 | 2.4 Gib| 0.96| 9.29| 40.52| | | +|H48 h6 | 1.8 Gib| 2.11| 15.95| 82.00| | | + +Time per cube adjusted for table size (in seconds \* GiB, lower is better). | Solver | Size |17 moves|18 moves|19 moves|20 moves|Superflip| |:---------|:-------|-------:|-------:|-------:|-------:|--------:| -|H48 h11 k2|56.1GiB | 12.90 | 64.51 | 284.99 |1755.93 | 3010.89 | -|vcube 404 |31.8GiB | 9.54 | 39.75 | 218.47 |1828.18 | 9263.66 | -|H48 h10 k2|28.1GiB | 9.55 | 50.58 | 218.34 | | 2301.11 | -|vcube 308 |21.2GiB | 4.24 | 23.53 | 146.70 | | | -|H48 h9 k2 |14.1GiB | 5.92 | 40.04 | 181.33 | | | -|vcube 208 | 7.3GiB | 4.16 | 32.19 | 151.48 | | | -|H48 h8 k2 | 7.1GiB | 6.11 | 47.29 | 194.54 | | | -|H48 h7 k2 | 3.6GiB | 5.29 | 32.04 | 152.86 | | | -|vcube 112 | 2.4GiB | 2.42 | 22.53 | | | | -|H48 h6 k2 | 1.8GiB | 4.10 | 30.40 | | | | - -<details><summary>Plots</summary> +|vcube 212 |58.2 Gib| 6.43| 43.80| 199.38| 1587.43| 1122.73| +|H48 h11 |56.5 Gib| 8.73| 51.58| 230.38| 1514.85| 1520.10| +|vcube 404 |31.8 Gib| 7.40| 39.47| 194.01| 1887.94| 8535.87| +|H48 h10 |28.3 Gib| 7.70| 41.43| 190.75| 1317.96| 1710.78| +|vcube 308 |21.2 Gib| 3.51| 21.71| 131.50| 1245.26| 12819.94| +|H48 h9 |14.1 Gib| 5.31| 37.64| 172.53| | | +|vcube 208 | 7.3 Gib| 4.08| 31.74| 149.68| | | +|H48 h8 | 7.1 Gib| 6.17| 46.72| 187.70| | | +|H48 h7 | 3.5 Gib| 3.62| 29.01| 145.74| | | +|vcube 112 | 2.4 Gib| 2.33| 22.53| 98.23| | | +|H48 h6 | 1.8 Gib| 3.72| 28.19| 144.94| | | + <img src="img/17moves1thread.png"> <img src="img/18moves1thread.png"> <img src="img/19moves1thread.png"> <img src="img/20moves1thread.png"> -</details> -### Multithread (4 threads) +</details> +<details><summary>Single solution, 4 threads</summary> Time per cube (in seconds, lower is better). | Solver | Size |17 moves|18 moves|19 moves|20 moves|Superflip| |:---------|:-------|-------:|-------:|-------:|-------:|--------:| -|H48 h11 k2|56.1GiB | 0.06 | 0.31 | 1.31 | 7.96 | 14.01 | -|vcube 404 |31.8GiB | 0.10 | 0.38 | 1.88 | 16.98 | (a) | -|H48 h10 k2|28.1GiB | 0.10 | 0.47 | 2.00 | 13.54 | 21.96 | -|vcube 308 |21.2GiB | 0.06 | 0.42 | 1.95 | 17.73 | (a) | -|H48 h9 k2 |14.1GiB | 0.14 | 0.83 | 3.82 | 25.98 | 31.68 | -|vcube 208 | 7.3GiB | 0.17 | 1.49 | 5.88 | | (a) | -|H48 h8 k2 | 7.1GiB | 0.27 | 2.02 | 7.94 | | | -|H48 h7 k2 | 3.6GiB | 0.35 | 2.59 | 12.41 | | | -|vcube 112 | 2.4GiB | 0.29 | 3.15 | 12.06 | | (a) | -|H48 h6 k2 | 1.8GiB | 0.65 | 4.79 | 23.91 | | | - -Time per cube adjusted for tables size (in seconds \* GiB, lower is better). +|vcube 212 |58.2 Gib| 0.03| 0.27| 1.04| 7.70| (a) | +|H48 h11 |56.5 Gib| 0.05| 0.26| 1.13| 7.60| 8.44| +|vcube 404 |31.8 Gib| 0.07| 0.30| 1.65| 16.17| (a) | +|H48 h10 |28.3 Gib| 0.08| 0.39| 1.89| 13.07| 16.63| +|vcube 308 |21.2 Gib| 0.05| 0.35| 1.78| 16.61| (a) | +|H48 h9 |14.1 Gib| 0.12| 0.77| 3.42| 24.72| 29.31| +|vcube 208 | 7.3 Gib| 0.16| 1.47| 5.86| | (a) | +|H48 h8 | 7.1 Gib| 0.26| 1.87| 7.84| | | +|H48 h7 | 3.5 Gib| 0.30| 2.32| 11.74| | | +|vcube 112 | 2.4 Gib| 0.29| 3.13| 11.95| | (a) | +|H48 h6 | 1.8 Gib| 0.63| 4.67| 24.67| | | + +Time per cube adjusted for table size (in seconds \* GiB, lower is better). | Solver | Size |17 moves|18 moves|19 moves|20 moves|Superflip| |:---------|:-------|-------:|-------:|-------:|-------:|--------:| -|H48 h11 k2|56.1GiB | 3.37 | 17.39 | 73.49 | 446.56 | 785.96 | -|vcube 404 |31.8GiB | 3.80 | 12.08 | 59.78 | 539.96 | (a) | -|H48 h10 k2|28.1GiB | 2.81 | 13.21 | 56.20 | 380.47 | 617.08 | -|vcube 308 |21.2GiB | 1.27 | 8.90 | 41.34 | 375.88 | (a) | -|H48 h9 k2 |14.1GiB | 1.97 | 11.70 | 53.86 | 366.32 | 446.69 | -|vcube 208 | 7.3GiB | 1.24 | 10.88 | 42.92 | | (a) | -|H48 h8 k2 | 7.1GiB | 1.92 | 14.34 | 56.37 | | | -|H48 h7 k2 | 3.6GiB | 1.26 | 9.32 | 44.68 | | | -|vcube 112 | 2.4GiB | 0.70 | 7.56 | 28.94 | | (a) | -|H48 h6 k2 | 1.8GiB | 1.17 | 8.62 | 43.04 | | | - -<details><summary>Plots</summary> +|vcube 212 |58.2 Gib| 2.03| 15.69| 60.25| 447.97| (a) | +|H48 h11 |56.5 Gib| 2.85| 14.76| 63.96| 429.69| 476.80| +|vcube 404 |31.8 Gib| 2.32| 9.50| 52.46| 514.50| (a) | +|H48 h10 |28.3 Gib| 2.22| 11.01| 53.47| 369.40| 469.92| +|vcube 308 |21.2 Gib| 1.02| 7.52| 37.82| 352.36| (a) | +|H48 h9 |14.1 Gib| 1.64| 10.83| 48.37| 349.20| 414.10| +|vcube 208 | 7.3 Gib| 1.18| 10.69| 42.63| | (a) | +|H48 h8 | 7.1 Gib| 1.81| 13.20| 55.35| | | +|H48 h7 | 3.5 Gib| 1.07| 8.19| 41.47| | | +|vcube 112 | 2.4 Gib| 0.69| 7.59| 28.97| | (a) | +|H48 h6 | 1.8 Gib| 1.11| 8.25| 43.60| | | + <img src="img/17moves4threads.png"> <img src="img/18moves4threads.png"> <img src="img/19moves4threads.png"> <img src="img/20moves4threads.png"> -</details> -(a) vcube cannot parallelize on a single scramble, the results for the -Superflip are going to be the same as in the single thread case. -### Multithread (16 threads) +(a) vcube cannot parallelize on a single scramble, the results for the +superflip are going to be the same as in the single thread case. +</details> +<details><summary>Single solution, 16 threads</summary> Time per cube (in seconds, lower is better). | Solver | Size |17 moves|18 moves|19 moves|20 moves|Superflip| |:---------|:-------|-------:|-------:|-------:|-------:|--------:| -|H48 h11 k2|56.1GiB | 0.02 | 0.10 | 0.43 | 2.48 | 5.67 | -|vcube 404 |31.8GiB | 0.03 | 0.16 | 0.67 | 6.36 | (a) | -|H48 h10 k2|28.1GiB | 0.03 | 0.16 | 0.74 | 4.43 | 8.81 | -|vcube 308 |21.2GiB | 0.04 | 0.22 | 0.89 | 9.53 | (a) | -|H48 h9 k2 |14.1GiB | 0.04 | 0.26 | 1.18 | 8.31 | 13.20 | -|vcube 208 | 7.3GiB | 0.08 | 0.80 | 2.38 | | (a) | -|H48 h8 k2 | 7.1GiB | 0.08 | 0.60 | 2.48 | | | -|H48 h7 k2 | 3.6GiB | 0.11 | 0.81 | 3.91 | | | -|vcube 112 | 2.4GiB | 0.15 | 1.66 | 5.18 | | (a) | -|H48 h6 k2 | 1.8GiB | 0.21 | 1.53 | 7.82 | | | - -Time per cube adjusted for tables size (in seconds \* GiB, lower is better). +|vcube 212 |58.2 Gib| 0.02| 0.13| 0.45| 2.84| (a) | +|H48 h11 |56.5 Gib| 0.02| 0.10| 0.37| 2.31| 3.47| +|vcube 404 |31.8 Gib| 0.04| 0.14| 0.65| 6.08| (a) | +|H48 h10 |28.3 Gib| 0.03| 0.14| 0.62| 4.24| 7.07| +|vcube 308 |21.2 Gib| 0.03| 0.19| 0.78| 6.67| (a) | +|H48 h9 |14.1 Gib| 0.04| 0.26| 1.15| 8.15| 12.18| +|vcube 208 | 7.3 Gib| 0.08| 0.79| 2.43| | (a) | +|H48 h8 | 7.1 Gib| 0.09| 0.64| 2.50| | | +|H48 h7 | 3.5 Gib| 0.11| 0.79| 3.88| | | +|vcube 112 | 2.4 Gib| 0.15| 1.63| 5.10| | (a) | +|H48 h6 | 1.8 Gib| 0.21| 1.48| 7.70| | | + +Time per cube adjusted for table size (in seconds \* GiB, lower is better). | Solver | Size |17 moves|18 moves|19 moves|20 moves|Superflip| |:---------|:-------|-------:|-------:|-------:|-------:|--------:| -|H48 h11 k2|56.1GiB | 1.12 | 5.61 | 24.12 | 139.13 | 318.09 | -|vcube 404 |31.8GiB | 1.08 | 5.09 | 21.31 | 202.25 | (a) | -|H48 h10 k2|28.1GiB | 0.84 | 4.50 | 20.79 | 124.48 | 247.56 | -|vcube 308 |21.2GiB | 0.85 | 4.66 | 18.87 | 202.04 | (a) | -|H48 h9 k2 |14.1GiB | 0.56 | 3.67 | 16.64 | 117.17 | 186.12 | -|vcube 208 | 7.3GiB | 0.58 | 5.84 | 17.37 | | (a) | -|H48 h8 k2 | 7.1GiB | 0.57 | 4.26 | 17.60 | | | -|H48 h7 k2 | 3.6GiB | 0.40 | 2.92 | 14.07 | | | -|vcube 112 | 2.4GiB | 0.36 | 3.98 | 12.43 | | (a) | -|H48 h6 k2 | 1.8GiB | 0.38 | 2.75 | 14.08 | | | - -<details><summary>Plots</summary> +|vcube 212 |58.2 Gib| 0.95| 7.83| 26.04| 165.03| (a) | +|H48 h11 |56.5 Gib| 1.20| 5.39| 21.09| 130.46| 195.92| +|vcube 404 |31.8 Gib| 1.21| 4.60| 20.76| 193.43| (a) | +|H48 h10 |28.3 Gib| 0.87| 3.99| 17.52| 119.70| 199.79| +|vcube 308 |21.2 Gib| 0.67| 4.01| 16.48| 141.49| (a) | +|H48 h9 |14.1 Gib| 0.61| 3.71| 16.23| 115.18| 172.04| +|vcube 208 | 7.3 Gib| 0.56| 5.78| 17.68| | (a) | +|H48 h8 | 7.1 Gib| 0.65| 4.50| 17.69| | | +|H48 h7 | 3.5 Gib| 0.39| 2.81| 13.73| | | +|vcube 112 | 2.4 Gib| 0.35| 3.95| 12.37| | (a) | +|H48 h6 | 1.8 Gib| 0.37| 2.62| 13.61| | | + <img src="img/17moves16threads.png"> <img src="img/18moves16threads.png"> <img src="img/19moves16threads.png"> <img src="img/20moves16threads.png"> -</details> - -(a) vcube cannot parallelize on a single scramble, the results for the -Superflip are going to be the same as in the single thread case. -## All optimal solutions -Average time for finding all optimal solutions. +(a) vcube cannot parallelize on a single scramble, the results for the +superflip are going to be the same as in the single thread case. +</details> +<details><summary>All solutions, 16 threads</summary> -### Multithread (16 threads) +*Note: vcube does not have an option for finding multiple solutions.* Time per cube (in seconds, lower is better). | Solver | Size |17 moves|18 moves|19 moves|20 moves|Superflip| |:---------|:-------|-------:|-------:|-------:|-------:|--------:| -|H48 h11 k2|56.1GiB | 0.05 | 0.50 | 4.24 | 19.75 | 52.99 | -|H48 h10 k2|28.1GiB | 0.08 | 0.88 | 6.94 | | | -|H48 h9 k2 |14.1GiB | 0.13 | 1.39 | 13.50 | | | -|H48 h8 k2 | 7.1GiB | 0.25 | 2.85 | | | | -|H48 h7 k2 | 3.6GiB | 0.36 | 4.24 | | | | -|H48 h6 k2 | 1.8GiB | 0.69 | 8.20 | | | | +|H48 h11 |56.5 Gib| 0.05| 0.46| 3.98| 33.27| 31.83| +|H48 h10 |28.3 Gib| 0.08| 0.78| 7.00| 59.77| 62.45| +|H48 h9 |14.1 Gib| 0.13| 1.39| 13.35| | | +|H48 h8 | 7.1 Gib| 0.27| 3.00| 29.13| | | +|H48 h7 | 3.5 Gib| 0.36| 4.19| 46.77| | | +|H48 h6 | 1.8 Gib| 0.69| 8.32| 91.89| | | -Time per cube adjusted for tables size (in seconds \* GiB, lower is better). +Time per cube adjusted for table size (in seconds \* GiB, lower is better). | Solver | Size |17 moves|18 moves|19 moves|20 moves|Superflip| |:---------|:-------|-------:|-------:|-------:|-------:|--------:| -|H48 h11 k2|56.1GiB | 2.81 | 28.05 | 237.86 |1107.98 | 2972.74 | -|H48 h10 k2|28.1GiB | 2.25 | 24.73 | 195.01 | | | -|H48 h9 k2 |14.1GiB | 1.83 | 19.60 | 190.35 | | | -|H48 h8 k2 | 7.1GiB | 1.77 | 20.24 | | | | -|H48 h7 k2 | 3.6GiB | 1.30 | 15.26 | | | | -|H48 h6 k2 | 1.8GiB | 1.24 | 14.76 | | | | +|H48 h11 |56.5 Gib| 2.84| 26.16| 224.81| 1879.97| 1798.27| +|H48 h10 |28.3 Gib| 2.19| 22.11| 197.79| 1688.62| 1764.41| +|H48 h9 |14.1 Gib| 1.80| 19.59| 188.61| | | +|H48 h8 | 7.1 Gib| 1.89| 21.16| 205.79| | | +|H48 h7 | 3.5 Gib| 1.28| 14.79| 165.25| | | +|H48 h6 | 1.8 Gib| 1.23| 14.70| 162.42| | | + +</details> ## Comments on the results @@ -226,20 +227,9 @@ Time per cube adjusted for tables size (in seconds \* GiB, lower is better). ## Other notes -* Missing values in the table mean that the test is very slow and I did - not want to wait for it to finish. I may add these values in the future. +* To repeat the benchmarks, use `./benchmarks/run-h48-benchmarks.sh`. * All the measurements above exclude the time needed to load the pruning - tables into memory, which can be quite significant for large tables. To - repeat these measurements, one can use the tool `301_solve_file`. For - example: - -``` -./build.sh tool solve_file h48h7k2 ./benchmarks/scrambles/scrambles-16.txt -``` - - To find all solutions, add something like `99999 0` at the end of the - command. This will tell the tool to find up to `99999` solutions that - are at most `0` moves longer than optimal. + tables into memory, which can be quite significant for large tables. * The measurements also excluded the one-off computation of the pruning tables which, for reasons related to the cube coordinates used, is significantly slower for H48 compared to vcube. @@ -254,3 +244,6 @@ Time per cube adjusted for tables size (in seconds \* GiB, lower is better). * vcube only supports x86 processors (Intel, AMD), while H48 runs on any architecture, including e.g. ARM (Macbook M series, android phones) and can be compiled to WebAssembly as well. +* For H48, both GCC and Clang have been tried, with the same options; + the resulting executable was about 10% faster with GCC compared to Clang. + vcube only supports compiling with Clang. diff --git a/benchmarks/img/17moves16threads.png b/benchmarks/img/17moves16threads.png Binary files differ. diff --git a/benchmarks/img/17moves1thread.png b/benchmarks/img/17moves1thread.png Binary files differ. diff --git a/benchmarks/img/17moves4threads.png b/benchmarks/img/17moves4threads.png Binary files differ. diff --git a/benchmarks/img/18moves16threads.png b/benchmarks/img/18moves16threads.png Binary files differ. diff --git a/benchmarks/img/18moves1thread.png b/benchmarks/img/18moves1thread.png Binary files differ. diff --git a/benchmarks/img/18moves4threads.png b/benchmarks/img/18moves4threads.png Binary files differ. diff --git a/benchmarks/img/19moves16threads.png b/benchmarks/img/19moves16threads.png Binary files differ. diff --git a/benchmarks/img/19moves1thread.png b/benchmarks/img/19moves1thread.png Binary files differ. diff --git a/benchmarks/img/19moves4threads.png b/benchmarks/img/19moves4threads.png Binary files differ. diff --git a/benchmarks/img/20moves16threads.png b/benchmarks/img/20moves16threads.png Binary files differ. diff --git a/benchmarks/img/20moves1thread.png b/benchmarks/img/20moves1thread.png Binary files differ. diff --git a/benchmarks/img/20moves4threads.png b/benchmarks/img/20moves4threads.png Binary files differ. diff --git a/benchmarks/plot-benchmarks.py b/benchmarks/plot-benchmarks.py @@ -0,0 +1,161 @@ +import matplotlib.pyplot as plt +import results_h48 +import results_vcube + +# Table sizes in bytes + +sizes_h48 = { + 6: 1897951528, + 7: 3793842344, + 8: 7585624040, + 9: 15169187432, + 10: 30336314216, + 11: 60670567784, +} + +sizes_vcube = { + 112: 2603089920, + 208: 7809269760, + 308: 22777036800, + 404: 34165555200, + 212: 62474158080, +} + +# Printing tables in markdown format + +def print_row(solver_name, solver_size, dict, mul_by_size, superflip_star): + if dict is None: + return + solver_gib = solver_size / (2**30) + m = solver_gib if mul_by_size else 1 + s = " (a) " if superflip_star else " " + cols = [f"{solver_name: <10}", f"{solver_gib:>4.1f} Gib"] + [ + f"{dict[key]*m/25:>8.2f}" if key in dict else " " + for key in [17, 18, 19, 20] + ] + [ + f"{dict["superflip"]*m:>9.2f}" if "superflip" in dict else s + ] + sep = "|" + print(sep + sep.join(cols) + sep) + +def print_table(h48, vcube, ms, st): + vcube = vcube or {} + print("| Solver | Size |17 moves|18 moves|19 moves|20 moves|Superflip|") + print("|:---------|:-------|-------:|-------:|-------:|-------:|--------:|") + print_row("vcube 212", sizes_vcube[212], vcube.get(212), ms, not st) + print_row("H48 h11", sizes_h48[11], h48[11], ms, False) + print_row("vcube 404", sizes_vcube[404], vcube.get(404), ms, not st) + print_row("H48 h10", sizes_h48[10], h48[10], ms, False) + print_row("vcube 308", sizes_vcube[308], vcube.get(308), ms, not st) + print_row("H48 h9", sizes_h48[9], h48[9], ms, False) + print_row("vcube 208", sizes_vcube[208], vcube.get(208), ms, not st) + print_row("H48 h8", sizes_h48[8], h48[8], ms, False) + print_row("H48 h7", sizes_h48[7], h48[7], ms, False) + print_row("vcube 112", sizes_vcube[112], vcube.get(112), ms, not st) + print_row("H48 h6", sizes_h48[6], h48[6], ms, False) + +print("<details><summary>Single solution, single thread</summary>") +print() +print("Time per cube (in seconds, lower is better).") +print() +print_table(results_h48.h48_single_thread, results_vcube.vcube_single_thread, False, True) +print() +print("Time per cube adjusted for table size (in seconds \\* GiB, lower is better).") +print() +print_table(results_h48.h48_single_thread, results_vcube.vcube_single_thread, True, True) +print() +print( + '<img src="img/17moves1thread.png">\n' + '<img src="img/18moves1thread.png">\n' + '<img src="img/19moves1thread.png">\n' + '<img src="img/20moves1thread.png">\n' +) +print("</details>") + +print("<details><summary>Single solution, 4 threads</summary>") +print() +print("Time per cube (in seconds, lower is better).") +print() +print_table(results_h48.h48_4_threads, results_vcube.vcube_4_threads, False, False) +print() +print("Time per cube adjusted for table size (in seconds \\* GiB, lower is better).") +print() +print_table(results_h48.h48_4_threads, results_vcube.vcube_4_threads, True, False) +print() +print( + '<img src="img/17moves4threads.png">\n' + '<img src="img/18moves4threads.png">\n' + '<img src="img/19moves4threads.png">\n' + '<img src="img/20moves4threads.png">\n' +) +print() +print("(a) vcube cannot parallelize on a single scramble, the results for the") +print("superflip are going to be the same as in the single thread case.") +print("</details>") + +print("<details><summary>Single solution, 16 threads</summary>") +print() +print("Time per cube (in seconds, lower is better).") +print() +print_table(results_h48.h48_16_threads, results_vcube.vcube_16_threads, False, False) +print() +print("Time per cube adjusted for table size (in seconds \\* GiB, lower is better).") +print() +print_table(results_h48.h48_16_threads, results_vcube.vcube_16_threads, True, False) +print() +print( + '<img src="img/17moves16threads.png">\n' + '<img src="img/18moves16threads.png">\n' + '<img src="img/19moves16threads.png">\n' + '<img src="img/20moves16threads.png">\n' +) +print() +print("(a) vcube cannot parallelize on a single scramble, the results for the") +print("superflip are going to be the same as in the single thread case.") +print("</details>") + +print("<details><summary>All solutions, 16 threads</summary>") +print() +print("*Note: vcube does not have an option for finding multiple solutions.*") +print() +print("Time per cube (in seconds, lower is better).") +print() +print_table(results_h48.h48_all_solutions, None, False, False) +print() +print("Time per cube adjusted for table size (in seconds \\* GiB, lower is better).") +print() +print_table(results_h48.h48_all_solutions, None, True, False) +print() +print("</details>") + +# Plotting + +def plot(title, hd, vd, key): + d = 1 if key == "superflip" else 25 + h48x = [sizes_h48[m]/(2**30) for m in hd.keys() if key in hd[m]] + vcubex = [sizes_vcube[m]/(2**30) for m in vd.keys() if key in vd[m]] + h48y = [d[key] for _, d in hd.items() if key in d] + vcubey = [d[key] for _, d in vd.items() if key in d] + + plt.clf() + plt.title(title) + plt.xlabel("Table size (GiB)") + plt.ylabel("Time to solve (s / cube)") + plt.plot(h48x, h48y, "o--", label = "H48") + plt.plot(vcubex, vcubey, "o--", label = "vcube") + plt.legend(loc = "right") + filename = title.replace(" ", "").replace(",", "") + ".png" + plt.savefig("benchmarks/img/" + filename, dpi=300) + #plt.show() + +rh, rv = results_h48.h48_single_thread, results_vcube.vcube_single_thread +for m in [17, 18, 19, 20]: + plot(f"{m} moves 1 thread", rh, rv, m) + +rh, rv = results_h48.h48_4_threads, results_vcube.vcube_4_threads +for m in [17, 18, 19, 20]: + plot(f"{m} moves 4 threads", rh, rv, m) + +rh, rv = results_h48.h48_16_threads, results_vcube.vcube_16_threads +for m in [17, 18, 19, 20]: + plot(f"{m} moves 16 threads", rh, rv, m) diff --git a/benchmarks/plot.py b/benchmarks/plot.py @@ -1,85 +0,0 @@ -import matplotlib.pyplot as plt - -h48 = [ - ("H48 h11 k2", 56.1, - [0.23, 1.15, 5.08, 31.30], - [0.06, 0.31, 1.31, 7.96], - [0.02, 0.1, 0.43, 2.48] - ), - ("H48 h10 k2", 28.1, - [0.34, 1.80, 7.77], - [0.1, 0.47, 2.00, 13.54], - [0.03, 0.16, 0.74, 4.43] - ), - ("H48 h9 k2", 14.1, - [0.42, 2.84, 12.86], - [0.14, 0.83, 3.82, 25.98], - [0.04, 0.26, 1.18, 8.31] - ), - ("H48 h8 k2", 7.1, - [0.86, 6.66, 27.4], - [0.27, 2.02, 7.94], - [0.08, 0.6, 2.48] - ), - ("H48 h7 k2", 3.6, - [1.47, 8.9, 42.46], - [0.35, 2.59, 12.41], - [0.11, 0.81, 3.91] - ), - ("H48 h6 k2", 1.8, - [2.28, 16.89], - [0.65, 4.79, 23.91], - [0.21, 1.53, 7.82] - ) -] - -vcube = [ - ("vcube 404", 31.8, - [0.3, 1.25, 6.87, 57.49], - [0.1, 0.38, 1.88, 16.98], - [0.03, 0.16, 0.67, 6.36] - ), - ("vcube 308", 21.2, - [0.2, 1.11, 6.92], - [0.06, 0.42, 1.95, 17.73], - [0.04, 0.26, 1.18, 9.53] - ), - ("vcube 208", 7.3, - [0.57, 4.41, 20.75], - [0.17, 1.49, 5.88], - [0.08, 0.8, 2.38] - ), - ("vcube 112", 2.4, - [1.01, 9.39], - [0.29, 3.15, 12.06], - [0.15, 1.66, 5.18] - ) -] - -h48_x = [i[1] for i in h48] -vcube_x = [i[1] for i in vcube] - -def getarr(m, t, a): - return [i[2+t][m-17] for i in a if len(i[2+t]) > m-17] - -def gethv(m, t): - return getarr(m, t, h48), getarr(m, t, vcube) - -def showplt(plt, title, h48y, vcubey): - plt.clf() - plt.title(title) - plt.xlabel("Table size (GiB)") - plt.ylabel("Time to solve (s / cube)") - plt.plot(h48_x[:len(h48y)], h48y, "o--", label = "H48") - plt.plot(vcube_x[:len(vcubey)], vcubey, "o--", label = "vcube") - plt.legend(loc = "right") - filename = title.replace(" ", "").replace(",", "") + ".png" - plt.savefig("benchmarks/img/" + filename, dpi=300) - #plt.show() - -for i in range(17, 21): - for j in range(0, 3): - title = "{} moves, {} thread{}".format( - i, 4**j, "s" if j > 0 else "") - h, v = gethv(i, j) - showplt(plt, title, h, v) diff --git a/benchmarks/results_h48.py b/benchmarks/results_h48.py @@ -0,0 +1,36 @@ +h48_single_thread = { + 6: {17: 52.6781, 18: 398.7213, 19: 2049.9640}, + 7: {17: 25.6133, 18: 205.2683, 19: 1031.1970}, + 8: {17: 21.8331, 18: 165.3185, 19: 664.2050}, + 9: {17: 9.4036, 18: 66.6141, 19: 305.3164}, + 10: {17: 6.8148, 18: 36.6565, 19: 168.7843, 20: 1166.2200, "superflip": 60.5523}, + 11: {17: 3.8640, 18: 22.8206, 19: 101.9301, 20: 670.2421, "superflip": 26.9025}, +} + +h48_4_threads = { + 6: {17: 15.6631, 18: 116.6983, 19: 616.6293}, + 7: {17: 7.5627, 18: 57.9755, 19: 293.4441}, + 8: {17: 6.3879, 18: 46.6986, 19: 195.8814}, + 9: {17: 2.9011, 18: 19.1689, 19: 85.5939, 20: 617.9417, "superflip": 29.3115}, + 10: {17: 1.9637, 18: 9.7415, 19: 47.3158, 20: 326.8692, "superflip": 16.6325}, + 11: {17: 1.2589, 18: 6.5298, 19: 28.2996, 20: 190.1169, "superflip": 8.4383}, +} + +h48_16_threads = { + 6: {17: 5.2001, 18: 37.0525, 19: 192.4323}, + 7: {17: 2.7835, 18: 19.8514, 19: 97.1157}, + 8: {17: 2.3060, 18: 15.9383, 19: 62.6004}, + 9: {17: 1.0799, 18: 6.5733, 19: 28.7205, 20: 203.8189, "superflip": 12.1779}, + 10: {17: 0.7715, 18: 3.5307, 19: 15.4995, 20: 105.9181, "superflip": 7.0716}, + 11: {17: 0.5313, 18: 2.3848, 19: 9.3324, 20: 57.7204, "superflip": 3.4673}, +} + +h48_all_solutions = { + 6: {17: 17.3451, 18: 207.8877, 19: 2297.1491}, + 7: {17: 9.0715, 18: 104.6421, 19: 1169.2355}, + 8: {17: 6.6934, 18: 74.8913, 19: 728.2341}, + 9: {17: 3.1790, 18: 34.6734, 19: 333.7746}, + 10: {17: 1.9342, 18: 19.5612, 19: 175.0206, 20: 1494.2053, "superflip": 62.4507}, + 11: {17: 1.2574, 18: 11.5760, 19: 99.4671, 20: 831.7873, "superflip": 31.8256}, +} + diff --git a/benchmarks/results_h48_clang.py b/benchmarks/results_h48_clang.py @@ -0,0 +1,36 @@ +h48_single_thread = { + 6: {17: 57.7296, 18: 436.0622, 19: 2240.5087}, + 7: {17: 27.8748, 18: 222.3812, 19: 1114.8537}, + 8: {17: 23.6884, 18: 181.7169, 19: 722.1819}, + 9: {17: 10.1115, 18: 72.6935, 19: 328.5458}, + 10: {17: 7.0337, 18: 37.5239, 19: 179.3647, 20: 1267.3059, "superflip": 71.8359}, + 11: {17: 3.9693, 18: 24.2573, 19: 106.8995, 20: 688.5594, "superflip": 32.0008}, +} + +h48_4_threads = { + 6: {17: 16.7353, 18: 129.9696, 19: 663.2930}, + 7: {17: 8.4315, 18: 65.4883, 19: 348.3440}, + 8: {17: 6.9127, 18: 51.8117, 19: 208.8438}, + 9: {17: 3.0985, 18: 21.2920, 19: 95.3467, 20: 711.2927, "superflip": 35.3772}, + 10: {17: 2.0592, 18: 10.6636, 19: 49.9811, 20: 388.0203, "superflip": 18.6665}, + 11: {17: 1.3065, 18: 6.9261, 19: 29.7725, 20: 197.9893, "superflip": 9.1158}, +} + +h48_16_threads = { + 6: {17: 5.6335, 18: 39.2955, 19: 207.5730}, + 7: {17: 2.9328, 18: 21.0462, 19: 104.7917}, + 8: {17: 2.4790, 18: 17.0854, 19: 67.6617}, + 9: {17: 1.1561, 18: 7.0899, 19: 30.5952, 20: 220.9804, "superflip": 13.5599}, + 10: {17: 0.8002, 18: 3.6835, 19: 16.4910, 20: 114.4733, "superflip": 7.6359}, + 11: {17: 0.5475, 18: 2.4681, 19: 9.8224, 20: 60.7893, "superflip": 3.8795}, +} + +h48_all_solutions = { + 6: {17: 18.3998, 18: 223.3861, 19: 2459.7513}, + 7: {17: 9.6264, 18: 114.2752, 19: 1242.9741}, + 8: {17: 7.1793, 18: 81.2145, 19: 783.8113}, + 9: {17: 3.3487, 18: 36.9225, 19: 363.1912}, + 10: {17: 2.0002, 18: 20.4565, 19: 187.5199, 20: 1570.6074, "superflip": 68.2016}, + 11: {17: 1.2833, 18: 12.3616, 19: 105.6184, 20: 872.5879, "superflip": 34.1071}, +} + diff --git a/benchmarks/results_vcube.py b/benchmarks/results_vcube.py @@ -0,0 +1,24 @@ +vcube_single_thread = { + 112: {17: 24.021138887, 18: 232.290148751, 19: 1012.971335021}, + 208: {17: 14.015260764, 18: 109.093653866, 19: 514.498894356}, + 308: {17: 4.140126887, 18: 25.580857443, 19: 154.973883402, 20: 1467.580449672, "superflip": 604.350116961}, + 404: {17: 5.817552240, 18: 31.011977639, 19: 152.427892670, 20: 1483.336504130, "superflip": 268.261983286}, + 212: {17: 2.762842065, 18: 18.817855233, 19: 85.668495927, 20: 682.075876988, "superflip": 19.296406826}, +} + +vcube_4_threads = { + 112: {17: 7.127616290, 18: 78.318152570, 19: 298.717794059}, + 208: {17: 4.064627893, 18: 36.750120494, 19: 146.530835915}, + 308: {17: 1.203668154, 18: 8.861778047, 19: 44.574642874, 20: 415.264434322}, + 404: {17: 1.824527079, 18: 7.464447783, 19: 41.220812956, 20: 404.238440653}, + 212: {17: 0.873352248, 18: 6.742663807, 19: 25.886399367, 20: 192.481134988}, +} + +vcube_16_threads = { + 112: {17: 3.634119823, 18: 40.734799597, 19: 127.553167079}, + 208: {17: 1.914579912, 18: 19.860062709, 19: 60.773634937}, + 308: {17: 0.787410653, 18: 4.727771575, 19: 19.423988316, 20: 166.749710026}, + 404: {17: 0.948257897, 18: 3.616245643, 19: 16.311276268, 20: 151.974609036}, + 212: {17: 0.409647385, 18: 3.365509654, 19: 11.187815758, 20: 70.908266374}, +} + diff --git a/benchmarks/run-h48-benchmarks.sh b/benchmarks/run-h48-benchmarks.sh @@ -0,0 +1,44 @@ +#!/bin/sh + +# This script must be run from the main repository folder. + +scr="./benchmarks/scrambles" +out="./benchmarks/results_h48.py" + +get_solve_time() { + THREADS="$1" ./build.sh tool solve_file "$2" "$3" "$4" 0 | \ + tail -n 1 | sed 's/Total time: //; s/s$//' +} + +do_all() { + low=$1 + cutoff=$2 + high=$3 + t=$4 + n=$5 + name=$6 + + printf '%s = {\n' "$name" >> "$out" + for i in $(seq "$low" "$high"); do + t17="$(get_solve_time $t h48h$i "$scr/scrambles-17.txt" $n)" + t18="$(get_solve_time $t h48h$i "$scr/scrambles-18.txt" $n)" + t19="$(get_solve_time $t h48h$i "$scr/scrambles-19.txt" $n)" + printf '\t%s: {17: %s, 18: %s, 19: %s' $i $t17 $t18 $t19 >> "$out" + if [ "$i" -ge "$cutoff" ]; then + t20="$(get_solve_time $t h48h$i "$scr/scrambles-20.txt" $n)" + tsf="$(get_solve_time $t h48h$i "$scr/superflip.txt" $n)" + printf ', 20: %s, "superflip": %s},\n' $t20 $tsf >> "$out" + else + printf '},\n' >> "$out" + fi + done + printf '}\n\n' >> "$out" +} + +./build.sh clean && ./build.sh +printf '' > "$out" + +do_all 6 10 11 1 1 "h48_single_thread" +do_all 6 9 11 4 1 "h48_4_threads" +do_all 6 9 11 16 1 "h48_16_threads" +do_all 6 10 11 16 999999 "h48_all_solutions" diff --git a/benchmarks/run-vcube-benchmarks.sh b/benchmarks/run-vcube-benchmarks.sh @@ -0,0 +1,44 @@ +#!/bin/sh + +# This script can be used to run benchmarks for vcube, similar to the +# benchmarks for H48 run by run-h48-benchmarks.sh. Before running this script, +# move it to the vcube folder and and adjust the paths below if necessary. + +scr="../nissy-core/benchmarks/scrambles" +out="../nissy-core/benchmarks/results_vcube.py" + +get_solve_time() { + ./vc-optimal -w "$1" -c "$2" <"$3" \ + 2>&1 >/dev/null | sed 's/Total time: //; s/ real.*//' +} + +do_all() { + t=$1 + name=$2 + printf '%s = {\n' "$name" >> "$out" + for i in 112 208 308 404 212 ; do + t17="$(get_solve_time $t $i "$scr/scrambles-17.txt")" + t18="$(get_solve_time $t $i "$scr/scrambles-18.txt")" + t19="$(get_solve_time $t $i "$scr/scrambles-19.txt")" + printf '\t%s: {17: %s, 18: %s, 19: %s' $i $t17 $t18 $t19 >> "$out" + if [ "$i" -ge 212 ]; then + t20="$(get_solve_time $t $i "$scr/scrambles-20.txt")" + printf ', 20: %s' $t20 >> "$out" + if [ "$t" = 1 ]; then + tsf="$(get_solve_time $t $i "$scr/superflip.txt")" + printf ', "superflip": %s},\n' $tsf >> "$out" + else + printf '},\n' >> "$out" + fi + else + printf '},\n' >> "$out" + fi + done + printf '}\n\n' >> "$out" +} + +printf '' > "$out" + +do_all 1 "vcube_single_thread" +do_all 4 "vcube_4_threads" +do_all 16 "vcube_16_threads"