nissy-core

The "engine" of nissy, including the H48 optimal solver.
git clone https://git.tronto.net/nissy-core
Download | Log | Files | Refs | README | LICENSE

commit b729562fc3c1b85f1ea9a19fadd1caaf85b85777
parent 2ac46cb4d4133e5d5de90bacfc9b6e979d1d6046
Author: Sebastiano Tronto <sebastiano@tronto.net>
Date:   Mon,  6 Apr 2026 10:08:31 +0200

Remove use of MMX intrinsics in favor of AVX2

Diffstat:
Msrc/arch/avx2.h | 15++++++++-------
1 file changed, 8 insertions(+), 7 deletions(-)

diff --git a/src/arch/avx2.h b/src/arch/avx2.h @@ -308,13 +308,13 @@ STATIC_INLINE uint64_t permtoindex_Nx8(uint64_t n, int64_t a) { uint64_t i, c, ret; - __m64 cmp; + __m256i cmp; for (i = 0, ret = 0; i < n; i++) { - cmp = _mm_set1_pi8(a & INT64_C(0xFF)); + cmp = _mm256_set1_epi8((char)(a & INT64_C(0xFF))); a = (a >> INT64_C(8)) | INT64_C(0x0F00000000000000); - cmp = _mm_cmpgt_pi8(cmp, _mm_cvtsi64_m64(a)); - c = _mm_popcnt_u64(_mm_cvtm64_si64(cmp)) >> UINT64_C(3); + cmp = _mm256_cmpgt_epi8(cmp, _mm256_set1_epi64x(a)); + c = _mm_popcnt_u32(_mm256_movemask_epi8(cmp)) >> 2; ret += c * factorial[n-1-i]; } @@ -428,11 +428,12 @@ STATIC_INLINE cube_t invcoord_epe(uint64_t i) { int64_t a; - __m64 a64; + __m256i b, r; a = indextoperm_4x8(i); - a64 = _mm_add_pi8(_mm_cvtsi64_m64(a), _mm_set_pi32(0, 0x08080808)); - a = _mm_cvtm64_si64(a64); + b = _mm256_set1_epi64x(INT64_C(0x08080808)); + r = _mm256_add_epi8(_mm256_set1_epi64x(a), b); + a = _mm256_extract_epi64(r, 0); return _mm256_set_epi64x(a, SOLVED_L, 0, SOLVED_L); }