commit 7c3701364542355c29b2e4ebc6d719ddd123c0f2
parent 836d626e7caba9578c3440ec0484e7ff1b4cc48e
Author: Sebastiano Tronto <sebastiano@tronto.net>
Date: Thu, 24 Jul 2025 09:00:49 +0200
Use bit trick for portable and arm popcount
Diffstat:
5 files changed, 72 insertions(+), 14 deletions(-)
diff --git a/src/arch/neon.h b/src/arch/neon.h
@@ -30,16 +30,17 @@ STATIC_INLINE uint8x8_t compose_corners_slim(uint8x8_t, uint8x8_t);
#define SOLVED_CUBE STATIC_CUBE( \
0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11)
-/* TODO: optimize this (use intrinsics?) */
STATIC_INLINE int
popcount_u32(uint32_t x)
{
- int ret;
-
- for (ret = 0; x != 0; x >>= 1)
- ret += x & 1;
-
- return ret;
+ /* Same as the portable version */
+ x -= (x >> UINT32_C(1)) & UINT32_C(0x55555555);
+ x = (x & UINT32_C(0x33333333)) +
+ ((x >> UINT32_C(2)) & UINT32_C(0x33333333));
+ x = (x + (x >> UINT32_C(4))) & UINT32_C(0x0F0F0F0F);
+ x = (x * UINT32_C(0x01010101)) >> UINT32_C(24);
+
+ return (int)x;
}
STATIC void
diff --git a/src/arch/portable.h b/src/arch/portable.h
@@ -9,16 +9,29 @@
#define SOLVED_CUBE STATIC_CUBE( \
0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11)
-/* TODO: optimize this (use bit tricks?) */
STATIC_INLINE int
popcount_u32(uint32_t x)
{
- int ret;
-
- for (ret = 0; x != 0; x >>= 1)
- ret += x & 1;
-
- return ret;
+ /*
+ Bit trick: accumulate in pairs of bits, quads of bits and so on,
+ until the final result is the sum of all bits.
+
+ x = (x & 0x55555555) + ((x >> 1) & 0x55555555);
+ x = (x & 0x33333333) + ((x >> 2) & 0x33333333);
+ x = (x & 0x0F0F0F0F) + ((x >> 4) & 0x0F0F0F0F);
+ x = (x & 0x00FF00FF) + ((x >> 8) & 0x00FF00FF);
+ x = (x & 0x0000FFFF) + ((x >> 16) & 0x0000FFFF);
+
+ The actual method we use is a small optimization of the one above.
+ */
+
+ x -= (x >> UINT32_C(1)) & UINT32_C(0x55555555);
+ x = (x & UINT32_C(0x33333333)) +
+ ((x >> UINT32_C(2)) & UINT32_C(0x33333333));
+ x = (x + (x >> UINT32_C(4))) & UINT32_C(0x0F0F0F0F);
+ x = (x * UINT32_C(0x01010101)) >> UINT32_C(24);
+
+ return (int)x;
}
STATIC void
diff --git a/test/016_popcount_u32/00_all.in b/test/016_popcount_u32/00_all.in
diff --git a/test/016_popcount_u32/00_all.out b/test/016_popcount_u32/00_all.out
@@ -0,0 +1 @@
+Ok
diff --git a/test/016_popcount_u32/popcount_u32_tests.c b/test/016_popcount_u32/popcount_u32_tests.c
@@ -0,0 +1,43 @@
+#include "../test.h"
+
+int popcount_u32(uint32_t x);
+
+int
+popcount_u32_simple(uint32_t x)
+{
+ int ret;
+
+ for (ret = 0; x != 0; x >>= 1)
+ ret += x & 1;
+
+ return ret;
+}
+
+bool
+correct(uint32_t x)
+{
+ int expected = popcount_u32_simple(x);
+ int actual = popcount_u32(x);
+ if (actual != expected) {
+ printf("Error at %" PRIu32 ": expected %d bits, found %d\n",
+ x, expected, actual);
+ return false;
+ }
+ return true;
+}
+
+void run(void) {
+ uint32_t i;
+
+ /* Test all numbers up to 2^16, and other ranges of 2^16 numbers */
+ for (i = 0; i < 0xFFFF; i++) {
+ if (!correct(i) ||
+ !correct(i + UINT32_C(0xFFFF0000)) ||
+ !correct(i + UINT32_C(1000000)) ||
+ !correct(i + UINT32_C(1)) ||
+ !correct(i + UINT32_C(1234567)))
+ return;
+ }
+
+ printf("Ok\n");
+}