Try fix compile with gcc-9

james-d-mitchell · james-d-mitchell · commit 7b37f714e1c8 · 2025-03-19T09:34:52.000Z
diff --git a/include/hpcombi/bmat16_impl.hpp b/include/hpcombi/bmat16_impl.hpp
@@ -25,8 +25,12 @@
 namespace HPCombi {
 static_assert(std::is_trivial<BMat16>(), "BMat16 is not a trivial class!");
 
-static constexpr xpu16 line{0x800, 0x901, 0xa02, 0xb03, 0xc04, 0xd05, 0xe06, 0xf07, 0x800, 0x901, 0xa02, 0xb03, 0xc04, 0xd05, 0xe06, 0xf07};
-static constexpr xpu16 block{0x200, 0x604, 0xa08, 0xe0c, 0x301, 0x705, 0xb09, 0xf0d, 0x200, 0x604, 0xa08, 0xe0c, 0x301, 0x705, 0xb09, 0xf0d};
+static constexpr xpu16 line{0x800, 0x901, 0xa02, 0xb03, 0xc04, 0xd05,
+                            0xe06, 0xf07, 0x800, 0x901, 0xa02, 0xb03,
+                            0xc04, 0xd05, 0xe06, 0xf07};
+static constexpr xpu16 block{0x200, 0x604, 0xa08, 0xe0c, 0x301, 0x705,
+                             0xb09, 0xf0d, 0x200, 0x604, 0xa08, 0xe0c,
+                             0x301, 0x705, 0xb09, 0xf0d};
 
 inline xpu64 to_line(xpu64 vect) {
     return simde_mm256_shuffle_epi8(vect, line);
@@ -36,7 +40,8 @@ inline xpu64 to_block(xpu64 vect) {
     return simde_mm256_shuffle_epi8(vect, block);
 }
 
-inline BMat16::BMat16(uint64_t n0, uint64_t n1, uint64_t n2, uint64_t n3) noexcept {
+inline BMat16::BMat16(uint64_t n0, uint64_t n1, uint64_t n2,
+                      uint64_t n3) noexcept {
     xpu64 tmp{n0, n1, n2, n3};
     _data = to_line(tmp);
 }
@@ -47,27 +52,25 @@ inline BMat16::BMat16(std::vector<std::vector<bool>> const &mat) noexcept {
     std::array<uint64_t, 4> tmp = {0, 0, 0, 0};
     for (int i = mat.size() - 1; i >= 0; --i) {
         HPCOMBI_ASSERT(mat.size() == mat[i].size());
-        tmp[i/4] <<= 16 - mat.size();
+        tmp[i / 4] <<= 16 - mat.size();
         for (int j = mat[i].size() - 1; j >= 0; --j) {
-            tmp[i/4] = (tmp[i/4] << 1) | mat[i][j];
+            tmp[i / 4] = (tmp[i / 4] << 1) | mat[i][j];
         }
     }
     _data = xpu64{tmp[0], tmp[1], tmp[2], tmp[3]};
 }
 
 inline bool BMat16::operator()(size_t i, size_t j) const noexcept {
-    return (_data[i/4] >> (16 * (i%4) + j)) & 1;
+    return (_data[i / 4] >> (16 * (i % 4) + j)) & 1;
 }
 
 inline void BMat16::set(size_t i, size_t j, bool val) noexcept {
     HPCOMBI_ASSERT(i < 16);
     HPCOMBI_ASSERT(j < 16);
     uint64_t a = 1;
-    a <<= 16 * (i%4) + j;
-    xpu64 mask{(i/4 == 0)*a, 
-               (i/4 == 1)*a, 
-               (i/4 == 2)*a, 
-               (i/4 == 3)*a};
+    a <<= 16 * (i % 4) + j;
+    xpu64 mask{(i / 4 == 0) * a, (i / 4 == 1) * a, (i / 4 == 2) * a,
+               (i / 4 == 3) * a};
     _data ^= (-val ^ _data) & mask;
 }
 
@@ -77,28 +80,36 @@ inline bool BMat16::operator==(BMat16 const &that) const noexcept {
 }
 
 inline bool BMat16::operator<(BMat16 const &that) const noexcept {
-    return _data[0] < that._data[0] || 
-            (_data[0] == that._data[0] && (_data[1] < that._data[1] || 
-            (_data[1] == that._data[1] && (_data[2] < that._data[2] || 
-            (_data[2] == that._data[2] && (_data[3] < that._data[3]))))));
+    return _data[0] < that._data[0] ||
+           (_data[0] == that._data[0] &&
+            (_data[1] < that._data[1] ||
+             (_data[1] == that._data[1] &&
+              (_data[2] < that._data[2] ||
+               (_data[2] == that._data[2] && (_data[3] < that._data[3]))))));
 }
 
 inline bool BMat16::operator>(BMat16 const &that) const noexcept {
-    return _data[0] > that._data[0] || 
-            (_data[0] == that._data[0] && (_data[1] > that._data[1] || 
-            (_data[1] == that._data[1] && (_data[2] > that._data[2] || 
-            (_data[2] == that._data[2] && (_data[3] > that._data[3]))))));
+    return _data[0] > that._data[0] ||
+           (_data[0] == that._data[0] &&
+            (_data[1] > that._data[1] ||
+             (_data[1] == that._data[1] &&
+              (_data[2] > that._data[2] ||
+               (_data[2] == that._data[2] && (_data[3] > that._data[3]))))));
 }
 
 inline std::array<std::array<bool, 16>, 16> BMat16::to_array() const noexcept {
     xpu64 tmp = to_block(_data);
     uint64_t a = tmp[0], b = tmp[1], c = tmp[2], d = tmp[3];
     std::array<std::array<bool, 16>, 16> res;
     for (size_t i = 0; i < 64; ++i) {
-        res[i/8][i%8]         = a & 1; a >>= 1;
-        res[i/8][8 + i%8]     = b & 1; b >>= 1;
-        res[8 + i/8][i%8]     = c & 1; c >>= 1;
-        res[8 + i/8][8 + i%8] = d & 1; d >>= 1;
+        res[i / 8][i % 8] = a & 1;
+        a >>= 1;
+        res[i / 8][8 + i % 8] = b & 1;
+        b >>= 1;
+        res[8 + i / 8][i % 8] = c & 1;
+        c >>= 1;
+        res[8 + i / 8][8 + i % 8] = d & 1;
+        d >>= 1;
     }
     return res;
 }
@@ -107,10 +118,10 @@ inline BMat16 BMat16::transpose_naive() const noexcept {
     uint64_t a = 0, b = 0, c = 0, d = 0;
     for (int i = 7; i >= 0; --i) {
         for (int j = 7; j >= 0; --j) {
-            a = (a << 1) | (*this)(j, i); 
-            b = (b << 1) | (*this)(j+8, i); 
-            c = (c << 1) | (*this)(j, i+8); 
-            d = (d << 1) | (*this)(j+8, i+8);
+            a = (a << 1) | (*this)(j, i);
+            b = (b << 1) | (*this)(j + 8, i);
+            c = (c << 1) | (*this)(j, i + 8);
+            d = (d << 1) | (*this)(j + 8, i + 8);
         }
     }
     return BMat16(a, b, c, d);
@@ -119,25 +130,33 @@ inline BMat16 BMat16::transpose_naive() const noexcept {
 inline BMat16 BMat16::transpose() const noexcept {
     xpu64 tmp = to_block(_data);
     xpu64 x = simde_mm256_set_epi64x(tmp[3], tmp[1], tmp[2], tmp[0]);
-    xpu64 y = (x ^ (x >> 7)) & (xpu64{0xAA00AA00AA00AA, 0xAA00AA00AA00AA, 0xAA00AA00AA00AA, 0xAA00AA00AA00AA});
+    xpu64 y = (x ^ (x >> 7)) & (xpu64{0xAA00AA00AA00AA, 0xAA00AA00AA00AA,
+                                      0xAA00AA00AA00AA, 0xAA00AA00AA00AA});
     x = x ^ y ^ (y << 7);
-    y = (x ^ (x >> 14)) & (xpu64{0xCCCC0000CCCC, 0xCCCC0000CCCC, 0xCCCC0000CCCC, 0xCCCC0000CCCC});
+    y = (x ^ (x >> 14)) &
+        (xpu64{0xCCCC0000CCCC, 0xCCCC0000CCCC, 0xCCCC0000CCCC, 0xCCCC0000CCCC});
     x = x ^ y ^ (y << 14);
-    y = (x ^ (x >> 28)) & (xpu64{0xF0F0F0F0, 0xF0F0F0F0, 0xF0F0F0F0, 0xF0F0F0F0});
+    y = (x ^ (x >> 28)) &
+        (xpu64{0xF0F0F0F0, 0xF0F0F0F0, 0xF0F0F0F0, 0xF0F0F0F0});
     x = x ^ y ^ (y << 28);
     return BMat16(to_line(x));
 }
 
-static constexpr xpu16 rot{0x302, 0x504, 0x706, 0x908, 0xb0a, 0xd0c, 0xf0e, 0x100, 0x302, 0x504, 0x706, 0x908, 0xb0a, 0xd0c, 0xf0e, 0x100};
+static constexpr xpu16 rot{0x302, 0x504, 0x706, 0x908, 0xb0a, 0xd0c,
+                           0xf0e, 0x100, 0x302, 0x504, 0x706, 0x908,
+                           0xb0a, 0xd0c, 0xf0e, 0x100};
 
 inline BMat16 BMat16::mult_transpose(BMat16 const &that) const noexcept {
     xpu16 x = _data;
     xpu16 y1 = that._data;
-    xpu16 y2 = simde_mm256_set_epi64x(that._data[1], that._data[0], that._data[3], that._data[2]);
+    xpu16 y2 = simde_mm256_set_epi64x(that._data[1], that._data[0],
+                                      that._data[3], that._data[2]);
     xpu16 zero = simde_mm256_setzero_si256();
     xpu16 data = simde_mm256_setzero_si256();
-    xpu16 diag1{0x1, 0x2, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80, 0x100, 0x200, 0x400, 0x800, 0x1000, 0x2000, 0x4000, 0x8000};
-    xpu16 diag2{0x100, 0x200, 0x400, 0x800, 0x1000, 0x2000, 0x4000, 0x8000, 0x1, 0x2, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80};
+    xpu16 diag1{0x1,   0x2,   0x4,   0x8,   0x10,   0x20,   0x40,   0x80,
+                0x100, 0x200, 0x400, 0x800, 0x1000, 0x2000, 0x4000, 0x8000};
+    xpu16 diag2{0x100, 0x200, 0x400, 0x800, 0x1000, 0x2000, 0x4000, 0x8000,
+                0x1,   0x2,   0x4,   0x8,   0x10,   0x20,   0x40,   0x80};
     for (size_t i = 0; i < 8; ++i) {
         data |= ((x & y1) != zero) & diag1;
         data |= ((x & y2) != zero) & diag2;
@@ -151,41 +170,51 @@ inline BMat16 BMat16::mult_transpose(BMat16 const &that) const noexcept {
 
 inline BMat16 BMat16::mult_4bmat8(BMat16 const &that) const noexcept {
     BMat16 tmp = that.transpose();
-    xpu64 t1 = to_block(_data), 
-          t2 = to_block(tmp._data);
-    BMat8 a1(t1[0]), b1(t1[1]), c1(t1[2]), d1(t1[3]),
-          a2(t2[0]), b2(t2[1]), c2(t2[2]), d2(t2[3]);
-    return BMat16((a1.mult_transpose(a2) | b1.mult_transpose(b2)).to_int(), 
-                  (a1.mult_transpose(c2) | b1.mult_transpose(d2)).to_int(), 
-                  (c1.mult_transpose(a2) | d1.mult_transpose(b2)).to_int(), 
+    xpu64 t1 = to_block(_data), t2 = to_block(tmp._data);
+    BMat8 a1(t1[0]), b1(t1[1]), c1(t1[2]), d1(t1[3]), a2(t2[0]), b2(t2[1]),
+        c2(t2[2]), d2(t2[3]);
+    return BMat16((a1.mult_transpose(a2) | b1.mult_transpose(b2)).to_int(),
+                  (a1.mult_transpose(c2) | b1.mult_transpose(d2)).to_int(),
+                  (c1.mult_transpose(a2) | d1.mult_transpose(b2)).to_int(),
                   (c1.mult_transpose(c2) | d1.mult_transpose(d2)).to_int());
 }
 
 inline BMat16 BMat16::mult_naive(BMat16 const &that) const noexcept {
     uint64_t a = 0, b = 0, c = 0, d = 0;
     for (int i = 7; i >= 0; --i) {
         for (int j = 7; j >= 0; --j) {
-            a <<= 1; b <<= 1; c <<= 1; d <<= 1;
+            a <<= 1;
+            b <<= 1;
+            c <<= 1;
+            d <<= 1;
             for (size_t k = 0; k < 8; ++k) {
-                a |= ((*this)(i, k)     & that(k, j))     | ((*this)(i, k + 8)     & that(k + 8, j));
-                b |= ((*this)(i, k)     & that(k, j + 8)) | ((*this)(i, k + 8)     & that(k + 8, j + 8));
-                c |= ((*this)(i + 8, k) & that(k, j))     | ((*this)(i + 8, k + 8) & that(k + 8, j));
-                d |= ((*this)(i + 8, k) & that(k, j + 8)) | ((*this)(i + 8, k + 8) & that(k + 8, j + 8));
+                a |= ((*this)(i, k) & that(k, j)) |
+                     ((*this)(i, k + 8) & that(k + 8, j));
+                b |= ((*this)(i, k) & that(k, j + 8)) |
+                     ((*this)(i, k + 8) & that(k + 8, j + 8));
+                c |= ((*this)(i + 8, k) & that(k, j)) |
+                     ((*this)(i + 8, k + 8) & that(k + 8, j));
+                d |= ((*this)(i + 8, k) & that(k, j + 8)) |
+                     ((*this)(i + 8, k + 8) & that(k + 8, j + 8));
             }
         }
     }
     return BMat16(a, b, c, d);
 }
 
 inline BMat16 BMat16::mult_naive_array(BMat16 const &that) const noexcept {
-    std::array<std::array<bool, 16>, 16> tab1 = to_array(), tab2 = that.to_array();
+    std::array<std::array<bool, 16>, 16> tab1 = to_array(),
+                                         tab2 = that.to_array();
     uint64_t a = 0, b = 0, c = 0, d = 0;
     for (int i = 7; i >= 0; --i) {
         for (int j = 7; j >= 0; --j) {
-            a <<= 1; b <<= 1; c <<= 1; d <<= 1;
+            a <<= 1;
+            b <<= 1;
+            c <<= 1;
+            d <<= 1;
             for (size_t k = 0; k < 16; ++k) {
-                a |= tab1[i][k]     & tab2[k][j];
-                b |= tab1[i][k]     & tab2[k][j + 8];
+                a |= tab1[i][k] & tab2[k][j];
+                b |= tab1[i][k] & tab2[k][j + 8];
                 c |= tab1[i + 8][k] & tab2[k][j];
                 d |= tab1[i + 8][k] & tab2[k][j + 8];
             }
@@ -194,14 +223,15 @@ inline BMat16 BMat16::mult_naive_array(BMat16 const &that) const noexcept {
     return BMat16(a, b, c, d);
 }
 
-inline size_t BMat16::nr_rows() const noexcept{
+inline size_t BMat16::nr_rows() const noexcept {
     size_t res = 0;
     for (size_t i = 0; i < 16; ++i)
-        if ((_data[i/4] << (16 * (i%4)) >> 48) != 0) 
+        if ((_data[i / 4] << (16 * (i % 4)) >> 48) != 0)
             ++res;
     return res;
 
-    //// Vectorized version which doesn't work due to the absence of popcnt in simde
+    //// Vectorized version which doesn't work due to the absence of popcnt in
+    /// simde
     // xpu16 tmp = _data, zero = simde_mm256_setzero_si256();
     // xpu16 x = (tmp != zero);
     // return simde_mm256_popcnt_epi16(x);
@@ -210,7 +240,7 @@ inline size_t BMat16::nr_rows() const noexcept{
 inline std::vector<uint16_t> BMat16::rows() const {
     std::vector<uint16_t> rows;
     for (size_t i = 0; i < 16; ++i) {
-        uint16_t row_rev = (_data[i/4] << (16 * (3 - i%4)) >> 48);
+        uint16_t row_rev = (_data[i / 4] << (16 * (3 - i % 4)) >> 48);
 
         // The row needs to be reversed
         uint16_t row = 0;
@@ -232,45 +262,80 @@ inline BMat16 BMat16::random() {
 }
 
 static const constexpr std::array<xpu64, 16> ROW_MASK16 = {
-    xpu16{0xffff, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, 
-    xpu16{0, 0xffff, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, 
-    xpu16{0, 0, 0xffff, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, 
-    xpu16{0, 0, 0, 0xffff, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, 
-    xpu16{0, 0, 0, 0, 0xffff, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, 
-    xpu16{0, 0, 0, 0, 0, 0xffff, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, 
-    xpu16{0, 0, 0, 0, 0, 0, 0xffff, 0, 0, 0, 0, 0, 0, 0, 0, 0}, 
-    xpu16{0, 0, 0, 0, 0, 0, 0, 0xffff, 0, 0, 0, 0, 0, 0, 0, 0}, 
-    xpu16{0, 0, 0, 0, 0, 0, 0, 0, 0xffff, 0, 0, 0, 0, 0, 0, 0}, 
-    xpu16{0, 0, 0, 0, 0, 0, 0, 0, 0, 0xffff, 0, 0, 0, 0, 0, 0}, 
-    xpu16{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0xffff, 0, 0, 0, 0, 0}, 
-    xpu16{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0xffff, 0, 0, 0, 0}, 
-    xpu16{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0xffff, 0, 0, 0}, 
-    xpu16{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0xffff, 0, 0}, 
-    xpu16{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0xffff, 0},
-    xpu16{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0xffff}
-};
+    static_cast<xpu64>(
+        xpu16{0xffff, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}),
+    static_cast<xpu64>(
+        xpu16{0, 0xffff, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}),
+    static_cast<xpu64>(
+        xpu16{0, 0, 0xffff, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}),
+    static_cast<xpu64>(
+        xpu16{0, 0, 0, 0xffff, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}),
+    static_cast<xpu64>(
+        xpu16{0, 0, 0, 0, 0xffff, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}),
+    static_cast<xpu64>(
+        xpu16{0, 0, 0, 0, 0, 0xffff, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}),
+    static_cast<xpu64>(
+        xpu16{0, 0, 0, 0, 0, 0, 0xffff, 0, 0, 0, 0, 0, 0, 0, 0, 0}),
+    static_cast<xpu64>(
+        xpu16{0, 0, 0, 0, 0, 0, 0, 0xffff, 0, 0, 0, 0, 0, 0, 0, 0}),
+    static_cast<xpu64>(
+        xpu16{0, 0, 0, 0, 0, 0, 0, 0, 0xffff, 0, 0, 0, 0, 0, 0, 0}),
+    static_cast<xpu64>(
+        xpu16{0, 0, 0, 0, 0, 0, 0, 0, 0, 0xffff, 0, 0, 0, 0, 0, 0}),
+    static_cast<xpu64>(
+        xpu16{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0xffff, 0, 0, 0, 0, 0}),
+    static_cast<xpu64>(
+        xpu16{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0xffff, 0, 0, 0, 0}),
+    static_cast<xpu64>(
+        xpu16{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0xffff, 0, 0, 0}),
+    static_cast<xpu64>(
+        xpu16{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0xffff, 0, 0}),
+    static_cast<xpu64>(
+        xpu16{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0xffff, 0}),
+    static_cast<xpu64>(
+        xpu16{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0xffff})};
 
 static const constexpr std::array<xpu64, 16> COL_MASK16 = {
-    xpu16{1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}, 
-    xpu16{2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2}, 
-    xpu16{4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4}, 
-    xpu16{8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8}, 
-    xpu16{0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10}, 
-    xpu16{0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20}, 
-    xpu16{0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40}, 
-    xpu16{0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80}, 
-    xpu16{0x100, 0x100, 0x100, 0x100, 0x100, 0x100, 0x100, 0x100, 0x100, 0x100, 0x100, 0x100, 0x100, 0x100, 0x100, 0x100}, 
-    xpu16{0x200, 0x200, 0x200, 0x200, 0x200, 0x200, 0x200, 0x200, 0x200, 0x200, 0x200, 0x200, 0x200, 0x200, 0x200, 0x200}, 
-    xpu16{0x400, 0x400, 0x400, 0x400, 0x400, 0x400, 0x400, 0x400, 0x400, 0x400, 0x400, 0x400, 0x400, 0x400, 0x400, 0x400}, 
-    xpu16{0x800, 0x800, 0x800, 0x800, 0x800, 0x800, 0x800, 0x800, 0x800, 0x800, 0x800, 0x800, 0x800, 0x800, 0x800, 0x800}, 
-    xpu16{0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000}, 
-    xpu16{0x2000, 0x2000, 0x2000, 0x2000, 0x2000, 0x2000, 0x2000, 0x2000, 0x2000, 0x2000, 0x2000, 0x2000, 0x2000, 0x2000, 0x2000, 0x2000}, 
-    xpu16{0x4000, 0x4000, 0x4000, 0x4000, 0x4000, 0x4000, 0x4000, 0x4000, 0x4000, 0x4000, 0x4000, 0x4000, 0x4000, 0x4000, 0x4000, 0x4000}, 
-    xpu16{0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000}
-};
+    static_cast<xpu64>(xpu16{1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}),
+    static_cast<xpu64>(xpu16{2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2}),
+    static_cast<xpu64>(xpu16{4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4}),
+    static_cast<xpu64>(xpu16{8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8}),
+    static_cast<xpu64>(xpu16{0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10,
+                             0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10}),
+    static_cast<xpu64>(xpu16{0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
+                             0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20}),
+    static_cast<xpu64>(xpu16{0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40,
+                             0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40}),
+    static_cast<xpu64>(xpu16{0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
+                             0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80}),
+    static_cast<xpu64>(xpu16{0x100, 0x100, 0x100, 0x100, 0x100, 0x100, 0x100,
+                             0x100, 0x100, 0x100, 0x100, 0x100, 0x100, 0x100,
+                             0x100, 0x100}),
+    static_cast<xpu64>(xpu16{0x200, 0x200, 0x200, 0x200, 0x200, 0x200, 0x200,
+                             0x200, 0x200, 0x200, 0x200, 0x200, 0x200, 0x200,
+                             0x200, 0x200}),
+    static_cast<xpu64>(xpu16{0x400, 0x400, 0x400, 0x400, 0x400, 0x400, 0x400,
+                             0x400, 0x400, 0x400, 0x400, 0x400, 0x400, 0x400,
+                             0x400, 0x400}),
+    static_cast<xpu64>(xpu16{0x800, 0x800, 0x800, 0x800, 0x800, 0x800, 0x800,
+                             0x800, 0x800, 0x800, 0x800, 0x800, 0x800, 0x800,
+                             0x800, 0x800}),
+    static_cast<xpu64>(xpu16{0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000,
+                             0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000,
+                             0x1000, 0x1000, 0x1000, 0x1000}),
+    static_cast<xpu64>(xpu16{0x2000, 0x2000, 0x2000, 0x2000, 0x2000, 0x2000,
+                             0x2000, 0x2000, 0x2000, 0x2000, 0x2000, 0x2000,
+                             0x2000, 0x2000, 0x2000, 0x2000}),
+    static_cast<xpu64>(xpu16{0x4000, 0x4000, 0x4000, 0x4000, 0x4000, 0x4000,
+                             0x4000, 0x4000, 0x4000, 0x4000, 0x4000, 0x4000,
+                             0x4000, 0x4000, 0x4000, 0x4000}),
+    static_cast<xpu64>(xpu16{0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000,
+                             0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000,
+                             0x8000, 0x8000, 0x8000, 0x8000})};
 
 inline BMat16 BMat16::random(size_t const dim) {
-    // TO DO : Instead of nulling all the cols/rows one by one, one could do that at once with the proper mask
+    // TO DO : Instead of nulling all the cols/rows one by one, one could do
+    // that at once with the proper mask
     HPCOMBI_ASSERT(0 < dim && dim <= 16);
     BMat16 bm = BMat16::random();
     for (size_t i = dim; i < 16; ++i) {
@@ -290,8 +355,7 @@ inline std::ostream &BMat16::write(std::ostream &os) const {
     return os;
 }
 
-
-} // namespace HPCombi
+}  // namespace HPCombi
 
 namespace std {