Skip to content

Commit 7b37f71

Browse files
Try fix compile with gcc-9
1 parent 0d0f8b9 commit 7b37f71

File tree

1 file changed

+155
-91
lines changed

1 file changed

+155
-91
lines changed

include/hpcombi/bmat16_impl.hpp

Lines changed: 155 additions & 91 deletions
Original file line numberDiff line numberDiff line change
@@ -25,8 +25,12 @@
2525
namespace HPCombi {
2626
static_assert(std::is_trivial<BMat16>(), "BMat16 is not a trivial class!");
2727

28-
static constexpr xpu16 line{0x800, 0x901, 0xa02, 0xb03, 0xc04, 0xd05, 0xe06, 0xf07, 0x800, 0x901, 0xa02, 0xb03, 0xc04, 0xd05, 0xe06, 0xf07};
29-
static constexpr xpu16 block{0x200, 0x604, 0xa08, 0xe0c, 0x301, 0x705, 0xb09, 0xf0d, 0x200, 0x604, 0xa08, 0xe0c, 0x301, 0x705, 0xb09, 0xf0d};
28+
static constexpr xpu16 line{0x800, 0x901, 0xa02, 0xb03, 0xc04, 0xd05,
29+
0xe06, 0xf07, 0x800, 0x901, 0xa02, 0xb03,
30+
0xc04, 0xd05, 0xe06, 0xf07};
31+
static constexpr xpu16 block{0x200, 0x604, 0xa08, 0xe0c, 0x301, 0x705,
32+
0xb09, 0xf0d, 0x200, 0x604, 0xa08, 0xe0c,
33+
0x301, 0x705, 0xb09, 0xf0d};
3034

3135
inline xpu64 to_line(xpu64 vect) {
3236
return simde_mm256_shuffle_epi8(vect, line);
@@ -36,7 +40,8 @@ inline xpu64 to_block(xpu64 vect) {
3640
return simde_mm256_shuffle_epi8(vect, block);
3741
}
3842

39-
inline BMat16::BMat16(uint64_t n0, uint64_t n1, uint64_t n2, uint64_t n3) noexcept {
43+
inline BMat16::BMat16(uint64_t n0, uint64_t n1, uint64_t n2,
44+
uint64_t n3) noexcept {
4045
xpu64 tmp{n0, n1, n2, n3};
4146
_data = to_line(tmp);
4247
}
@@ -47,27 +52,25 @@ inline BMat16::BMat16(std::vector<std::vector<bool>> const &mat) noexcept {
4752
std::array<uint64_t, 4> tmp = {0, 0, 0, 0};
4853
for (int i = mat.size() - 1; i >= 0; --i) {
4954
HPCOMBI_ASSERT(mat.size() == mat[i].size());
50-
tmp[i/4] <<= 16 - mat.size();
55+
tmp[i / 4] <<= 16 - mat.size();
5156
for (int j = mat[i].size() - 1; j >= 0; --j) {
52-
tmp[i/4] = (tmp[i/4] << 1) | mat[i][j];
57+
tmp[i / 4] = (tmp[i / 4] << 1) | mat[i][j];
5358
}
5459
}
5560
_data = xpu64{tmp[0], tmp[1], tmp[2], tmp[3]};
5661
}
5762

5863
inline bool BMat16::operator()(size_t i, size_t j) const noexcept {
59-
return (_data[i/4] >> (16 * (i%4) + j)) & 1;
64+
return (_data[i / 4] >> (16 * (i % 4) + j)) & 1;
6065
}
6166

6267
inline void BMat16::set(size_t i, size_t j, bool val) noexcept {
6368
HPCOMBI_ASSERT(i < 16);
6469
HPCOMBI_ASSERT(j < 16);
6570
uint64_t a = 1;
66-
a <<= 16 * (i%4) + j;
67-
xpu64 mask{(i/4 == 0)*a,
68-
(i/4 == 1)*a,
69-
(i/4 == 2)*a,
70-
(i/4 == 3)*a};
71+
a <<= 16 * (i % 4) + j;
72+
xpu64 mask{(i / 4 == 0) * a, (i / 4 == 1) * a, (i / 4 == 2) * a,
73+
(i / 4 == 3) * a};
7174
_data ^= (-val ^ _data) & mask;
7275
}
7376

@@ -77,28 +80,36 @@ inline bool BMat16::operator==(BMat16 const &that) const noexcept {
7780
}
7881

7982
inline bool BMat16::operator<(BMat16 const &that) const noexcept {
80-
return _data[0] < that._data[0] ||
81-
(_data[0] == that._data[0] && (_data[1] < that._data[1] ||
82-
(_data[1] == that._data[1] && (_data[2] < that._data[2] ||
83-
(_data[2] == that._data[2] && (_data[3] < that._data[3]))))));
83+
return _data[0] < that._data[0] ||
84+
(_data[0] == that._data[0] &&
85+
(_data[1] < that._data[1] ||
86+
(_data[1] == that._data[1] &&
87+
(_data[2] < that._data[2] ||
88+
(_data[2] == that._data[2] && (_data[3] < that._data[3]))))));
8489
}
8590

8691
inline bool BMat16::operator>(BMat16 const &that) const noexcept {
87-
return _data[0] > that._data[0] ||
88-
(_data[0] == that._data[0] && (_data[1] > that._data[1] ||
89-
(_data[1] == that._data[1] && (_data[2] > that._data[2] ||
90-
(_data[2] == that._data[2] && (_data[3] > that._data[3]))))));
92+
return _data[0] > that._data[0] ||
93+
(_data[0] == that._data[0] &&
94+
(_data[1] > that._data[1] ||
95+
(_data[1] == that._data[1] &&
96+
(_data[2] > that._data[2] ||
97+
(_data[2] == that._data[2] && (_data[3] > that._data[3]))))));
9198
}
9299

93100
inline std::array<std::array<bool, 16>, 16> BMat16::to_array() const noexcept {
94101
xpu64 tmp = to_block(_data);
95102
uint64_t a = tmp[0], b = tmp[1], c = tmp[2], d = tmp[3];
96103
std::array<std::array<bool, 16>, 16> res;
97104
for (size_t i = 0; i < 64; ++i) {
98-
res[i/8][i%8] = a & 1; a >>= 1;
99-
res[i/8][8 + i%8] = b & 1; b >>= 1;
100-
res[8 + i/8][i%8] = c & 1; c >>= 1;
101-
res[8 + i/8][8 + i%8] = d & 1; d >>= 1;
105+
res[i / 8][i % 8] = a & 1;
106+
a >>= 1;
107+
res[i / 8][8 + i % 8] = b & 1;
108+
b >>= 1;
109+
res[8 + i / 8][i % 8] = c & 1;
110+
c >>= 1;
111+
res[8 + i / 8][8 + i % 8] = d & 1;
112+
d >>= 1;
102113
}
103114
return res;
104115
}
@@ -107,10 +118,10 @@ inline BMat16 BMat16::transpose_naive() const noexcept {
107118
uint64_t a = 0, b = 0, c = 0, d = 0;
108119
for (int i = 7; i >= 0; --i) {
109120
for (int j = 7; j >= 0; --j) {
110-
a = (a << 1) | (*this)(j, i);
111-
b = (b << 1) | (*this)(j+8, i);
112-
c = (c << 1) | (*this)(j, i+8);
113-
d = (d << 1) | (*this)(j+8, i+8);
121+
a = (a << 1) | (*this)(j, i);
122+
b = (b << 1) | (*this)(j + 8, i);
123+
c = (c << 1) | (*this)(j, i + 8);
124+
d = (d << 1) | (*this)(j + 8, i + 8);
114125
}
115126
}
116127
return BMat16(a, b, c, d);
@@ -119,25 +130,33 @@ inline BMat16 BMat16::transpose_naive() const noexcept {
119130
inline BMat16 BMat16::transpose() const noexcept {
120131
xpu64 tmp = to_block(_data);
121132
xpu64 x = simde_mm256_set_epi64x(tmp[3], tmp[1], tmp[2], tmp[0]);
122-
xpu64 y = (x ^ (x >> 7)) & (xpu64{0xAA00AA00AA00AA, 0xAA00AA00AA00AA, 0xAA00AA00AA00AA, 0xAA00AA00AA00AA});
133+
xpu64 y = (x ^ (x >> 7)) & (xpu64{0xAA00AA00AA00AA, 0xAA00AA00AA00AA,
134+
0xAA00AA00AA00AA, 0xAA00AA00AA00AA});
123135
x = x ^ y ^ (y << 7);
124-
y = (x ^ (x >> 14)) & (xpu64{0xCCCC0000CCCC, 0xCCCC0000CCCC, 0xCCCC0000CCCC, 0xCCCC0000CCCC});
136+
y = (x ^ (x >> 14)) &
137+
(xpu64{0xCCCC0000CCCC, 0xCCCC0000CCCC, 0xCCCC0000CCCC, 0xCCCC0000CCCC});
125138
x = x ^ y ^ (y << 14);
126-
y = (x ^ (x >> 28)) & (xpu64{0xF0F0F0F0, 0xF0F0F0F0, 0xF0F0F0F0, 0xF0F0F0F0});
139+
y = (x ^ (x >> 28)) &
140+
(xpu64{0xF0F0F0F0, 0xF0F0F0F0, 0xF0F0F0F0, 0xF0F0F0F0});
127141
x = x ^ y ^ (y << 28);
128142
return BMat16(to_line(x));
129143
}
130144

131-
static constexpr xpu16 rot{0x302, 0x504, 0x706, 0x908, 0xb0a, 0xd0c, 0xf0e, 0x100, 0x302, 0x504, 0x706, 0x908, 0xb0a, 0xd0c, 0xf0e, 0x100};
145+
static constexpr xpu16 rot{0x302, 0x504, 0x706, 0x908, 0xb0a, 0xd0c,
146+
0xf0e, 0x100, 0x302, 0x504, 0x706, 0x908,
147+
0xb0a, 0xd0c, 0xf0e, 0x100};
132148

133149
inline BMat16 BMat16::mult_transpose(BMat16 const &that) const noexcept {
134150
xpu16 x = _data;
135151
xpu16 y1 = that._data;
136-
xpu16 y2 = simde_mm256_set_epi64x(that._data[1], that._data[0], that._data[3], that._data[2]);
152+
xpu16 y2 = simde_mm256_set_epi64x(that._data[1], that._data[0],
153+
that._data[3], that._data[2]);
137154
xpu16 zero = simde_mm256_setzero_si256();
138155
xpu16 data = simde_mm256_setzero_si256();
139-
xpu16 diag1{0x1, 0x2, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80, 0x100, 0x200, 0x400, 0x800, 0x1000, 0x2000, 0x4000, 0x8000};
140-
xpu16 diag2{0x100, 0x200, 0x400, 0x800, 0x1000, 0x2000, 0x4000, 0x8000, 0x1, 0x2, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80};
156+
xpu16 diag1{0x1, 0x2, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80,
157+
0x100, 0x200, 0x400, 0x800, 0x1000, 0x2000, 0x4000, 0x8000};
158+
xpu16 diag2{0x100, 0x200, 0x400, 0x800, 0x1000, 0x2000, 0x4000, 0x8000,
159+
0x1, 0x2, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80};
141160
for (size_t i = 0; i < 8; ++i) {
142161
data |= ((x & y1) != zero) & diag1;
143162
data |= ((x & y2) != zero) & diag2;
@@ -151,41 +170,51 @@ inline BMat16 BMat16::mult_transpose(BMat16 const &that) const noexcept {
151170

152171
inline BMat16 BMat16::mult_4bmat8(BMat16 const &that) const noexcept {
153172
BMat16 tmp = that.transpose();
154-
xpu64 t1 = to_block(_data),
155-
t2 = to_block(tmp._data);
156-
BMat8 a1(t1[0]), b1(t1[1]), c1(t1[2]), d1(t1[3]),
157-
a2(t2[0]), b2(t2[1]), c2(t2[2]), d2(t2[3]);
158-
return BMat16((a1.mult_transpose(a2) | b1.mult_transpose(b2)).to_int(),
159-
(a1.mult_transpose(c2) | b1.mult_transpose(d2)).to_int(),
160-
(c1.mult_transpose(a2) | d1.mult_transpose(b2)).to_int(),
173+
xpu64 t1 = to_block(_data), t2 = to_block(tmp._data);
174+
BMat8 a1(t1[0]), b1(t1[1]), c1(t1[2]), d1(t1[3]), a2(t2[0]), b2(t2[1]),
175+
c2(t2[2]), d2(t2[3]);
176+
return BMat16((a1.mult_transpose(a2) | b1.mult_transpose(b2)).to_int(),
177+
(a1.mult_transpose(c2) | b1.mult_transpose(d2)).to_int(),
178+
(c1.mult_transpose(a2) | d1.mult_transpose(b2)).to_int(),
161179
(c1.mult_transpose(c2) | d1.mult_transpose(d2)).to_int());
162180
}
163181

164182
inline BMat16 BMat16::mult_naive(BMat16 const &that) const noexcept {
165183
uint64_t a = 0, b = 0, c = 0, d = 0;
166184
for (int i = 7; i >= 0; --i) {
167185
for (int j = 7; j >= 0; --j) {
168-
a <<= 1; b <<= 1; c <<= 1; d <<= 1;
186+
a <<= 1;
187+
b <<= 1;
188+
c <<= 1;
189+
d <<= 1;
169190
for (size_t k = 0; k < 8; ++k) {
170-
a |= ((*this)(i, k) & that(k, j)) | ((*this)(i, k + 8) & that(k + 8, j));
171-
b |= ((*this)(i, k) & that(k, j + 8)) | ((*this)(i, k + 8) & that(k + 8, j + 8));
172-
c |= ((*this)(i + 8, k) & that(k, j)) | ((*this)(i + 8, k + 8) & that(k + 8, j));
173-
d |= ((*this)(i + 8, k) & that(k, j + 8)) | ((*this)(i + 8, k + 8) & that(k + 8, j + 8));
191+
a |= ((*this)(i, k) & that(k, j)) |
192+
((*this)(i, k + 8) & that(k + 8, j));
193+
b |= ((*this)(i, k) & that(k, j + 8)) |
194+
((*this)(i, k + 8) & that(k + 8, j + 8));
195+
c |= ((*this)(i + 8, k) & that(k, j)) |
196+
((*this)(i + 8, k + 8) & that(k + 8, j));
197+
d |= ((*this)(i + 8, k) & that(k, j + 8)) |
198+
((*this)(i + 8, k + 8) & that(k + 8, j + 8));
174199
}
175200
}
176201
}
177202
return BMat16(a, b, c, d);
178203
}
179204

180205
inline BMat16 BMat16::mult_naive_array(BMat16 const &that) const noexcept {
181-
std::array<std::array<bool, 16>, 16> tab1 = to_array(), tab2 = that.to_array();
206+
std::array<std::array<bool, 16>, 16> tab1 = to_array(),
207+
tab2 = that.to_array();
182208
uint64_t a = 0, b = 0, c = 0, d = 0;
183209
for (int i = 7; i >= 0; --i) {
184210
for (int j = 7; j >= 0; --j) {
185-
a <<= 1; b <<= 1; c <<= 1; d <<= 1;
211+
a <<= 1;
212+
b <<= 1;
213+
c <<= 1;
214+
d <<= 1;
186215
for (size_t k = 0; k < 16; ++k) {
187-
a |= tab1[i][k] & tab2[k][j];
188-
b |= tab1[i][k] & tab2[k][j + 8];
216+
a |= tab1[i][k] & tab2[k][j];
217+
b |= tab1[i][k] & tab2[k][j + 8];
189218
c |= tab1[i + 8][k] & tab2[k][j];
190219
d |= tab1[i + 8][k] & tab2[k][j + 8];
191220
}
@@ -194,14 +223,15 @@ inline BMat16 BMat16::mult_naive_array(BMat16 const &that) const noexcept {
194223
return BMat16(a, b, c, d);
195224
}
196225

197-
inline size_t BMat16::nr_rows() const noexcept{
226+
inline size_t BMat16::nr_rows() const noexcept {
198227
size_t res = 0;
199228
for (size_t i = 0; i < 16; ++i)
200-
if ((_data[i/4] << (16 * (i%4)) >> 48) != 0)
229+
if ((_data[i / 4] << (16 * (i % 4)) >> 48) != 0)
201230
++res;
202231
return res;
203232

204-
//// Vectorized version which doesn't work due to the absence of popcnt in simde
233+
//// Vectorized version which doesn't work due to the absence of popcnt in
234+
/// simde
205235
// xpu16 tmp = _data, zero = simde_mm256_setzero_si256();
206236
// xpu16 x = (tmp != zero);
207237
// return simde_mm256_popcnt_epi16(x);
@@ -210,7 +240,7 @@ inline size_t BMat16::nr_rows() const noexcept{
210240
inline std::vector<uint16_t> BMat16::rows() const {
211241
std::vector<uint16_t> rows;
212242
for (size_t i = 0; i < 16; ++i) {
213-
uint16_t row_rev = (_data[i/4] << (16 * (3 - i%4)) >> 48);
243+
uint16_t row_rev = (_data[i / 4] << (16 * (3 - i % 4)) >> 48);
214244

215245
// The row needs to be reversed
216246
uint16_t row = 0;
@@ -232,45 +262,80 @@ inline BMat16 BMat16::random() {
232262
}
233263

234264
static const constexpr std::array<xpu64, 16> ROW_MASK16 = {
235-
xpu16{0xffff, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
236-
xpu16{0, 0xffff, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
237-
xpu16{0, 0, 0xffff, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
238-
xpu16{0, 0, 0, 0xffff, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
239-
xpu16{0, 0, 0, 0, 0xffff, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
240-
xpu16{0, 0, 0, 0, 0, 0xffff, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
241-
xpu16{0, 0, 0, 0, 0, 0, 0xffff, 0, 0, 0, 0, 0, 0, 0, 0, 0},
242-
xpu16{0, 0, 0, 0, 0, 0, 0, 0xffff, 0, 0, 0, 0, 0, 0, 0, 0},
243-
xpu16{0, 0, 0, 0, 0, 0, 0, 0, 0xffff, 0, 0, 0, 0, 0, 0, 0},
244-
xpu16{0, 0, 0, 0, 0, 0, 0, 0, 0, 0xffff, 0, 0, 0, 0, 0, 0},
245-
xpu16{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0xffff, 0, 0, 0, 0, 0},
246-
xpu16{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0xffff, 0, 0, 0, 0},
247-
xpu16{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0xffff, 0, 0, 0},
248-
xpu16{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0xffff, 0, 0},
249-
xpu16{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0xffff, 0},
250-
xpu16{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0xffff}
251-
};
265+
static_cast<xpu64>(
266+
xpu16{0xffff, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}),
267+
static_cast<xpu64>(
268+
xpu16{0, 0xffff, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}),
269+
static_cast<xpu64>(
270+
xpu16{0, 0, 0xffff, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}),
271+
static_cast<xpu64>(
272+
xpu16{0, 0, 0, 0xffff, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}),
273+
static_cast<xpu64>(
274+
xpu16{0, 0, 0, 0, 0xffff, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}),
275+
static_cast<xpu64>(
276+
xpu16{0, 0, 0, 0, 0, 0xffff, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}),
277+
static_cast<xpu64>(
278+
xpu16{0, 0, 0, 0, 0, 0, 0xffff, 0, 0, 0, 0, 0, 0, 0, 0, 0}),
279+
static_cast<xpu64>(
280+
xpu16{0, 0, 0, 0, 0, 0, 0, 0xffff, 0, 0, 0, 0, 0, 0, 0, 0}),
281+
static_cast<xpu64>(
282+
xpu16{0, 0, 0, 0, 0, 0, 0, 0, 0xffff, 0, 0, 0, 0, 0, 0, 0}),
283+
static_cast<xpu64>(
284+
xpu16{0, 0, 0, 0, 0, 0, 0, 0, 0, 0xffff, 0, 0, 0, 0, 0, 0}),
285+
static_cast<xpu64>(
286+
xpu16{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0xffff, 0, 0, 0, 0, 0}),
287+
static_cast<xpu64>(
288+
xpu16{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0xffff, 0, 0, 0, 0}),
289+
static_cast<xpu64>(
290+
xpu16{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0xffff, 0, 0, 0}),
291+
static_cast<xpu64>(
292+
xpu16{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0xffff, 0, 0}),
293+
static_cast<xpu64>(
294+
xpu16{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0xffff, 0}),
295+
static_cast<xpu64>(
296+
xpu16{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0xffff})};
252297

253298
static const constexpr std::array<xpu64, 16> COL_MASK16 = {
254-
xpu16{1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1},
255-
xpu16{2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2},
256-
xpu16{4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4},
257-
xpu16{8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8},
258-
xpu16{0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10},
259-
xpu16{0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20},
260-
xpu16{0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40},
261-
xpu16{0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80},
262-
xpu16{0x100, 0x100, 0x100, 0x100, 0x100, 0x100, 0x100, 0x100, 0x100, 0x100, 0x100, 0x100, 0x100, 0x100, 0x100, 0x100},
263-
xpu16{0x200, 0x200, 0x200, 0x200, 0x200, 0x200, 0x200, 0x200, 0x200, 0x200, 0x200, 0x200, 0x200, 0x200, 0x200, 0x200},
264-
xpu16{0x400, 0x400, 0x400, 0x400, 0x400, 0x400, 0x400, 0x400, 0x400, 0x400, 0x400, 0x400, 0x400, 0x400, 0x400, 0x400},
265-
xpu16{0x800, 0x800, 0x800, 0x800, 0x800, 0x800, 0x800, 0x800, 0x800, 0x800, 0x800, 0x800, 0x800, 0x800, 0x800, 0x800},
266-
xpu16{0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000},
267-
xpu16{0x2000, 0x2000, 0x2000, 0x2000, 0x2000, 0x2000, 0x2000, 0x2000, 0x2000, 0x2000, 0x2000, 0x2000, 0x2000, 0x2000, 0x2000, 0x2000},
268-
xpu16{0x4000, 0x4000, 0x4000, 0x4000, 0x4000, 0x4000, 0x4000, 0x4000, 0x4000, 0x4000, 0x4000, 0x4000, 0x4000, 0x4000, 0x4000, 0x4000},
269-
xpu16{0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000}
270-
};
299+
static_cast<xpu64>(xpu16{1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}),
300+
static_cast<xpu64>(xpu16{2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2}),
301+
static_cast<xpu64>(xpu16{4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4}),
302+
static_cast<xpu64>(xpu16{8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8}),
303+
static_cast<xpu64>(xpu16{0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10,
304+
0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10}),
305+
static_cast<xpu64>(xpu16{0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
306+
0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20}),
307+
static_cast<xpu64>(xpu16{0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40,
308+
0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40}),
309+
static_cast<xpu64>(xpu16{0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
310+
0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80}),
311+
static_cast<xpu64>(xpu16{0x100, 0x100, 0x100, 0x100, 0x100, 0x100, 0x100,
312+
0x100, 0x100, 0x100, 0x100, 0x100, 0x100, 0x100,
313+
0x100, 0x100}),
314+
static_cast<xpu64>(xpu16{0x200, 0x200, 0x200, 0x200, 0x200, 0x200, 0x200,
315+
0x200, 0x200, 0x200, 0x200, 0x200, 0x200, 0x200,
316+
0x200, 0x200}),
317+
static_cast<xpu64>(xpu16{0x400, 0x400, 0x400, 0x400, 0x400, 0x400, 0x400,
318+
0x400, 0x400, 0x400, 0x400, 0x400, 0x400, 0x400,
319+
0x400, 0x400}),
320+
static_cast<xpu64>(xpu16{0x800, 0x800, 0x800, 0x800, 0x800, 0x800, 0x800,
321+
0x800, 0x800, 0x800, 0x800, 0x800, 0x800, 0x800,
322+
0x800, 0x800}),
323+
static_cast<xpu64>(xpu16{0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000,
324+
0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000,
325+
0x1000, 0x1000, 0x1000, 0x1000}),
326+
static_cast<xpu64>(xpu16{0x2000, 0x2000, 0x2000, 0x2000, 0x2000, 0x2000,
327+
0x2000, 0x2000, 0x2000, 0x2000, 0x2000, 0x2000,
328+
0x2000, 0x2000, 0x2000, 0x2000}),
329+
static_cast<xpu64>(xpu16{0x4000, 0x4000, 0x4000, 0x4000, 0x4000, 0x4000,
330+
0x4000, 0x4000, 0x4000, 0x4000, 0x4000, 0x4000,
331+
0x4000, 0x4000, 0x4000, 0x4000}),
332+
static_cast<xpu64>(xpu16{0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000,
333+
0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000,
334+
0x8000, 0x8000, 0x8000, 0x8000})};
271335

272336
inline BMat16 BMat16::random(size_t const dim) {
273-
// TO DO : Instead of nulling all the cols/rows one by one, one could do that at once with the proper mask
337+
// TO DO : Instead of nulling all the cols/rows one by one, one could do
338+
// that at once with the proper mask
274339
HPCOMBI_ASSERT(0 < dim && dim <= 16);
275340
BMat16 bm = BMat16::random();
276341
for (size_t i = dim; i < 16; ++i) {
@@ -290,8 +355,7 @@ inline std::ostream &BMat16::write(std::ostream &os) const {
290355
return os;
291356
}
292357

293-
294-
} // namespace HPCombi
358+
} // namespace HPCombi
295359

296360
namespace std {
297361

0 commit comments

Comments
 (0)