@@ -925,7 +925,8 @@ bvt bv_utilst::dadda_tree(const std::vector<bvt> &pps)
925
925
// with Dadda's reduction yields the most consistent performance improvement
926
926
// while not regressing substantially in the matrix of different benchmarks and
927
927
// CaDiCaL and MiniSat2 as solvers.
928
- #define RADIX_MULTIPLIER 8
928
+ // #define RADIX_MULTIPLIER 8
929
+ #define USE_KARATSUBA
929
930
#ifdef RADIX_MULTIPLIER
930
931
# define DADDA_TREE
931
932
#endif
@@ -1821,6 +1822,53 @@ bvt bv_utilst::unsigned_multiplier(const bvt &_op0, const bvt &_op1)
1821
1822
}
1822
1823
}
1823
1824
1825
+ bvt bv_utilst::unsigned_karatsuba_multiplier (const bvt &_op0, const bvt &_op1)
1826
+ {
1827
+ if (_op0.size () != _op1.size ())
1828
+ return unsigned_multiplier (_op0, _op1);
1829
+
1830
+ const std::size_t op_size = _op0.size ();
1831
+ // only use this approach for powers of two
1832
+ if (op_size == 0 || (op_size & (op_size - 1 )) != 0 )
1833
+ return unsigned_multiplier (_op0, _op1);
1834
+
1835
+ const std::size_t half_op_size = op_size >> 1 ;
1836
+
1837
+ // The need to use a full multiplier for z_0 means that we will not actually
1838
+ // accomplish a reduction in bit width.
1839
+ bvt x0{_op0.begin (), _op0.begin () + half_op_size};
1840
+ x0.resize (op_size, const_literal (false ));
1841
+ bvt x1{_op0.begin () + half_op_size, _op0.end ()};
1842
+ // x1.resize(op_size, const_literal(false));
1843
+ bvt y0{_op1.begin (), _op1.begin () + half_op_size};
1844
+ y0.resize (op_size, const_literal (false ));
1845
+ bvt y1{_op1.begin () + half_op_size, _op1.end ()};
1846
+ // y1.resize(op_size, const_literal(false));
1847
+
1848
+ bvt z0 = unsigned_multiplier (x0, y0);
1849
+ bvt z2 = unsigned_karatsuba_multiplier (x1, y1);
1850
+
1851
+ bvt z0_half{z0.begin (), z0.begin () + half_op_size};
1852
+ bvt z2_plus_z0 = add (z2, z0_half);
1853
+ z2_plus_z0.resize (half_op_size);
1854
+
1855
+ bvt x0_half{x0.begin (), x0.begin () + half_op_size};
1856
+ bvt xdiff = add (x0_half, x1);
1857
+ // xdiff.resize(half_op_size);
1858
+ bvt y0_half{y0.begin (), y0.begin () + half_op_size};
1859
+ bvt ydiff = add (y1, y0_half);
1860
+ // ydiff.resize(half_op_size);
1861
+
1862
+ bvt z1 = sub (unsigned_karatsuba_multiplier (xdiff, ydiff), z2_plus_z0);
1863
+ for (std::size_t i = 0 ; i < half_op_size; ++i)
1864
+ z1.insert (z1.begin (), const_literal (false ));
1865
+ // result.insert(result.end(), z1.begin(), z1.end());
1866
+
1867
+ // z1.resize(op_size);
1868
+ z0.resize (op_size);
1869
+ return add (z0, z1);
1870
+ }
1871
+
1824
1872
bvt bv_utilst::unsigned_multiplier_no_overflow (
1825
1873
const bvt &op0,
1826
1874
const bvt &op1)
@@ -1871,7 +1919,11 @@ bvt bv_utilst::signed_multiplier(const bvt &op0, const bvt &op1)
1871
1919
bvt neg0=cond_negate (op0, sign0);
1872
1920
bvt neg1=cond_negate (op1, sign1);
1873
1921
1922
+ #ifdef USE_KARATSUBA
1923
+ bvt result = unsigned_karatsuba_multiplier (neg0, neg1);
1924
+ #else
1874
1925
bvt result=unsigned_multiplier (neg0, neg1);
1926
+ #endif
1875
1927
1876
1928
literalt result_sign=prop.lxor (sign0, sign1);
1877
1929
@@ -1939,7 +1991,12 @@ bvt bv_utilst::multiplier(
1939
1991
switch (rep)
1940
1992
{
1941
1993
case representationt::SIGNED: return signed_multiplier (op0, op1);
1994
+ #ifdef USE_KARATSUBA
1995
+ case representationt::UNSIGNED:
1996
+ return unsigned_karatsuba_multiplier (op0, op1);
1997
+ #else
1942
1998
case representationt::UNSIGNED: return unsigned_multiplier (op0, op1);
1999
+ #endif
1943
2000
}
1944
2001
1945
2002
UNREACHABLE;
0 commit comments