|
8 | 8 |
|
9 | 9 | #include "bv_utils.h"
|
10 | 10 |
|
| 11 | +#include <util/arith_tools.h> |
| 12 | + |
11 | 13 | #include <list>
|
12 | 14 | #include <utility>
|
13 | 15 |
|
@@ -783,7 +785,7 @@ bvt bv_utilst::dadda_tree(const std::vector<bvt> &pps)
|
783 | 785 | // trees (and also the default addition scheme), but isn't consistently more
|
784 | 786 | // performant with simple partial-product generation. Only when using
|
785 | 787 | // higher-radix multipliers the combination appears to perform better.
|
786 |
| -#define DADDA_TREE |
| 788 | +// #define DADDA_TREE |
787 | 789 |
|
788 | 790 | // The following examples demonstrate the performance differences (with a
|
789 | 791 | // time-out of 7200 seconds):
|
@@ -926,7 +928,8 @@ bvt bv_utilst::dadda_tree(const std::vector<bvt> &pps)
|
926 | 928 | // while not regressing substantially in the matrix of different benchmarks and
|
927 | 929 | // CaDiCaL and MiniSat2 as solvers.
|
928 | 930 | // #define RADIX_MULTIPLIER 8
|
929 |
| -#define USE_KARATSUBA |
| 931 | +// #define USE_KARATSUBA |
| 932 | +#define USE_TOOM_COOK |
930 | 933 | #ifdef RADIX_MULTIPLIER
|
931 | 934 | # define DADDA_TREE
|
932 | 935 | #endif
|
@@ -1869,6 +1872,155 @@ bvt bv_utilst::unsigned_karatsuba_multiplier(const bvt &_op0, const bvt &_op1)
|
1869 | 1872 | return add(z0, z1);
|
1870 | 1873 | }
|
1871 | 1874 |
|
| 1875 | +bvt bv_utilst::unsigned_toom_cook_multiplier(const bvt &_op0, const bvt &_op1) |
| 1876 | +{ |
| 1877 | + PRECONDITION(!_op0.empty()); |
| 1878 | + PRECONDITION(!_op1.empty()); |
| 1879 | + |
| 1880 | + if(_op1.size() == 1) |
| 1881 | + return unsigned_multiplier(_op0, _op1); |
| 1882 | + |
| 1883 | + // break up _op0, _op1 in groups of at most GROUP_SIZE bits |
| 1884 | + PRECONDITION(_op0.size() == _op1.size()); |
| 1885 | +#define GROUP_SIZE 8 |
| 1886 | + const std::size_t d_bits = |
| 1887 | + 2 * GROUP_SIZE + |
| 1888 | + 2 * address_bits((_op0.size() + GROUP_SIZE - 1) / GROUP_SIZE); |
| 1889 | + std::vector<bvt> a, b, c_ops, d; |
| 1890 | + for(std::size_t i = 0; i < _op0.size(); i += GROUP_SIZE) |
| 1891 | + { |
| 1892 | + std::size_t u = std::min(i + GROUP_SIZE, _op0.size()); |
| 1893 | + a.emplace_back(_op0.begin() + i, _op0.begin() + u); |
| 1894 | + b.emplace_back(_op1.begin() + i, _op1.begin() + u); |
| 1895 | + |
| 1896 | + c_ops.push_back(zeros(i)); |
| 1897 | + d.push_back(prop.new_variables(d_bits)); |
| 1898 | + c_ops.back().insert(c_ops.back().end(), d.back().begin(), d.back().end()); |
| 1899 | + c_ops.back() = zero_extension(c_ops.back(), _op0.size()); |
| 1900 | + } |
| 1901 | + for(std::size_t i = a.size(); i < 2 * a.size() - 1; ++i) |
| 1902 | + { |
| 1903 | + d.push_back(prop.new_variables(d_bits)); |
| 1904 | + } |
| 1905 | + |
| 1906 | + // r(0) |
| 1907 | + bvt r_0 = d[0]; |
| 1908 | + prop.l_set_to_true(equal( |
| 1909 | + r_0, |
| 1910 | + unsigned_multiplier( |
| 1911 | + zero_extension(a[0], r_0.size()), zero_extension(b[0], r_0.size())))); |
| 1912 | + |
| 1913 | + for(std::size_t j = 1; j < a.size(); ++j) |
| 1914 | + { |
| 1915 | + // r(2^(j-1)) |
| 1916 | + bvt r_j = zero_extension( |
| 1917 | + d[0], std::min(_op0.size(), d[0].size() + (j - 1) * (d.size() - 1))); |
| 1918 | + for(std::size_t i = 1; i < d.size(); ++i) |
| 1919 | + { |
| 1920 | + r_j = add( |
| 1921 | + r_j, |
| 1922 | + shift( |
| 1923 | + zero_extension(d[i], r_j.size()), shiftt::SHIFT_LEFT, (j - 1) * i)); |
| 1924 | + } |
| 1925 | + |
| 1926 | + bvt a_even = zero_extension(a[0], r_j.size()); |
| 1927 | + for(std::size_t i = 2; i < a.size(); i += 2) |
| 1928 | + { |
| 1929 | + a_even = add( |
| 1930 | + a_even, |
| 1931 | + shift( |
| 1932 | + zero_extension(a[i], a_even.size()), |
| 1933 | + shiftt::SHIFT_LEFT, |
| 1934 | + (j - 1) * i)); |
| 1935 | + } |
| 1936 | + bvt a_odd = zero_extension(a[1], r_j.size()); |
| 1937 | + for(std::size_t i = 3; i < a.size(); i += 2) |
| 1938 | + { |
| 1939 | + a_odd = add( |
| 1940 | + a_odd, |
| 1941 | + shift( |
| 1942 | + zero_extension(a[i], a_odd.size()), |
| 1943 | + shiftt::SHIFT_LEFT, |
| 1944 | + (j - 1) * (i - 1))); |
| 1945 | + } |
| 1946 | + bvt b_even = zero_extension(b[0], r_j.size()); |
| 1947 | + for(std::size_t i = 2; i < b.size(); i += 2) |
| 1948 | + { |
| 1949 | + b_even = add( |
| 1950 | + b_even, |
| 1951 | + shift( |
| 1952 | + zero_extension(b[i], b_even.size()), |
| 1953 | + shiftt::SHIFT_LEFT, |
| 1954 | + (j - 1) * i)); |
| 1955 | + } |
| 1956 | + bvt b_odd = zero_extension(b[1], r_j.size()); |
| 1957 | + for(std::size_t i = 3; i < b.size(); i += 2) |
| 1958 | + { |
| 1959 | + b_odd = add( |
| 1960 | + b_odd, |
| 1961 | + shift( |
| 1962 | + zero_extension(b[i], b_odd.size()), |
| 1963 | + shiftt::SHIFT_LEFT, |
| 1964 | + (j - 1) * (i - 1))); |
| 1965 | + } |
| 1966 | + |
| 1967 | + prop.l_set_to_true(equal( |
| 1968 | + r_j, |
| 1969 | + unsigned_multiplier( |
| 1970 | + add(a_even, shift(a_odd, shiftt::SHIFT_LEFT, j - 1)), |
| 1971 | + add(b_even, shift(b_odd, shiftt::SHIFT_LEFT, j - 1))))); |
| 1972 | + |
| 1973 | + // r(-2^(j-1)) |
| 1974 | + bvt r_minus_j = zero_extension( |
| 1975 | + d[0], std::min(_op0.size(), d[0].size() + (j - 1) * (d.size() - 1))); |
| 1976 | + for(std::size_t i = 1; i < d.size(); ++i) |
| 1977 | + { |
| 1978 | + if(i % 2 == 1) |
| 1979 | + { |
| 1980 | + r_minus_j = sub( |
| 1981 | + r_minus_j, |
| 1982 | + shift( |
| 1983 | + zero_extension(d[i], r_minus_j.size()), |
| 1984 | + shiftt::SHIFT_LEFT, |
| 1985 | + (j - 1) * i)); |
| 1986 | + } |
| 1987 | + else |
| 1988 | + { |
| 1989 | + r_minus_j = add( |
| 1990 | + r_minus_j, |
| 1991 | + shift( |
| 1992 | + zero_extension(d[i], r_minus_j.size()), |
| 1993 | + shiftt::SHIFT_LEFT, |
| 1994 | + (j - 1) * i)); |
| 1995 | + } |
| 1996 | + } |
| 1997 | + |
| 1998 | + prop.l_set_to_true(equal( |
| 1999 | + r_minus_j, |
| 2000 | + unsigned_multiplier( |
| 2001 | + sub(a_even, shift(a_odd, shiftt::SHIFT_LEFT, j - 1)), |
| 2002 | + sub(b_even, shift(b_odd, shiftt::SHIFT_LEFT, j - 1))))); |
| 2003 | + } |
| 2004 | + |
| 2005 | + if(c_ops.empty()) |
| 2006 | + return zeros(_op0.size()); |
| 2007 | + else |
| 2008 | + { |
| 2009 | +#ifdef WALLACE_TREE |
| 2010 | + return wallace_tree(c_ops); |
| 2011 | +#elif defined(DADDA_TREE) |
| 2012 | + return dadda_tree(c_ops); |
| 2013 | +#else |
| 2014 | + bvt product = c_ops.front(); |
| 2015 | + |
| 2016 | + for(auto it = std::next(c_ops.begin()); it != c_ops.end(); ++it) |
| 2017 | + product = add(product, *it); |
| 2018 | + |
| 2019 | + return product; |
| 2020 | +#endif |
| 2021 | + } |
| 2022 | +} |
| 2023 | + |
1872 | 2024 | bvt bv_utilst::unsigned_multiplier_no_overflow(
|
1873 | 2025 | const bvt &op0,
|
1874 | 2026 | const bvt &op1)
|
@@ -1921,6 +2073,8 @@ bvt bv_utilst::signed_multiplier(const bvt &op0, const bvt &op1)
|
1921 | 2073 |
|
1922 | 2074 | #ifdef USE_KARATSUBA
|
1923 | 2075 | bvt result = unsigned_karatsuba_multiplier(neg0, neg1);
|
| 2076 | +#elif defined(USE_TOOM_COOK) |
| 2077 | + bvt result = unsigned_toom_cook_multiplier(neg0, neg1); |
1924 | 2078 | #else
|
1925 | 2079 | bvt result=unsigned_multiplier(neg0, neg1);
|
1926 | 2080 | #endif
|
@@ -1994,6 +2148,9 @@ bvt bv_utilst::multiplier(
|
1994 | 2148 | #ifdef USE_KARATSUBA
|
1995 | 2149 | case representationt::UNSIGNED:
|
1996 | 2150 | return unsigned_karatsuba_multiplier(op0, op1);
|
| 2151 | +#elif defined(USE_TOOM_COOK) |
| 2152 | + case representationt::UNSIGNED: |
| 2153 | + return unsigned_toom_cook_multiplier(op0, op1); |
1997 | 2154 | #else
|
1998 | 2155 | case representationt::UNSIGNED: return unsigned_multiplier(op0, op1);
|
1999 | 2156 | #endif
|
|
0 commit comments