Skip to content

ext/bcmath: Performance improvement bcsqrt() #18771

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 9 commits into
base: master
Choose a base branch
from
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 0 additions & 2 deletions ext/bcmath/config.m4
Original file line number Diff line number Diff line change
@@ -15,8 +15,6 @@ if test "$PHP_BCMATH" != "no"; then
libbcmath/src/floor_or_ceil.c
libbcmath/src/long2num.c
libbcmath/src/init.c
libbcmath/src/int2num.c
libbcmath/src/nearzero.c
libbcmath/src/neg.c
libbcmath/src/num2long.c
libbcmath/src/num2str.c
4 changes: 2 additions & 2 deletions ext/bcmath/config.w32
Original file line number Diff line number Diff line change
@@ -5,9 +5,9 @@ ARG_ENABLE("bcmath", "bc style precision math functions", "yes");
if (PHP_BCMATH == "yes") {
EXTENSION("bcmath", "bcmath.c", null, "/DZEND_ENABLE_STATIC_TSRMLS_CACHE=1");
ADD_SOURCES("ext/bcmath/libbcmath/src", "add.c div.c init.c neg.c \
raisemod.c sub.c compare.c divmod.c int2num.c long2num.c \
raisemod.c sub.c compare.c divmod.c long2num.c \
num2long.c recmul.c sqrt.c zero.c doaddsub.c \
floor_or_ceil.c nearzero.c num2str.c raise.c rmzero.c str2num.c \
floor_or_ceil.c num2str.c raise.c rmzero.c str2num.c \
round.c convert.c", "bcmath");

AC_DEFINE('HAVE_BCMATH', 1, "Define to 1 if the PHP extension 'bcmath' is available.");
10 changes: 0 additions & 10 deletions ext/bcmath/libbcmath/src/bcmath.h
Original file line number Diff line number Diff line change
@@ -117,20 +117,12 @@ bool bc_is_zero(bc_num num);

bool bc_is_zero_for_scale(bc_num num, size_t scale);

bool bc_is_near_zero(bc_num num, size_t scale);

bool bc_is_neg(bc_num num);

void bc_rm_trailing_zeros(bc_num num);

bc_num bc_add(bc_num n1, bc_num n2, size_t scale_min);

#define bc_add_ex(n1, n2, result, scale_min) do { \
bc_num add_ex = bc_add(n1, n2, scale_min); \
bc_free_num (result); \
*(result) = add_ex; \
} while (0)

bc_num bc_sub(bc_num n1, bc_num n2, size_t scale_min);

#define bc_sub_ex(n1, n2, result, scale_min) do { \
@@ -178,8 +170,6 @@ raise_mod_status bc_raisemod(bc_num base, bc_num exponent, bc_num mod, bc_num *r

bc_raise_status bc_raise(bc_num base, long exponent, bc_num *result, size_t scale);

void bc_raise_bc_exponent(bc_num base, bc_num exponent, bc_num *resul, size_t scale);

bool bc_sqrt(bc_num *num, size_t scale);

/* Prototypes needed for external utility routines. */
16 changes: 16 additions & 0 deletions ext/bcmath/libbcmath/src/div.c
Original file line number Diff line number Diff line change
@@ -429,3 +429,19 @@ bool bc_divide(bc_num numerator, bc_num divisor, bc_num *quot, size_t scale)
*quot = bc_copy_num(BCG(_zero_));
return true;
}

void bc_divide_vector(
BC_VECTOR *numerator_vectors, size_t numerator_arr_size,
const BC_VECTOR *divisor_vectors, size_t divisor_arr_size, size_t divisor_size,
BC_VECTOR *quot_vectors, size_t quot_arr_size
) {
ZEND_ASSERT(divisor_vectors[divisor_arr_size - 1] != 0);
ZEND_ASSERT(quot_arr_size == numerator_arr_size - divisor_arr_size + 1);

/* Do the division */
if (divisor_arr_size == 1) {
bc_fast_div(numerator_vectors, numerator_arr_size, divisor_vectors[0], quot_vectors, quot_arr_size);
} else {
bc_standard_div(numerator_vectors, numerator_arr_size, divisor_vectors, divisor_arr_size, divisor_size, quot_vectors, quot_arr_size);
}
}
74 changes: 0 additions & 74 deletions ext/bcmath/libbcmath/src/int2num.c

This file was deleted.

57 changes: 0 additions & 57 deletions ext/bcmath/libbcmath/src/nearzero.c

This file was deleted.

4 changes: 4 additions & 0 deletions ext/bcmath/libbcmath/src/private.h
Original file line number Diff line number Diff line change
@@ -87,6 +87,10 @@ bc_num _bc_do_sub (bc_num n1, bc_num n2);
void bc_multiply_vector(
const BC_VECTOR *n1_vector, size_t n1_arr_size, const BC_VECTOR *n2_vector, size_t n2_arr_size,
BC_VECTOR *prod_vector, size_t prod_arr_size);
void bc_divide_vector(
BC_VECTOR *numerator_vectors, size_t numerator_arr_size,
const BC_VECTOR *divisor_vectors, size_t divisor_arr_size, size_t divisor_size,
BC_VECTOR *quot_vectors, size_t quot_arr_size);
void _bc_rm_leading_zeros (bc_num num);

#endif
16 changes: 0 additions & 16 deletions ext/bcmath/libbcmath/src/raise.c
Original file line number Diff line number Diff line change
@@ -252,19 +252,3 @@ bc_raise_status bc_raise(bc_num base, long exponent, bc_num *result, size_t scal
}
return BC_RAISE_STATUS_OK;
}

/* This is used internally by BCMath */
void bc_raise_bc_exponent(bc_num base, bc_num expo, bc_num *result, size_t scale) {
/* Exponent must not have fractional part */
assert(expo->n_scale == 0);

long exponent = bc_num2long(expo);
/* Exponent must be properly convertable to long */
if (exponent == 0 && (expo->n_len > 1 || expo->n_value[0] != 0)) {
assert(false && "Exponent is not well formed in internal call");
//assert(exponent != 0 || (expo->n_len == 0 && expo->n_value[0] == 0));
}
//assert(exponent != 0 || (expo->n_len == 0 && expo->n_value[0] == 0));
bc_raise(base, exponent, result, scale);
}

320 changes: 268 additions & 52 deletions ext/bcmath/libbcmath/src/sqrt.c
Original file line number Diff line number Diff line change
@@ -30,28 +30,278 @@
*************************************************************************/

#include "bcmath.h"
#include "convert.h"
#include <stdbool.h>
#include <stddef.h>
#include "private.h"

/* Take the square root NUM and return it in NUM with SCALE digits
after the decimal place. */

static inline BC_VECTOR bc_sqrt_get_pow_10(size_t exponent)
{
BC_VECTOR value = 1;
while (exponent >= 8) {
value *= BC_POW_10_LUT[8];
exponent -= 8;
}
value *= BC_POW_10_LUT[exponent];
return value;
}

static BC_VECTOR bc_fast_sqrt_vector(BC_VECTOR n_vector)
{
/* Use sqrt() from <math.h> to determine the initial value. */
BC_VECTOR guess_vector = (BC_VECTOR) round(sqrt((double) n_vector));

/* Newton's algorithm. Iterative expression is `x_{n+1} = (x_n + a / x_n) / 2` */
BC_VECTOR guess1_vector;
size_t diff;
do {
guess1_vector = guess_vector;
guess_vector = (guess1_vector + n_vector / guess1_vector) / 2; /* Iterative expression */
diff = guess1_vector > guess_vector ? guess1_vector - guess_vector : guess_vector - guess1_vector;
} while (diff > 1);
return guess_vector;
}

static inline void bc_fast_sqrt(bc_num *num, size_t scale, size_t num_calc_full_len, size_t num_use_full_len, size_t leading_zeros)
{
BC_VECTOR n_vector = 0;
const char *nptr = (*num)->n_value + leading_zeros;
for (size_t i = 0; i < num_use_full_len; i++) {
n_vector = n_vector * BASE + *nptr++;
}
/* When calculating the square root of a number using only integer operations,
* need to adjust the digit scale accordingly.
* Considering that the original number is the square of the result,
* if the desired scale of the result is 5, the input number should be scaled
* by twice that, i.e., scale 10. */
n_vector *= bc_sqrt_get_pow_10(num_calc_full_len - num_use_full_len);

/* Get sqrt */
BC_VECTOR guess_vector = bc_fast_sqrt_vector(n_vector);

size_t ret_len;
if (leading_zeros > 0) {
ret_len = 1;
} else {
ret_len = ((*num)->n_len + 1) / 2;
}

bc_num ret = bc_new_num_nonzeroed(ret_len, scale);
char *rptr = ret->n_value;
char *rend = rptr + ret_len + scale - 1;

guess_vector /= BASE; /* Since the scale of guess_vector is scale + 1, reduce the scale by 1. */
while (rend >= rptr) {
*rend-- = guess_vector % BASE;
guess_vector /= BASE;
}
bc_free_num(num);
*num = ret;
}

static inline void bc_standard_sqrt(bc_num *num, size_t scale, size_t num_calc_full_len, size_t num_use_full_len, size_t leading_zeros)
{
/* allocate memory */
size_t n_arr_size = BC_ARR_SIZE_FROM_LEN(num_calc_full_len);

size_t guess_full_len = (num_calc_full_len + 1) / 2;
/* Since add the old guess and the new guess together during the calculation,
* there is a chance of overflow, so allocate an extra size. */
size_t guess_arr_size = BC_ARR_SIZE_FROM_LEN(guess_full_len) + 1;

/* n_arr_size * 2 + guess_arr_size * 3 */
BC_VECTOR *buf = safe_emalloc(n_arr_size + guess_arr_size, sizeof(BC_VECTOR) * 2, guess_arr_size * sizeof(BC_VECTOR));

BC_VECTOR *n_vector = buf;
/* In division by successive approximation, the numerator is modified during the computation,
* so it must be copied each time. */
BC_VECTOR *n_vector_copy = n_vector + n_arr_size;
BC_VECTOR *guess_vector = n_vector_copy + n_arr_size;
BC_VECTOR *guess1_vector = guess_vector + guess_arr_size;
BC_VECTOR *tmp_div_ret_vector = guess1_vector + guess_arr_size;

/* convert num to n_vector */
const char *nend = (*num)->n_value + leading_zeros + num_use_full_len - 1;
bc_convert_to_vector_with_zero_pad(n_vector, nend, num_use_full_len, num_calc_full_len - num_use_full_len);

/* Prepare guess_vector. Use `bc_fast_sqrt_vector()` to quickly obtain a highly accurate initial value. */

/* 18 on 64-bit, 10 on 32-bit */
size_t n_top_len_for_initial_guess = (MAX_LENGTH_OF_LONG - 1) & ~1;

/* Set the number of digits of num to be used as the initial value for Newton's method.
* Just as the square roots of 1000 and 100 differ significantly, the number of digits
* to "ignore" here must be even. */
if (num_calc_full_len & 1) {
n_top_len_for_initial_guess--;
}

BC_VECTOR n_top = n_vector[n_arr_size - 1];
size_t n_top_index = n_arr_size - 2;
size_t n_top_vector_len = num_calc_full_len % BC_VECTOR_SIZE == 0 ? BC_VECTOR_SIZE : num_calc_full_len % BC_VECTOR_SIZE;
size_t count = n_top_len_for_initial_guess - n_top_vector_len;
while (count >= BC_VECTOR_SIZE) {
n_top *= BC_VECTOR_BOUNDARY_NUM;
n_top += n_vector[n_top_index--];
count -= BC_VECTOR_SIZE;
}
if (count > 0) {
n_top *= BC_POW_10_LUT[count];
n_top += n_vector[n_top_index] / BC_POW_10_LUT[BC_VECTOR_SIZE - count];
}

/* Calculate the initial guess. */
BC_VECTOR initial_guess = bc_fast_sqrt_vector(n_top);

/* Set the obtained initial guess to guess_vector. */
size_t initial_guess_len = (n_top_len_for_initial_guess + 1) / 2;
size_t guess_top_vector_len = guess_full_len % BC_VECTOR_SIZE == 0 ? BC_VECTOR_SIZE : guess_full_len % BC_VECTOR_SIZE;
size_t guess_len_diff = initial_guess_len - guess_top_vector_len;
guess_vector[guess_arr_size - 2] = initial_guess / BC_POW_10_LUT[guess_len_diff];
initial_guess %= BC_POW_10_LUT[guess_len_diff];
guess_vector[guess_arr_size - 3] = initial_guess * BC_POW_10_LUT[BC_VECTOR_SIZE - guess_len_diff];
guess_vector[guess_arr_size - 3] += BC_POW_10_LUT[BC_VECTOR_SIZE - guess_len_diff] - 1;

/* Initialize the uninitialized vector with `BC_VECTOR_BOUNDARY_NUM - 1`. */
for (size_t i = 0; i < guess_arr_size - 3; i++) {
guess_vector[i] = BC_VECTOR_BOUNDARY_NUM - 1;
guess1_vector[i] = BC_VECTOR_BOUNDARY_NUM - 1;
}
guess_vector[guess_arr_size - 1] = 0;

BC_VECTOR two[1] = { 2 };

/* The precision (number of vectors) used for the calculation.
* Since the initial value uses two vectors, the initial precision is set to 2. */
size_t guess_precision = 2;
size_t guess_offset = guess_arr_size - 1 - guess_precision;
size_t n_offset = guess_offset * 2;
size_t n_precision = n_arr_size - n_offset;
size_t quot_size = n_precision - (guess_precision) + 1;
size_t guess_use_len = guess_top_vector_len + BC_VECTOR_SIZE;
bool updated_precision = false;

/**
* Newton's algorithm. Iterative expression is `x_{n+1} = (x_n + a / x_n) / 2`
* If break down the calculation into detailed steps, it looks like this:
* 1. quot = a / x_n
* 2. add = x_n + quot1
* 3. x_{n+1} = add / 2
* 4. repeat until the difference between the `x_n` and `x_{n+1}` is less than or equal to 1.
*/
bool done = false;
do {
if (updated_precision) {
guess_offset = guess_arr_size - 1 - guess_precision;
n_offset = guess_offset * 2;
n_precision = n_arr_size - n_offset;
quot_size = n_precision - (guess_precision) + 1;
guess_use_len = guess_top_vector_len + (guess_precision - 1) * BC_VECTOR_SIZE;
updated_precision = false;
}

/* Since the value changes during division by successive approximation, use a copied version of it. */
memcpy(n_vector_copy + n_offset, n_vector + n_offset, n_precision * sizeof(BC_VECTOR));

/* 1. quot = a / x_n */
bc_divide_vector(
n_vector_copy + n_offset, n_precision,
guess_vector + guess_offset, guess_precision, guess_use_len,
tmp_div_ret_vector + guess_offset, quot_size
);

BC_VECTOR *tmp_vptr = guess1_vector;
guess1_vector = guess_vector;
guess_vector = tmp_vptr;

/* 2. add = x_n + quot1 */
int carry = 0;
for (size_t i = guess_offset; i < guess_arr_size - 1; i++) {
guess_vector[i] = guess1_vector[i] + tmp_div_ret_vector[i] + carry;
if (guess_vector[i] >= BC_VECTOR_BOUNDARY_NUM) {
guess_vector[i] -= BC_VECTOR_BOUNDARY_NUM;
carry = 1;
} else {
carry = 0;
}
}
guess_vector[guess_arr_size - 1] = tmp_div_ret_vector[guess_arr_size - 1] + carry;

/* 3. x_{n+1} = add / 2 */
bc_divide_vector(
guess_vector + guess_offset, guess_precision + 1,
two, 1, 1,
tmp_div_ret_vector + guess_offset, guess_precision + 1
);

memcpy(guess_vector + guess_offset, tmp_div_ret_vector + guess_offset, guess_precision * sizeof(BC_VECTOR));

/* 4. repeat until the difference between the `x_n` and `x_{n+1}` is less than or equal to 1. */
if (guess_precision < guess_arr_size - 1) {
/* If the precision has not yet reached the maximum number of digits, it will be increased. */
guess_precision = MIN(guess_precision * 2, guess_arr_size - 1);
updated_precision = true;
} else {
size_t diff = guess_vector[0] > guess1_vector[0] ? guess_vector[0] - guess1_vector[0] : guess1_vector[0] - guess_vector[0];
if (diff <= 1) {
bool is_same = true;
for (size_t i = 1; i < guess_arr_size - 1; i++) {
if (guess_vector[i] != guess1_vector[i]) {
is_same = false;
break;
}
}
done = is_same;
}
}
} while (!done);

size_t guess_len;
size_t guess_leading_zeros = 0;
if (leading_zeros > 0) {
guess_len = 1; /* for int zero */
guess_leading_zeros = (leading_zeros + 1) / 2;
} else {
guess_len = ((*num)->n_len + 1) / 2;
}
bc_num ret = bc_new_num_nonzeroed(guess_len, scale + 1);
char *rptr = ret->n_value;
char *rend = rptr + guess_len + scale + 1 - 1;

for (size_t i = 0; i < guess_leading_zeros; i++) {
*rptr++ = 0;
}
bc_convert_vector_to_char(guess_vector, rptr, rend, guess_arr_size - 1);
ret->n_scale = scale;

bc_free_num(num);
*num = ret;

efree(buf);
}

bool bc_sqrt(bc_num *num, size_t scale)
{
const bc_num local_num = *num;
/* Initial checks. */
if (bc_is_neg(local_num)) {
if (bc_is_neg(*num)) {
/* Cannot take the square root of a negative number */
return false;
}

size_t num_calc_scale = (scale + 1) * 2;
size_t num_use_scale = MIN((*num)->n_scale, num_calc_scale);

/* Square root of 0 is 0 */
if (bc_is_zero(local_num)) {
if (bc_is_zero_for_scale(*num, num_use_scale)) {
bc_free_num (num);
*num = bc_copy_num(BCG(_zero_));
return true;
}

bcmath_compare_result num_cmp_one = bc_compare(local_num, BCG(_one_), local_num->n_scale);
bcmath_compare_result num_cmp_one = bc_compare(*num, BCG(_one_), num_use_scale);
/* Square root of 1 is 1 */
if (num_cmp_one == BCMATH_EQUAL) {
bc_free_num (num);
@@ -60,58 +310,24 @@ bool bc_sqrt(bc_num *num, size_t scale)
}

/* Initialize the variables. */
size_t cscale;
bc_num guess, guess1, point5, diff;
size_t rscale = MAX(scale, local_num->n_scale);

bc_init_num(&guess1);
bc_init_num(&diff);
point5 = bc_new_num (1, 1);
point5->n_value[1] = 5;


/* Calculate the initial guess. */
size_t leading_zeros = 0;
size_t num_calc_full_len = (*num)->n_len + num_calc_scale;
size_t num_use_full_len = (*num)->n_len + num_use_scale;
if (num_cmp_one == BCMATH_RIGHT_GREATER) {
/* The number is between 0 and 1. Guess should start at 1. */
guess = bc_copy_num(BCG(_one_));
cscale = local_num->n_scale;
} else {
/* The number is greater than 1. Guess should start at 10^(exp/2). */
bc_init_num(&guess);
bc_int2num(&guess, 10);

bc_int2num(&guess1, local_num->n_len);
bc_multiply_ex(guess1, point5, &guess1, 0);
guess1->n_scale = 0;
bc_raise_bc_exponent(guess, guess1, &guess, 0);
bc_free_num (&guess1);
cscale = 3;
const char *nptr = (*num)->n_value;
while (*nptr == 0) {
leading_zeros++;
nptr++;
}
num_calc_full_len -= leading_zeros;
num_use_full_len -= leading_zeros;
}

/* Find the square root using Newton's algorithm. */
bool done = false;
while (!done) {
bc_free_num (&guess1);
guess1 = bc_copy_num(guess);
bc_divide(*num, guess, &guess, cscale);
bc_add_ex(guess, guess1, &guess, 0);
bc_multiply_ex(guess, point5, &guess, cscale);
bc_sub_ex(guess, guess1, &diff, cscale + 1);
if (bc_is_near_zero(diff, cscale)) {
if (cscale < rscale + 1) {
cscale = MIN (cscale * 3, rscale + 1);
} else {
done = true;
}
}
if (num_calc_full_len < MAX_LENGTH_OF_LONG) {
bc_fast_sqrt(num, scale, num_calc_full_len, num_use_full_len, leading_zeros);
} else {
bc_standard_sqrt(num, scale, num_calc_full_len, num_use_full_len, leading_zeros);
}

/* Assign the number and clean up. */
bc_free_num (num);
bc_divide(guess, BCG(_one_), num, rscale);
bc_free_num (&guess);
bc_free_num (&guess1);
bc_free_num (&point5);
bc_free_num (&diff);
return true;
}
21 changes: 21 additions & 0 deletions ext/bcmath/tests/bcsqrt.phpt
Original file line number Diff line number Diff line change
@@ -28,6 +28,20 @@ foreach ($scales as $scale) {
}
}

echo "\n";

$radicants = [
"0.005432987654321",
"0.0005432987654321",
"0.0000000000005432987654321",
"0.00000000000005432987654321",
"0.00000000000098765432109876543210987654321098765432109876543210987654321",
"0.000000000000098765432109876543210987654321098765432109876543210987654321",
];
foreach ($radicants as $radicant) {
echo bcsqrt($radicant, 20), "\n";
}

?>
--EXPECT--
0
@@ -52,3 +66,10 @@ foreach ($scales as $scale) {
0.3872983346
3.8729833462
1.0000000000

0.07370880309922960619
0.02330877013984435859
0.00000073708803099229
0.00000023308770139844
0.00000099380799005580
0.00000031426968054503