PowerTransform bug #23

alanarazi7 · 2025-05-10T12:01:15Z

Hello, there seems to be a bug with the PowerTransform implementation, which I run into using 15 datasets out of 50 when evaluate CARTE over datasets from OpenML and Kaggle. I understand that this is a known issue.

I add here a simple reproducible example for one of the problematic datasets. The code is simplified to the highest extent possible (e.g. no train-test split).

import openml
from huggingface_hub import hf_hub_download
from carte_ai import Table2GraphTransformer

model_path = hf_hub_download(repo_id="hi-paris/fastText", filename="cc.en.300.bin")
preprocessor = Table2GraphTransformer(fasttext_model_path=model_path)

dataset = openml.datasets.get_dataset(46667)
x, y, _, _ = dataset.get_data(target=dataset.default_target_attribute)
x = preprocessor.fit_transform(x, y=y)


In [4]: x = preprocessor.fit_transform(x, y=y)
/data/home/alan.arazi/miniconda3/envs/tabular/lib/python3.11/site-packages/sklearn/preprocessing/_data.py:3438: RuntimeWarning: overflow encountered in power
  out[pos] = (np.power(x[pos] + 1, lmbda) - 1) / lmbda
---------------------------------------------------------------------------
BracketError                              Traceback (most recent call last)
Cell In[4], line 1
----> 1 x = preprocessor.fit_transform(x, y=y)

File ~/miniconda3/envs/tabular/lib/python3.11/site-packages/sklearn/utils/_set_output.py:316, in _wrap_method_output.<locals>.wrapped(self, X, *args, **kwargs)
    314 @wraps(f)
    315 def wrapped(self, X, *args, **kwargs):
--> 316     data_to_wrap = f(self, X, *args, **kwargs)
    317     if isinstance(data_to_wrap, tuple):
    318         # only wrap the first output for cross decomposition
    319         return_tuple = (
    320             _wrap_data_with_container(method, data_to_wrap[0], X, self),
    321             *data_to_wrap[1:],
    322         )

File ~/miniconda3/envs/tabular/lib/python3.11/site-packages/sklearn/base.py:1101, in TransformerMixin.fit_transform(self, X, y, **fit_params)
   1098     return self.fit(X, **fit_params).transform(X)
   1099 else:
   1100     # fit method of arity 2 (supervised transformation)
-> 1101     return self.fit(X, y, **fit_params).transform(X)

File ~/miniconda3/envs/tabular/lib/python3.11/site-packages/carte_ai/src/carte_table_to_graph.py:148, in Table2GraphTransformer.fit(self, X, y)
    146 num_cols_exist = [col for col in self.num_col_names if col in X.columns]
    147 if num_cols_exist:
--> 148     self.num_transformer_.fit(X[num_cols_exist])
    149     #print(f"Numerical columns fitted for normalization: {num_cols_exist}")
    151 self.is_fitted_ = True

File ~/miniconda3/envs/tabular/lib/python3.11/site-packages/sklearn/base.py:1473, in _fit_context.<locals>.decorator.<locals>.wrapper(estimator, *args, **kwargs)
   1466     estimator._validate_params()
   1468 with config_context(
   1469     skip_parameter_validation=(
   1470         prefer_skip_nested_validation or global_skip_validation
   1471     )
   1472 ):
-> 1473     return fit_method(estimator, *args, **kwargs)

File ~/miniconda3/envs/tabular/lib/python3.11/site-packages/sklearn/preprocessing/_data.py:3251, in PowerTransformer.fit(self, X, y)
   3231 @_fit_context(prefer_skip_nested_validation=True)
   3232 def fit(self, X, y=None):
   3233     """Estimate the optimal parameter lambda for each feature.
   3234
   3235     The optimal lambda parameter for minimizing skewness is estimated on
   (...)
   3249         Fitted transformer.
   3250     """
-> 3251     self._fit(X, y=y, force_transform=False)
   3252     return self

File ~/miniconda3/envs/tabular/lib/python3.11/site-packages/sklearn/preprocessing/_data.py:3304, in PowerTransformer._fit(self, X, y, force_transform)
   3301     self.lambdas_[i] = 1.0
   3302     continue
-> 3304 self.lambdas_[i] = optim_function(col)
   3306 if self.standardize or force_transform:
   3307     X[:, i] = transform_function(X[:, i], self.lambdas_[i])

File ~/miniconda3/envs/tabular/lib/python3.11/site-packages/sklearn/preprocessing/_data.py:3493, in PowerTransformer._yeo_johnson_optimize(self, x)
   3491 x = x[~np.isnan(x)]
   3492 # choosing bracket -2, 2 like for boxcox
-> 3493 return optimize.brent(_neg_log_likelihood, brack=(-2, 2))

File ~/miniconda3/envs/tabular/lib/python3.11/site-packages/scipy/optimize/_optimize.py:2655, in brent(func, args, brack, tol, full_output, maxiter)
   2583 """
   2584 Given a function of one variable and a possible bracket, return
   2585 a local minimizer of the function isolated to a fractional precision
   (...)
   2651
   2652 """
   2653 options = {'xtol': tol,
   2654            'maxiter': maxiter}
-> 2655 res = _minimize_scalar_brent(func, brack, args, **options)
   2656 if full_output:
   2657     return res['x'], res['fun'], res['nit'], res['nfev']

File ~/miniconda3/envs/tabular/lib/python3.11/site-packages/scipy/optimize/_optimize.py:2697, in _minimize_scalar_brent(func, brack, args, xtol, maxiter, disp, **unknown_options)
   2694 brent = Brent(func=func, args=args, tol=tol,
   2695               full_output=True, maxiter=maxiter, disp=disp)
   2696 brent.set_bracket(brack)
-> 2697 brent.optimize()
   2698 x, fval, nit, nfev = brent.get_result(full_output=True)
   2700 success = nit < maxiter and not (np.isnan(x) or np.isnan(fval))

File ~/miniconda3/envs/tabular/lib/python3.11/site-packages/scipy/optimize/_optimize.py:2462, in Brent.optimize(self)
   2459 def optimize(self):
   2460     # set up for optimization
   2461     func = self.func
-> 2462     xa, xb, xc, fa, fb, fc, funcalls = self.get_bracket_info()
   2463     _mintol = self._mintol
   2464     _cg = self._cg

File ~/miniconda3/envs/tabular/lib/python3.11/site-packages/scipy/optimize/_optimize.py:2431, in Brent.get_bracket_info(self)
   2429     xa, xb, xc, fa, fb, fc, funcalls = bracket(func, args=args)
   2430 elif len(brack) == 2:
-> 2431     xa, xb, xc, fa, fb, fc, funcalls = bracket(func, xa=brack[0],
   2432                                                xb=brack[1], args=args)
   2433 elif len(brack) == 3:
   2434     xa, xb, xc = brack

File ~/miniconda3/envs/tabular/lib/python3.11/site-packages/scipy/optimize/_optimize.py:3070, in bracket(func, xa, xb, args, grow_limit, maxiter)
   3068     e = BracketError(msg)
   3069     e.data = (xa, xb, xc, fa, fb, fc, funcalls)
-> 3070     raise e
   3072 return xa, xb, xc, fa, fb, fc, funcalls

BracketError: The algorithm terminated without finding a valid bracket. Consider trying different initial points.

The text was updated successfully, but these errors were encountered:

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

PowerTransform bug #23

PowerTransform bug #23

alanarazi7 commented May 10, 2025

PowerTransform bug #23

PowerTransform bug #23

Comments

alanarazi7 commented May 10, 2025