Skip to content

PowerTransform bug #23

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
alanarazi7 opened this issue May 10, 2025 · 0 comments
Open

PowerTransform bug #23

alanarazi7 opened this issue May 10, 2025 · 0 comments

Comments

@alanarazi7
Copy link

Hello, there seems to be a bug with the PowerTransform implementation, which I run into using 15 datasets out of 50 when evaluate CARTE over datasets from OpenML and Kaggle. I understand that this is a known issue.

I add here a simple reproducible example for one of the problematic datasets. The code is simplified to the highest extent possible (e.g. no train-test split).

import openml
from huggingface_hub import hf_hub_download
from carte_ai import Table2GraphTransformer

model_path = hf_hub_download(repo_id="hi-paris/fastText", filename="cc.en.300.bin")
preprocessor = Table2GraphTransformer(fasttext_model_path=model_path)

dataset = openml.datasets.get_dataset(46667)
x, y, _, _ = dataset.get_data(target=dataset.default_target_attribute)
x = preprocessor.fit_transform(x, y=y)


In [4]: x = preprocessor.fit_transform(x, y=y)
/data/home/alan.arazi/miniconda3/envs/tabular/lib/python3.11/site-packages/sklearn/preprocessing/_data.py:3438: RuntimeWarning: overflow encountered in power
  out[pos] = (np.power(x[pos] + 1, lmbda) - 1) / lmbda
---------------------------------------------------------------------------
BracketError                              Traceback (most recent call last)
Cell In[4], line 1
----> 1 x = preprocessor.fit_transform(x, y=y)

File ~/miniconda3/envs/tabular/lib/python3.11/site-packages/sklearn/utils/_set_output.py:316, in _wrap_method_output.<locals>.wrapped(self, X, *args, **kwargs)
    314 @wraps(f)
    315 def wrapped(self, X, *args, **kwargs):
--> 316     data_to_wrap = f(self, X, *args, **kwargs)
    317     if isinstance(data_to_wrap, tuple):
    318         # only wrap the first output for cross decomposition
    319         return_tuple = (
    320             _wrap_data_with_container(method, data_to_wrap[0], X, self),
    321             *data_to_wrap[1:],
    322         )

File ~/miniconda3/envs/tabular/lib/python3.11/site-packages/sklearn/base.py:1101, in TransformerMixin.fit_transform(self, X, y, **fit_params)
   1098     return self.fit(X, **fit_params).transform(X)
   1099 else:
   1100     # fit method of arity 2 (supervised transformation)
-> 1101     return self.fit(X, y, **fit_params).transform(X)

File ~/miniconda3/envs/tabular/lib/python3.11/site-packages/carte_ai/src/carte_table_to_graph.py:148, in Table2GraphTransformer.fit(self, X, y)
    146 num_cols_exist = [col for col in self.num_col_names if col in X.columns]
    147 if num_cols_exist:
--> 148     self.num_transformer_.fit(X[num_cols_exist])
    149     #print(f"Numerical columns fitted for normalization: {num_cols_exist}")
    151 self.is_fitted_ = True

File ~/miniconda3/envs/tabular/lib/python3.11/site-packages/sklearn/base.py:1473, in _fit_context.<locals>.decorator.<locals>.wrapper(estimator, *args, **kwargs)
   1466     estimator._validate_params()
   1468 with config_context(
   1469     skip_parameter_validation=(
   1470         prefer_skip_nested_validation or global_skip_validation
   1471     )
   1472 ):
-> 1473     return fit_method(estimator, *args, **kwargs)

File ~/miniconda3/envs/tabular/lib/python3.11/site-packages/sklearn/preprocessing/_data.py:3251, in PowerTransformer.fit(self, X, y)
   3231 @_fit_context(prefer_skip_nested_validation=True)
   3232 def fit(self, X, y=None):
   3233     """Estimate the optimal parameter lambda for each feature.
   3234
   3235     The optimal lambda parameter for minimizing skewness is estimated on
   (...)
   3249         Fitted transformer.
   3250     """
-> 3251     self._fit(X, y=y, force_transform=False)
   3252     return self

File ~/miniconda3/envs/tabular/lib/python3.11/site-packages/sklearn/preprocessing/_data.py:3304, in PowerTransformer._fit(self, X, y, force_transform)
   3301     self.lambdas_[i] = 1.0
   3302     continue
-> 3304 self.lambdas_[i] = optim_function(col)
   3306 if self.standardize or force_transform:
   3307     X[:, i] = transform_function(X[:, i], self.lambdas_[i])

File ~/miniconda3/envs/tabular/lib/python3.11/site-packages/sklearn/preprocessing/_data.py:3493, in PowerTransformer._yeo_johnson_optimize(self, x)
   3491 x = x[~np.isnan(x)]
   3492 # choosing bracket -2, 2 like for boxcox
-> 3493 return optimize.brent(_neg_log_likelihood, brack=(-2, 2))

File ~/miniconda3/envs/tabular/lib/python3.11/site-packages/scipy/optimize/_optimize.py:2655, in brent(func, args, brack, tol, full_output, maxiter)
   2583 """
   2584 Given a function of one variable and a possible bracket, return
   2585 a local minimizer of the function isolated to a fractional precision
   (...)
   2651
   2652 """
   2653 options = {'xtol': tol,
   2654            'maxiter': maxiter}
-> 2655 res = _minimize_scalar_brent(func, brack, args, **options)
   2656 if full_output:
   2657     return res['x'], res['fun'], res['nit'], res['nfev']

File ~/miniconda3/envs/tabular/lib/python3.11/site-packages/scipy/optimize/_optimize.py:2697, in _minimize_scalar_brent(func, brack, args, xtol, maxiter, disp, **unknown_options)
   2694 brent = Brent(func=func, args=args, tol=tol,
   2695               full_output=True, maxiter=maxiter, disp=disp)
   2696 brent.set_bracket(brack)
-> 2697 brent.optimize()
   2698 x, fval, nit, nfev = brent.get_result(full_output=True)
   2700 success = nit < maxiter and not (np.isnan(x) or np.isnan(fval))

File ~/miniconda3/envs/tabular/lib/python3.11/site-packages/scipy/optimize/_optimize.py:2462, in Brent.optimize(self)
   2459 def optimize(self):
   2460     # set up for optimization
   2461     func = self.func
-> 2462     xa, xb, xc, fa, fb, fc, funcalls = self.get_bracket_info()
   2463     _mintol = self._mintol
   2464     _cg = self._cg

File ~/miniconda3/envs/tabular/lib/python3.11/site-packages/scipy/optimize/_optimize.py:2431, in Brent.get_bracket_info(self)
   2429     xa, xb, xc, fa, fb, fc, funcalls = bracket(func, args=args)
   2430 elif len(brack) == 2:
-> 2431     xa, xb, xc, fa, fb, fc, funcalls = bracket(func, xa=brack[0],
   2432                                                xb=brack[1], args=args)
   2433 elif len(brack) == 3:
   2434     xa, xb, xc = brack

File ~/miniconda3/envs/tabular/lib/python3.11/site-packages/scipy/optimize/_optimize.py:3070, in bracket(func, xa, xb, args, grow_limit, maxiter)
   3068     e = BracketError(msg)
   3069     e.data = (xa, xb, xc, fa, fb, fc, funcalls)
-> 3070     raise e
   3072 return xa, xb, xc, fa, fb, fc, funcalls

BracketError: The algorithm terminated without finding a valid bracket. Consider trying different initial points.
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
None yet
Projects
None yet
Development

No branches or pull requests

1 participant