Nixtla · elephaint · Jun 20, 2025 · Jun 20, 2025 · Jun 20, 2025 · Jun 20, 2025
diff --git a/.circleci/config.yml b/.circleci/config.yml
@@ -18,6 +18,16 @@ jobs:
             uv pip install ".[dev]"
             nbdev_test --do_print --timing
   test-model-performance:
+    parameters:
+      hierarchy: 
+        type: string
+        default: "strict"
+      forecast_type: 
+        type: string
+        default: "point"
+      engine: 
+        type: string
+        default: "pandas"
     resource_class: large
     docker:
       - image: python:3.10-slim
@@ -35,8 +45,8 @@ jobs:
             uv pip install ".[dev]"
             cd ./action_files/test_models/
             uv pip install -r requirements.txt
-            python -m src.models
-            python -m src.evaluation
+            python -m src.models << parameters.hierarchy >> << parameters.forecast_type >> << parameters.engine >>
+            python -m src.evaluation << parameters.forecast_type >>
             cd ../../
       - store_artifacts:
           path: ./action_files/test_models/data/evaluation.csv
@@ -59,7 +69,7 @@ jobs:
             uv pip install ".[dev]"
             cd ./action_files/test_models/
             uv pip install -r requirements.txt
-            python -m src.models_temporal
+            python -m src.models_temporal 
             python -m src.evaluation_temporal
             cd ../../
       - store_artifacts:
@@ -69,5 +79,10 @@ workflows:
   sample:
     jobs:
       - nbdev-tests
-      - test-model-performance
+      - test-model-performance:
+          matrix:
+            parameters:
+              hierarchy: ["strict", "non-strict"]
+              forecast_type: ["point", "probabilistic"]
+              engine: ["pandas"]
       - test-model-performance-temporal
diff --git a/action_files/test_models/src/data.py b/action_files/test_models/src/data.py
@@ -1,84 +1,12 @@
-import os
-import fire
-import pickle
 import pandas as pd
 
-from statsforecast.models import AutoETS
-from statsforecast.core import StatsForecast
-
-from hierarchicalforecast.utils import aggregate
-
-
-def get_data():
-    # If data exists read it
-    if os.path.isfile('data/Y_test.csv'):
-        Y_test_df = pd.read_csv('data/Y_test.csv')
-        Y_train_df = pd.read_csv('data/Y_train.csv')
-        Y_hat_df = pd.read_csv('data/Y_hat.csv')
-        Y_fitted_df = pd.read_csv('data/Y_fitted.csv')
-        S_df = pd.read_csv('data/S.csv')
-
-        with open('data/tags.pickle', 'rb') as handle:
-            tags = pickle.load(handle)
-
-        return Y_train_df, Y_test_df, Y_hat_df, Y_fitted_df, S_df, tags
-
-    # Read and Parse Data
+def get_tourism():
+    # Read data
     Y_df = pd.read_csv('https://raw.githubusercontent.com/Nixtla/transfer-learning-time-series/main/datasets/tourism.csv')
     Y_df = Y_df.rename({'Trips': 'y', 'Quarter': 'ds'}, axis=1)
     Y_df.insert(0, 'Country', 'Australia')
     Y_df = Y_df[['Country', 'Region', 'State', 'Purpose', 'ds', 'y']]
     Y_df['ds'] = Y_df['ds'].str.replace(r'(\d+) (Q\d)', r'\1-\2', regex=True)
-    Y_df['ds'] = pd.to_datetime(Y_df['ds'])
-
-    # Hierarchical Aggregation
-    spec = [
-        ['Country'],
-        ['Country', 'State'], 
-        ['Country', 'State', 'Region'], 
-        ['Country', 'State', 'Region', 'Purpose']
-    ]
-
-    Y_df, S_df, tags = aggregate(Y_df, spec)
-
-    # Train/Test Splits
-    Y_test_df = Y_df.groupby('unique_id').tail(8)
-    Y_train_df = Y_df.drop(Y_test_df.index)
-
-    sf = StatsForecast(models=[AutoETS(season_length=4, model='ZZA')],
-                       freq='QS', n_jobs=-1)
-    Y_hat_df = sf.forecast(df=Y_train_df, h=8, fitted=True)
-    Y_fitted_df = sf.forecast_fitted_values()
-
-    # Save Data
-    if not os.path.exists('./data'):
-        os.makedirs('./data')
-
-    Y_test_df.to_csv('./data/Y_test.csv', index=False)
-    Y_train_df.to_csv('./data/Y_train.csv', index=False)
-
-    Y_hat_df.to_csv('./data/Y_hat.csv', index=False)
-    Y_fitted_df.to_csv('./data/Y_fitted.csv', index=False)
-    S_df.to_csv('./data/S.csv', index=False)
-
-    with open('./data/tags.pickle', 'wb') as handle:
-        pickle.dump(tags, handle, protocol=pickle.HIGHEST_PROTOCOL)    
-
-    return Y_train_df, Y_test_df, Y_hat_df, Y_fitted_df, S_df, tags
-
-def save_data():
-    Y_train_df, Y_test_df, Y_hat_df, Y_fitted_df, S_df, tags = get_data()
-
-    Y_test_df.to_csv('./data/Y_test.csv', index=False)
-    Y_train_df.to_csv('./data/Y_train.csv', index=False)
-
-    Y_hat_df.to_csv('./data/Y_hat.csv', index=False)
-    Y_fitted_df.to_csv('./data/Y_fitted.csv', index=False)
-    S_df.to_csv('./data/S.csv', index=False)
-
-    with open('./data/tags.pickle', 'wb') as handle:
-        pickle.dump(tags, handle, protocol=pickle.HIGHEST_PROTOCOL)
-
+    Y_df['ds'] = pd.PeriodIndex(Y_df["ds"], freq='Q').to_timestamp()
 
-if __name__=="__main__":
-    fire.Fire(save_data)
+    return Y_df
diff --git a/action_files/test_models/src/evaluation.py b/action_files/test_models/src/evaluation.py
@@ -1,19 +1,22 @@
+import fire
 import pickle
 import numpy as np
 import pandas as pd
 
-from hierarchicalforecast.evaluation import HierarchicalEvaluation
-
-def rmse(y, y_hat):
-    return np.mean(np.sqrt(np.mean((y-y_hat)**2, axis=1)))
-
-def mase(y, y_hat, y_insample, seasonality=4):
-    errors = np.mean(np.abs(y - y_hat), axis=1)
-    scale = np.mean(np.abs(y_insample[:, seasonality:] - y_insample[:, :-seasonality]), axis=1)
-    return np.mean(errors / scale)
-
-
-def evaluate():
+import hierarchicalforecast.evaluation as hfe
+from utilsforecast.losses import rmse, mase,scaled_crps
+from functools import partial
+
+def eval(type: str = "point") -> pd.DataFrame:
+    mase_p = partial(mase, seasonality=4)
+    if type == "probabilistic":
+        level = [80, 90]
+        metrics = [rmse, mase_p, scaled_crps]
+    elif type == "point":
+        level = None
+        metrics = [rmse, mase_p]        
+    else:
+        raise ValueError("Type must be either 'point' or 'probabilistic'.")
     execution_times = pd.read_csv('data/execution_times.csv')
     models = [f"{x[0]} ({x[1]:.2f} secs)" for x in execution_times.values]
 
@@ -24,26 +27,18 @@ def evaluate():
     with open('data/tags.pickle', 'rb') as handle:
         tags = pickle.load(handle)
 
-    eval_tags = {}
-    eval_tags['Total'] = tags['Country']
-    eval_tags['State'] = tags['Country/State']
-    eval_tags['Regions'] = tags['Country/State/Region']
-    eval_tags['Bottom'] = tags['Country/State/Region/Purpose']
-    eval_tags['All'] = np.concatenate(list(tags.values()))
-
-    evaluator = HierarchicalEvaluation(evaluators=[mase])
-    evaluation = evaluator.evaluate(
-            Y_hat_df=Y_rec_df, Y_test_df=Y_test_df,
-            tags=eval_tags, Y_df=Y_train_df
+    evaluation = hfe.evaluate(
+            df=Y_rec_df.merge(Y_test_df, on=['unique_id', 'ds'], how="left"),
+            metrics = metrics,
+            level=level,
+            tags=tags, 
+            train_df=Y_train_df
     )
-    evaluation = evaluation.query("level != 'Overall'").set_index(['level', 'metric'])
-
-    evaluation.columns = ['Base'] + models
-    evaluation = evaluation.map('{:.2f}'.format)
-    return evaluation
-
+    numeric_cols = evaluation.select_dtypes(include="number").columns
+    evaluation[numeric_cols] = evaluation[numeric_cols].map('{:.3}'.format).astype(np.float64)
+    evaluation.columns = ['level', 'metric', 'Base'] + models
+    print(evaluation.T)
+    evaluation.to_csv('./data/evaluation.csv')
 
 if __name__ == '__main__':
-    evaluation = evaluate()
-    evaluation.to_csv('./data/evaluation.csv')
-    print(evaluation.T)
+    fire.Fire(eval)
diff --git a/action_files/test_models/src/models.py b/action_files/test_models/src/models.py
@@ -1,6 +1,8 @@
 import os
 import fire
 import pandas as pd
+import pickle
+import polars as pl
 
 from hierarchicalforecast.core import HierarchicalReconciliation
 from hierarchicalforecast.methods import (
@@ -10,46 +12,125 @@
     OptimalCombination,
     ERM,
 )
+from hierarchicalforecast.utils import aggregate
+from src.data import get_tourism
+from statsforecast.models import AutoETS
+from statsforecast.core import StatsForecast
 
-from src.data import get_data
+SPECS = {
+        "strict": [
+                    ['Country'],
+                    ['Country', 'State'], 
+                    ['Country', 'State', 'Region'], 
+                    ['Country', 'State', 'Region', 'Purpose']
+                    ],
+        "non-strict": [
+                    ['Country'],
+                    ['Country', 'State'], 
+                    ['Country', 'Purpose'], 
+                    ['Country', 'State', 'Region'], 
+                    ['Country', 'State', 'Purpose'], 
+                    ['Country', 'State', 'Region', 'Purpose']     
+                    ],               
+                    }
 
 
-def main():
-    Y_train_df, Y_test_df, Y_hat_df, Y_fitted_df, S_df, tags = get_data()
+def main(hierarchy: str = "non-strict", type: str = "point", engine: str = 'pandas') -> None:
+    if type == "probabilistic":
+        level = [80, 90]
+    elif type == "point":
+        level = None
+    # Get data
+    Y_df = get_tourism()
+    freq = "QS"
+    if engine == 'polars':
+        Y_df = pl.from_pandas(Y_df)
+        freq = "1q"
 
-    reconcilers = [BottomUp(),
-                   BottomUpSparse(),
-                   TopDown(method="forecast_proportions"),
-                   TopDownSparse(method="forecast_proportions"),
-                   TopDown(method="average_proportions"),
-                   TopDownSparse(method="average_proportions"),
-                   TopDown(method="proportion_averages"),
-                   TopDownSparse(method="proportion_averages"),
-                   MiddleOut(middle_level="Country/State", top_down_method="average_proportions"),
-                   MiddleOutSparse(middle_level="Country/State", top_down_method="average_proportions"),
+    # Hierarchical Aggregation
+    spec = SPECS[hierarchy]
+    Y_df, S_df, tags = aggregate(Y_df, spec)
+
+    # Train/Test Splits
+    if engine == 'pandas':
+        Y_test_df = Y_df.groupby('unique_id').tail(8)
+        Y_train_df = Y_df.drop(Y_test_df.index)
+    elif engine == 'polars':
+        Y_test_df = Y_df.group_by('unique_id').tail(8)
+        Y_train_df = Y_df.filter(pl.col('ds') < Y_test_df['ds'].min())
+
+    sf = StatsForecast(models=[AutoETS(season_length=4, model='ZZA')],
+                       freq=freq, n_jobs=-1)
+    Y_hat_df = sf.forecast(df=Y_train_df, h=8, fitted=True, level=level)
+    Y_fitted_df = sf.forecast_fitted_values()
+
+    # Base reconcilers
+    reconcilers = [
+                   BottomUp(),
                    MinTrace(method='ols'),
                    MinTrace(method='wls_struct'),
                    MinTrace(method='wls_var'),
-                   MinTrace(method='mint_cov'),
                    MinTrace(method='mint_shrink'),
-                   MinTraceSparse(method='ols'),
-                   MinTraceSparse(method='wls_struct'),
-                   MinTraceSparse(method='wls_var'),
                    OptimalCombination(method='ols'),
                    OptimalCombination(method='wls_struct'),
                    ERM(method='closed'),
     ]
+
+    # Add reconcilers that handle strict hierarchies only
+    if hierarchy == "strict":
+        reconcilers += [
+                TopDown(method="average_proportions"),
+                TopDown(method="proportion_averages"),
+                MinTrace(method='mint_cov'),
+        ]
+        if level is None:
+            reconcilers += [
+                    TopDown(method="forecast_proportions"),
+                    MiddleOut(middle_level="Country/State", top_down_method="average_proportions"),
+            ]    
+            if engine == 'pandas':
+                reconcilers += [
+                    TopDownSparse(method="forecast_proportions"),
+                    MiddleOutSparse(middle_level="Country/State", top_down_method="average_proportions"),
+                    ]
+
+    # Add sparse reconcilers only if using pandas engine
+    if engine == 'pandas':
+        if hierarchy == "strict":
+            reconcilers += [
+                    BottomUpSparse(),
+                    TopDownSparse(method="average_proportions"),
+                    TopDownSparse(method="proportion_averages"),
+                    MinTraceSparse(method='ols'),
+                    MinTraceSparse(method='wls_struct'),
+                    MinTraceSparse(method='wls_var'),
+            ]
+        else:
+            reconcilers += [
+                    BottomUpSparse(),
+                    MinTraceSparse(method='ols'),
+                    MinTraceSparse(method='wls_struct'),
+                    MinTraceSparse(method='wls_var'),
+            ]
+
     hrec = HierarchicalReconciliation(reconcilers=reconcilers)
     Y_rec_df = hrec.reconcile(Y_hat_df=Y_hat_df,
-                               Y_df=Y_fitted_df, S=S_df, tags=tags)
+                               Y_df=Y_fitted_df, S=S_df, tags=tags, level=level)
 
     execution_times = pd.Series(hrec.execution_times).reset_index()
 
     if not os.path.exists('./data'):
         os.makedirs('./data')
+    if engine == 'polars':
+        Y_test_df = Y_test_df.to_pandas()
+        Y_train_df = Y_train_df.to_pandas()
+        Y_rec_df = Y_rec_df.to_pandas()
     Y_rec_df.to_csv('./data/Y_rec.csv', index=False)
+    Y_test_df.to_csv('./data/Y_test.csv', index=False)
+    Y_train_df.to_csv('./data/Y_train.csv', index=False)
     execution_times.to_csv('./data/execution_times.csv', index=False)
-
+    with open('./data/tags.pickle', 'wb') as handle:
+        pickle.dump(tags, handle, protocol=pickle.HIGHEST_PROTOCOL)    
 
 if __name__ == '__main__':
     fire.Fire(main)
diff --git a/hierarchicalforecast/probabilistic_methods.py b/hierarchicalforecast/probabilistic_methods.py
@@ -8,6 +8,7 @@
 from typing import Optional
 
 import numpy as np
+import scipy.sparse as sp
 from scipy.stats import norm
 from sklearn.preprocessing import OneHotEncoder
 
@@ -57,7 +58,10 @@ def __init__(
         self.S = S
         self.P = P
         self.y_hat = y_hat
-        self.SP = self.S @ self.P
+        if isinstance(P, sp.linalg.LinearOperator) and sp.issparse(S):
+            self.SP = sp.linalg.aslinearoperator(self.S) @ self.P
+        else:
+            self.SP = self.S @ self.P
         self.W = W
         self.sigmah = sigmah
         self.seed = seed