Skip to content

Commit 9d44f42

Browse files
committed
Pushing the docs to dev/ for branch: main, commit 68d8c2cfcc527ca194ad8ca377acca7a38502f98
1 parent 58b371e commit 9d44f42

File tree

1,552 files changed

+6112
-6104
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

1,552 files changed

+6112
-6104
lines changed
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.

dev/_downloads/86c888008757148890daaf43d664fa71/plot_tweedie_regression_insurance_claims.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -80,7 +80,7 @@ def load_mtpl2(n_samples=None):
8080
df["ClaimAmount"] = df["ClaimAmount"].fillna(0)
8181

8282
# unquote string fields
83-
for column_name in df.columns[df.dtypes.values == object]:
83+
for column_name in df.columns[[t is object for t in df.dtypes.values]]:
8484
df[column_name] = df[column_name].str.strip("'")
8585
return df.iloc[:n_samples]
8686

Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.

dev/_downloads/a97bf662e52d471b04e1ab480c0ad7f2/plot_tweedie_regression_insurance_claims.ipynb

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@
1515
},
1616
"outputs": [],
1717
"source": [
18-
"from functools import partial\n\nimport matplotlib.pyplot as plt\nimport numpy as np\nimport pandas as pd\n\nfrom sklearn.datasets import fetch_openml\nfrom sklearn.metrics import (\n mean_absolute_error,\n mean_squared_error,\n mean_tweedie_deviance,\n)\n\n\ndef load_mtpl2(n_samples=None):\n \"\"\"Fetch the French Motor Third-Party Liability Claims dataset.\n\n Parameters\n ----------\n n_samples: int, default=None\n number of samples to select (for faster run time). Full dataset has\n 678013 samples.\n \"\"\"\n # freMTPL2freq dataset from https://www.openml.org/d/41214\n df_freq = fetch_openml(data_id=41214, as_frame=True).data\n df_freq[\"IDpol\"] = df_freq[\"IDpol\"].astype(int)\n df_freq.set_index(\"IDpol\", inplace=True)\n\n # freMTPL2sev dataset from https://www.openml.org/d/41215\n df_sev = fetch_openml(data_id=41215, as_frame=True).data\n\n # sum ClaimAmount over identical IDs\n df_sev = df_sev.groupby(\"IDpol\").sum()\n\n df = df_freq.join(df_sev, how=\"left\")\n df[\"ClaimAmount\"] = df[\"ClaimAmount\"].fillna(0)\n\n # unquote string fields\n for column_name in df.columns[df.dtypes.values == object]:\n df[column_name] = df[column_name].str.strip(\"'\")\n return df.iloc[:n_samples]\n\n\ndef plot_obs_pred(\n df,\n feature,\n weight,\n observed,\n predicted,\n y_label=None,\n title=None,\n ax=None,\n fill_legend=False,\n):\n \"\"\"Plot observed and predicted - aggregated per feature level.\n\n Parameters\n ----------\n df : DataFrame\n input data\n feature: str\n a column name of df for the feature to be plotted\n weight : str\n column name of df with the values of weights or exposure\n observed : str\n a column name of df with the observed target\n predicted : DataFrame\n a dataframe, with the same index as df, with the predicted target\n fill_legend : bool, default=False\n whether to show fill_between legend\n \"\"\"\n # aggregate observed and predicted variables by feature level\n df_ = df.loc[:, [feature, weight]].copy()\n df_[\"observed\"] = df[observed] * df[weight]\n df_[\"predicted\"] = predicted * df[weight]\n df_ = (\n df_.groupby([feature])[[weight, \"observed\", \"predicted\"]]\n .sum()\n .assign(observed=lambda x: x[\"observed\"] / x[weight])\n .assign(predicted=lambda x: x[\"predicted\"] / x[weight])\n )\n\n ax = df_.loc[:, [\"observed\", \"predicted\"]].plot(style=\".\", ax=ax)\n y_max = df_.loc[:, [\"observed\", \"predicted\"]].values.max() * 0.8\n p2 = ax.fill_between(\n df_.index,\n 0,\n y_max * df_[weight] / df_[weight].values.max(),\n color=\"g\",\n alpha=0.1,\n )\n if fill_legend:\n ax.legend([p2], [\"{} distribution\".format(feature)])\n ax.set(\n ylabel=y_label if y_label is not None else None,\n title=title if title is not None else \"Train: Observed vs Predicted\",\n )\n\n\ndef score_estimator(\n estimator,\n X_train,\n X_test,\n df_train,\n df_test,\n target,\n weights,\n tweedie_powers=None,\n):\n \"\"\"Evaluate an estimator on train and test sets with different metrics\"\"\"\n\n metrics = [\n (\"D\u00b2 explained\", None), # Use default scorer if it exists\n (\"mean abs. error\", mean_absolute_error),\n (\"mean squared error\", mean_squared_error),\n ]\n if tweedie_powers:\n metrics += [\n (\n \"mean Tweedie dev p={:.4f}\".format(power),\n partial(mean_tweedie_deviance, power=power),\n )\n for power in tweedie_powers\n ]\n\n res = []\n for subset_label, X, df in [\n (\"train\", X_train, df_train),\n (\"test\", X_test, df_test),\n ]:\n y, _weights = df[target], df[weights]\n for score_label, metric in metrics:\n if isinstance(estimator, tuple) and len(estimator) == 2:\n # Score the model consisting of the product of frequency and\n # severity models.\n est_freq, est_sev = estimator\n y_pred = est_freq.predict(X) * est_sev.predict(X)\n else:\n y_pred = estimator.predict(X)\n\n if metric is None:\n if not hasattr(estimator, \"score\"):\n continue\n score = estimator.score(X, y, sample_weight=_weights)\n else:\n score = metric(y, y_pred, sample_weight=_weights)\n\n res.append({\"subset\": subset_label, \"metric\": score_label, \"score\": score})\n\n res = (\n pd.DataFrame(res)\n .set_index([\"metric\", \"subset\"])\n .score.unstack(-1)\n .round(4)\n .loc[:, [\"train\", \"test\"]]\n )\n return res"
18+
"from functools import partial\n\nimport matplotlib.pyplot as plt\nimport numpy as np\nimport pandas as pd\n\nfrom sklearn.datasets import fetch_openml\nfrom sklearn.metrics import (\n mean_absolute_error,\n mean_squared_error,\n mean_tweedie_deviance,\n)\n\n\ndef load_mtpl2(n_samples=None):\n \"\"\"Fetch the French Motor Third-Party Liability Claims dataset.\n\n Parameters\n ----------\n n_samples: int, default=None\n number of samples to select (for faster run time). Full dataset has\n 678013 samples.\n \"\"\"\n # freMTPL2freq dataset from https://www.openml.org/d/41214\n df_freq = fetch_openml(data_id=41214, as_frame=True).data\n df_freq[\"IDpol\"] = df_freq[\"IDpol\"].astype(int)\n df_freq.set_index(\"IDpol\", inplace=True)\n\n # freMTPL2sev dataset from https://www.openml.org/d/41215\n df_sev = fetch_openml(data_id=41215, as_frame=True).data\n\n # sum ClaimAmount over identical IDs\n df_sev = df_sev.groupby(\"IDpol\").sum()\n\n df = df_freq.join(df_sev, how=\"left\")\n df[\"ClaimAmount\"] = df[\"ClaimAmount\"].fillna(0)\n\n # unquote string fields\n for column_name in df.columns[[t is object for t in df.dtypes.values]]:\n df[column_name] = df[column_name].str.strip(\"'\")\n return df.iloc[:n_samples]\n\n\ndef plot_obs_pred(\n df,\n feature,\n weight,\n observed,\n predicted,\n y_label=None,\n title=None,\n ax=None,\n fill_legend=False,\n):\n \"\"\"Plot observed and predicted - aggregated per feature level.\n\n Parameters\n ----------\n df : DataFrame\n input data\n feature: str\n a column name of df for the feature to be plotted\n weight : str\n column name of df with the values of weights or exposure\n observed : str\n a column name of df with the observed target\n predicted : DataFrame\n a dataframe, with the same index as df, with the predicted target\n fill_legend : bool, default=False\n whether to show fill_between legend\n \"\"\"\n # aggregate observed and predicted variables by feature level\n df_ = df.loc[:, [feature, weight]].copy()\n df_[\"observed\"] = df[observed] * df[weight]\n df_[\"predicted\"] = predicted * df[weight]\n df_ = (\n df_.groupby([feature])[[weight, \"observed\", \"predicted\"]]\n .sum()\n .assign(observed=lambda x: x[\"observed\"] / x[weight])\n .assign(predicted=lambda x: x[\"predicted\"] / x[weight])\n )\n\n ax = df_.loc[:, [\"observed\", \"predicted\"]].plot(style=\".\", ax=ax)\n y_max = df_.loc[:, [\"observed\", \"predicted\"]].values.max() * 0.8\n p2 = ax.fill_between(\n df_.index,\n 0,\n y_max * df_[weight] / df_[weight].values.max(),\n color=\"g\",\n alpha=0.1,\n )\n if fill_legend:\n ax.legend([p2], [\"{} distribution\".format(feature)])\n ax.set(\n ylabel=y_label if y_label is not None else None,\n title=title if title is not None else \"Train: Observed vs Predicted\",\n )\n\n\ndef score_estimator(\n estimator,\n X_train,\n X_test,\n df_train,\n df_test,\n target,\n weights,\n tweedie_powers=None,\n):\n \"\"\"Evaluate an estimator on train and test sets with different metrics\"\"\"\n\n metrics = [\n (\"D\u00b2 explained\", None), # Use default scorer if it exists\n (\"mean abs. error\", mean_absolute_error),\n (\"mean squared error\", mean_squared_error),\n ]\n if tweedie_powers:\n metrics += [\n (\n \"mean Tweedie dev p={:.4f}\".format(power),\n partial(mean_tweedie_deviance, power=power),\n )\n for power in tweedie_powers\n ]\n\n res = []\n for subset_label, X, df in [\n (\"train\", X_train, df_train),\n (\"test\", X_test, df_test),\n ]:\n y, _weights = df[target], df[weights]\n for score_label, metric in metrics:\n if isinstance(estimator, tuple) and len(estimator) == 2:\n # Score the model consisting of the product of frequency and\n # severity models.\n est_freq, est_sev = estimator\n y_pred = est_freq.predict(X) * est_sev.predict(X)\n else:\n y_pred = estimator.predict(X)\n\n if metric is None:\n if not hasattr(estimator, \"score\"):\n continue\n score = estimator.score(X, y, sample_weight=_weights)\n else:\n score = metric(y, y_pred, sample_weight=_weights)\n\n res.append({\"subset\": subset_label, \"metric\": score_label, \"score\": score})\n\n res = (\n pd.DataFrame(res)\n .set_index([\"metric\", \"subset\"])\n .score.unstack(-1)\n .round(4)\n .loc[:, [\"train\", \"test\"]]\n )\n return res"
1919
]
2020
},
2121
{

0 commit comments

Comments
 (0)