diff --git a/autoemulate/experimental/exploratory/hm_refactor.ipynb b/autoemulate/experimental/exploratory/hm_refactor.ipynb
new file mode 100644
index 000000000..2f4dc22a6
--- /dev/null
+++ b/autoemulate/experimental/exploratory/hm_refactor.ipynb
@@ -0,0 +1,454 @@
+{
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "# Refactored HistoryMatching workflow example\n",
+ "\n",
+ "## 1. Set up"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 1,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import torch\n",
+ "\n",
+ "# imports from main\n",
+ "from autoemulate.history_matching_dashboard import HistoryMatchingDashboard\n",
+ "\n",
+ "# imports from experimental\n",
+ "from autoemulate.experimental.emulators.gaussian_process.exact import (\n",
+ " GaussianProcessExact,\n",
+ ")\n",
+ "from autoemulate.experimental.simulations.epidemic import Epidemic\n",
+ "from autoemulate.experimental.history_matching import HistoryMatching, HistoryMatchingWorkflow"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": []
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### Simulate data & train a GP\n",
+ "\n",
+ "Set up a `Simulator` and generate data."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 2,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "Running simulations: 100%|██████████| 10/10 [00:00<00:00, 553.21it/s]"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Successfully completed 10/10 simulations (100.0%)\n"
+ ]
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "\n"
+ ]
+ }
+ ],
+ "source": [
+ "simulator = Epidemic()\n",
+ "x = simulator.sample_inputs(10)\n",
+ "y = simulator.forward_batch(x)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "The next step should be done with `AutoEmulate.compare()`."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 3,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "gp = GaussianProcessExact(x, y)\n",
+ "gp.fit(x, y)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## 2. HistoryMatching\n",
+ "\n",
+ "Firstly, one can instantiate `HistoryMatching` without a simulator or an emulator. It can be used to calculate implausability for a given set of predictions."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 4,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "/var/folders/bj/kdwy1bhj3h728lr5xdj19yd40000gr/T/ipykernel_13433/4021603184.py:3: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.detach().clone() or sourceTensor.detach().clone().requires_grad_(True), rather than torch.tensor(sourceTensor).\n",
+ " output = gp.predict(torch.tensor(x_new, dtype=torch.float32))\n"
+ ]
+ }
+ ],
+ "source": [
+ "# generate predictions for new x inputs\n",
+ "x_new = simulator.sample_inputs(10)\n",
+ "output = gp.predict(torch.tensor(x_new, dtype=torch.float32))\n",
+ "pred_means, pred_vars = (\n",
+ " output.mean.float().detach(),\n",
+ " output.variance.float().detach(),\n",
+ ")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 5,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Define observed data with means and variances\n",
+ "observations = {\"infection_rate\": (0.3, 0.05)}\n",
+ "\n",
+ "# Create history matcher\n",
+ "hm = HistoryMatching(\n",
+ " observations=observations,\n",
+ " threshold=3.0\n",
+ ")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 6,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "implausability = hm.calculate_implausibility(pred_means, pred_vars)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Once implausability has been calculated, it can be used to identify indices of NROY parameters:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 7,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "tensor([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])"
+ ]
+ },
+ "execution_count": 7,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "hm.get_nroy(implausability)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Or to filter parameters at those NROY indices:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 8,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "tensor([[0.1709, 0.0508],\n",
+ " [0.4401, 0.0828],\n",
+ " [0.3762, 0.0232],\n",
+ " [0.1998, 0.1910],\n",
+ " [0.3041, 0.1451],\n",
+ " [0.2680, 0.0296],\n",
+ " [0.4938, 0.1741],\n",
+ " [0.2367, 0.0964],\n",
+ " [0.1025, 0.1294],\n",
+ " [0.3978, 0.1181]])"
+ ]
+ },
+ "execution_count": 8,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "\n",
+ "hm.get_nroy(implausability, x_new)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Optionally, `HistoryMatching` can be instantiated with an emulator to make extracting prediction means and variances easier."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 9,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "tensor([[0.0526],\n",
+ " [0.0409],\n",
+ " [0.0536],\n",
+ " [0.0313],\n",
+ " [0.0283],\n",
+ " [0.0547],\n",
+ " [0.0281],\n",
+ " [0.0363],\n",
+ " [0.0300],\n",
+ " [0.0315]])"
+ ]
+ },
+ "execution_count": 9,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "hm_with_emul = HistoryMatching(\n",
+ " observations=observations,\n",
+ " threshold=3.0,\n",
+ " emulator=gp\n",
+ ")\n",
+ "\n",
+ "pred_means, pred_vars = hm_with_emul.emulator_predict(x_new)\n",
+ "hm_with_emul.calculate_implausibility(pred_means, pred_vars)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## 3. Iterative HistoryMatchingWorkflow\n",
+ "\n",
+ "We also have a separate class that implements an iterative sample-predict-refit workflow:\n",
+ "- sample `n_test_samples` to test from the NROY space\n",
+ "- use emulator to filter out implausible samples and update the NROY space\n",
+ "- run `n_simulations` predictions for the sampled parameters using the simulator\n",
+ "- refit the emulator using the simulated data\n",
+ "\n",
+ "The object maintains and updates the internal state each time `run()` is called so this can be done as many times as the user wants."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 10,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "Running simulations: 100%|██████████| 20/20 [00:00<00:00, 1140.05it/s]"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Successfully completed 20/20 simulations (100.0%)\n"
+ ]
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "\n"
+ ]
+ }
+ ],
+ "source": [
+ "hmw = HistoryMatchingWorkflow(\n",
+ " simulator=simulator,\n",
+ " emulator=gp,\n",
+ " observations=observations,\n",
+ " threshold=3.0,\n",
+ " train_x=x,\n",
+ " train_y=y\n",
+ ")\n",
+ "\n",
+ "test_parameters, impl_scores = hmw.run(n_simulations=20, n_test_samples=100)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 11,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "(torch.Size([100, 2]), torch.Size([100, 1]))"
+ ]
+ },
+ "execution_count": 11,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "test_parameters.shape, impl_scores.shape"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "We can call `run()` as many times as we want, the class stores states from previous runs."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 12,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "Running simulations: 100%|██████████| 20/20 [00:00<00:00, 1008.67it/s]"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Successfully completed 20/20 simulations (100.0%)\n"
+ ]
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "\n"
+ ]
+ }
+ ],
+ "source": [
+ "test_parameters, impl_scores = hmw.run(n_simulations=20, n_test_samples=100)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## 4. Integration with dashboard"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 13,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "dashboard = HistoryMatchingDashboard(\n",
+ " samples=test_parameters,\n",
+ " impl_scores=impl_scores,\n",
+ " param_names=simulator.param_names, \n",
+ " output_names=simulator.output_names, \n",
+ " )"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 14,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "application/vnd.jupyter.widget-view+json": {
+ "model_id": "c0a1f4ff869a4acf86ed20bf20689055",
+ "version_major": 2,
+ "version_minor": 0
+ },
+ "text/plain": [
+ "HTML(value='
History Matching Dashboard
')"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "application/vnd.jupyter.widget-view+json": {
+ "model_id": "f83c37cf211246d6a8753961a7c02639",
+ "version_major": 2,
+ "version_minor": 0
+ },
+ "text/plain": [
+ "VBox(children=(HBox(children=(Dropdown(description='Plot Type:', options=('Parameter vs Implausibility', 'Pair…"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "dashboard.display()"
+ ]
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": ".venv",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.12.4"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/autoemulate/experimental/history_matching.py b/autoemulate/experimental/history_matching.py
new file mode 100644
index 000000000..5c067f3bc
--- /dev/null
+++ b/autoemulate/experimental/history_matching.py
@@ -0,0 +1,487 @@
+import warnings
+
+import torch
+
+from autoemulate.experimental.device import TorchDeviceMixin
+from autoemulate.experimental.emulators import GaussianProcessExact
+from autoemulate.experimental.simulations.base import Simulator
+from autoemulate.experimental.types import DeviceLike, GaussianLike, TensorLike
+
+
+class HistoryMatching(TorchDeviceMixin):
+ """
+ History matching is a model calibration method, which uses observed data to
+ rule out ``implausible`` parameter values. The implausibility metric is:
+
+ .. math::
+ I_i(\bar{x_0}) = \frac{|z_i - \\mathbb{E}(f_i(\bar{x_0}))|}
+ {\\sqrt{\text{Var}[z_i - \\mathbb{E}(f_i(\bar{x_0}))]}}
+
+ Queried parameters above a given implausibility threshold are ruled out (RO)
+ whereas all other parameters are marked as not ruled out yet (NROY).
+ """
+
+ def __init__( # noqa: PLR0913 allow too many arguments since all currently required
+ self,
+ observations: dict[str, tuple[float, float]] | dict[str, float],
+ threshold: float = 3.0,
+ model_discrepancy: float = 0.0,
+ rank: int = 1,
+ emulator: GaussianProcessExact | None = None,
+ device: DeviceLike | None = None,
+ ):
+ """
+ Initialize the history matching object.
+
+ Parameters
+ ----------
+ observations: dict[str, tuple[float, float] | dict[str, float]
+ For each output variable, specifies observed [value, noise]. In case
+ of no uncertainty in observations, provides just the observed value.
+ threshold: float
+ Implausibility threshold (query points with implausibility scores that
+ exceed this value are ruled out). Defaults to 3, which is considered
+ a good value for simulations with a single output.
+ model_discrepancy: float
+ Additional variance to include in the implausibility calculation.
+ rank: int
+ Scoring method for multi-output problems. Must be 1 <= rank <= n_outputs.
+ When the implausibility scores are ordered across outputs, it indicates
+ which rank to use when determining whether the query point is NROY. The
+ default of ``1`` indicates that the largest implausibility will be used.
+ emulator: GaussianProcessExact
+ TODO: make this EmulatorWithUncertainty once implemented (see #542)
+ An optional trained Gaussian Process emulator.
+ device: DeviceLike | None
+ The device to use. If None, the default torch device is returned.
+ """
+ TorchDeviceMixin.__init__(self, device=device)
+
+ self.threshold = threshold
+ self.discrepancy = model_discrepancy
+ self.out_dim = len(observations)
+ self.emulator = emulator
+
+ # TODO: make this EmulatorWithUncertainty once implemented (see #542)
+ if isinstance(self.emulator, GaussianProcessExact):
+ self.emulator.device = self.device
+
+ if rank > self.out_dim or rank < 1:
+ raise ValueError(
+ f"Rank ({rank}) is outside valid range between 1 and output dimension "
+ f"of simulator ({self.out_dim})",
+ )
+ self.rank = rank
+
+ # Save mean and variance of observations, shape: [1, n_outputs]
+ self.obs_means, self.obs_vars = self._process_observations(observations)
+
+ def _process_observations(
+ self,
+ observations: dict[str, tuple[float, float]] | dict[str, float],
+ ) -> tuple[TensorLike, TensorLike]:
+ """
+ Turn observations into tensors of shape [1, n_inputs].
+
+ Parameters
+ ----------
+ observations: dict[str, tuple[float, float] | dict[str, float]
+ For each output variable, specifies observed [value, noise]. In case
+ of no uncertainty in observations, provides just the observed value.
+
+ Returns
+ -------
+ tuple[TensorLike, TensorLike]
+ Tensors of observations and the associated noise (which can be 0).
+ """
+ values = torch.tensor(list(observations.values()), device=self.device)
+
+ # No variance
+ if values.ndim == 1:
+ means = values
+ variances = torch.zeros_like(means, device=self.device)
+ # Values are (mean, variance)
+ elif values.ndim == 2:
+ means = values[:, 0]
+ variances = values[:, 1]
+ else:
+ msg = "Observations must be either float or tuple of two floats."
+ raise ValueError(msg)
+
+ # Reshape observation tensors for broadcasting
+ return means.view(1, -1), variances.view(1, -1)
+
+ def _create_nroy_mask(self, implausibility: TensorLike) -> TensorLike:
+ """
+ Create mask for NROY points based on rank.
+
+ Parameters
+ ----------
+ implausibility: TensorLike
+ Tensor of implausibility scores for tested parameters.
+
+ Returns
+ -------
+ TensorLike
+ Tensor indicating whether each implausability score is NROY
+ given self.rank and self.threshold values.
+ """
+ # Sort implausibilities for each sample (descending)
+ I_sorted, _ = torch.sort(implausibility, dim=1, descending=True)
+ # The rank-th highest output implausibility must be <= threshold
+ return I_sorted[:, self.rank - 1] <= self.threshold
+
+ def emulator_predict(self, x: TensorLike) -> tuple[TensorLike, TensorLike]:
+ """
+ Return emulator predicted mean and variance for input parameters `x`.
+
+ Parameters
+ ----------
+ x: TensorLike
+ Tensor of input parameters to make predictions for.
+
+ Returns
+ -------
+ tuple[TensorLike, TensorLike]
+ The emulator predicted mean and variance for `x`.
+ """
+ if self.emulator is not None:
+ output = self.emulator.predict(x)
+ assert isinstance(output, GaussianLike)
+ assert output.variance.ndim == 2
+ return (
+ output.mean.float().detach(),
+ output.variance.float().detach(),
+ )
+ msg = "Need an emulator to make predictions."
+ raise ValueError(msg)
+
+ def get_nroy(
+ self, implausibility: TensorLike, x: TensorLike | None = None
+ ) -> TensorLike:
+ """
+ Get indices of NROY points from implausibility scores. If `x`
+ is provided, returns parameter values at NROY indices.
+
+ Parameters
+ ----------
+ implausibility: TensorLike
+ Tensor of implausibility scores for tested input parameters.
+ x: Tensorlike | None
+ Optional tensor of scored input parameters.
+
+ Returns
+ -------
+ TensorLike
+ Indices of NROY points or `x` parameters at NROY indices.
+ """
+ nroy_mask = self._create_nroy_mask(implausibility)
+ idx = torch.where(nroy_mask)[0]
+ if x is None:
+ return idx
+ return x[idx]
+
+ def get_ro(
+ self, implausibility: TensorLike, x: TensorLike | None = None
+ ) -> TensorLike:
+ """
+ Get indices of RO points from implausibility scores. If `x`
+ is provided, returns parameter values at RO indices.
+
+ Parameters
+ ----------
+ implausibility: TensorLike
+ Tensor of implausibility scores for tested input parameters.
+ x: Tensorlike | None
+ Optional tensor of scored iput parameters.
+
+ Returns
+ -------
+ TensorLike
+ Indices of RO points or `x` parameters at RO indices.
+ """
+ nroy_mask = self._create_nroy_mask(implausibility)
+ idx = torch.where(~nroy_mask)[0]
+ if x is None:
+ return idx
+ return x[idx]
+
+ def calculate_implausibility(
+ self,
+ pred_means: TensorLike, # [n_samples, n_outputs]
+ pred_vars: TensorLike, # [n_samples, n_outputs]
+ ) -> TensorLike:
+ """
+ Calculate implausibility scores.
+
+ Parameters
+ ----------
+ pred_means: TensorLike
+ Tensor of prediction means [n_samples, n_outputs]
+ pred_vars: TensorLike
+ Tensor of prediction variances [n_samples, n_outputs].
+
+ Returns
+ -------
+ TensorLike
+ Tensor of implausibility scores.
+ """
+ # Additional variance due to model discrepancy (defaults to 0)
+ discrepancy = torch.full_like(
+ self.obs_vars, self.discrepancy, device=self.device
+ )
+
+ # Calculate total variance
+ Vs = pred_vars + discrepancy + self.obs_vars
+
+ # Calculate implausibility
+ return torch.abs(self.obs_means - pred_means) / torch.sqrt(Vs)
+
+
+class HistoryMatchingWorkflow(HistoryMatching):
+ """
+ Run history matching workflow:
+ - sample parameter values to test from the current NROY parameter space
+ - use emulator to rule out implausible parameters and update NROY space
+ - make predictions for a subset the NROY parameters using the simulator
+ - refit the emulator using the simulated data
+ """
+
+ def __init__( # noqa: PLR0913 allow too many arguments since all currently required
+ self,
+ simulator: Simulator,
+ # TODO: make this EmulatorWithUncertainty once implemented (see #542)
+ emulator: GaussianProcessExact,
+ observations: dict[str, tuple[float, float]] | dict[str, float],
+ threshold: float = 3.0,
+ model_discrepancy: float = 0.0,
+ rank: int = 1,
+ train_x: TensorLike | None = None,
+ train_y: TensorLike | None = None,
+ device: DeviceLike | None = None,
+ ):
+ """
+ Initialize the history matching workflow object.
+
+ TODO:
+ - add random seed for reproducibility (once #465 is complete)
+
+ Parameters
+ ----------
+ simulator: Simulator
+ A simulator.
+ emulator: GaussianProcessExact
+ TODO: make this EmulatorWithUncertainty once implemented (see #542)
+ A Gaussian Process emulator pre-trained on `simulator` data.
+ observations: dict[str, tuple[float, float] | dict[str, float]
+ For each output variable, specifies observed [value, noise]. In case
+ of no uncertainty in observations, provides just the observed value.
+ threshold: float
+ Implausibility threshold (query points with implausibility scores that
+ exceed this value are ruled out). Defaults to 3, which is considered
+ a good value for simulations with a single output.
+ model_discrepancy: float
+ Additional variance to include in the implausibility calculation.
+ rank: int
+ Scoring method for multi-output problems. Must be 1 <= rank <= n_outputs.
+ When the implausibility scores are ordered across outputs, it indicates
+ which rank to use when determining whether the query point is NROY. The
+ default val of ``1`` indicates that the largest implausibility will be used.
+ train_x: TensorLike | None
+ Optional tensor of input data the emulator was trained on.
+ train_y: TensorLike | None
+ Optional tensor of output data the emulator was trained on.
+ device: DeviceLike | None
+ The device to use. If None, the default torch device is returned.
+ """
+ super().__init__(
+ observations, threshold, model_discrepancy, rank, emulator, device
+ )
+ self.simulator = simulator
+
+ # These get updated when run() is called and used to refit the emulator
+ if train_x is not None and train_y is not None:
+ self.train_x = train_x.to(self.device)
+ self.train_y = train_y.to(self.device)
+ else:
+ self.train_x = torch.empty((0, self.simulator.in_dim), device=self.device)
+ self.train_y = torch.empty((0, self.simulator.out_dim), device=self.device)
+
+ def generate_samples(self, n: int) -> tuple[TensorLike, TensorLike]:
+ """
+ Draw `n` samples from the simulator min/max parameter bounds and
+ evaluate implausability given emulator predictions.
+
+ Parameters
+ ----------
+ n: int
+ The number of parameter samples to generate.
+
+ Returns
+ -------
+ tuple[TensorLike, TensorLike]
+ A tensor of tested input parameters and their implausability scores.
+ """
+ # Generate `n` parameter samples from within NROY bounds
+ test_x = self.simulator.sample_inputs(n).to(self.device)
+
+ # Rule out implausible parameters from samples using an emulator
+ pred_means, pred_vars = self.emulator_predict(test_x)
+ impl_scores = self.calculate_implausibility(pred_means, pred_vars)
+
+ return test_x, impl_scores
+
+ def update_simulator_bounds(self, nroy_x: TensorLike):
+ """
+ Update simulator parameter bounds to min/max of NROY parameter samples.
+
+ Parameters
+ ----------
+ nroy_x: TensorLike
+ A tensor of NROY parameter samples [n_samples, n_inputs]
+ """
+ # Can't get min/max if have only 1 sample
+ if nroy_x.shape[0] > 1:
+ min_nroy_values = torch.min(nroy_x, dim=0).values
+ max_nroy_values = torch.max(nroy_x, dim=0).values
+ self.simulator._param_bounds = list(
+ zip(
+ min_nroy_values.cpu().tolist(),
+ max_nroy_values.cpu().tolist(),
+ strict=False,
+ )
+ )
+ else:
+ warnings.warn(
+ (
+ f"Could not update simulator parameter bounds only "
+ f"{nroy_x.shape[0]} samples were provided."
+ ),
+ stacklevel=2,
+ )
+
+ def sample_tensor(self, n: int, x: TensorLike) -> TensorLike:
+ """
+ Randomly sample `n` rows from `x`.
+
+ Parameters
+ ----------
+ n: int
+ The number of samples to draw.
+ x: TensorLike
+ The tensor to sample from.
+
+ Returns
+ -------
+ TensorLike
+ A tensor of samples with `n` rows.
+ """
+ if x.shape[0] < n:
+ warnings.warn(
+ f"Number of tensor rows {x.shape[0]} is less than {n} samples.",
+ stacklevel=2,
+ )
+ idx = torch.randperm(x.shape[0], device=self.device)[:n]
+ return x[idx]
+
+ def simulate(self, x: TensorLike) -> tuple[TensorLike, TensorLike]:
+ """
+ Simulate `x` parameter inputs and filter out failed simulations.
+
+ Parameters
+ ----------
+ x: TensorLike
+ A tensor of parameters to simulate [n_samples, n_inputs].
+
+ Returns
+ -------
+ tuple[TensorLike, TensorLike]
+ Tensors of succesfully simulated input parameters and predictions.
+ """
+ y = self.simulator.forward_batch(x).to(self.device)
+
+ # Filter out runs that simulator failed to return predictions for
+ # TODO: this assumes that simulator returns None if it fails (see #438)
+ valid_indices = [i for i, res in enumerate(y) if res is not None]
+ valid_x = x[valid_indices]
+ valid_y = y[valid_indices]
+
+ self.train_y = torch.cat([self.train_y, valid_y], dim=0)
+ self.train_x = torch.cat([self.train_x, valid_x], dim=0)
+
+ return valid_x, valid_y
+
+ def run(
+ self,
+ n_simulations: int = 100,
+ n_test_samples: int = 10000,
+ max_retries: int = 3,
+ ) -> tuple[TensorLike, TensorLike]:
+ """
+ Run a wave of the history matching workflow.
+
+ Parameters
+ ----------
+ n_simulations: int
+ The number of simulations to run.
+ n_test_samples: int
+ Number of input parameters to test for implausibility with the emulator.
+ Parameters to simulate are sampled from this NROY subset.
+ max_retries: int
+ Maximum number of times to try to generate `n_simulations` NROY parameters.
+ That is the maximum number of times to repeat the following steps:
+ - draw `n_test_samples` parameters
+ - use emulator to make predictions for those parameters
+ - score implausability of parameters given predictions
+ - identify NROY parameters within this set
+
+ Returns
+ -------
+ tuple[TensorLike, TensorLike]
+ A tensor of tested input parameters and their implausibility scores from
+ which simulation samples were then selected.
+ """
+
+ test_parameters_list, impl_scores_list, nroy_parameters_list = (
+ [],
+ [],
+ [torch.empty((0, self.simulator.in_dim), device=self.device)],
+ )
+
+ retries = 0
+ while torch.cat(nroy_parameters_list, 0).shape[0] < n_simulations:
+ if retries == max_retries:
+ msg = (
+ f"Could not generate n_simulations ({n_simulations}) samples "
+ f"that are NROY after {max_retries} retries."
+ )
+ raise RuntimeError(msg)
+
+ # Generate `n_test_samples` with implausability scores, identify NROY
+ test_parameters, impl_scores = self.generate_samples(n_test_samples)
+ nroy_parameters = self.get_nroy(impl_scores, test_parameters)
+
+ # Store results
+ nroy_parameters_list.append(nroy_parameters)
+ test_parameters_list.append(test_parameters)
+ impl_scores_list.append(impl_scores)
+
+ retries += 1
+
+ # Update simulator parameter bounds to NROY region
+ # Next time that call run(), will sample from within this region
+ nroy_parameters_all = torch.cat(nroy_parameters_list, 0)
+ self.update_simulator_bounds(nroy_parameters_all)
+
+ # Randomly pick at most `n_simulations` parameters from NROY to simulate
+ nroy_simulation_samples = self.sample_tensor(n_simulations, nroy_parameters_all)
+
+ # Make predictions using simulator (this updates self.x_train and self.y_train)
+ _, _ = self.simulate(nroy_simulation_samples)
+
+ # Refit emulator using all available data
+ assert self.emulator is not None
+ self.emulator.fit(self.train_x, self.train_y)
+
+ # Return test parameters and impl scores for this run/wave
+ return torch.cat(test_parameters_list, 0), torch.cat(impl_scores_list, 0)
diff --git a/autoemulate/experimental/simulations/epidemic.py b/autoemulate/experimental/simulations/epidemic.py
index 8235f6e94..7c15b4fc0 100644
--- a/autoemulate/experimental/simulations/epidemic.py
+++ b/autoemulate/experimental/simulations/epidemic.py
@@ -35,5 +35,6 @@ def _forward(self, x: TensorLike) -> TensorLike:
TensorLike
Peak infection rate.
"""
- y = simulate_epidemic(x.numpy()[0])
- return torch.tensor([y]).view(-1, 1)
+ y = simulate_epidemic(x.cpu().numpy()[0])
+ # TODO (#537): update with default dtype
+ return torch.tensor([y], dtype=torch.float32).view(-1, 1)
diff --git a/autoemulate/history_matching_dashboard.py b/autoemulate/history_matching_dashboard.py
index d6dcdd7aa..d8c41bd44 100644
--- a/autoemulate/history_matching_dashboard.py
+++ b/autoemulate/history_matching_dashboard.py
@@ -8,6 +8,8 @@
from IPython.display import display
from sklearn.decomposition import PCA
+from autoemulate.experimental.types import TensorLike
+
class HistoryMatchingDashboard:
"""
@@ -21,9 +23,11 @@ def __init__(self, samples, impl_scores, param_names, output_names, threshold=3.
"""
Initialize the dashboard
+ TODO: shouldn't this include rank as input?
+
Args:
- samples: DataFrame or numpy array with parameter samples
- impl_scores: Array of implausibility scores
+ samples: DataFrame or numpy array or tensor with parameter samples
+ impl_scores: Array or tensor of implausibility scores
param_names: List of parameter names
output_names: List of output names
threshold: Implausibility threshold
@@ -31,6 +35,8 @@ def __init__(self, samples, impl_scores, param_names, output_names, threshold=3.
# Convert samples to DataFrame if it's an array
if isinstance(samples, np.ndarray):
self.samples_df = pd.DataFrame(samples, columns=param_names)
+ elif isinstance(samples, TensorLike):
+ self.samples_df = pd.DataFrame(samples.numpy(), columns=param_names)
else:
self.samples_df = samples.copy()
@@ -45,18 +51,21 @@ def __init__(self, samples, impl_scores, param_names, output_names, threshold=3.
self.max_waves = min(10, n_samples // 100) if n_samples > 100 else 5
# Store other data
- self.impl_scores = impl_scores
+ if isinstance(impl_scores, TensorLike):
+ self.impl_scores = impl_scores.numpy()
+ else:
+ self.impl_scores = impl_scores
self.param_names = param_names
self.output_names = output_names
self.threshold = threshold
# Calculate minimum implausibility for each sample
- if len(impl_scores.shape) > 1:
- self.min_impl = np.min(impl_scores, axis=1)
- self.max_impl = np.max(impl_scores, axis=1)
+ if len(self.impl_scores.shape) > 1:
+ self.min_impl = np.min(self.impl_scores, axis=1)
+ self.max_impl = np.max(self.impl_scores, axis=1)
else:
- self.min_impl = impl_scores
- self.max_impl = impl_scores
+ self.min_impl = self.impl_scores
+ self.max_impl = self.impl_scores
# Add implausibility to DataFrame
self.samples_df["min_implausibility"] = self.min_impl
diff --git a/tests/experimental/__init__.py b/tests/experimental/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/tests/experimental/test_experimental_history_matching.py b/tests/experimental/test_experimental_history_matching.py
new file mode 100644
index 000000000..7dc5d3a00
--- /dev/null
+++ b/tests/experimental/test_experimental_history_matching.py
@@ -0,0 +1,166 @@
+import pytest
+import torch
+from autoemulate.experimental.device import (
+ SUPPORTED_DEVICES,
+ check_torch_device_is_available,
+)
+from autoemulate.experimental.emulators.gaussian_process.exact import (
+ GaussianProcessExact,
+)
+from autoemulate.experimental.history_matching import (
+ HistoryMatching,
+ HistoryMatchingWorkflow,
+)
+from autoemulate.experimental.simulations.epidemic import Epidemic
+from autoemulate.experimental.types import TensorLike
+
+from .test_experimental_base_simulator import MockSimulator
+
+
+@pytest.fixture
+def mock_simulator():
+ """Fixture for the mock simulator from test_base_simulator"""
+ param_ranges = {"param1": (0.0, 1.0), "param2": (-10.0, 10.0)}
+ return MockSimulator(param_ranges, output_names=["output1", "output2"])
+
+
+@pytest.fixture
+def observations():
+ """Fixture for observations data matching mock simulator outputs"""
+ return {"output1": (0.5, 0.1), "output2": (0.6, 0.2)} # (mean, variance)
+
+
+@pytest.fixture
+def history_matcher(observations):
+ """Fixture for a basic HistoryMatching instance."""
+ return HistoryMatching(
+ observations=observations,
+ threshold=3.0,
+ model_discrepancy=0.1,
+ rank=1,
+ )
+
+
+def test_history_matcher_init(history_matcher):
+ """Test initialization of HistoryMatching."""
+ assert history_matcher.threshold == 3.0
+ assert history_matcher.discrepancy == 0.1
+ assert history_matcher.rank == 1
+ assert history_matcher.obs_means.shape == (1, 2)
+ assert history_matcher.obs_vars.shape == (1, 2)
+
+
+def test_calculate_implausibility(history_matcher, observations):
+ """Test implausibility calculation."""
+
+ # have 1 sample of 2 outputs arranged as [n_samples, n_outputs]
+ pred_means = torch.Tensor([[0.4, 0.7]])
+ pred_vars = torch.Tensor([[0.05, 0.1]])
+
+ impl_scores = history_matcher.calculate_implausibility(pred_means, pred_vars)
+ assert impl_scores.shape == (1, 2)
+
+ assert impl_scores[0][0] == (
+ abs(pred_means[0][0] - observations["output1"][0])
+ # have an extra term in the denominator for model discrepancy
+ / (pred_vars[0][0] + observations["output1"][1] + 0.1) ** 0.5
+ )
+
+ assert impl_scores[0][1] == (
+ abs(pred_means[0][1] - observations["output2"][0])
+ # have an extra term in the denominator for model discrepancy
+ / (pred_vars[0][1] + observations["output2"][1] + 0.1) ** 0.5
+ )
+
+ # the output shape is the same irrespective of rank
+ history_matcher.rank = 2
+ impl_scores = history_matcher.calculate_implausibility(pred_means, pred_vars)
+ assert impl_scores.shape == (1, 2)
+
+
+def test_get_indices(history_matcher):
+ """Test NROY and RO indices vary with rank."""
+ impl_scores = torch.tensor([[1, 5], [1, 2], [4, 2]])
+
+ # rank = 1
+ nroy = history_matcher.get_nroy(impl_scores)
+ assert len(nroy) == 1
+ assert nroy[0] == 1
+
+ ro = history_matcher.get_ro(impl_scores)
+ assert len(ro) == 2
+ assert ro[0] == 0
+ assert ro[1] == 2
+
+ # rank = n
+ history_matcher.rank = 2
+ assert len(history_matcher.get_nroy(impl_scores)) == 3
+ assert len(history_matcher.get_ro(impl_scores)) == 0
+
+
+@pytest.mark.parametrize("device", SUPPORTED_DEVICES)
+def test_run(device):
+ """Test the full history matching workflow with Epidemic simulator."""
+ if not check_torch_device_is_available(device):
+ pytest.skip(f"Device ({device}) is not available.")
+
+ simulator = Epidemic()
+ x = simulator.sample_inputs(10)
+ y = simulator.forward_batch(x)
+
+ # Run history matching
+ gp = GaussianProcessExact(x, y, device=device)
+ gp.fit(x, y)
+
+ observations = {"infection_rate": (0.3, 0.05)}
+
+ hm = HistoryMatchingWorkflow(
+ simulator=simulator,
+ emulator=gp,
+ observations=observations,
+ threshold=3.0,
+ model_discrepancy=0.1,
+ rank=1,
+ device=device,
+ )
+
+ # call run first time
+ hm.run(n_simulations=5)
+
+ # Check basic structure of results
+ assert isinstance(hm.train_x, TensorLike)
+ assert isinstance(hm.emulator, GaussianProcessExact)
+
+ assert len(hm.train_x) == 5
+
+ # can run again
+ hm.run(n_simulations=5)
+
+ # We should get results for all valid samples
+ assert len(hm.train_x) == 5 * 2
+
+
+def test_run_max_tries():
+ """Run history matching with observations that return no NROY params."""
+ simulator = Epidemic()
+ x = simulator.sample_inputs(10)
+ y = simulator.forward_batch(x)
+
+ # Run history matching
+ gp = GaussianProcessExact(x, y)
+ gp.fit(x, y)
+
+ # Extreme values outside the range of what the simulator returns
+ observations = {"infection_rate": (100.0, 1.0)}
+
+ hm = HistoryMatchingWorkflow(
+ simulator=simulator,
+ emulator=gp,
+ observations=observations,
+ threshold=3.0,
+ model_discrepancy=0.1,
+ rank=1,
+ )
+
+ with pytest.raises(RuntimeError):
+ hm.run(n_simulations=5)