cmu-delphi
diff --git a/‎NAMESPACE
Lines changed: 1 addition & 1 deletion b/‎NAMESPACE
Lines changed: 1 addition & 1 deletion
diff --git a/‎R/cdc_baseline_forecaster.R
Lines changed: 1 addition & 1 deletion b/‎R/cdc_baseline_forecaster.R
Lines changed: 1 addition & 1 deletion
diff --git a/‎R/epi_workflow.R
Lines changed: 11 additions & 7 deletions b/‎R/epi_workflow.R
Lines changed: 11 additions & 7 deletions
diff --git a/‎R/get_predict_data.R
Lines changed: 60 additions & 0 deletions b/‎R/get_predict_data.R
Lines changed: 60 additions & 0 deletions
diff --git a/‎R/get_test_data.R
Lines changed: 0 additions & 113 deletions b/‎R/get_test_data.R
Lines changed: 0 additions & 113 deletions
diff --git a/‎R/tidy.R
Lines changed: 1 addition & 1 deletion b/‎R/tidy.R
Lines changed: 1 addition & 1 deletion
diff --git a/‎_pkgdown.yml
Lines changed: 1 addition & 1 deletion b/‎_pkgdown.yml
Lines changed: 1 addition & 1 deletion
diff --git a/‎man/get_test_data.Rd renamed to ‎man/get_predict_data.Rd
Lines changed: 6 additions & 6 deletions b/‎man/get_test_data.Rd renamed to ‎man/get_predict_data.Rd
Lines changed: 6 additions & 6 deletions
diff --git a/‎man/tidy.frosting.Rd
Lines changed: 1 addition & 1 deletion b/‎man/tidy.frosting.Rd
Lines changed: 1 addition & 1 deletion
diff --git a/‎tests/testthat/_snaps/check_enough_data.md
Lines changed: 1 addition & 1 deletion b/‎tests/testthat/_snaps/check_enough_data.md
Lines changed: 1 addition & 1 deletion
@@ -167,7 +167,7 @@ export(flatline_forecaster)
 export(flusight_hub_formatter)
 export(forecast)
 export(frosting)
-export(get_test_data)
+export(get_predict_data)
 export(is_epi_recipe)
 export(is_epi_workflow)
 export(is_layer)
 
@@ -78,7 +78,7 @@ cdc_baseline_forecaster <- function(
   # target_date <- args_list$target_date %||% (forecast_date + args_list$ahead)
 
 
-  latest <- get_test_data(epi_recipe(epi_data), epi_data)
+  latest <- get_predict_data(epi_recipe(epi_data), epi_data)
 
   f <- frosting() %>%
     layer_predict() %>%
 
@@ -132,6 +132,9 @@ fit.epi_workflow <- function(object, data, ..., control = workflows::control_wor
 #' @param new_data A data frame containing the new predictors to preprocess
 #'   and predict on
 #'
+#' @param reference_dates A vector matching the type of `time_value` in
+#'   `new_data` giving the dates of the predictions to keep. Defaults to the `reference_date` of the `object`'s recipe.
+#'
 #' @inheritParams parsnip::predict.model_fit
 #'
 #' @return
@@ -155,14 +158,13 @@ fit.epi_workflow <- function(object, data, ..., control = workflows::control_wor
 #'
 #' preds <- predict(wf, latest)
 #' preds
-predict.epi_workflow <- function(object, new_data, type = NULL, opts = list(), ...) {
+predict.epi_workflow <- function(object, new_data, type = NULL, opts = list(), reference_dates = NULL, ...) {
   if (!workflows::is_trained_workflow(object)) {
     cli_abort(c(
       "Can't predict on an untrained epi_workflow.",
       i = "Do you need to call `fit()`?"
     ))
   }
-  browser()
   components <- list()
   components$mold <- workflows::extract_mold(object)
   components$forged <- hardhat::forge(new_data,
@@ -171,7 +173,8 @@ predict.epi_workflow <- function(object, new_data, type = NULL, opts = list(), .
 
   components$keys <- grab_forged_keys(components$forged, object, new_data)
   components <- apply_frosting(object, components, new_data, type = type, opts = opts, ...)
-  components$predictions
+  reference_dates <- reference_dates %||% extract_recipe(object)$reference_date
+  components$predictions %>% filter(time_value %in% reference_dates)
 }
 
 
@@ -267,11 +270,12 @@ forecast.epi_workflow <- function(object, ..., n_recent = NULL, forecast_date =
     }
   }
 
-  test_data <- get_test_data(
+  predict_data <- get_predict_data(
     hardhat::extract_preprocessor(object),
-    object$original_data
+    object$original_data,
+    reference_date = forecast_date
   )
+  predict_data$time_value %>% max
 
-  predictions <- predict(object, new_data = test_data)
-
+  predict(object, new_data = predict_data, forecast_date)
 }
@@ -0,0 +1,60 @@
+#' Get test data for prediction based on longest lag period
+#'
+#' Based on the longest lag period in the recipe,
+#' `get_predict_data()` creates an [epi_df][epiprocess::as_epi_df]
+#' with columns `geo_value`, `time_value`
+#' and other variables in the original dataset,
+#' which will be used to create features necessary to produce forecasts.
+#'
+#' The minimum required (recent) data to produce a forecast is equal to
+#' the maximum lag requested (on any predictor) plus the longest horizon
+#' used if growth rate calculations are requested by the recipe. This is
+#' calculated internally.
+#'
+#' @param recipe A recipe object.
+#' @param x An epi_df. The typical usage is to
+#'   pass the same data as that used for fitting the recipe.
+#' @param test_interval A time interval or integer. The length of time before
+#'   the `forecast_date` to consider for the forecast. The default is 1 year,
+#'   which you will likely only need to make longer if you are doing long
+#'   forecast horizons, or shorter if you are forecasting using an expensive
+#'   model.
+#'
+#' @return An object of the same type as `x` with columns `geo_value`, `time_value`, any additional
+#'   keys, as well other variables in the original dataset.
+#' @examples
+#' # create recipe
+#' rec <- epi_recipe(covid_case_death_rates) %>%
+#'   step_epi_ahead(death_rate, ahead = 7) %>%
+#'   step_epi_lag(death_rate, lag = c(0, 7, 14)) %>%
+#'   step_epi_lag(case_rate, lag = c(0, 7, 14))
+#' get_predict_data(recipe = rec, x = covid_case_death_rates)
+#' @importFrom rlang %@%
+#' @importFrom stats na.omit
+#' @export
+get_predict_data <- function(recipe,
+                          x,
+                          test_interval = NULL,
+                          reference_date = NULL) {
+  if (!is_epi_df(x)) cli_abort("`x` must be an `epi_df`.")
+  check <- hardhat::check_column_names(x, colnames(recipe$template))
+  if (!check$ok) {
+    cli_abort(c(
+      "Some variables used for training are not available in {.arg x}.",
+      i = "The following required columns are missing: {check$missing_names}"
+    ))
+  }
+  reference_date <- reference_date %||% recipe$reference_date
+  test_interval <- test_interval %||% as.difftime(365, units = "days")
+  trimmed_x <- x %>%
+    filter((reference_date - time_value) < test_interval)
+
+  if (nrow(trimmed_x) == 0) {
+    cli_abort(
+      "predict data is filtered to no rows; check your `test_interval = {test_interval}` and `reference_date= {reference_date}`",
+      class = "epipredict__get_predict_data__no_predict_data"
+    )
+  }
+
+  trimmed_x
+}
@@ -35,7 +35,7 @@
 #'   step_epi_naomit()
 #'
 #' wf <- epi_workflow(r, parsnip::linear_reg()) %>% fit(jhu)
-#' latest <- get_test_data(recipe = r, x = jhu)
+#' latest <- get_predict_data(recipe = r, x = jhu)
 #'
 #' f <- frosting() %>%
 #'   layer_predict() %>%
 
@@ -84,7 +84,7 @@ reference:
     contents:
       - frosting
       - ends_with("_frosting")
-      - get_test_data
+      - get_predict_data
       - tidy.frosting
 
   - title: Frosting layers
 
@@ -37,7 +37,7 @@
 # check_enough_data only checks train data when skip = FALSE
 
     Code
-      forecaster %>% predict(new_data = toy_test_data %>% filter(time_value >
+      forecaster %>% predict(new_data = toy_predict_data %>% filter(time_value >
         "2020-01-08"))
     Condition
       Error in `check_enough_data_core()`: