cmu-delphi · May 5, 2025
diff --git a/‎R/arx_forecaster.R
Lines changed: 31 additions & 29 deletions b/‎R/arx_forecaster.R
Lines changed: 31 additions & 29 deletions
diff --git a/‎R/epi_workflow.R
Lines changed: 18 additions & 8 deletions b/‎R/epi_workflow.R
Lines changed: 18 additions & 8 deletions
diff --git a/‎R/get_predict_data.R
Lines changed: 11 additions & 7 deletions b/‎R/get_predict_data.R
Lines changed: 11 additions & 7 deletions
diff --git a/‎man/arx_args_list.Rd
Lines changed: 9 additions & 2 deletions b/‎man/arx_args_list.Rd
Lines changed: 9 additions & 2 deletions
diff --git a/‎man/arx_class_args_list.Rd
Lines changed: 0 additions & 9 deletions b/‎man/arx_class_args_list.Rd
Lines changed: 0 additions & 9 deletions
diff --git a/‎man/cdc_baseline_args_list.Rd
Lines changed: 0 additions & 9 deletions b/‎man/cdc_baseline_args_list.Rd
Lines changed: 0 additions & 9 deletions
diff --git a/‎man/climate_args_list.Rd
Lines changed: 0 additions & 9 deletions b/‎man/climate_args_list.Rd
Lines changed: 0 additions & 9 deletions
diff --git a/‎man/flatline_args_list.Rd
Lines changed: 0 additions & 9 deletions b/‎man/flatline_args_list.Rd
Lines changed: 0 additions & 9 deletions
diff --git a/‎man/forecast.epi_workflow.Rd
Lines changed: 14 additions & 2 deletions b/‎man/forecast.epi_workflow.Rd
Lines changed: 14 additions & 2 deletions
diff --git a/‎man/get_predict_data.Rd
Lines changed: 11 additions & 1 deletion b/‎man/get_predict_data.Rd
Lines changed: 11 additions & 1 deletion
diff --git a/‎man/grf_quantiles.Rd
Lines changed: 2 additions & 2 deletions b/‎man/grf_quantiles.Rd
Lines changed: 2 additions & 2 deletions
diff --git a/‎man/predict-epi_workflow.Rd
Lines changed: 11 additions & 1 deletion b/‎man/predict-epi_workflow.Rd
Lines changed: 11 additions & 1 deletion
diff --git a/‎man/step_adjust_latency.Rd
Lines changed: 19 additions & 27 deletions b/‎man/step_adjust_latency.Rd
Lines changed: 19 additions & 27 deletions
diff --git a/‎tests/testthat/_snaps/get_predict_data.md
Lines changed: 1 addition & 1 deletion b/‎tests/testthat/_snaps/get_predict_data.md
Lines changed: 1 addition & 1 deletion
diff --git a/‎tests/testthat/test-arx_forecaster.R
Lines changed: 7 additions & 8 deletions b/‎tests/testthat/test-arx_forecaster.R
Lines changed: 7 additions & 8 deletions
diff --git a/‎tests/testthat/test-get_predict_data.R
Lines changed: 3 additions & 3 deletions b/‎tests/testthat/test-get_predict_data.R
Lines changed: 3 additions & 3 deletions
@@ -47,20 +47,19 @@ arx_forecaster <- function(
   if (!is_regression(trainer)) {
     cli_abort("`trainer` must be a {.pkg parsnip} model of mode 'regression'.")
   }
-
   wf <- arx_fcast_epi_workflow(epi_data, outcome, predictors, trainer, args_list)
   wf <- fit(wf, epi_data)
 
   # get the forecast date for the forecast function
   if (args_list$adjust_latency == "none") {
-    forecast_date_default <- max(epi_data$time_value)
+    reference_date_default <- max(epi_data$time_value)
   } else {
-    forecast_date_default <- attributes(epi_data)$metadata$as_of
+    reference_date_default <- attributes(epi_data)$metadata$as_of
   }
-  forecast_date <- args_list$forecast_date %||% forecast_date_default
-
+  reference_date <- args_list$reference_date %||% reference_date_default
+  predict_interval <- args_list$predict_interval
 
-  preds <- forecast(wf, forecast_date = forecast_date) %>%
+  preds <- forecast(wf, reference_dates = reference_date, predict_interval = predict_interval) %>%
     as_tibble() %>%
     select(-time_value)
 
@@ -126,21 +125,21 @@ arx_fcast_epi_workflow <- function(
   # if they don't and they're not adjusting latency, it defaults to the max time_value
   # if they're adjusting, it defaults to the as_of
   if (args_list$adjust_latency == "none") {
-    forecast_date_default <- max(epi_data$time_value)
-    if (!is.null(args_list$forecast_date) && args_list$forecast_date != forecast_date_default) {
+    reference_date_default <- max(epi_data$time_value)
+    if (!is.null(args_list$reference_date) && args_list$reference_date != reference_date_default) {
       cli_warn(
-        "The specified forecast date {args_list$forecast_date} doesn't match the date from which the forecast is actually occurring {forecast_date_default}.",
+        "The specified forecast date {args_list$reference_date} doesn't match the date from which the forecast is actually occurring {reference_date_default}.",
         class = "epipredict__arx_forecaster__forecast_date_defaulting"
       )
     }
   } else {
-    forecast_date_default <- attributes(epi_data)$metadata$as_of
+    reference_date_default <- attributes(epi_data)$metadata$as_of
   }
-  forecast_date <- args_list$forecast_date %||% forecast_date_default
-  target_date <- args_list$target_date %||% (forecast_date + args_list$ahead)
-  if (forecast_date + args_list$ahead != target_date) {
-    cli_abort("`forecast_date` {.val {forecast_date}} + `ahead` {.val {ahead}} must equal `target_date` {.val {target_date}}.",
-      class = "epipredict__arx_forecaster__inconsistent_target_ahead_forecaste_date"
+  reference_date <- args_list$reference_date %||% reference_date_default
+  target_date <- args_list$target_date %||% (reference_date + args_list$ahead)
+  if (reference_date + args_list$ahead != target_date) {
+    cli_abort("`reference_date` {.val {reference_date}} + `ahead` {.val {ahead}} must equal `target_date` {.val {target_date}}.",
+      class = "epipredict__arx_forecaster__inconsistent_target_ahead_forecast_date"
     )
   }
 
@@ -153,12 +152,12 @@ arx_fcast_epi_workflow <- function(
   if (!is.null(method_adjust_latency)) {
     if (method_adjust_latency == "extend_ahead") {
       r <- r %>% step_adjust_latency(all_outcomes(),
-        fixed_forecast_date = forecast_date,
+        fixed_reference_date = reference_date,
         method = method_adjust_latency
       )
     } else if (method_adjust_latency == "extend_lags") {
       r <- r %>% step_adjust_latency(all_predictors(),
-        fixed_forecast_date = forecast_date,
+        fixed_reference_date = reference_date,
         method = method_adjust_latency
       )
     }
@@ -218,7 +217,7 @@ arx_fcast_epi_workflow <- function(
       by_key = args_list$quantile_by_key
     )
   }
-  f <- layer_add_forecast_date(f, forecast_date = forecast_date) %>%
+  f <- layer_add_forecast_date(f, forecast_date = reference_date) %>%
     layer_add_target_date(target_date = target_date)
   if (args_list$nonneg) f <- layer_threshold(f, dplyr::starts_with(".pred"))
 
@@ -238,19 +237,19 @@ arx_fcast_epi_workflow <- function(
 #' @param n_training Integer. An upper limit for the number of rows per
 #'   key that are used for training
 #'   (in the time unit of the `epi_df`).
-#' @param forecast_date Date. The date from which the forecast is occurring.
+#' @param reference_date Date. The date from which the forecast is occurring.
 #'   The default `NULL` will determine this automatically from either
 #'   1. the maximum time value for which there's data if there is no latency
 #'   adjustment (the default case), or
 #'   2. the `as_of` date of `epi_data` if `adjust_latency` is
 #'   non-`NULL`.
 #' @param target_date Date. The date that is being forecast. The default `NULL`
-#'   will determine this automatically as `forecast_date + ahead`.
+#'   will determine this automatically as `reference_date + ahead`.
 #' @param adjust_latency Character. One of the `method`s of
 #'   [step_adjust_latency()], or `"none"` (in which case there is no adjustment).
-#'   If the `forecast_date` is after the last day of data, this determines how
+#'   If the `reference_date` is after the last day of data, this determines how
 #'   to shift the model to account for this difference. The options are:
-#'   - `"none"` the default, assumes the `forecast_date` is the last day of data
+#'   - `"none"` the default, assumes the `reference_date` is the last day of data
 #'   - `"extend_ahead"`: increase the `ahead` by the latency so it's relative to
 #'   the last day of data. For example, if the last day of data was 3 days ago,
 #'   the ahead becomes `ahead+3`.
@@ -280,6 +279,7 @@ arx_fcast_epi_workflow <- function(
 #'   column names on which to group the data and check threshold within each
 #'   group. Useful if training per group (for example, per geo_value).
 #' @param ... Space to handle future expansions (unused).
+#' @inheritParams get_predict_data
 #'
 #'
 #' @return A list containing updated parameter choices with class `arx_flist`.
@@ -294,7 +294,7 @@ arx_args_list <- function(
     lags = c(0L, 7L, 14L),
     ahead = 7L,
     n_training = Inf,
-    forecast_date = NULL,
+    reference_date = NULL,
     target_date = NULL,
     adjust_latency = c("none", "extend_ahead", "extend_lags", "locf"),
     warn_latency = TRUE,
@@ -304,6 +304,7 @@ arx_args_list <- function(
     quantile_by_key = character(0L),
     check_enough_data_n = NULL,
     check_enough_data_epi_keys = NULL,
+    predict_interval = NULL,
     ...) {
   # error checking if lags is a list
   rlang::check_dots_empty()
@@ -313,8 +314,8 @@ arx_args_list <- function(
   adjust_latency <- rlang::arg_match(adjust_latency)
   arg_is_scalar(ahead, n_training, symmetrize, nonneg, adjust_latency, warn_latency)
   arg_is_chr(quantile_by_key, allow_empty = TRUE)
-  arg_is_scalar(forecast_date, target_date, allow_null = TRUE)
-  arg_is_date(forecast_date, target_date, allow_null = TRUE)
+  arg_is_scalar(reference_date, target_date, allow_null = TRUE)
+  arg_is_date(reference_date, target_date, allow_null = TRUE)
   arg_is_nonneg_int(ahead, lags)
   arg_is_lgl(symmetrize, nonneg)
   arg_is_probabilities(quantile_levels, allow_null = TRUE)
@@ -323,9 +324,9 @@ arx_args_list <- function(
   arg_is_pos(check_enough_data_n, allow_null = TRUE)
   arg_is_chr(check_enough_data_epi_keys, allow_null = TRUE)
 
-  if (!is.null(forecast_date) && !is.null(target_date)) {
-    if (forecast_date + ahead != target_date) {
-      cli_abort("`forecast_date` {.val {forecast_date}} + `ahead` {.val {ahead}} must equal `target_date` {.val {target_date}}.",
+  if (!is.null(reference_date) && !is.null(target_date)) {
+    if (reference_date + ahead != target_date) {
+      cli_abort("`reference_date` {.val {reference_date}} + `ahead` {.val {ahead}} must equal `target_date` {.val {target_date}}.",
         class = "epipredict__arx_args__inconsistent_target_ahead_forecaste_date"
       )
     }
@@ -338,8 +339,9 @@ arx_args_list <- function(
       ahead,
       n_training,
       quantile_levels,
-      forecast_date,
+      reference_date,
       target_date,
+      predict_interval,
       adjust_latency,
       warn_latency,
       symmetrize,
 
@@ -174,7 +174,18 @@ predict.epi_workflow <- function(object, new_data, type = NULL, opts = list(), r
   components$keys <- grab_forged_keys(components$forged, object, new_data)
   components <- apply_frosting(object, components, new_data, type = type, opts = opts, ...)
   reference_dates <- reference_dates %||% extract_recipe(object)$reference_date
-  components$predictions %>% filter(time_value %in% reference_dates)
+  #browser()
+  predictions <- components$predictions %>% filter(time_value %in% reference_dates)
+  predictions
+  if (nrow(predictions) == 0) {
+    last_pred_date <- components$predictions %>% pull(time_value) %>% max()
+    last_data_date <- new_data %>% pull(time_value) %>% max()
+    cli_warn(
+      "no predictions on the reference date(s) {reference_dates}. The last prediction was on {last_pred_date}. The most recent prediction data is on {last_data_date}",
+      class = "epipredict__predict_epi_workflow__no_predictions"
+    )
+  }
+  predictions
 }
 
 
@@ -242,14 +253,12 @@ print.epi_workflow <- function(x, ...) {
 #' example, suppose n_recent = 3, then if the 3 most recent observations in any
 #' geo_value are all NA’s, we won’t be able to fill anything, and an error
 #' message will be thrown. (See details.)
-#' @param forecast_date By default, this is set to the maximum time_value in x.
-#' But if there is data latency such that recent NA's should be filled, this may
-#' be after the last available time_value.
+#' @inheritParams get_predict_data
 #'
 #' @return A forecast tibble.
 #'
 #' @export
-forecast.epi_workflow <- function(object, ..., n_recent = NULL, forecast_date = NULL) {
+forecast.epi_workflow <- function(object, ..., n_recent = NULL, reference_dates = NULL, predict_interval = NULL) {
   rlang::check_dots_empty()
 
   if (!object$trained) {
@@ -259,6 +268,7 @@ forecast.epi_workflow <- function(object, ..., n_recent = NULL, forecast_date =
     ))
   }
 
+  #browser()
   frosting_fd <- NULL
   if (has_postprocessor(object) && detect_layer(object, "layer_add_forecast_date")) {
     frosting_fd <- extract_argument(object, "layer_add_forecast_date", "forecast_date")
@@ -273,9 +283,9 @@ forecast.epi_workflow <- function(object, ..., n_recent = NULL, forecast_date =
   predict_data <- get_predict_data(
     hardhat::extract_preprocessor(object),
     object$original_data,
-    reference_date = forecast_date
+    reference_date = reference_dates,
+    predict_interval = predict_interval
   )
-  predict_data$time_value %>% max
 
-  predict(object, new_data = predict_data, forecast_date)
+  predict(object, new_data = predict_data, reference_dates = reference_dates)
 }
@@ -14,14 +14,18 @@
 #' @param recipe A recipe object.
 #' @param x An epi_df. The typical usage is to
 #'   pass the same data as that used for fitting the recipe.
-#' @param test_interval A time interval or integer. The length of time before
+#' @param predict_interval A time interval or integer. The length of time before
 #'   the `forecast_date` to consider for the forecast. The default is 1 year,
 #'   which you will likely only need to make longer if you are doing long
 #'   forecast horizons, or shorter if you are forecasting using an expensive
 #'   model.
+#' @param reference_date By default, this is set to the maximum time_value in x.
+#' But if there is data latency such that recent NA's should be filled, this may
+#' be after the last available time_value.
 #'
-#' @return An object of the same type as `x` with columns `geo_value`, `time_value`, any additional
-#'   keys, as well other variables in the original dataset.
+#' @return An object of the same type as `x` with columns `geo_value`,
+#'   `time_value`, any additional keys, as well other variables in the original
+#'   dataset.
 #' @examples
 #' # create recipe
 #' rec <- epi_recipe(covid_case_death_rates) %>%
@@ -34,7 +38,7 @@
 #' @export
 get_predict_data <- function(recipe,
                           x,
-                          test_interval = NULL,
+                          predict_interval = NULL,
                           reference_date = NULL) {
   if (!is_epi_df(x)) cli_abort("`x` must be an `epi_df`.")
   check <- hardhat::check_column_names(x, colnames(recipe$template))
@@ -45,13 +49,13 @@ get_predict_data <- function(recipe,
     ))
   }
   reference_date <- reference_date %||% recipe$reference_date
-  test_interval <- test_interval %||% as.difftime(365, units = "days")
+  predict_interval <- predict_interval %||% as.difftime(365, units = "days")
   trimmed_x <- x %>%
-    filter((reference_date - time_value) < test_interval)
+    filter((reference_date - time_value) < predict_interval)
 
   if (nrow(trimmed_x) == 0) {
     cli_abort(
-      "predict data is filtered to no rows; check your `test_interval = {test_interval}` and `reference_date= {reference_date}`",
+      "predict data is filtered to no rows; check your `predict_interval = {predict_interval}`, `reference_date= {reference_date}` and latest data {max(x$time_value)}",
       class = "epipredict__get_predict_data__no_predict_data"
     )
   }
 
@@ -4,7 +4,7 @@
       get_predict_data(recipe = r, x = covid_case_death_rates)
     Condition
       Error in `get_predict_data()`:
-      ! predict data is filtered to no rows; check your `test_interval = 365` and `reference_date= 2023-03-10`
+      ! predict data is filtered to no rows; check your `predict_interval = 365` and `reference_date= 2023-03-10`
 
 # expect error that geo_value or time_value does not exist
 
 
@@ -1,27 +1,27 @@
 train_data <- epidatasets::cases_deaths_subset
 test_that("arx_forecaster warns if forecast date beyond the implicit one", {
   bad_date <- max(train_data$time_value) + 300
-  expect_error(
+  expect_warning(
     expect_warning(
       arx1 <- arx_forecaster(
         train_data,
         "death_rate_7d_av",
         c("death_rate_7d_av", "case_rate_7d_av"),
-        args_list = (arx_args_list(forecast_date = bad_date))
+        args_list = (arx_args_list(reference_date = bad_date))
       ),
       class = "epipredict__arx_forecaster__forecast_date_defaulting"
     ),
-    class = "epipredict__get_predict_data__no_predict_data")
+    class = "epipredict__predict_epi_workflow__no_predictions")
 })
 
-test_that("arx_forecaster errors if forecast date, target date, and ahead are inconsistent", {
+test_that("arx_forecaster errors if reference date, target date, and ahead are inconsistent", {
   max_date <- max(train_data$time_value)
   expect_error(
     arx1 <- arx_forecaster(
       train_data,
       "death_rate_7d_av",
       c("death_rate_7d_av", "case_rate_7d_av"),
-      args_list = (arx_args_list(ahead = 5, target_date = max_date, forecast_date = max_date))
+      args_list = (arx_args_list(ahead = 5, target_date = max_date, reference_date = max_date))
     ),
     class = "epipredict__arx_args__inconsistent_target_ahead_forecaste_date"
   )
@@ -38,10 +38,9 @@ test_that("warns if there's not enough data to predict", {
     # and actually, pretend we're around mid-October 2022:
     filter(time_value <= as.Date("2022-10-12")) %>%
     as_epi_df(as_of = as.Date("2022-10-12"))
-  edf %>% filter(time_value > "2022-08-01")
 
   expect_error(
-    edf %>% arx_forecaster("value"),
-    class = "epipredict__not_enough_data"
+    edf %>% arx_forecaster("value", args_list = arx_args_list(predict_interval = as.difftime(0, units = "days"))),
+    class = "epipredict__get_predict_data__no_predict_data"
   )
 })
@@ -1,6 +1,6 @@
 suppressPackageStartupMessages(library(dplyr))
 forecast_date <- max(covid_case_death_rates$time_value)
-test_that("return expected number of rows for various `test_intervals`", {
+test_that("return expected number of rows for various `predict_intervals`", {
   r <- epi_recipe(covid_case_death_rates, reference_date = forecast_date) %>%
     step_epi_ahead(death_rate, ahead = 7) %>%
     step_epi_lag(death_rate, lag = c(0, 7, 14, 21, 28)) %>%
@@ -15,14 +15,14 @@ test_that("return expected number of rows for various `test_intervals`", {
     dplyr::n_distinct(covid_case_death_rates$geo_value) * 365
   )
 
-  predict_data <- get_predict_data(recipe = r, test_interval = 5, x = covid_case_death_rates)
+  predict_data <- get_predict_data(recipe = r, predict_interval = 5, x = covid_case_death_rates)
 
   expect_equal(
     nrow(predict_data),
     dplyr::n_distinct(covid_case_death_rates$geo_value) * 5
   )
 
-  predict_data <- get_predict_data(recipe = r, test_interval = as.difftime(35, units = "days"), x = covid_case_death_rates)
+  predict_data <- get_predict_data(recipe = r, predict_interval = as.difftime(35, units = "days"), x = covid_case_death_rates)
 
   expect_equal(
     nrow(predict_data),