cmu-delphi
diff --git a/‎NAMESPACE
Lines changed: 5 additions & 5 deletions b/‎NAMESPACE
Lines changed: 5 additions & 5 deletions
diff --git a/‎NEWS.md
Lines changed: 3 additions & 0 deletions b/‎NEWS.md
Lines changed: 3 additions & 0 deletions
diff --git a/‎R/arx_classifier.R
Lines changed: 1 addition & 1 deletion b/‎R/arx_classifier.R
Lines changed: 1 addition & 1 deletion
diff --git a/‎R/arx_forecaster.R
Lines changed: 3 additions & 3 deletions b/‎R/arx_forecaster.R
Lines changed: 3 additions & 3 deletions
diff --git a/‎R/check_enough_train_data.R renamed to ‎R/check_enough_data.R
Lines changed: 29 additions & 24 deletions b/‎R/check_enough_train_data.R renamed to ‎R/check_enough_data.R
Lines changed: 29 additions & 24 deletions
diff --git a/‎_pkgdown.yml
Lines changed: 1 addition & 1 deletion b/‎_pkgdown.yml
Lines changed: 1 addition & 1 deletion
diff --git a/‎man/check_enough_train_data.Rd renamed to ‎man/check_enough_data.Rd
Lines changed: 21 additions & 16 deletions b/‎man/check_enough_train_data.Rd renamed to ‎man/check_enough_data.Rd
Lines changed: 21 additions & 16 deletions
diff --git a/‎man/step_adjust_latency.Rd
Lines changed: 5 additions & 5 deletions b/‎man/step_adjust_latency.Rd
Lines changed: 5 additions & 5 deletions
diff --git a/‎tests/testthat/_snaps/check_enough_train_data.md renamed to ‎tests/testthat/_snaps/check_enough_data.md
Lines changed: 12 additions & 13 deletions b/‎tests/testthat/_snaps/check_enough_train_data.md renamed to ‎tests/testthat/_snaps/check_enough_data.md
Lines changed: 12 additions & 13 deletions
@@ -15,7 +15,7 @@ S3method(apply_frosting,epi_workflow)
 S3method(augment,epi_workflow)
 S3method(autoplot,canned_epipred)
 S3method(autoplot,epi_workflow)
-S3method(bake,check_enough_train_data)
+S3method(bake,check_enough_data)
 S3method(bake,epi_recipe)
 S3method(bake,step_adjust_latency)
 S3method(bake,step_climate)
@@ -49,7 +49,7 @@ S3method(key_colnames,recipe)
 S3method(mean,quantile_pred)
 S3method(predict,epi_workflow)
 S3method(predict,flatline)
-S3method(prep,check_enough_train_data)
+S3method(prep,check_enough_data)
 S3method(prep,epi_recipe)
 S3method(prep,step_adjust_latency)
 S3method(prep,step_climate)
@@ -65,7 +65,7 @@ S3method(print,arx_class)
 S3method(print,arx_fcast)
 S3method(print,canned_epipred)
 S3method(print,cdc_baseline_fcast)
-S3method(print,check_enough_train_data)
+S3method(print,check_enough_data)
 S3method(print,climate_fcast)
 S3method(print,epi_recipe)
 S3method(print,epi_workflow)
@@ -109,7 +109,7 @@ S3method(slather,layer_threshold)
 S3method(slather,layer_unnest)
 S3method(snap,default)
 S3method(snap,quantile_pred)
-S3method(tidy,check_enough_train_data)
+S3method(tidy,check_enough_data)
 S3method(tidy,frosting)
 S3method(tidy,layer)
 S3method(update,layer)
@@ -142,7 +142,7 @@ export(autoplot)
 export(bake)
 export(cdc_baseline_args_list)
 export(cdc_baseline_forecaster)
-export(check_enough_train_data)
+export(check_enough_data)
 export(clean_f_name)
 export(climate_args_list)
 export(climatological_forecaster)
 
@@ -20,6 +20,9 @@ Pre-1.0.0 numbering scheme: 0.x will indicate releases, while 0.0.x will indicat
 - Removes dependence on the `distributional` package, replacing the quantiles 
   with `hardhat::quantile_pred()`. Some associated functions are deprecated with
   `lifecycle` messages.
+- Rename `check_enough_train_data()` to `check_enough_data()`, and generalize it
+  enough to use as a check on either training or testing.
+- Add check for enough data to predict in `arx_forecaster()`
 
 ## Improvements
 
 
@@ -222,7 +222,7 @@ arx_class_epi_workflow <- function(
     step_training_window(n_recent = args_list$n_training)
 
   if (!is.null(args_list$check_enough_data_n)) {
-    r <- check_enough_train_data(
+    r <- check_enough_data(
       r,
       recipes::all_predictors(),
       recipes::all_outcomes(),
 
@@ -172,12 +172,12 @@ arx_fcast_epi_workflow <- function(
   r <- r %>%
     step_epi_naomit() %>%
     step_training_window(n_recent = args_list$n_training) %>%
-    check_enough_train_data(all_predictors(), n = args_list$check_enough_data_n, skip = FALSE)
+    check_enough_data(all_predictors(), n = 1, skip = FALSE)
 
   if (!is.null(args_list$check_enough_data_n)) {
-    r <- r %>% check_enough_train_data(
+    r <- r %>% check_enough_data(
       all_predictors(),
-      !!outcome,
+      all_outcomes(),
       n = args_list$check_enough_data_n,
       epi_keys = args_list$check_enough_data_epi_keys,
       drop_na = FALSE
 
@@ -1,13 +1,13 @@
 #' Check the dataset contains enough data points.
 #'
-#' `check_enough_train_data` creates a *specification* of a recipe
+#' `check_enough_data` creates a *specification* of a recipe
 #'  operation that will check if variables contain enough data.
 #'
 #' @param recipe A recipe object. The check will be added to the
 #'  sequence of operations for this recipe.
 #' @param ... One or more selector functions to choose variables for this check.
 #'  See [selections()] for more details. You will usually want to use
-#'  [recipes::all_predictors()] here.
+#'  [recipes::all_predictors()] and/or [recipes::all_outcomes()] here.
 #' @param n The minimum number of data points required for training. If this is
 #'   NULL, the total number of predictors will be used.
 #' @param epi_keys A character vector of column names on which to group the data
@@ -21,24 +21,29 @@
 #' @param columns An internal argument that tracks which columns are evaluated
 #'   for this check. Should not be used by the user.
 #' @param id A character string that is unique to this check to identify it.
-#' @param skip A logical. Should the check be skipped when the
-#'  recipe is baked by [bake()]? While all operations are baked
-#'  when [prep()] is run, some operations may not be able to be
-#'  conducted on new data (e.g. processing the outcome variable(s)).
-#'  Care should be taken when using `skip = TRUE` as it may affect
-#'  the computations for subsequent operations.
+#' @param skip A logical. If `TRUE`, only training data is checked, while if
+#'   `FALSE`, both training and predicting data is checked. Technically, this
+#'   answers the question "should the check be skipped when the recipe is baked
+#'   by [bake()]?" While all operations are baked when [prep()] is run, some
+#'   operations may not be able to be conducted on new data (e.g. processing the
+#'   outcome variable(s)).  Care should be taken when using `skip = TRUE` as it
+#'   may affect the computations for subsequent operations.
 #' @family checks
 #' @export
-#' @details This check will break the `bake` function if any of the checked
-#'  columns have not enough non-NA values. If the check passes, nothing is
-#'  changed to the data.
+#' @details This check will break the `prep` and/or bake function if any of the
+#'   checked columns have not enough non-NA values. If the check passes, nothing
+#'   is changed in the data. It is best used after every other step.
+#'
+#'   For checking training data, it is best to set `...` to be
+#'   `all_predictors(), all_outcomes()`, while for checking prediction data, it
+#'   is best to set `...` to be `all_predictors()` only, with `n = 1`.
 #'
 #'  # tidy() results
 #'
 #'  When you [`tidy()`][tidy.recipe()] this check, a tibble with column
 #'  `terms` (the selectors or variables selected) is returned.
 #'
-check_enough_train_data <-
+check_enough_data <-
   function(recipe,
            ...,
            n = NULL,
@@ -47,11 +52,11 @@ check_enough_train_data <-
            role = NA,
            trained = FALSE,
            columns = NULL,
-           skip = FALSE,
-           id = rand_id("enough_train_data")) {
+           skip = TRUE,
+           id = rand_id("enough_data")) {
     recipes::add_check(
       recipe,
-      check_enough_train_data_new(
+      check_enough_data_new(
         n = n,
         epi_keys = epi_keys,
         drop_na = drop_na,
@@ -65,10 +70,10 @@ check_enough_train_data <-
     )
   }
 
-check_enough_train_data_new <-
+check_enough_data_new <-
   function(n, epi_keys, drop_na, terms, role, trained, columns, skip, id) {
     recipes::check(
-      subclass = "enough_train_data",
+      subclass = "enough_data",
       prefix = "check_",
       n = n,
       epi_keys = epi_keys,
@@ -83,7 +88,7 @@ check_enough_train_data_new <-
   }
 
 #' @export
-prep.check_enough_train_data <- function(x, training, info = NULL, ...) {
+prep.check_enough_data <- function(x, training, info = NULL, ...) {
   col_names <- recipes::recipes_eval_select(x$terms, training, info)
   if (is.null(x$n)) {
     x$n <- length(col_names)
@@ -102,11 +107,11 @@ prep.check_enough_train_data <- function(x, training, info = NULL, ...) {
   if (length(cols_not_enough_data) > 0) {
     cli_abort(
       "The following columns don't have enough data to predict: {cols_not_enough_data}.",
-      class = "epipredict__not_enough_train_data"
+      class = "epipredict__not_enough_data"
     )
   }
 
-  check_enough_train_data_new(
+  check_enough_data_new(
     n = x$n,
     epi_keys = x$epi_keys,
     drop_na = x$drop_na,
@@ -120,7 +125,7 @@ prep.check_enough_train_data <- function(x, training, info = NULL, ...) {
 }
 
 #' @export
-bake.check_enough_train_data <- function(object, new_data, ...) {
+bake.check_enough_data <- function(object, new_data, ...) {
   col_names <- object$columns
   if (object$drop_na) {
     non_na_data <- tidyr::drop_na(new_data, any_of(unname(col_names)))
@@ -137,21 +142,21 @@ bake.check_enough_train_data <- function(object, new_data, ...) {
   if (length(cols_not_enough_data) > 0) {
     cli_abort(
       "The following columns don't have enough data to predict: {cols_not_enough_data}.",
-      class = "epipredict__not_enough_train_data"
+      class = "epipredict__not_enough_data"
     )
   }
   new_data
 }
 
 #' @export
-print.check_enough_train_data <- function(x, width = max(20, options()$width - 30), ...) {
+print.check_enough_data <- function(x, width = max(20, options()$width - 30), ...) {
   title <- paste0("Check enough data (n = ", x$n, ") for ")
   recipes::print_step(x$columns, x$terms, x$trained, title, width)
   invisible(x)
 }
 
 #' @export
-tidy.check_enough_train_data <- function(x, ...) {
+tidy.check_enough_data <- function(x, ...) {
   if (recipes::is_trained(x)) {
     res <- tibble(terms = unname(x$columns))
   } else {
 
@@ -77,7 +77,7 @@ reference:
 
   - title: Epi recipe verification checks
     contents:
-      - check_enough_train_data
+      - check_enough_data
 
   - title: Forecast postprocessing
     desc: Create a series of postprocessing operations
 
@@ -1,25 +1,25 @@
-# check_enough_train_data works on pooled data
+# check_enough_data works on pooled data
 
     Code
-      epi_recipe(toy_epi_df) %>% check_enough_train_data(x, y, n = 2 * n + 1,
-      drop_na = FALSE) %>% prep(toy_epi_df) %>% bake(new_data = NULL)
+      epi_recipe(toy_epi_df) %>% check_enough_data(x, y, n = 2 * n + 1, drop_na = FALSE) %>%
+        prep(toy_epi_df) %>% bake(new_data = NULL)
     Condition
       Error in `prep()`:
       ! The following columns don't have enough data to predict: x and y.
 
 ---
 
     Code
-      epi_recipe(toy_epi_df) %>% check_enough_train_data(x, y, n = 2 * n - 1,
-      drop_na = TRUE) %>% prep(toy_epi_df) %>% bake(new_data = NULL)
+      epi_recipe(toy_epi_df) %>% check_enough_data(x, y, n = 2 * n - 1, drop_na = TRUE) %>%
+        prep(toy_epi_df) %>% bake(new_data = NULL)
     Condition
       Error in `prep()`:
       ! The following columns don't have enough data to predict: x and y.
 
-# check_enough_train_data works on unpooled data
+# check_enough_data works on unpooled data
 
     Code
-      epi_recipe(toy_epi_df) %>% check_enough_train_data(x, y, n = n + 1, epi_keys = "geo_value",
+      epi_recipe(toy_epi_df) %>% check_enough_data(x, y, n = n + 1, epi_keys = "geo_value",
       drop_na = FALSE) %>% prep(toy_epi_df) %>% bake(new_data = NULL)
     Condition
       Error in `prep()`:
@@ -28,18 +28,17 @@
 ---
 
     Code
-      epi_recipe(toy_epi_df) %>% check_enough_train_data(x, y, n = 2 * n - 3,
-      epi_keys = "geo_value", drop_na = TRUE) %>% prep(toy_epi_df) %>% bake(new_data = NULL)
+      epi_recipe(toy_epi_df) %>% check_enough_data(x, y, n = 2 * n - 3, epi_keys = "geo_value",
+      drop_na = TRUE) %>% prep(toy_epi_df) %>% bake(new_data = NULL)
     Condition
       Error in `prep()`:
       ! The following columns don't have enough data to predict: x and y.
 
-# check_enough_train_data works with all_predictors() downstream of constructed terms
+# check_enough_data works with all_predictors() downstream of constructed terms
 
     Code
-      epi_recipe(toy_epi_df) %>% step_epi_lag(x, lag = c(1, 2)) %>%
-        check_enough_train_data(all_predictors(), y, n = 2 * n - 4) %>% prep(
-        toy_epi_df) %>% bake(new_data = NULL)
+      epi_recipe(toy_epi_df) %>% step_epi_lag(x, lag = c(1, 2)) %>% check_enough_data(
+        all_predictors(), y, n = 2 * n - 4) %>% prep(toy_epi_df) %>% bake(new_data = NULL)
     Condition
       Error in `prep()`:
       ! The following columns don't have enough data to predict: lag_1_x, lag_2_x, and y.