Skip to content

Commit 5ef9823

Browse files
committed
rename, old default skip, news, all_outcomes() for test, docs
1 parent 2940f95 commit 5ef9823

11 files changed

+94
-82
lines changed

NAMESPACE

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@ S3method(apply_frosting,epi_workflow)
1515
S3method(augment,epi_workflow)
1616
S3method(autoplot,canned_epipred)
1717
S3method(autoplot,epi_workflow)
18-
S3method(bake,check_enough_train_data)
18+
S3method(bake,check_enough_data)
1919
S3method(bake,epi_recipe)
2020
S3method(bake,step_adjust_latency)
2121
S3method(bake,step_climate)
@@ -49,7 +49,7 @@ S3method(key_colnames,recipe)
4949
S3method(mean,quantile_pred)
5050
S3method(predict,epi_workflow)
5151
S3method(predict,flatline)
52-
S3method(prep,check_enough_train_data)
52+
S3method(prep,check_enough_data)
5353
S3method(prep,epi_recipe)
5454
S3method(prep,step_adjust_latency)
5555
S3method(prep,step_climate)
@@ -65,7 +65,7 @@ S3method(print,arx_class)
6565
S3method(print,arx_fcast)
6666
S3method(print,canned_epipred)
6767
S3method(print,cdc_baseline_fcast)
68-
S3method(print,check_enough_train_data)
68+
S3method(print,check_enough_data)
6969
S3method(print,climate_fcast)
7070
S3method(print,epi_recipe)
7171
S3method(print,epi_workflow)
@@ -109,7 +109,7 @@ S3method(slather,layer_threshold)
109109
S3method(slather,layer_unnest)
110110
S3method(snap,default)
111111
S3method(snap,quantile_pred)
112-
S3method(tidy,check_enough_train_data)
112+
S3method(tidy,check_enough_data)
113113
S3method(tidy,frosting)
114114
S3method(tidy,layer)
115115
S3method(update,layer)
@@ -142,7 +142,7 @@ export(autoplot)
142142
export(bake)
143143
export(cdc_baseline_args_list)
144144
export(cdc_baseline_forecaster)
145-
export(check_enough_train_data)
145+
export(check_enough_data)
146146
export(clean_f_name)
147147
export(climate_args_list)
148148
export(climatological_forecaster)

NEWS.md

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,9 @@ Pre-1.0.0 numbering scheme: 0.x will indicate releases, while 0.0.x will indicat
2020
- Removes dependence on the `distributional` package, replacing the quantiles
2121
with `hardhat::quantile_pred()`. Some associated functions are deprecated with
2222
`lifecycle` messages.
23+
- Rename `check_enough_train_data()` to `check_enough_data()`, and generalize it
24+
enough to use as a check on either training or testing.
25+
- Add check for enough data to predict in `arx_forecaster()`
2326

2427
## Improvements
2528

R/arx_classifier.R

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -222,7 +222,7 @@ arx_class_epi_workflow <- function(
222222
step_training_window(n_recent = args_list$n_training)
223223

224224
if (!is.null(args_list$check_enough_data_n)) {
225-
r <- check_enough_train_data(
225+
r <- check_enough_data(
226226
r,
227227
recipes::all_predictors(),
228228
recipes::all_outcomes(),

R/arx_forecaster.R

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -172,12 +172,12 @@ arx_fcast_epi_workflow <- function(
172172
r <- r %>%
173173
step_epi_naomit() %>%
174174
step_training_window(n_recent = args_list$n_training) %>%
175-
check_enough_train_data(all_predictors(), n = args_list$check_enough_data_n, skip = FALSE)
175+
check_enough_data(all_predictors(), n = 1, skip = FALSE)
176176

177177
if (!is.null(args_list$check_enough_data_n)) {
178-
r <- r %>% check_enough_train_data(
178+
r <- r %>% check_enough_data(
179179
all_predictors(),
180-
!!outcome,
180+
all_outcomes(),
181181
n = args_list$check_enough_data_n,
182182
epi_keys = args_list$check_enough_data_epi_keys,
183183
drop_na = FALSE

R/check_enough_train_data.R renamed to R/check_enough_data.R

Lines changed: 29 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,13 @@
11
#' Check the dataset contains enough data points.
22
#'
3-
#' `check_enough_train_data` creates a *specification* of a recipe
3+
#' `check_enough_data` creates a *specification* of a recipe
44
#' operation that will check if variables contain enough data.
55
#'
66
#' @param recipe A recipe object. The check will be added to the
77
#' sequence of operations for this recipe.
88
#' @param ... One or more selector functions to choose variables for this check.
99
#' See [selections()] for more details. You will usually want to use
10-
#' [recipes::all_predictors()] here.
10+
#' [recipes::all_predictors()] and/or [recipes::all_outcomes()] here.
1111
#' @param n The minimum number of data points required for training. If this is
1212
#' NULL, the total number of predictors will be used.
1313
#' @param epi_keys A character vector of column names on which to group the data
@@ -21,24 +21,29 @@
2121
#' @param columns An internal argument that tracks which columns are evaluated
2222
#' for this check. Should not be used by the user.
2323
#' @param id A character string that is unique to this check to identify it.
24-
#' @param skip A logical. Should the check be skipped when the
25-
#' recipe is baked by [bake()]? While all operations are baked
26-
#' when [prep()] is run, some operations may not be able to be
27-
#' conducted on new data (e.g. processing the outcome variable(s)).
28-
#' Care should be taken when using `skip = TRUE` as it may affect
29-
#' the computations for subsequent operations.
24+
#' @param skip A logical. If `TRUE`, only training data is checked, while if
25+
#' `FALSE`, both training and predicting data is checked. Technically, this
26+
#' answers the question "should the check be skipped when the recipe is baked
27+
#' by [bake()]?" While all operations are baked when [prep()] is run, some
28+
#' operations may not be able to be conducted on new data (e.g. processing the
29+
#' outcome variable(s)). Care should be taken when using `skip = TRUE` as it
30+
#' may affect the computations for subsequent operations.
3031
#' @family checks
3132
#' @export
32-
#' @details This check will break the `bake` function if any of the checked
33-
#' columns have not enough non-NA values. If the check passes, nothing is
34-
#' changed to the data.
33+
#' @details This check will break the `prep` and/or bake function if any of the
34+
#' checked columns have not enough non-NA values. If the check passes, nothing
35+
#' is changed in the data. It is best used after every other step.
36+
#'
37+
#' For checking training data, it is best to set `...` to be
38+
#' `all_predictors(), all_outcomes()`, while for checking prediction data, it
39+
#' is best to set `...` to be `all_predictors()` only, with `n = 1`.
3540
#'
3641
#' # tidy() results
3742
#'
3843
#' When you [`tidy()`][tidy.recipe()] this check, a tibble with column
3944
#' `terms` (the selectors or variables selected) is returned.
4045
#'
41-
check_enough_train_data <-
46+
check_enough_data <-
4247
function(recipe,
4348
...,
4449
n = NULL,
@@ -47,11 +52,11 @@ check_enough_train_data <-
4752
role = NA,
4853
trained = FALSE,
4954
columns = NULL,
50-
skip = FALSE,
51-
id = rand_id("enough_train_data")) {
55+
skip = TRUE,
56+
id = rand_id("enough_data")) {
5257
recipes::add_check(
5358
recipe,
54-
check_enough_train_data_new(
59+
check_enough_data_new(
5560
n = n,
5661
epi_keys = epi_keys,
5762
drop_na = drop_na,
@@ -65,10 +70,10 @@ check_enough_train_data <-
6570
)
6671
}
6772

68-
check_enough_train_data_new <-
73+
check_enough_data_new <-
6974
function(n, epi_keys, drop_na, terms, role, trained, columns, skip, id) {
7075
recipes::check(
71-
subclass = "enough_train_data",
76+
subclass = "enough_data",
7277
prefix = "check_",
7378
n = n,
7479
epi_keys = epi_keys,
@@ -83,7 +88,7 @@ check_enough_train_data_new <-
8388
}
8489

8590
#' @export
86-
prep.check_enough_train_data <- function(x, training, info = NULL, ...) {
91+
prep.check_enough_data <- function(x, training, info = NULL, ...) {
8792
col_names <- recipes::recipes_eval_select(x$terms, training, info)
8893
if (is.null(x$n)) {
8994
x$n <- length(col_names)
@@ -102,11 +107,11 @@ prep.check_enough_train_data <- function(x, training, info = NULL, ...) {
102107
if (length(cols_not_enough_data) > 0) {
103108
cli_abort(
104109
"The following columns don't have enough data to predict: {cols_not_enough_data}.",
105-
class = "epipredict__not_enough_train_data"
110+
class = "epipredict__not_enough_data"
106111
)
107112
}
108113

109-
check_enough_train_data_new(
114+
check_enough_data_new(
110115
n = x$n,
111116
epi_keys = x$epi_keys,
112117
drop_na = x$drop_na,
@@ -120,7 +125,7 @@ prep.check_enough_train_data <- function(x, training, info = NULL, ...) {
120125
}
121126

122127
#' @export
123-
bake.check_enough_train_data <- function(object, new_data, ...) {
128+
bake.check_enough_data <- function(object, new_data, ...) {
124129
col_names <- object$columns
125130
if (object$drop_na) {
126131
non_na_data <- tidyr::drop_na(new_data, any_of(unname(col_names)))
@@ -137,21 +142,21 @@ bake.check_enough_train_data <- function(object, new_data, ...) {
137142
if (length(cols_not_enough_data) > 0) {
138143
cli_abort(
139144
"The following columns don't have enough data to predict: {cols_not_enough_data}.",
140-
class = "epipredict__not_enough_train_data"
145+
class = "epipredict__not_enough_data"
141146
)
142147
}
143148
new_data
144149
}
145150

146151
#' @export
147-
print.check_enough_train_data <- function(x, width = max(20, options()$width - 30), ...) {
152+
print.check_enough_data <- function(x, width = max(20, options()$width - 30), ...) {
148153
title <- paste0("Check enough data (n = ", x$n, ") for ")
149154
recipes::print_step(x$columns, x$terms, x$trained, title, width)
150155
invisible(x)
151156
}
152157

153158
#' @export
154-
tidy.check_enough_train_data <- function(x, ...) {
159+
tidy.check_enough_data <- function(x, ...) {
155160
if (recipes::is_trained(x)) {
156161
res <- tibble(terms = unname(x$columns))
157162
} else {

_pkgdown.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -77,7 +77,7 @@ reference:
7777

7878
- title: Epi recipe verification checks
7979
contents:
80-
- check_enough_train_data
80+
- check_enough_data
8181

8282
- title: Forecast postprocessing
8383
desc: Create a series of postprocessing operations

man/check_enough_train_data.Rd renamed to man/check_enough_data.Rd

Lines changed: 21 additions & 16 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

man/step_adjust_latency.Rd

Lines changed: 5 additions & 5 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

tests/testthat/_snaps/check_enough_train_data.md renamed to tests/testthat/_snaps/check_enough_data.md

Lines changed: 12 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1,25 +1,25 @@
1-
# check_enough_train_data works on pooled data
1+
# check_enough_data works on pooled data
22

33
Code
4-
epi_recipe(toy_epi_df) %>% check_enough_train_data(x, y, n = 2 * n + 1,
5-
drop_na = FALSE) %>% prep(toy_epi_df) %>% bake(new_data = NULL)
4+
epi_recipe(toy_epi_df) %>% check_enough_data(x, y, n = 2 * n + 1, drop_na = FALSE) %>%
5+
prep(toy_epi_df) %>% bake(new_data = NULL)
66
Condition
77
Error in `prep()`:
88
! The following columns don't have enough data to predict: x and y.
99

1010
---
1111

1212
Code
13-
epi_recipe(toy_epi_df) %>% check_enough_train_data(x, y, n = 2 * n - 1,
14-
drop_na = TRUE) %>% prep(toy_epi_df) %>% bake(new_data = NULL)
13+
epi_recipe(toy_epi_df) %>% check_enough_data(x, y, n = 2 * n - 1, drop_na = TRUE) %>%
14+
prep(toy_epi_df) %>% bake(new_data = NULL)
1515
Condition
1616
Error in `prep()`:
1717
! The following columns don't have enough data to predict: x and y.
1818

19-
# check_enough_train_data works on unpooled data
19+
# check_enough_data works on unpooled data
2020

2121
Code
22-
epi_recipe(toy_epi_df) %>% check_enough_train_data(x, y, n = n + 1, epi_keys = "geo_value",
22+
epi_recipe(toy_epi_df) %>% check_enough_data(x, y, n = n + 1, epi_keys = "geo_value",
2323
drop_na = FALSE) %>% prep(toy_epi_df) %>% bake(new_data = NULL)
2424
Condition
2525
Error in `prep()`:
@@ -28,18 +28,17 @@
2828
---
2929

3030
Code
31-
epi_recipe(toy_epi_df) %>% check_enough_train_data(x, y, n = 2 * n - 3,
32-
epi_keys = "geo_value", drop_na = TRUE) %>% prep(toy_epi_df) %>% bake(new_data = NULL)
31+
epi_recipe(toy_epi_df) %>% check_enough_data(x, y, n = 2 * n - 3, epi_keys = "geo_value",
32+
drop_na = TRUE) %>% prep(toy_epi_df) %>% bake(new_data = NULL)
3333
Condition
3434
Error in `prep()`:
3535
! The following columns don't have enough data to predict: x and y.
3636

37-
# check_enough_train_data works with all_predictors() downstream of constructed terms
37+
# check_enough_data works with all_predictors() downstream of constructed terms
3838

3939
Code
40-
epi_recipe(toy_epi_df) %>% step_epi_lag(x, lag = c(1, 2)) %>%
41-
check_enough_train_data(all_predictors(), y, n = 2 * n - 4) %>% prep(
42-
toy_epi_df) %>% bake(new_data = NULL)
40+
epi_recipe(toy_epi_df) %>% step_epi_lag(x, lag = c(1, 2)) %>% check_enough_data(
41+
all_predictors(), y, n = 2 * n - 4) %>% prep(toy_epi_df) %>% bake(new_data = NULL)
4342
Condition
4443
Error in `prep()`:
4544
! The following columns don't have enough data to predict: lag_1_x, lag_2_x, and y.

0 commit comments

Comments
 (0)