Skip to content

Commit 48db4fd

Browse files
committed
attempt to address many of review comments
1 parent 94101be commit 48db4fd

File tree

1 file changed

+44
-105
lines changed

1 file changed

+44
-105
lines changed

R/get_map_data.R

Lines changed: 44 additions & 105 deletions
Original file line numberDiff line numberDiff line change
@@ -45,120 +45,63 @@
4545
#' hub directory.
4646
#' @param hub_reports_path character, path to forecast hub
4747
#' reports directory.
48+
#' @param disease character, disease name ("covid" or "rsv").
49+
#' Used to derive hub name and file prefix.
4850
#' @param horizons_to_include integer vector, horizons to
4951
#' include in the output. Default: c(0, 1, 2).
50-
#' @param hub_name character, name of the forecast hub
51-
#' ensemble (e.g., "CovidHub", "RSVHub"). Default:
52-
#' "CovidHub".
53-
#' @param file_prefix character, prefix used in output
54-
#' filename (e.g., "covid", "rsv"). Default: "covid".
52+
#' @param population_data data frame with columns
53+
#' "location_name" and "population".
54+
#' @param excluded_locations character vector of location
55+
#' codes to exclude from the output. Default: character(0).
56+
#' @param output_format character, output file format.
57+
#' One of "csv", "tsv", or "parquet". Default: "csv".
5558
#'
5659
#' @export
5760
get_map_data <- function(
5861
reference_date,
5962
base_hub_path,
6063
hub_reports_path,
64+
disease,
6165
horizons_to_include = c(0, 1, 2),
62-
hub_name = "CovidHub",
63-
file_prefix = "covid"
66+
population_data,
67+
excluded_locations = character(0),
68+
output_format = "csv"
6469
) {
65-
# check for invalid horizon entries
66-
valid_horizons <- c(-1, 0, 1, 2, 3)
67-
invalid_horizons <- horizons_to_include[
68-
!sapply(
69-
horizons_to_include,
70-
function(x) x %in% valid_horizons
71-
)
72-
]
73-
if (length(invalid_horizons) > 0) {
74-
stop(
75-
"Invalid elements: ",
76-
glue::glue_collapse(invalid_horizons, sep = ", ")
77-
)
78-
}
70+
checkmate::assert_scalar(disease)
71+
checkmate::assert_names(disease, subset.of = c("covid", "rsv"))
72+
checkmate::assert_subset(horizons_to_include, choices = c(-1, 0, 1, 2, 3))
73+
checkmate::assert_data_frame(population_data)
74+
checkmate::assert_names(
75+
colnames(population_data),
76+
must.include = c("location_name", "population")
77+
)
78+
checkmate::assert_character(excluded_locations)
79+
checkmate::assert_choice(output_format, choices = c("csv", "tsv", "parquet"))
80+
81+
reference_date <- lubridate::as_date(reference_date)
82+
83+
hub_name <- get_hub_name(disease)
84+
file_prefix <- disease
7985

8086
# load the latest ensemble data from the model-output folder
8187
ensemble_model_name <- glue::glue("{hub_name}-ensemble")
82-
ensemble_folder <- file.path(
83-
base_hub_path,
84-
"model-output",
85-
ensemble_model_name
86-
)
87-
ensemble_file_current <- file.path(
88-
ensemble_folder,
89-
glue::glue("{reference_date}-{ensemble_model_name}.csv")
90-
)
91-
if (file.exists(ensemble_file_current)) {
92-
ensemble_file <- ensemble_file_current
93-
} else {
94-
stop(
95-
glue::glue(
96-
"Ensemble file for reference date {reference_date} ",
97-
"not found in the directory: {ensemble_folder}"
98-
)
99-
)
100-
}
101-
ensemble_data <- readr::read_csv(ensemble_file)
102-
required_columns <- c(
103-
"reference_date",
104-
"target_end_date",
105-
"value",
106-
"location"
107-
)
108-
missing_columns <- setdiff(
109-
required_columns,
110-
colnames(ensemble_data)
111-
)
112-
if (length(missing_columns) > 0) {
113-
stop(
114-
glue::glue(
115-
"Missing columns in ensemble data: ",
116-
"{glue::glue_collapse(missing_columns, sep = ', ')}"
117-
)
118-
)
119-
}
12088

121-
# population data, add later to forecasttools
122-
pop_data_path <- file.path(
123-
base_hub_path,
124-
"auxiliary-data",
125-
"locations_with_2023_census_pop.csv"
126-
)
127-
pop_data <- readr::read_csv(pop_data_path)
128-
pop_required_columns <- c("abbreviation", "population")
129-
missing_pop_columns <- setdiff(
130-
pop_required_columns,
131-
colnames(pop_data)
132-
)
133-
if (length(missing_pop_columns) > 0) {
134-
stop(
89+
ensemble_data <- hubData::connect_hub(base_hub_path) |>
90+
dplyr::filter(
91+
.data$reference_date == !!reference_date,
92+
.data$model_id == !!ensemble_model_name
93+
) |>
94+
hubData::collect_hub()
95+
96+
if (nrow(ensemble_data) == 0) {
97+
cli::cli_abort(
13598
glue::glue(
136-
"Missing columns in population data: ",
137-
"{glue::glue_collapse(missing_pop_columns, sep = ', ')}"
99+
"No ensemble data found for reference date {reference_date} ",
100+
"and model {ensemble_model_name}"
138101
)
139102
)
140103
}
141104

142-
# check if the reference date has any
143-
# exclusions and exclude specified locations if any
144-
exclude_data_path_toml <- fs::path(
145-
base_hub_path,
146-
"auxiliary-data",
147-
"excluded_locations.toml"
148-
)
149-
if (fs::file_exists(exclude_data_path_toml)) {
150-
exclude_data_toml <- RcppTOML::parseTOML(exclude_data_path_toml)
151-
if (reference_date %in% names(exclude_data_toml)) {
152-
excluded_locations <- exclude_data_toml[[reference_date]]
153-
message("Excluding locations for reference date: ", reference_date)
154-
} else {
155-
excluded_locations <- character(0)
156-
message("No exclusion for reference date: ", reference_date)
157-
}
158-
} else {
159-
stop("TOML file not found: ", exclude_data_path_toml)
160-
}
161-
162105
# process ensemble data into the required format for Map file
163106
map_data <- forecasttools::pivot_hubverse_quantiles_wider(
164107
hubverse_table = ensemble_data,
@@ -170,9 +113,7 @@ get_map_data <- function(
170113
"quantile_0.975" = 0.975
171114
)
172115
) |>
173-
# usually filter out horizon 3, -1
174116
dplyr::filter(.data$horizon %in% !!horizons_to_include) |>
175-
# filter out excluded locations if the ref date is the first week in season
176117
dplyr::filter(!(.data$location %in% !!excluded_locations)) |>
177118
dplyr::mutate(
178119
reference_date = as.Date(.data$reference_date),
@@ -199,7 +140,7 @@ get_map_data <- function(
199140
) |>
200141
dplyr::arrange(.data$location_sort_order, .data$location) |>
201142
dplyr::left_join(
202-
pop_data,
143+
population_data,
203144
by = c("location" = "location_name")
204145
) |>
205146
dplyr::mutate(
@@ -258,18 +199,16 @@ get_map_data <- function(
258199
output_filepath <- fs::path(
259200
output_folder_path,
260201
output_filename,
261-
ext = "csv"
202+
ext = output_format
262203
)
263204

264-
# determine if the output folder exists, create it if not
265205
fs::dir_create(output_folder_path)
266-
message("Directory is ready: ", output_folder_path)
206+
cli::cli_inform("Directory is ready: {output_folder_path}")
267207

268-
# check if the file exists, and if not, save to csv, else throw an error
269208
if (!fs::file_exists(output_filepath)) {
270-
readr::write_csv(map_data, output_filepath)
271-
message("File saved as: ", output_filepath)
209+
forecasttools::write_tabular(map_data, output_filepath)
210+
cli::cli_inform("File saved as: {output_filepath}")
272211
} else {
273-
stop("File already exists: ", output_filepath)
212+
cli::cli_abort("File already exists: {output_filepath}")
274213
}
275214
}

0 commit comments

Comments
 (0)