4545# ' hub directory.
4646# ' @param hub_reports_path character, path to forecast hub
4747# ' reports directory.
48+ # ' @param disease character, disease name ("covid" or "rsv").
49+ # ' Used to derive hub name and file prefix.
4850# ' @param horizons_to_include integer vector, horizons to
4951# ' include in the output. Default: c(0, 1, 2).
50- # ' @param hub_name character, name of the forecast hub
51- # ' ensemble (e.g., "CovidHub", "RSVHub"). Default:
52- # ' "CovidHub".
53- # ' @param file_prefix character, prefix used in output
54- # ' filename (e.g., "covid", "rsv"). Default: "covid".
52+ # ' @param population_data data frame with columns
53+ # ' "location_name" and "population".
54+ # ' @param excluded_locations character vector of location
55+ # ' codes to exclude from the output. Default: character(0).
56+ # ' @param output_format character, output file format.
57+ # ' One of "csv", "tsv", or "parquet". Default: "csv".
5558# '
5659# ' @export
5760get_map_data <- function (
5861 reference_date ,
5962 base_hub_path ,
6063 hub_reports_path ,
64+ disease ,
6165 horizons_to_include = c(0 , 1 , 2 ),
62- hub_name = " CovidHub" ,
63- file_prefix = " covid"
66+ population_data ,
67+ excluded_locations = character (0 ),
68+ output_format = " csv"
6469) {
65- # check for invalid horizon entries
66- valid_horizons <- c(- 1 , 0 , 1 , 2 , 3 )
67- invalid_horizons <- horizons_to_include [
68- ! sapply(
69- horizons_to_include ,
70- function (x ) x %in% valid_horizons
71- )
72- ]
73- if (length(invalid_horizons ) > 0 ) {
74- stop(
75- " Invalid elements: " ,
76- glue :: glue_collapse(invalid_horizons , sep = " , " )
77- )
78- }
70+ checkmate :: assert_scalar(disease )
71+ checkmate :: assert_names(disease , subset.of = c(" covid" , " rsv" ))
72+ checkmate :: assert_subset(horizons_to_include , choices = c(- 1 , 0 , 1 , 2 , 3 ))
73+ checkmate :: assert_data_frame(population_data )
74+ checkmate :: assert_names(
75+ colnames(population_data ),
76+ must.include = c(" location_name" , " population" )
77+ )
78+ checkmate :: assert_character(excluded_locations )
79+ checkmate :: assert_choice(output_format , choices = c(" csv" , " tsv" , " parquet" ))
80+
81+ reference_date <- lubridate :: as_date(reference_date )
82+
83+ hub_name <- get_hub_name(disease )
84+ file_prefix <- disease
7985
8086 # load the latest ensemble data from the model-output folder
8187 ensemble_model_name <- glue :: glue(" {hub_name}-ensemble" )
82- ensemble_folder <- file.path(
83- base_hub_path ,
84- " model-output" ,
85- ensemble_model_name
86- )
87- ensemble_file_current <- file.path(
88- ensemble_folder ,
89- glue :: glue(" {reference_date}-{ensemble_model_name}.csv" )
90- )
91- if (file.exists(ensemble_file_current )) {
92- ensemble_file <- ensemble_file_current
93- } else {
94- stop(
95- glue :: glue(
96- " Ensemble file for reference date {reference_date} " ,
97- " not found in the directory: {ensemble_folder}"
98- )
99- )
100- }
101- ensemble_data <- readr :: read_csv(ensemble_file )
102- required_columns <- c(
103- " reference_date" ,
104- " target_end_date" ,
105- " value" ,
106- " location"
107- )
108- missing_columns <- setdiff(
109- required_columns ,
110- colnames(ensemble_data )
111- )
112- if (length(missing_columns ) > 0 ) {
113- stop(
114- glue :: glue(
115- " Missing columns in ensemble data: " ,
116- " {glue::glue_collapse(missing_columns, sep = ', ')}"
117- )
118- )
119- }
12088
121- # population data, add later to forecasttools
122- pop_data_path <- file.path(
123- base_hub_path ,
124- " auxiliary-data" ,
125- " locations_with_2023_census_pop.csv"
126- )
127- pop_data <- readr :: read_csv(pop_data_path )
128- pop_required_columns <- c(" abbreviation" , " population" )
129- missing_pop_columns <- setdiff(
130- pop_required_columns ,
131- colnames(pop_data )
132- )
133- if (length(missing_pop_columns ) > 0 ) {
134- stop(
89+ ensemble_data <- hubData :: connect_hub(base_hub_path ) | >
90+ dplyr :: filter(
91+ .data $ reference_date == !! reference_date ,
92+ .data $ model_id == !! ensemble_model_name
93+ ) | >
94+ hubData :: collect_hub()
95+
96+ if (nrow(ensemble_data ) == 0 ) {
97+ cli :: cli_abort(
13598 glue :: glue(
136- " Missing columns in population data: " ,
137- " {glue::glue_collapse(missing_pop_columns, sep = ', ') }"
99+ " No ensemble data found for reference date {reference_date} " ,
100+ " and model {ensemble_model_name }"
138101 )
139102 )
140103 }
141104
142- # check if the reference date has any
143- # exclusions and exclude specified locations if any
144- exclude_data_path_toml <- fs :: path(
145- base_hub_path ,
146- " auxiliary-data" ,
147- " excluded_locations.toml"
148- )
149- if (fs :: file_exists(exclude_data_path_toml )) {
150- exclude_data_toml <- RcppTOML :: parseTOML(exclude_data_path_toml )
151- if (reference_date %in% names(exclude_data_toml )) {
152- excluded_locations <- exclude_data_toml [[reference_date ]]
153- message(" Excluding locations for reference date: " , reference_date )
154- } else {
155- excluded_locations <- character (0 )
156- message(" No exclusion for reference date: " , reference_date )
157- }
158- } else {
159- stop(" TOML file not found: " , exclude_data_path_toml )
160- }
161-
162105 # process ensemble data into the required format for Map file
163106 map_data <- forecasttools :: pivot_hubverse_quantiles_wider(
164107 hubverse_table = ensemble_data ,
@@ -170,9 +113,7 @@ get_map_data <- function(
170113 " quantile_0.975" = 0.975
171114 )
172115 ) | >
173- # usually filter out horizon 3, -1
174116 dplyr :: filter(.data $ horizon %in% !! horizons_to_include ) | >
175- # filter out excluded locations if the ref date is the first week in season
176117 dplyr :: filter(! (.data $ location %in% !! excluded_locations )) | >
177118 dplyr :: mutate(
178119 reference_date = as.Date(.data $ reference_date ),
@@ -199,7 +140,7 @@ get_map_data <- function(
199140 ) | >
200141 dplyr :: arrange(.data $ location_sort_order , .data $ location ) | >
201142 dplyr :: left_join(
202- pop_data ,
143+ population_data ,
203144 by = c(" location" = " location_name" )
204145 ) | >
205146 dplyr :: mutate(
@@ -258,18 +199,16 @@ get_map_data <- function(
258199 output_filepath <- fs :: path(
259200 output_folder_path ,
260201 output_filename ,
261- ext = " csv "
202+ ext = output_format
262203 )
263204
264- # determine if the output folder exists, create it if not
265205 fs :: dir_create(output_folder_path )
266- message (" Directory is ready: " , output_folder_path )
206+ cli :: cli_inform (" Directory is ready: { output_folder_path} " )
267207
268- # check if the file exists, and if not, save to csv, else throw an error
269208 if (! fs :: file_exists(output_filepath )) {
270- readr :: write_csv (map_data , output_filepath )
271- message (" File saved as: " , output_filepath )
209+ forecasttools :: write_tabular (map_data , output_filepath )
210+ cli :: cli_inform (" File saved as: { output_filepath} " )
272211 } else {
273- stop (" File already exists: " , output_filepath )
212+ cli :: cli_abort (" File already exists: { output_filepath} " )
274213 }
275214}
0 commit comments