Skip to content

Commit b637363

Browse files
authored
Add function to generate weekly summary map data (#40)
1 parent a89c2cf commit b637363

File tree

3 files changed

+297
-0
lines changed

3 files changed

+297
-0
lines changed

NAMESPACE

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ export(generate_hub_baseline)
77
export(generate_hub_ensemble)
88
export(generate_oracle_output)
99
export(get_hub_name)
10+
export(get_map_data)
1011
export(included_locations)
1112
export(update_authorized_users)
1213
export(update_hub_target_data)

R/get_map_data.R

Lines changed: 209 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,209 @@
1+
#' Generate map data file containing ensemble forecast
2+
#' data.
3+
#'
4+
#' This function loads the latest ensemble forecast data
5+
#' from the forecast hub and processes it into the required
6+
#' format. The resulting file contains forecast values for
7+
#' all regions (including US, DC, and Puerto Rico), for
8+
#' various forecast horizons, and quantiles (0.025, 0.5,
9+
#' and 0.975).
10+
#'
11+
#' The ensemble data is expected to contain the following
12+
#' columns:
13+
#' - `reference_date`: the date of the forecast
14+
#' - `location`: state abbreviation
15+
#' - `horizon`: forecast horizon
16+
#' - `target`: forecast target (e.g., "wk inc covid hosp")
17+
#' - `target_end_date`: the forecast target date
18+
#' - `output_type`: type of output (e.g., "quantile")
19+
#' - `output_type_id`: quantile value (e.g., 0.025, 0.5,
20+
#' 0.975)
21+
#' - `value`: forecast value
22+
#'
23+
#' The resulting map file will have the following columns:
24+
#' - `location_name`: full state name (including "US" for
25+
#' the US state)
26+
#' - `quantile_*`: the quantile forecast values (rounded
27+
#' to two decimal places)
28+
#' - `horizon`: forecast horizon
29+
#' - `target`: forecast target (e.g., "7 day ahead inc
30+
#' hosp")
31+
#' - `target_end_date`: target date for the forecast (Ex:
32+
#' 2024-11-30)
33+
#' - `reference_date`: date that the forecast was generated
34+
#' (Ex: 2024-11-23)
35+
#' - `target_end_date_formatted`: target date for the
36+
#' forecast, prettily re-formatted as a string (Ex:
37+
#' "November 30, 2024")
38+
#' - `reference_date_formatted`: date that the forecast
39+
#' was generated, prettily re-formatted as a string
40+
#' (Ex: "November 23, 2024")
41+
#'
42+
#' @param reference_date character, the reference date for
43+
#' the forecast in YYYY-MM-DD format (ISO-8601).
44+
#' @param base_hub_path character, path to the forecast
45+
#' hub directory.
46+
#' @param hub_reports_path character, path to forecast hub
47+
#' reports directory.
48+
#' @param disease character, disease name ("covid" or
49+
#' "rsv"). Used to derive hub name and file prefix.
50+
#' @param horizons_to_include integer vector, horizons to
51+
#' include in the output. Default: c(0, 1, 2).
52+
#' @param population_data data frame with columns
53+
#' "location" and "population".
54+
#' @param excluded_locations character vector of location
55+
#' codes to exclude from the output. Default: character(0).
56+
#' @param output_format character, output file format. One
57+
#' of "csv", "tsv", or "parquet". Default: "csv".
58+
#'
59+
#' @export
60+
get_map_data <- function(
61+
reference_date,
62+
base_hub_path,
63+
hub_reports_path,
64+
disease,
65+
horizons_to_include = c(0, 1, 2),
66+
population_data,
67+
excluded_locations = character(0),
68+
output_format = "csv"
69+
) {
70+
checkmate::assert_choice(disease, choices = c("covid", "rsv"))
71+
checkmate::assert_subset(horizons_to_include, choices = c(-1, 0, 1, 2, 3))
72+
checkmate::assert_data_frame(population_data)
73+
checkmate::assert_names(
74+
colnames(population_data),
75+
must.include = c("location", "population")
76+
)
77+
checkmate::assert_character(excluded_locations)
78+
checkmate::assert_choice(output_format, choices = c("csv", "tsv", "parquet"))
79+
80+
reference_date <- lubridate::as_date(reference_date)
81+
82+
hub_name <- get_hub_name(disease)
83+
ensemble_model_name <- glue::glue("{hub_name}-ensemble")
84+
85+
ensemble_data <- hubData::connect_hub(base_hub_path) |>
86+
dplyr::filter(
87+
.data$reference_date == !!reference_date,
88+
.data$model_id == !!ensemble_model_name
89+
) |>
90+
hubData::collect_hub()
91+
92+
if (nrow(ensemble_data) == 0) {
93+
cli::cli_abort(
94+
glue::glue(
95+
"No ensemble data found for reference date {reference_date} ",
96+
"and model {ensemble_model_name}"
97+
)
98+
)
99+
}
100+
101+
# process ensemble data into the required format for Map file
102+
map_data <- forecasttools::pivot_hubverse_quantiles_wider(
103+
hubverse_table = ensemble_data,
104+
pivot_quantiles = c(
105+
"quantile_0.025" = 0.025,
106+
"quantile_0.25" = 0.25,
107+
"quantile_0.5" = 0.5,
108+
"quantile_0.75" = 0.75,
109+
"quantile_0.975" = 0.975
110+
)
111+
) |>
112+
dplyr::filter(.data$horizon %in% !!horizons_to_include) |>
113+
dplyr::filter(!(.data$location %in% !!excluded_locations)) |>
114+
dplyr::mutate(
115+
reference_date = as.Date(.data$reference_date),
116+
target_end_date = as.Date(.data$target_end_date),
117+
model = !!ensemble_model_name
118+
) |>
119+
# convert location column codes to full location names
120+
dplyr::mutate(
121+
location = forecasttools::us_location_recode(
122+
.data$location,
123+
"hub",
124+
"name"
125+
)
126+
) |>
127+
# long name "United States" to "US"
128+
dplyr::mutate(
129+
location = dplyr::case_match(
130+
.data$location,
131+
"United States" ~ "US",
132+
.default = .data$location
133+
),
134+
# sort locations alphabetically, except for US
135+
location_sort_order = ifelse(.data$location == "US", 0, 1)
136+
) |>
137+
dplyr::arrange(.data$location_sort_order, .data$location) |>
138+
dplyr::left_join(
139+
population_data,
140+
by = "location"
141+
) |>
142+
dplyr::mutate(
143+
population = as.numeric(.data$population),
144+
quantile_0.025_per100k = .data$quantile_0.025 / .data$population * 100000,
145+
quantile_0.5_per100k = .data$quantile_0.5 / .data$population * 100000,
146+
quantile_0.975_per100k = .data$quantile_0.975 / .data$population * 100000,
147+
quantile_0.025_count = .data$quantile_0.025,
148+
quantile_0.5_count = .data$quantile_0.5,
149+
quantile_0.975_count = .data$quantile_0.975,
150+
quantile_0.025_per100k_rounded = round(.data$quantile_0.025_per100k, 2),
151+
quantile_0.5_per100k_rounded = round(.data$quantile_0.5_per100k, 2),
152+
quantile_0.975_per100k_rounded = round(.data$quantile_0.975_per100k, 2),
153+
quantile_0.025_count_rounded = round(.data$quantile_0.025_count),
154+
quantile_0.5_count_rounded = round(.data$quantile_0.5_count),
155+
quantile_0.975_count_rounded = round(.data$quantile_0.975_count),
156+
target_end_date_formatted = format(.data$target_end_date, "%B %d, %Y"),
157+
reference_date_formatted = format(.data$reference_date, "%B %d, %Y"),
158+
forecast_due_date = as.Date(!!reference_date) - 3,
159+
forecast_due_date_formatted = format(
160+
.data$forecast_due_date,
161+
"%B %d, %Y"
162+
),
163+
) |>
164+
dplyr::select(
165+
location_name = "location",
166+
"horizon",
167+
"quantile_0.025_per100k",
168+
"quantile_0.5_per100k",
169+
"quantile_0.975_per100k",
170+
"quantile_0.025_count",
171+
"quantile_0.5_count",
172+
"quantile_0.975_count",
173+
"quantile_0.025_per100k_rounded",
174+
"quantile_0.5_per100k_rounded",
175+
"quantile_0.975_per100k_rounded",
176+
"quantile_0.025_count_rounded",
177+
"quantile_0.5_count_rounded",
178+
"quantile_0.975_count_rounded",
179+
"target",
180+
"target_end_date",
181+
"reference_date",
182+
"forecast_due_date",
183+
"target_end_date_formatted",
184+
"forecast_due_date_formatted",
185+
"reference_date_formatted",
186+
"model",
187+
)
188+
189+
output_folder_path <- fs::path(
190+
hub_reports_path,
191+
"weekly-summaries",
192+
reference_date
193+
)
194+
output_filename <- glue::glue("{reference_date}_{disease}_map_data")
195+
output_filepath <- fs::path(
196+
output_folder_path,
197+
output_filename,
198+
ext = output_format
199+
)
200+
201+
fs::dir_create(output_folder_path)
202+
203+
if (!fs::file_exists(output_filepath)) {
204+
forecasttools::write_tabular(map_data, output_filepath)
205+
cli::cli_inform("File saved as: {output_filepath}")
206+
} else {
207+
cli::cli_abort("File already exists: {output_filepath}")
208+
}
209+
}

man/get_map_data.Rd

Lines changed: 87 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)