Skip to content

Commit c92f5da

Browse files
committed
add updated webtext script
1 parent 0708209 commit c92f5da

File tree

2 files changed

+268
-4
lines changed

2 files changed

+268
-4
lines changed

.pre-commit-config.yaml

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@ repos:
22
#####
33
# Basic file cleanliness
44
- repo: https://github.com/pre-commit/pre-commit-hooks
5-
rev: v5.0.0
5+
rev: v6.0.0
66
hooks:
77
- id: check-added-large-files
88
- id: check-yaml
@@ -13,7 +13,7 @@ repos:
1313
#####
1414
# Python
1515
- repo: https://github.com/astral-sh/ruff-pre-commit
16-
rev: v0.11.10
16+
rev: v0.13.1
1717
hooks:
1818
# Sort imports
1919
- id: ruff
@@ -27,13 +27,13 @@ repos:
2727
#####
2828
# R
2929
- repo: https://github.com/lorenzwalthert/precommit
30-
rev: v0.4.3.9009
30+
rev: v0.4.3.9014
3131
hooks:
3232
- id: lintr
3333
#####
3434
# Java
3535
- repo: https://github.com/macisamuele/language-formatters-pre-commit-hooks
36-
rev: v2.14.0
36+
rev: v2.15.0
3737
hooks:
3838
- id: pretty-format-java
3939
args: [--aosp,--autofix]

R/get_webtext.R

Lines changed: 264 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,264 @@
1+
#' Generate text content for COVID-19 forecast
2+
#' visualization webpage.
3+
#'
4+
#' This script generates formatted text summaries for the
5+
#' COVID-19 forecast visualization webpage. It processes
6+
#' ensemble forecast data, target hospital admissions data,
7+
#' and contributing team information to create a text
8+
#' description of the current week's COVID-19
9+
#' hospitalization forecasts.
10+
#'
11+
#' Usage:
12+
#' Rscript src/get_webtext.R --reference-date "2025-09-19"
13+
#' --hub-reports-path "../covidhub-reports"
14+
15+
parser <- argparser::arg_parser(
16+
"Generate text for the webpage."
17+
)
18+
parser <- argparser::add_argument(
19+
parser,
20+
"--reference-date",
21+
type = "character",
22+
help = "The reference date for the forecast in YYYY-MM-DD format (ISO-8601)"
23+
)
24+
parser <- argparser::add_argument(
25+
parser,
26+
"--base-hub-path",
27+
type = "character",
28+
default = ".",
29+
help = "Path to the Covid19 forecast hub directory."
30+
)
31+
parser <- argparser::add_argument(
32+
parser,
33+
"--hub-reports-path",
34+
type = "character",
35+
default = "../covidhub-reports",
36+
help = "path to COVIDhub reports directory"
37+
)
38+
39+
args <- argparser::parse_args(parser)
40+
reference_date <- args$reference_date
41+
base_hub_path <- args$base_hub_path
42+
hub_reports_path <- args$hub_reports_path
43+
44+
weekly_data_path <- file.path(
45+
hub_reports_path,
46+
"weekly-summaries",
47+
reference_date
48+
)
49+
50+
ensemble_us_1wk_ahead <- readr::read_csv(
51+
file.path(weekly_data_path, paste0(reference_date, "_covid_map_data.csv")),
52+
show_col_types = FALSE
53+
) |>
54+
dplyr::filter(horizon == 1, location_name == "US")
55+
56+
target_data <- readr::read_csv(
57+
file.path(
58+
weekly_data_path,
59+
paste0(
60+
reference_date,
61+
"_covid_target_hospital_admissions_data.csv"
62+
)
63+
),
64+
show_col_types = FALSE
65+
)
66+
67+
contributing_teams <- readr::read_csv(
68+
file.path(
69+
weekly_data_path,
70+
paste0(reference_date, "_covid_forecasts_data.csv")
71+
),
72+
show_col_types = FALSE
73+
) |>
74+
dplyr::filter(model != "CovidHub-ensemble") |>
75+
dplyr::pull(model) |>
76+
unique()
77+
78+
wkly_submissions <- hubData::load_model_metadata(
79+
base_hub_path,
80+
model_ids = contributing_teams
81+
) |>
82+
dplyr::distinct(.data$model_id, .data$designated_model, .keep_all = TRUE) |>
83+
dplyr::mutate(
84+
team_model_url = glue::glue(
85+
"[{team_name} (Model: {model_abbr})]({website_url})"
86+
)
87+
) |>
88+
dplyr::select(
89+
model_id,
90+
team_abbr,
91+
model_abbr,
92+
team_model_url,
93+
designated_model
94+
)
95+
96+
# Generate flag for less than 80 percent of hospitals reporting
97+
desired_weekendingdate <- as.Date(reference_date) - lubridate::dweeks(1)
98+
99+
exclude_territories_path <- fs::path(
100+
base_hub_path,
101+
"auxiliary-data",
102+
"excluded_territories",
103+
ext = "toml"
104+
)
105+
stopifnot(fs::file_exists(exclude_territories_path))
106+
exclude_territories_toml <- RcppTOML::parseTOML(exclude_territories_path)
107+
excluded_locations <- exclude_territories_toml$locations
108+
109+
percent_hosp_reporting_below80 <- forecasttools::pull_data_cdc_gov_dataset(
110+
dataset = "mpgq-jmmr",
111+
columns = c("totalconfc19newadmperchosprepabove80pct"),
112+
start_date = "2024-11-09"
113+
) |>
114+
dplyr::mutate(
115+
weekendingdate = as.Date(.data$weekendingdate),
116+
report_above_80_lgl = as.logical(
117+
as.numeric(.data$totalconfc19newadmperchosprepabove80pct)
118+
),
119+
jurisdiction = dplyr::case_match(
120+
.data$jurisdiction,
121+
"USA" ~ "US",
122+
.default = .data$jurisdiction
123+
),
124+
location = forecasttools::us_location_recode(
125+
.data$jurisdiction,
126+
"abbr",
127+
"code"
128+
),
129+
location_name = forecasttools::us_location_recode(
130+
.data$jurisdiction,
131+
"abbr",
132+
"name"
133+
)
134+
) |>
135+
dplyr::filter(!(.data$location %in% !!excluded_locations)) |>
136+
dplyr::group_by(.data$jurisdiction) |>
137+
dplyr::mutate(max_weekendingdate = max(.data$weekendingdate)) |>
138+
dplyr::ungroup()
139+
140+
jurisdiction_w_latency <- percent_hosp_reporting_below80 |>
141+
dplyr::filter(.data$max_weekendingdate < !!desired_weekendingdate)
142+
143+
if (nrow(jurisdiction_w_latency) > 0) {
144+
cli::cli_warn(
145+
"
146+
Some locations have missing reported data for the most recent week.
147+
The reference date is {reference_date}, we expect data at least
148+
through {desired_weekendingdate}. However, {nrow(jurisdiction_w_latency)}
149+
location{?s} did not have reporting through that date:
150+
{jurisdiction_w_latency$location_name}.
151+
"
152+
)
153+
}
154+
155+
latest_reporting_below80 <- percent_hosp_reporting_below80 |>
156+
dplyr::filter(
157+
.data$weekendingdate == max(.data$weekendingdate),
158+
!.data$report_above_80_lgl
159+
)
160+
161+
reporting_rate_flag <- if (length(latest_reporting_below80$location_name) > 0) {
162+
location_list <- if (length(latest_reporting_below80$location_name) < 3) {
163+
glue::glue_collapse(latest_reporting_below80$location_name, sep = " and ")
164+
} else {
165+
glue::glue_collapse(
166+
latest_reporting_below80$location_name,
167+
sep = ", ",
168+
last = ", and "
169+
)
170+
}
171+
172+
glue::glue(
173+
"The following jurisdictions had <80% of hospitals reporting for ",
174+
"the most recent week: {location_list}. ",
175+
"Lower reporting rates could impact forecast validity. Percent ",
176+
"of hospitals reporting is calculated based on the number of active ",
177+
"hospitals reporting complete data to NHSN for a given reporting week.\n\n"
178+
)
179+
} else {
180+
""
181+
}
182+
183+
round_to_place <- function(value) {
184+
if (value >= 1000) {
185+
rounded_val <- round(value, -2)
186+
} else if (value >= 10) {
187+
rounded_val <- round(value, -1)
188+
} else {
189+
rounded_val <- round(value, 0)
190+
}
191+
return(rounded_val)
192+
}
193+
194+
# generate variables used in the web text
195+
196+
median_forecast_1wk_ahead <- round_to_place(
197+
ensemble_us_1wk_ahead$quantile_0.5_count
198+
)
199+
lower_95ci_forecast_1wk_ahead <- round_to_place(
200+
ensemble_us_1wk_ahead$quantile_0.025_count
201+
)
202+
upper_95ci_forecast_1wk_ahead <- round_to_place(
203+
ensemble_us_1wk_ahead$quantile_0.975_count
204+
)
205+
206+
designated <- wkly_submissions[wkly_submissions$designated_model, ]
207+
not_designated <- wkly_submissions[!wkly_submissions$designated_model, ]
208+
weekly_num_teams <- length(unique(designated$team_abbr))
209+
weekly_num_models <- length(unique(designated$model_abbr))
210+
model_incl_in_hub_ensemble <- designated$team_model_url
211+
model_not_incl_in_hub_ensemble <- not_designated$team_model_url
212+
213+
first_target_data_date <- format(
214+
as.Date(min(target_data$week_ending_date)),
215+
"%B %d, %Y"
216+
)
217+
last_target_data_date <- format(
218+
as.Date(max(target_data$week_ending_date)),
219+
"%B %d, %Y"
220+
)
221+
forecast_due_date <- ensemble_us_1wk_ahead$forecast_due_date_formatted
222+
target_end_date_1wk_ahead <- ensemble_us_1wk_ahead$target_end_date_formatted
223+
target_end_date_2wk_ahead <- format(
224+
ensemble_us_1wk_ahead$target_end_date + lubridate::weeks(1),
225+
"%B %d, %Y"
226+
)
227+
228+
last_reported_target_data <- target_data |>
229+
dplyr::filter(week_ending_date == max(week_ending_date), location == "US") |>
230+
dplyr::mutate(week_end_date_formatted = format(week_ending_date, "%B %d, %Y"))
231+
232+
last_reported_admissions <- round(last_reported_target_data$value, -2)
233+
234+
web_text <- glue::glue(
235+
"The CovidHub ensemble's one-week-ahead forecast predicts that the number ",
236+
"of new weekly laboratory-confirmed COVID-19 hospital admissions will be ",
237+
"approximately {median_forecast_1wk_ahead} nationally, with ",
238+
"{lower_95ci_forecast_1wk_ahead} to {upper_95ci_forecast_1wk_ahead} ",
239+
"laboratory confirmed COVID-19 hospital admissions likely reported in the ",
240+
"week ending {target_end_date_1wk_ahead}. This is compared to the ",
241+
"{last_reported_admissions} admissions reported for the week ",
242+
"ending {last_reported_target_data$week_end_date_formatted}, the most ",
243+
"recent week of reporting from U.S. hospitals.\n\n",
244+
"Reported and forecasted new COVID-19 hospital admissions as of ",
245+
"{forecast_due_date}. This week, {weekly_num_teams} modeling groups ",
246+
"contributed {weekly_num_models} forecasts that were eligible for inclusion ",
247+
"in the ensemble forecasts for at least one jurisdiction.\n\n",
248+
"The figure shows the number of new laboratory-confirmed COVID-19 hospital ",
249+
"admissions reported in the United States each week from ",
250+
"{first_target_data_date} through {last_target_data_date} and forecasted ",
251+
"new COVID-19 hospital admissions per week for this week and the next ",
252+
"2 weeks through {target_end_date_2wk_ahead}.\n\n",
253+
"{reporting_rate_flag}\n",
254+
"Contributing teams and models:\n\n",
255+
"Models included in the CovidHub ensemble:\n",
256+
"{paste(model_incl_in_hub_ensemble, collapse = '\n')}\n\n",
257+
"Models not included in the CovidHub ensemble:\n",
258+
"{paste(model_not_incl_in_hub_ensemble, collapse = '\n')}"
259+
)
260+
261+
writeLines(
262+
web_text,
263+
file.path(weekly_data_path, paste0(reference_date, "_webtext.md"))
264+
)

0 commit comments

Comments
 (0)