|
| 1 | +#' Generate text content for COVID-19 forecast |
| 2 | +#' visualization webpage. |
| 3 | +#' |
| 4 | +#' This script generates formatted text summaries for the |
| 5 | +#' COVID-19 forecast visualization webpage. It processes |
| 6 | +#' ensemble forecast data, target hospital admissions data, |
| 7 | +#' and contributing team information to create a text |
| 8 | +#' description of the current week's COVID-19 |
| 9 | +#' hospitalization forecasts. |
| 10 | +#' |
| 11 | +#' Usage: |
| 12 | +#' Rscript src/get_webtext.R --reference-date "2025-09-19" |
| 13 | +#' --hub-reports-path "../covidhub-reports" |
| 14 | + |
| 15 | +parser <- argparser::arg_parser( |
| 16 | + "Generate text for the webpage." |
| 17 | +) |
| 18 | +parser <- argparser::add_argument( |
| 19 | + parser, |
| 20 | + "--reference-date", |
| 21 | + type = "character", |
| 22 | + help = "The reference date for the forecast in YYYY-MM-DD format (ISO-8601)" |
| 23 | +) |
| 24 | +parser <- argparser::add_argument( |
| 25 | + parser, |
| 26 | + "--base-hub-path", |
| 27 | + type = "character", |
| 28 | + default = ".", |
| 29 | + help = "Path to the Covid19 forecast hub directory." |
| 30 | +) |
| 31 | +parser <- argparser::add_argument( |
| 32 | + parser, |
| 33 | + "--hub-reports-path", |
| 34 | + type = "character", |
| 35 | + default = "../covidhub-reports", |
| 36 | + help = "path to COVIDhub reports directory" |
| 37 | +) |
| 38 | + |
| 39 | +args <- argparser::parse_args(parser) |
| 40 | +reference_date <- args$reference_date |
| 41 | +base_hub_path <- args$base_hub_path |
| 42 | +hub_reports_path <- args$hub_reports_path |
| 43 | + |
| 44 | +weekly_data_path <- file.path( |
| 45 | + hub_reports_path, |
| 46 | + "weekly-summaries", |
| 47 | + reference_date |
| 48 | +) |
| 49 | + |
| 50 | +ensemble_us_1wk_ahead <- readr::read_csv( |
| 51 | + file.path(weekly_data_path, paste0(reference_date, "_covid_map_data.csv")), |
| 52 | + show_col_types = FALSE |
| 53 | +) |> |
| 54 | + dplyr::filter(horizon == 1, location_name == "US") |
| 55 | + |
| 56 | +target_data <- readr::read_csv( |
| 57 | + file.path( |
| 58 | + weekly_data_path, |
| 59 | + paste0( |
| 60 | + reference_date, |
| 61 | + "_covid_target_hospital_admissions_data.csv" |
| 62 | + ) |
| 63 | + ), |
| 64 | + show_col_types = FALSE |
| 65 | +) |
| 66 | + |
| 67 | +contributing_teams <- readr::read_csv( |
| 68 | + file.path( |
| 69 | + weekly_data_path, |
| 70 | + paste0(reference_date, "_covid_forecasts_data.csv") |
| 71 | + ), |
| 72 | + show_col_types = FALSE |
| 73 | +) |> |
| 74 | + dplyr::filter(model != "CovidHub-ensemble") |> |
| 75 | + dplyr::pull(model) |> |
| 76 | + unique() |
| 77 | + |
| 78 | +wkly_submissions <- hubData::load_model_metadata( |
| 79 | + base_hub_path, |
| 80 | + model_ids = contributing_teams |
| 81 | +) |> |
| 82 | + dplyr::distinct(.data$model_id, .data$designated_model, .keep_all = TRUE) |> |
| 83 | + dplyr::mutate( |
| 84 | + team_model_url = glue::glue( |
| 85 | + "[{team_name} (Model: {model_abbr})]({website_url})" |
| 86 | + ) |
| 87 | + ) |> |
| 88 | + dplyr::select( |
| 89 | + model_id, |
| 90 | + team_abbr, |
| 91 | + model_abbr, |
| 92 | + team_model_url, |
| 93 | + designated_model |
| 94 | + ) |
| 95 | + |
| 96 | +# Generate flag for less than 80 percent of hospitals reporting |
| 97 | +desired_weekendingdate <- as.Date(reference_date) - lubridate::dweeks(1) |
| 98 | + |
| 99 | +exclude_territories_path <- fs::path( |
| 100 | + base_hub_path, |
| 101 | + "auxiliary-data", |
| 102 | + "excluded_territories", |
| 103 | + ext = "toml" |
| 104 | +) |
| 105 | +stopifnot(fs::file_exists(exclude_territories_path)) |
| 106 | +exclude_territories_toml <- RcppTOML::parseTOML(exclude_territories_path) |
| 107 | +excluded_locations <- exclude_territories_toml$locations |
| 108 | + |
| 109 | +percent_hosp_reporting_below80 <- forecasttools::pull_data_cdc_gov_dataset( |
| 110 | + dataset = "mpgq-jmmr", |
| 111 | + columns = c("totalconfc19newadmperchosprepabove80pct"), |
| 112 | + start_date = "2024-11-09" |
| 113 | +) |> |
| 114 | + dplyr::mutate( |
| 115 | + weekendingdate = as.Date(.data$weekendingdate), |
| 116 | + report_above_80_lgl = as.logical( |
| 117 | + as.numeric(.data$totalconfc19newadmperchosprepabove80pct) |
| 118 | + ), |
| 119 | + jurisdiction = dplyr::case_match( |
| 120 | + .data$jurisdiction, |
| 121 | + "USA" ~ "US", |
| 122 | + .default = .data$jurisdiction |
| 123 | + ), |
| 124 | + location = forecasttools::us_location_recode( |
| 125 | + .data$jurisdiction, |
| 126 | + "abbr", |
| 127 | + "code" |
| 128 | + ), |
| 129 | + location_name = forecasttools::us_location_recode( |
| 130 | + .data$jurisdiction, |
| 131 | + "abbr", |
| 132 | + "name" |
| 133 | + ) |
| 134 | + ) |> |
| 135 | + dplyr::filter(!(.data$location %in% !!excluded_locations)) |> |
| 136 | + dplyr::group_by(.data$jurisdiction) |> |
| 137 | + dplyr::mutate(max_weekendingdate = max(.data$weekendingdate)) |> |
| 138 | + dplyr::ungroup() |
| 139 | + |
| 140 | +jurisdiction_w_latency <- percent_hosp_reporting_below80 |> |
| 141 | + dplyr::filter(.data$max_weekendingdate < !!desired_weekendingdate) |
| 142 | + |
| 143 | +if (nrow(jurisdiction_w_latency) > 0) { |
| 144 | + cli::cli_warn( |
| 145 | + " |
| 146 | + Some locations have missing reported data for the most recent week. |
| 147 | + The reference date is {reference_date}, we expect data at least |
| 148 | + through {desired_weekendingdate}. However, {nrow(jurisdiction_w_latency)} |
| 149 | + location{?s} did not have reporting through that date: |
| 150 | + {jurisdiction_w_latency$location_name}. |
| 151 | + " |
| 152 | + ) |
| 153 | +} |
| 154 | + |
| 155 | +latest_reporting_below80 <- percent_hosp_reporting_below80 |> |
| 156 | + dplyr::filter( |
| 157 | + .data$weekendingdate == max(.data$weekendingdate), |
| 158 | + !.data$report_above_80_lgl |
| 159 | + ) |
| 160 | + |
| 161 | +reporting_rate_flag <- if (length(latest_reporting_below80$location_name) > 0) { |
| 162 | + location_list <- if (length(latest_reporting_below80$location_name) < 3) { |
| 163 | + glue::glue_collapse(latest_reporting_below80$location_name, sep = " and ") |
| 164 | + } else { |
| 165 | + glue::glue_collapse( |
| 166 | + latest_reporting_below80$location_name, |
| 167 | + sep = ", ", |
| 168 | + last = ", and " |
| 169 | + ) |
| 170 | + } |
| 171 | + |
| 172 | + glue::glue( |
| 173 | + "The following jurisdictions had <80% of hospitals reporting for ", |
| 174 | + "the most recent week: {location_list}. ", |
| 175 | + "Lower reporting rates could impact forecast validity. Percent ", |
| 176 | + "of hospitals reporting is calculated based on the number of active ", |
| 177 | + "hospitals reporting complete data to NHSN for a given reporting week.\n\n" |
| 178 | + ) |
| 179 | +} else { |
| 180 | + "" |
| 181 | +} |
| 182 | + |
| 183 | +round_to_place <- function(value) { |
| 184 | + if (value >= 1000) { |
| 185 | + rounded_val <- round(value, -2) |
| 186 | + } else if (value >= 10) { |
| 187 | + rounded_val <- round(value, -1) |
| 188 | + } else { |
| 189 | + rounded_val <- round(value, 0) |
| 190 | + } |
| 191 | + return(rounded_val) |
| 192 | +} |
| 193 | + |
| 194 | +# generate variables used in the web text |
| 195 | + |
| 196 | +median_forecast_1wk_ahead <- round_to_place( |
| 197 | + ensemble_us_1wk_ahead$quantile_0.5_count |
| 198 | +) |
| 199 | +lower_95ci_forecast_1wk_ahead <- round_to_place( |
| 200 | + ensemble_us_1wk_ahead$quantile_0.025_count |
| 201 | +) |
| 202 | +upper_95ci_forecast_1wk_ahead <- round_to_place( |
| 203 | + ensemble_us_1wk_ahead$quantile_0.975_count |
| 204 | +) |
| 205 | + |
| 206 | +designated <- wkly_submissions[wkly_submissions$designated_model, ] |
| 207 | +not_designated <- wkly_submissions[!wkly_submissions$designated_model, ] |
| 208 | +weekly_num_teams <- length(unique(designated$team_abbr)) |
| 209 | +weekly_num_models <- length(unique(designated$model_abbr)) |
| 210 | +model_incl_in_hub_ensemble <- designated$team_model_url |
| 211 | +model_not_incl_in_hub_ensemble <- not_designated$team_model_url |
| 212 | + |
| 213 | +first_target_data_date <- format( |
| 214 | + as.Date(min(target_data$week_ending_date)), |
| 215 | + "%B %d, %Y" |
| 216 | +) |
| 217 | +last_target_data_date <- format( |
| 218 | + as.Date(max(target_data$week_ending_date)), |
| 219 | + "%B %d, %Y" |
| 220 | +) |
| 221 | +forecast_due_date <- ensemble_us_1wk_ahead$forecast_due_date_formatted |
| 222 | +target_end_date_1wk_ahead <- ensemble_us_1wk_ahead$target_end_date_formatted |
| 223 | +target_end_date_2wk_ahead <- format( |
| 224 | + ensemble_us_1wk_ahead$target_end_date + lubridate::weeks(1), |
| 225 | + "%B %d, %Y" |
| 226 | +) |
| 227 | + |
| 228 | +last_reported_target_data <- target_data |> |
| 229 | + dplyr::filter(week_ending_date == max(week_ending_date), location == "US") |> |
| 230 | + dplyr::mutate(week_end_date_formatted = format(week_ending_date, "%B %d, %Y")) |
| 231 | + |
| 232 | +last_reported_admissions <- round(last_reported_target_data$value, -2) |
| 233 | + |
| 234 | +web_text <- glue::glue( |
| 235 | + "The CovidHub ensemble's one-week-ahead forecast predicts that the number ", |
| 236 | + "of new weekly laboratory-confirmed COVID-19 hospital admissions will be ", |
| 237 | + "approximately {median_forecast_1wk_ahead} nationally, with ", |
| 238 | + "{lower_95ci_forecast_1wk_ahead} to {upper_95ci_forecast_1wk_ahead} ", |
| 239 | + "laboratory confirmed COVID-19 hospital admissions likely reported in the ", |
| 240 | + "week ending {target_end_date_1wk_ahead}. This is compared to the ", |
| 241 | + "{last_reported_admissions} admissions reported for the week ", |
| 242 | + "ending {last_reported_target_data$week_end_date_formatted}, the most ", |
| 243 | + "recent week of reporting from U.S. hospitals.\n\n", |
| 244 | + "Reported and forecasted new COVID-19 hospital admissions as of ", |
| 245 | + "{forecast_due_date}. This week, {weekly_num_teams} modeling groups ", |
| 246 | + "contributed {weekly_num_models} forecasts that were eligible for inclusion ", |
| 247 | + "in the ensemble forecasts for at least one jurisdiction.\n\n", |
| 248 | + "The figure shows the number of new laboratory-confirmed COVID-19 hospital ", |
| 249 | + "admissions reported in the United States each week from ", |
| 250 | + "{first_target_data_date} through {last_target_data_date} and forecasted ", |
| 251 | + "new COVID-19 hospital admissions per week for this week and the next ", |
| 252 | + "2 weeks through {target_end_date_2wk_ahead}.\n\n", |
| 253 | + "{reporting_rate_flag}\n", |
| 254 | + "Contributing teams and models:\n\n", |
| 255 | + "Models included in the CovidHub ensemble:\n", |
| 256 | + "{paste(model_incl_in_hub_ensemble, collapse = '\n')}\n\n", |
| 257 | + "Models not included in the CovidHub ensemble:\n", |
| 258 | + "{paste(model_not_incl_in_hub_ensemble, collapse = '\n')}" |
| 259 | +) |
| 260 | + |
| 261 | +writeLines( |
| 262 | + web_text, |
| 263 | + file.path(weekly_data_path, paste0(reference_date, "_webtext.md")) |
| 264 | +) |
0 commit comments