CDCgov · O957 · Dec 9, 2025 · Dec 5, 2025 · Dec 5, 2025 · Dec 5, 2025
@@ -9,7 +9,7 @@ Description: This package provides functions to help with the maintenance of CDC
 License: Apache License (>= 2)
 Encoding: UTF-8
 Roxygen: list(markdown = TRUE)
-RoxygenNote: 7.3.2
+RoxygenNote: 7.3.3
 Imports:
     checkmate,
     cli,

@@ -10,7 +10,11 @@ export(get_forecast_data)
 export(get_hub_name)
 export(get_map_data)
 export(included_locations)
+export(summarize_ref_date_forecasts)
 export(update_authorized_users)
 export(update_hub_target_data)
+export(write_ref_date_summary)
+export(write_ref_date_summary_all)
+export(write_ref_date_summary_ensemble)
 importFrom(rlang,":=")
 importFrom(rlang,.data)
@@ -1,25 +1,10 @@
-#' Generate forecast data file containing all forecast hub
-#' model submissions.
+#' Generate forecast data file containing all forecast hub model submissions
 #'
 #' This function fetches all forecast submissions from a
 #' forecast hub based on the reference date. The forecast
 #' data is then pivoted to create a wide format with
 #' quantile levels as columns.
 #'
-#' The resulting file contains the following columns:
-#' - `location_name`: full state name (including "US" for
-#'    the US state)
-#' - `abbreviation`: state abbreviation
-#' - `horizon`: forecast horizon
-#' - `forecast_date`: date the forecast was generated
-#' - `target_end_date`: target date for the forecast
-#' - `model`: model name
-#' - `quantile_*`: forecast values for various quantiles
-#'    (e.g., 0.025, 0.5, 0.975)
-#' - `forecast_teams`: name of the team that generated the
-#'    model
-#' - `forecast_fullnames`: full model name
-#'
 #' @param reference_date character, the reference date for
 #' the forecast in YYYY-MM-DD format (ISO-8601).
 #' @param base_hub_path character, path to the forecast
@@ -51,119 +36,14 @@ get_forecast_data <- function(
   output_format = "csv",
   targets = NULL
 ) {
-  checkmate::assert_choice(disease, choices = c("covid", "rsv"))
-  checkmate::assert_subset(horizons_to_include, choices = c(-1, 0, 1, 2, 3))
-  checkmate::assert_character(excluded_locations)
-  checkmate::assert_choice(output_format, choices = c("csv", "tsv", "parquet"))
-  checkmate::assert_character(targets, null.ok = TRUE)
-
-  reference_date <- lubridate::as_date(reference_date)
-
-  model_metadata <- hubData::load_model_metadata(
-    base_hub_path,
-    model_ids = NULL
-  )
-
-  hub_content <- hubData::connect_hub(base_hub_path)
-
-  current_forecasts <- hub_content |>
-    dplyr::filter(
-      .data$reference_date == !!reference_date,
-      !(.data$location %in% !!excluded_locations),
-      .data$horizon %in% !!horizons_to_include
-    ) |>
-    hubData::collect_hub() |>
-    dplyr::filter(forecasttools::nullable_comparison(
-      .data$target,
-      "%in%",
-      !!targets
-    ))
-
-  all_forecasts_data <- forecasttools::pivot_hubverse_quantiles_wider(
-    hubverse_table = current_forecasts,
-    pivot_quantiles = c(
-      "quantile_0.025" = 0.025,
-      "quantile_0.25" = 0.25,
-      "quantile_0.5" = 0.5,
-      "quantile_0.75" = 0.75,
-      "quantile_0.975" = 0.975
-    )
-  ) |>
-    dplyr::mutate(
-      location_name = forecasttools::us_location_recode(
-        .data$location,
-        "hub",
-        "name"
-      ),
-      abbreviation = forecasttools::us_location_recode(
-        .data$location,
-        "hub",
-        "abbr"
-      ),
-      dplyr::across(
-        tidyselect::starts_with("quantile_"),
-        round,
-        .names = "{.col}_rounded"
-      ),
-      forecast_due_date = as.Date(!!reference_date) - 3,
-      location_sort_order = ifelse(.data$location_name == "United States", 0, 1)
-    ) |>
-    dplyr::mutate(
-      location_name = dplyr::case_match(
-        .data$location_name,
-        "United States" ~ "US",
-        .default = .data$location_name
-      )
-    ) |>
-    dplyr::arrange(.data$location_sort_order, .data$location_name) |>
-    dplyr::left_join(
-      dplyr::distinct(
-        model_metadata,
-        .data$model_id,
-        .keep_all = TRUE
-      ),
-      by = "model_id"
-    ) |>
-    dplyr::select(
-      "location_name",
-      "abbreviation",
-      "horizon",
-      forecast_date = "reference_date",
-      "target_end_date",
-      model = "model_id",
-      "quantile_0.025",
-      "quantile_0.25",
-      "quantile_0.5",
-      "quantile_0.75",
-      "quantile_0.975",
-      "quantile_0.025_rounded",
-      "quantile_0.25_rounded",
-      "quantile_0.5_rounded",
-      "quantile_0.75_rounded",
-      "quantile_0.975_rounded",
-      forecast_team = "team_name",
-      "forecast_due_date",
-      model_full_name = "model_name"
-    )
-
-  output_folder_path <- fs::path(
-    hub_reports_path,
-    "weekly-summaries",
-    reference_date
-  )
-  output_filename <- glue::glue("{reference_date}_{disease}_forecasts_data")
-  output_filepath <- fs::path(
-    output_folder_path,
-    output_filename,
-    ext = output_format
+  write_ref_date_summary_all(
+    reference_date = reference_date,
+    base_hub_path = base_hub_path,
+    hub_reports_path = hub_reports_path,
+    disease = disease,
+    horizons_to_include = horizons_to_include,
+    excluded_locations = excluded_locations,
+    output_format = output_format,
+    targets = targets
   )
-
-  fs::dir_create(output_folder_path)
-
-  if (!fs::file_exists(output_filepath)) {
-    forecasttools::write_tabular(all_forecasts_data, output_filepath)
-    cli::cli_inform("File saved as: {output_filepath}")
-  } else {
-    cli::cli_abort("File already exists: {output_filepath}")
-  }
 }
@@ -1,5 +1,4 @@
-#' Generate map data file containing ensemble forecast
-#' data.
+#' Generate map data file containing ensemble forecast data
 #'
 #' This function loads the latest ensemble forecast data
 #' from the forecast hub and processes it into the required
@@ -8,37 +7,6 @@
 #' various forecast horizons, and quantiles (0.025, 0.5,
 #' and 0.975).
 #'
-#' The ensemble data is expected to contain the following
-#' columns:
-#' - `reference_date`: the date of the forecast
-#' - `location`: state abbreviation
-#' - `horizon`: forecast horizon
-#' - `target`: forecast target (e.g., "wk inc covid hosp")
-#' - `target_end_date`: the forecast target date
-#' - `output_type`: type of output (e.g., "quantile")
-#' - `output_type_id`: quantile value (e.g., 0.025, 0.5,
-#'    0.975)
-#' - `value`: forecast value
-#'
-#' The resulting map file will have the following columns:
-#' - `location_name`: full state name (including "US" for
-#'    the US state)
-#' - `quantile_*`: the quantile forecast values (rounded
-#'    to two decimal places)
-#' - `horizon`: forecast horizon
-#' - `target`: forecast target (e.g., "7 day ahead inc
-#'    hosp")
-#' - `target_end_date`: target date for the forecast (Ex:
-#'    2024-11-30)
-#' - `reference_date`: date that the forecast was generated
-#'    (Ex: 2024-11-23)
-#' - `target_end_date_formatted`: target date for the
-#'    forecast, prettily re-formatted as a string (Ex:
-#'    "November 30, 2024")
-#' - `reference_date_formatted`: date that the forecast
-#'    was generated, prettily re-formatted as a string
-#'    (Ex: "November 23, 2024")
-#'
 #' @param reference_date character, the reference date for
 #' the forecast in YYYY-MM-DD format (ISO-8601).
 #' @param base_hub_path character, path to the forecast
@@ -67,143 +35,14 @@ get_map_data <- function(
   excluded_locations = character(0),
   output_format = "csv"
 ) {
-  checkmate::assert_choice(disease, choices = c("covid", "rsv"))
-  checkmate::assert_subset(horizons_to_include, choices = c(-1, 0, 1, 2, 3))
-  checkmate::assert_data_frame(population_data)
-  checkmate::assert_names(
-    colnames(population_data),
-    must.include = c("location", "population")
-  )
-  checkmate::assert_character(excluded_locations)
-  checkmate::assert_choice(output_format, choices = c("csv", "tsv", "parquet"))
-
-  reference_date <- lubridate::as_date(reference_date)
-
-  hub_name <- get_hub_name(disease)
-  ensemble_model_name <- glue::glue("{hub_name}-ensemble")
-
-  ensemble_data <- hubData::connect_hub(base_hub_path) |>
-    dplyr::filter(
-      .data$reference_date == !!reference_date,
-      .data$model_id == !!ensemble_model_name
-    ) |>
-    hubData::collect_hub()
-
-  if (nrow(ensemble_data) == 0) {
-    cli::cli_abort(
-      glue::glue(
-        "No ensemble data found for reference date {reference_date} ",
-        "and model {ensemble_model_name}"
-      )
-    )
-  }
-
-  # process ensemble data into the required format for Map file
-  map_data <- forecasttools::pivot_hubverse_quantiles_wider(
-    hubverse_table = ensemble_data,
-    pivot_quantiles = c(
-      "quantile_0.025" = 0.025,
-      "quantile_0.25" = 0.25,
-      "quantile_0.5" = 0.5,
-      "quantile_0.75" = 0.75,
-      "quantile_0.975" = 0.975
-    )
-  ) |>
-    dplyr::filter(.data$horizon %in% !!horizons_to_include) |>
-    dplyr::filter(!(.data$location %in% !!excluded_locations)) |>
-    dplyr::mutate(
-      reference_date = as.Date(.data$reference_date),
-      target_end_date = as.Date(.data$target_end_date),
-      model = !!ensemble_model_name
-    ) |>
-    # convert location column codes to full location names
-    dplyr::mutate(
-      location = forecasttools::us_location_recode(
-        .data$location,
-        "hub",
-        "name"
-      )
-    ) |>
-    # long name "United States" to "US"
-    dplyr::mutate(
-      location = dplyr::case_match(
-        .data$location,
-        "United States" ~ "US",
-        .default = .data$location
-      ),
-      # sort locations alphabetically, except for US
-      location_sort_order = ifelse(.data$location == "US", 0, 1)
-    ) |>
-    dplyr::arrange(.data$location_sort_order, .data$location) |>
-    dplyr::left_join(
-      population_data,
-      by = "location"
-    ) |>
-    dplyr::mutate(
-      population = as.numeric(.data$population),
-      quantile_0.025_per100k = .data$quantile_0.025 / .data$population * 100000,
-      quantile_0.5_per100k = .data$quantile_0.5 / .data$population * 100000,
-      quantile_0.975_per100k = .data$quantile_0.975 / .data$population * 100000,
-      quantile_0.025_count = .data$quantile_0.025,
-      quantile_0.5_count = .data$quantile_0.5,
-      quantile_0.975_count = .data$quantile_0.975,
-      quantile_0.025_per100k_rounded = round(.data$quantile_0.025_per100k, 2),
-      quantile_0.5_per100k_rounded = round(.data$quantile_0.5_per100k, 2),
-      quantile_0.975_per100k_rounded = round(.data$quantile_0.975_per100k, 2),
-      quantile_0.025_count_rounded = round(.data$quantile_0.025_count),
-      quantile_0.5_count_rounded = round(.data$quantile_0.5_count),
-      quantile_0.975_count_rounded = round(.data$quantile_0.975_count),
-      target_end_date_formatted = format(.data$target_end_date, "%B %d, %Y"),
-      reference_date_formatted = format(.data$reference_date, "%B %d, %Y"),
-      forecast_due_date = as.Date(!!reference_date) - 3,
-      forecast_due_date_formatted = format(
-        .data$forecast_due_date,
-        "%B %d, %Y"
-      ),
-    ) |>
-    dplyr::select(
-      location_name = "location",
-      "horizon",
-      "quantile_0.025_per100k",
-      "quantile_0.5_per100k",
-      "quantile_0.975_per100k",
-      "quantile_0.025_count",
-      "quantile_0.5_count",
-      "quantile_0.975_count",
-      "quantile_0.025_per100k_rounded",
-      "quantile_0.5_per100k_rounded",
-      "quantile_0.975_per100k_rounded",
-      "quantile_0.025_count_rounded",
-      "quantile_0.5_count_rounded",
-      "quantile_0.975_count_rounded",
-      "target",
-      "target_end_date",
-      "reference_date",
-      "forecast_due_date",
-      "target_end_date_formatted",
-      "forecast_due_date_formatted",
-      "reference_date_formatted",
-      "model",
-    )
-
-  output_folder_path <- fs::path(
-    hub_reports_path,
-    "weekly-summaries",
-    reference_date
-  )
-  output_filename <- glue::glue("{reference_date}_{disease}_map_data")
-  output_filepath <- fs::path(
-    output_folder_path,
-    output_filename,
-    ext = output_format
+  write_ref_date_summary_ensemble(
+    reference_date = reference_date,
+    base_hub_path = base_hub_path,
+    hub_reports_path = hub_reports_path,
+    disease = disease,
+    horizons_to_include = horizons_to_include,
+    population_data = population_data,
+    excluded_locations = excluded_locations,
+    output_format = output_format
   )
-
-  fs::dir_create(output_folder_path)
-
-  if (!fs::file_exists(output_filepath)) {
-    forecasttools::write_tabular(map_data, output_filepath)
-    cli::cli_inform("File saved as: {output_filepath}")
-  } else {
-    cli::cli_abort("File already exists: {output_filepath}")
-  }
 }