Merge remote-tracking branch 'origin/main' into 37-add-get_webtextr

O957 · O957 · commit a6c39bfc540d · 2025-12-09T10:01:12.000-05:00
diff --git a/.github/workflows/jarl-check.yaml b/.github/workflows/jarl-check.yaml
@@ -0,0 +1,16 @@
+name: "Lint R Code With JARL"
+
+on:
+  push:
+    branches: [ main ]
+  pull_request:
+    branches: [ main ]
+
+jobs:
+  lint:
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v6
+      - uses: etiennebacher/setup-jarl@v0.1.0
+        with:
+          args: check . --fix --output-format github
diff --git a/.lintr b/.lintr
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -13,7 +13,7 @@ repos:
 #####
 # Python
 - repo: https://github.com/astral-sh/ruff-pre-commit
-  rev: v0.14.6
+  rev: v0.14.8
   hooks:
     # Sort imports
     - id: ruff
@@ -25,12 +25,6 @@ repos:
     - id: ruff-format
       args: ['--line-length', '79']
 #####
-# R
--   repo: https://github.com/lorenzwalthert/precommit
-    rev: v0.4.3.9017
-    hooks:
-    -   id: lintr
-#####
 # Java
 - repo: https://github.com/macisamuele/language-formatters-pre-commit-hooks
   rev: v2.15.0
diff --git a/NAMESPACE b/NAMESPACE
@@ -6,6 +6,7 @@ export(excluded_locations)
 export(generate_hub_baseline)
 export(generate_hub_ensemble)
 export(generate_oracle_output)
+export(get_forecast_data)
 export(get_hub_name)
 export(get_map_data)
 export(get_webtext)
diff --git a/R/generate_hub_baselines.R b/R/generate_hub_baselines.R
@@ -181,12 +181,16 @@ make_baseline_forecast <- function(
 #' @param base_hub_path Path to the base hub directory.
 #' @param reference_date Reference date (should be a Saturday).
 #' @param disease Disease name ("covid" or "rsv").
+#' @param as_of As of date to filter to, as an object
+#' coercible by as.Date(), or "latest" to filter to the
+#' most recent available vintage. Default "latest".
 #' @return NULL. Writes baseline forecast file to hub's model-output directory.
 #' @export
 generate_hub_baseline <- function(
   base_hub_path,
   reference_date,
-  disease
+  disease,
+  as_of = "latest"
 ) {
   checkmate::assert_scalar(disease)
   checkmate::assert_names(disease, subset.of = c("covid", "rsv"))
@@ -213,7 +217,7 @@ generate_hub_baseline <- function(
 
   hub_target_data <- hubData::connect_target_timeseries(base_hub_path) |>
     dplyr::collect() |>
-    forecasttools::hub_target_data_as_of()
+    forecasttools::hub_target_data_as_of(as_of)
 
   preds_hosp <- make_baseline_forecast(
     target_data = hub_target_data,
diff --git a/R/get_forecast_data.R b/R/get_forecast_data.R
@@ -0,0 +1,169 @@
+#' Generate forecast data file containing all forecast hub
+#' model submissions.
+#'
+#' This function fetches all forecast submissions from a
+#' forecast hub based on the reference date. The forecast
+#' data is then pivoted to create a wide format with
+#' quantile levels as columns.
+#'
+#' The resulting file contains the following columns:
+#' - `location_name`: full state name (including "US" for
+#'    the US state)
+#' - `abbreviation`: state abbreviation
+#' - `horizon`: forecast horizon
+#' - `forecast_date`: date the forecast was generated
+#' - `target_end_date`: target date for the forecast
+#' - `model`: model name
+#' - `quantile_*`: forecast values for various quantiles
+#'    (e.g., 0.025, 0.5, 0.975)
+#' - `forecast_teams`: name of the team that generated the
+#'    model
+#' - `forecast_fullnames`: full model name
+#'
+#' @param reference_date character, the reference date for
+#' the forecast in YYYY-MM-DD format (ISO-8601).
+#' @param base_hub_path character, path to the forecast
+#' hub directory.
+#' @param hub_reports_path character, path to forecast hub
+#' reports directory.
+#' @param disease character, disease name ("covid" or
+#' "rsv"). Used to derive target name and file prefix.
+#' @param horizons_to_include integer vector, horizons to
+#' include in the output. Default: c(0, 1, 2).
+#' @param excluded_locations character vector of location
+#' codes to exclude from the output. Default: character(0).
+#' @param output_format character, output file format. One
+#' of "csv", "tsv", or "parquet". Default: "csv".
+#' @param targets character vector, target name(s) to filter
+#' forecasts. If NULL (default), does not filter by target.
+#' Can be a single target like "wk inc covid hosp" or
+#' multiple targets like c("wk inc covid hosp", "wk inc
+#' covid prop ed visits").
+#'
+#' @export
+get_forecast_data <- function(
+  reference_date,
+  base_hub_path,
+  hub_reports_path,
+  disease,
+  horizons_to_include = c(0, 1, 2),
+  excluded_locations = character(0),
+  output_format = "csv",
+  targets = NULL
+) {
+  checkmate::assert_choice(disease, choices = c("covid", "rsv"))
+  checkmate::assert_subset(horizons_to_include, choices = c(-1, 0, 1, 2, 3))
+  checkmate::assert_character(excluded_locations)
+  checkmate::assert_choice(output_format, choices = c("csv", "tsv", "parquet"))
+  checkmate::assert_character(targets, null.ok = TRUE)
+
+  reference_date <- lubridate::as_date(reference_date)
+
+  model_metadata <- hubData::load_model_metadata(
+    base_hub_path,
+    model_ids = NULL
+  )
+
+  hub_content <- hubData::connect_hub(base_hub_path)
+
+  current_forecasts <- hub_content |>
+    dplyr::filter(
+      .data$reference_date == !!reference_date,
+      !(.data$location %in% !!excluded_locations),
+      .data$horizon %in% !!horizons_to_include
+    ) |>
+    hubData::collect_hub() |>
+    dplyr::filter(forecasttools::nullable_comparison(
+      .data$target,
+      "%in%",
+      !!targets
+    ))
+
+  all_forecasts_data <- forecasttools::pivot_hubverse_quantiles_wider(
+    hubverse_table = current_forecasts,
+    pivot_quantiles = c(
+      "quantile_0.025" = 0.025,
+      "quantile_0.25" = 0.25,
+      "quantile_0.5" = 0.5,
+      "quantile_0.75" = 0.75,
+      "quantile_0.975" = 0.975
+    )
+  ) |>
+    dplyr::mutate(
+      location_name = forecasttools::us_location_recode(
+        .data$location,
+        "hub",
+        "name"
+      ),
+      abbreviation = forecasttools::us_location_recode(
+        .data$location,
+        "hub",
+        "abbr"
+      ),
+      dplyr::across(
+        tidyselect::starts_with("quantile_"),
+        round,
+        .names = "{.col}_rounded"
+      ),
+      forecast_due_date = as.Date(!!reference_date) - 3,
+      location_sort_order = ifelse(.data$location_name == "United States", 0, 1)
+    ) |>
+    dplyr::mutate(
+      location_name = dplyr::case_match(
+        .data$location_name,
+        "United States" ~ "US",
+        .default = .data$location_name
+      )
+    ) |>
+    dplyr::arrange(.data$location_sort_order, .data$location_name) |>
+    dplyr::left_join(
+      dplyr::distinct(
+        model_metadata,
+        .data$model_id,
+        .keep_all = TRUE
+      ),
+      by = "model_id"
+    ) |>
+    dplyr::select(
+      "location_name",
+      "abbreviation",
+      "horizon",
+      forecast_date = "reference_date",
+      "target_end_date",
+      model = "model_id",
+      "quantile_0.025",
+      "quantile_0.25",
+      "quantile_0.5",
+      "quantile_0.75",
+      "quantile_0.975",
+      "quantile_0.025_rounded",
+      "quantile_0.25_rounded",
+      "quantile_0.5_rounded",
+      "quantile_0.75_rounded",
+      "quantile_0.975_rounded",
+      forecast_team = "team_name",
+      "forecast_due_date",
+      model_full_name = "model_name"
+    )
+
+  output_folder_path <- fs::path(
+    hub_reports_path,
+    "weekly-summaries",
+    reference_date
+  )
+  output_filename <- glue::glue("{reference_date}_{disease}_forecasts_data")
+  output_filepath <- fs::path(
+    output_folder_path,
+    output_filename,
+    ext = output_format
+  )
+
+  fs::dir_create(output_folder_path)
+
+  if (!fs::file_exists(output_filepath)) {
+    forecasttools::write_tabular(all_forecasts_data, output_filepath)
+    cli::cli_inform("File saved as: {output_filepath}")
+  } else {
+    cli::cli_abort("File already exists: {output_filepath}")
+  }
+}
diff --git a/jarl.toml b/jarl.toml
@@ -0,0 +1,3 @@
+[lint]
+default-exclude = true
+assignment = "<-"
diff --git a/man/generate_hub_baseline.Rd b/man/generate_hub_baseline.Rd
diff --git a/man/get_forecast_data.Rd b/man/get_forecast_data.Rd

Original file line number	Diff line number	Diff line change
`@@ -0,0 +1,3 @@`
	`1`	`+[lint]`
	`2`	`+default-exclude = true`
	`3`	`+assignment = "<-"`