diff --git a/model-metadata/CU-ensemble.yml b/model-metadata/CU-ensemble.yml index 34100142f..60428c42a 100644 --- a/model-metadata/CU-ensemble.yml +++ b/model-metadata/CU-ensemble.yml @@ -1,35 +1,40 @@ -team_name: "Columbia University" -team_abbr: "CU" -model_name: "Ensemble" -model_abbr: "ensemble" +team_name: Columbia University +team_abbr: CU +model_name: Columbia University Ensemble +model_abbr: CU-ensemble model_contributors: [ { "name": "Rami Yaari", "affiliation": "Columbia University", - "email": "ry2460@cumc.columbia.edu" + "email": "ry2460@cumc.columbia.edu", + "orcid": "0000-0002-8808-8937" }, { - "name": "Teresa Yamana", + "name": "Sen Pei", "affiliation": "Columbia University", - "email": "tky2104@cumc.columbia.edu" - }, + "email": "sp3449@cumc.columbia.edu", + "orcid": "0000-0002-7072-2995" + } { - "name": "Sen Pei", + "name": "Teresa Yamana", "affiliation": "Columbia University", - "email": "sp3449@cumc.columbia.edu" - }, + "email": "tky2104@cumc.columbia.edu>", + "orcid": "0000-0001-8349-3151" + } { "name": "Jeffrey Shaman", "affiliation": "Columbia University", - "email": "jls106@cumc.columbia.edu" + "email": "jls106@cumc.columbia.edu", + "orcid": "0000-0002-7216-7809" } ] -website_url: "https://blogs.cuit.columbia.edu/jls106/" -license: "CC-BY-4.0" -team_funding: "US NIH grant AI163023 and CDC 75D30122C14289" -designated_model: true -data_inputs: "State and national-level daily confirmed influenza hospital admissions, queried using covidcast R package. State and national-level ILINet surveillance data, queried using cdcfluview R package." -methods: "An inverse-WIS weighted ensemble of several component models - an SEIRS compartmental model with EAKF, an ARIMA model, a random walk with drift, and the N-HiTS and N-BEATS deep-learning models." -methods_long: "The dynamical model simulates influenza transmission in each state and the US using a humidity-driven SEIRS dynamics. Model variables and parameters are sequentially updated each week using the ensemble adjustment Kalman filter and new observations. Forecasts are generated by integrating the optimized model into the future. Autoregressive Integrated Moving Average model and baseline models use implementations available in the fable R package (ARIMA and RW, respectively). We employ multivariate versions of N-HiTS and N-BEATS models as implemented in the darts python package, trained on modified state-level ILI data and hospitalization data. To build ensemble, the quantile distributions of the component models are weighted by the sum of inverse-WIS scores, over last 4 weeks. The 4-week window is target and location-specific and are recomputed at each forecast week." +website_url: https://blogs.cuit.columbia.edu/jls106/ +repo_url: https://github.com/ramiyaari/Flusight-CU-Ensemble +license: cc-by-4.0 +team_model_designation: primary ensemble_of_models: true ensemble_of_hub_models: false +data_inputs: 1) State specific weekly total number of hospitalized patients taken from the US Department of Health and Human Services, COVID-19 Reported Patient Impact and Hospital Capacity by State Timeseries (target data), 2) State specific weekly weighted influenza-like illness values downloaded from the CDC website, 3) State specific weekly percentage of positive influenza lab tests downloaded from the CDC FluView website, and 4) State specific weekly average absolute humidity values +methods: An inverse-WIS weighted ensemble of 4 component models - an SEIRS compartmental model with EAKF, an Exponetial-Smoothing model, a Gradient-Boosting model and a Temporal-Fusion-Transformer model. +methods_long: The dynamical model simulates influenza transmission in each state and the US using a humidity-driven SEIRS dynamics. Model variables and parameters are sequentially updated each week using the ensemble adjustment Kalman filter and new observations. Forecasts are generated by integrating the optimized model into the future. Beside the dynamical model, we use three statistical models implemented within the python library darts: 1) Holt Winter’s Exponential Smoothing (ES), a classical statistical model that decomposes a time series to a baseline, trend and seasonal components, 2) Light Gradient Boosting Machine (LightGBM), a ML ensemble decision tree method designed for classification and regression tasks that has been effectively adapted for time series forecasting, and 3) Temporal Fusion Transformer (TFT) - a transformer-based neural-network architecture tailored for time series forecasting. Past years ILI data is transformed to resemble hospitalization data and is used to train the models. Labratory data is used as covariate with model fitting and predicitons. To build the ensemble, the quantile distributions of the component models are weighted by the sum of inverse-WIS scores, over last 4 weeks. The 4-week window is target-specific and only includes weeks for which WIS scores could be evaluated (i.e. weights for 4-wk target are calculated with a window further back in time than for 1-wk target). Weights are location-specific and recomputed at each forecast week. Peak week distribution and incidence is currently being forecasted using historical stats gathered from the combination of transformed ILI data and hospitalization data. Peak week distribution is smoothed using non-parameteric kernel density estimation (KDE). In the near future, we intend to enhance these forecasts using the SEIRS model forecasts and forecasts of statistical models trained on these peak week targets. +team_funding: US NIH grant GM110748