Exercise 22: Forcast Modeling with Time Series Data

ESS 330 - Quantitative Reasoning

# Load necessary libraries
library(tidyverse)
Warning: package 'lubridate' was built under R version 4.4.3
── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
✔ dplyr     1.1.4     ✔ readr     2.1.5
✔ forcats   1.0.0     ✔ stringr   1.5.1
✔ ggplot2   3.5.1     ✔ tibble    3.2.1
✔ lubridate 1.9.4     ✔ tidyr     1.3.1
✔ purrr     1.0.2     
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag()    masks stats::lag()
ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(dataRetrieval)
Warning: package 'dataRetrieval' was built under R version 4.4.3
library(lubridate)
library(timetk)
Warning: package 'timetk' was built under R version 4.4.3
library(modeltime)
Warning: package 'modeltime' was built under R version 4.4.3
library(tidymodels)
── Attaching packages ────────────────────────────────────── tidymodels 1.2.0 ──
✔ broom        1.0.7     ✔ rsample      1.2.1
✔ dials        1.3.0     ✔ tune         1.2.1
✔ infer        1.0.7     ✔ workflows    1.1.4
✔ modeldata    1.4.0     ✔ workflowsets 1.1.0
✔ parsnip      1.2.1     ✔ yardstick    1.3.2
✔ recipes      1.1.0     
Warning: package 'scales' was built under R version 4.4.3
── Conflicts ───────────────────────────────────────── tidymodels_conflicts() ──
✖ scales::discard() masks purrr::discard()
✖ dplyr::filter()   masks stats::filter()
✖ recipes::fixed()  masks stringr::fixed()
✖ dplyr::lag()      masks stats::lag()
✖ yardstick::spec() masks readr::spec()
✖ recipes::step()   masks stats::step()
• Search for functions across packages at https://www.tidymodels.org/find/
library(prophet)
Warning: package 'prophet' was built under R version 4.4.3
Loading required package: Rcpp

Attaching package: 'Rcpp'

The following object is masked from 'package:rsample':

    populate

Loading required package: rlang

Attaching package: 'rlang'

The following objects are masked from 'package:purrr':

    %@%, flatten, flatten_chr, flatten_dbl, flatten_int, flatten_lgl,
    flatten_raw, invoke, splice
# Data import for: Cache la Poudre River at Mouth (USGS site 06752260)

# Bringing in historical (training) data (2013-2023)
poudre_hist_flow <- readNWISdv(siteNumber = "06752260",    # Download data from USGS for site 06752260
                          parameterCd = "00060",      # Parameter code 00060 = discharge in cfs)
                          startDate = "2013-01-01",   # Set the start date
                          endDate = "2023-12-31") %>% # Set the end date
  renameNWISColumns() %>%                             # Rename columns to standard names (e.g., "Flow", "Date")
  mutate(date = floor_date(Date, "month")) %>%        # Convert to first day of each month
  group_by(date) %>%                                  # Group the data by the new monthly Date
  summarize(Flow = mean(Flow, na.rm = TRUE)) %>%      # Calculate the monthly average flow
  ungroup()
GET:https://waterservices.usgs.gov/nwis/dv/?site=06752260&format=waterml%2C1.1&ParameterCd=00060&StatCd=00003&startDT=2013-01-01&endDT=2023-12-31
# Bring in recent (testing) data (2024)
poudre_2024_obs_flow <- readNWISdv(siteNumber = "06752260",
                          parameterCd = "00060",      
                          startDate = "2024-01-01",  
                          endDate = "2024-12-31") %>% 
  renameNWISColumns() %>%                              
  mutate(date = floor_date(Date, "month")) %>%        
  group_by(date) %>%                                 
  summarize(Flow = mean(Flow, na.rm = TRUE)) %>%     
  ungroup()
GET:https://waterservices.usgs.gov/nwis/dv/?site=06752260&format=waterml%2C1.1&ParameterCd=00060&StatCd=00003&startDT=2024-01-01&endDT=2024-12-31
# Log normalization for training data
poudre_hist_flow <- poudre_hist_flow %>% 
  mutate(log_flow = log1p(Flow))

# Define forcasting models
# Prophet model
prophet_fit <- prophet_reg() %>% 
  set_engine("prophet") %>% 
  fit(log_flow ~ date, data = poudre_hist_flow)
Disabling weekly seasonality. Run prophet with weekly.seasonality=TRUE to override this.
Disabling daily seasonality. Run prophet with daily.seasonality=TRUE to override this.
# ARIMA model
arima_fit <- arima_reg() %>% 
  set_engine("auto_arima") %>% 
  fit(log_flow ~ date, data = poudre_hist_flow)
frequency = 12 observations per 1 year
# Combine models into modeltime table
models_tbl <- modeltime_table(prophet_fit, arima_fit)

# Fortcast the next 12 months w/ test data
future_dates <- poudre_hist_flow %>% 
  future_frame(.date_var = date, .length_out = 12)

forcast_tbl <- models_tbl %>% 
  modeltime_forecast(
    new_data = future_dates,
    actual_data = poudre_hist_flow,
    keep_data = TRUE
  ) %>% 
  mutate(.value = expm1(.value)) # Back-transform predictions to original scale
  
# Join predicted vs observed for 2024
comparison_tbl <- forcast_tbl %>% 
  filter(.key == "prediction") %>% 
  select(.model_desc, .index, .value) %>% 
  rename(
    predicted = .value,
    date = .index,
    model_desc = .model_desc
  ) %>% 
  inner_join(poudre_2024_obs_flow, by = "date") %>% 
  rename(observed = Flow) %>% 
    mutate(model_desc = case_when(
    str_detect(model_desc, "ARIMA") ~ "ARIMA",
    str_detect(model_desc, "PROPHET") ~ "Prophet",
    TRUE ~ model_desc))

# Compute R-sq values
comparison_tbl %>%  
  group_by(model_desc) %>% 
  summarize(r2 = summary(lm(observed ~ predicted))$r.squared)
# A tibble: 2 × 2
  model_desc    r2
  <chr>      <dbl>
1 ARIMA      0.922
2 Prophet    0.880
# Plot Predicted vs Observed Forcast Values
ggplot(comparison_tbl, aes(x = predicted, y = observed, color = model_desc)) +
  geom_point(size = 2) +
  geom_smooth(method = "lm", se = FALSE, linetype = "solid") +
  geom_abline(slope = 1, intercept = 0, linetype = "dashed", color = "black") +
  labs(
    title = "Forcast (Predicted) vs Observed Monthly Flow for 2024",
    subtitle = "Model comparison using ARIMA and Prophet",
    x = "Predicted Flow (cfs)",
    y = "Observed Flow (cfs)",
    color = "Model"
  ) +
  theme_minimal() +
  theme(legend.position =  "top",
        plot.title = element_text(hjust = 0.5),  # Center title
    plot.subtitle = element_text(hjust = 0.5)  # Center
    ) +
  coord_fixed(ratio = 1) +   # Ensures 1:1 aspect ratio
  coord_cartesian(xlim= c(0, 600), ylim = c(0,600)) # Fix axis scales
Coordinate system already present. Adding new coordinate system, which will
replace the existing one.
`geom_smooth()` using formula = 'y ~ x'