# Load necessary libraries
library (tidyverse)
Warning: package 'lubridate' was built under R version 4.4.3
── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
✔ dplyr 1.1.4 ✔ readr 2.1.5
✔ forcats 1.0.0 ✔ stringr 1.5.1
✔ ggplot2 3.5.1 ✔ tibble 3.2.1
✔ lubridate 1.9.4 ✔ tidyr 1.3.1
✔ purrr 1.0.2
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag() masks stats::lag()
ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
Warning: package 'dataRetrieval' was built under R version 4.4.3
library (lubridate)
library (timetk)
Warning: package 'timetk' was built under R version 4.4.3
Warning: package 'modeltime' was built under R version 4.4.3
── Attaching packages ────────────────────────────────────── tidymodels 1.2.0 ──
✔ broom 1.0.7 ✔ rsample 1.2.1
✔ dials 1.3.0 ✔ tune 1.2.1
✔ infer 1.0.7 ✔ workflows 1.1.4
✔ modeldata 1.4.0 ✔ workflowsets 1.1.0
✔ parsnip 1.2.1 ✔ yardstick 1.3.2
✔ recipes 1.1.0
Warning: package 'scales' was built under R version 4.4.3
── Conflicts ───────────────────────────────────────── tidymodels_conflicts() ──
✖ scales::discard() masks purrr::discard()
✖ dplyr::filter() masks stats::filter()
✖ recipes::fixed() masks stringr::fixed()
✖ dplyr::lag() masks stats::lag()
✖ yardstick::spec() masks readr::spec()
✖ recipes::step() masks stats::step()
• Search for functions across packages at https://www.tidymodels.org/find/
Warning: package 'prophet' was built under R version 4.4.3
Loading required package: Rcpp
Attaching package: 'Rcpp'
The following object is masked from 'package:rsample':
populate
Loading required package: rlang
Attaching package: 'rlang'
The following objects are masked from 'package:purrr':
%@%, flatten, flatten_chr, flatten_dbl, flatten_int, flatten_lgl,
flatten_raw, invoke, splice
# Data import for: Cache la Poudre River at Mouth (USGS site 06752260)
# Bringing in historical (training) data (2013-2023)
poudre_hist_flow <- readNWISdv (siteNumber = "06752260" , # Download data from USGS for site 06752260
parameterCd = "00060" , # Parameter code 00060 = discharge in cfs)
startDate = "2013-01-01" , # Set the start date
endDate = "2023-12-31" ) %>% # Set the end date
renameNWISColumns () %>% # Rename columns to standard names (e.g., "Flow", "Date")
mutate (date = floor_date (Date, "month" )) %>% # Convert to first day of each month
group_by (date) %>% # Group the data by the new monthly Date
summarize (Flow = mean (Flow, na.rm = TRUE )) %>% # Calculate the monthly average flow
ungroup ()
GET:https://waterservices.usgs.gov/nwis/dv/?site=06752260&format=waterml%2C1.1&ParameterCd=00060&StatCd=00003&startDT=2013-01-01&endDT=2023-12-31
# Bring in recent (testing) data (2024)
poudre_2024_obs_flow <- readNWISdv (siteNumber = "06752260" ,
parameterCd = "00060" ,
startDate = "2024-01-01" ,
endDate = "2024-12-31" ) %>%
renameNWISColumns () %>%
mutate (date = floor_date (Date, "month" )) %>%
group_by (date) %>%
summarize (Flow = mean (Flow, na.rm = TRUE )) %>%
ungroup ()
GET:https://waterservices.usgs.gov/nwis/dv/?site=06752260&format=waterml%2C1.1&ParameterCd=00060&StatCd=00003&startDT=2024-01-01&endDT=2024-12-31
# Log normalization for training data
poudre_hist_flow <- poudre_hist_flow %>%
mutate (log_flow = log1p (Flow))
# Define forcasting models
# Prophet model
prophet_fit <- prophet_reg () %>%
set_engine ("prophet" ) %>%
fit (log_flow ~ date, data = poudre_hist_flow)
Disabling weekly seasonality. Run prophet with weekly.seasonality=TRUE to override this.
Disabling daily seasonality. Run prophet with daily.seasonality=TRUE to override this.
# ARIMA model
arima_fit <- arima_reg () %>%
set_engine ("auto_arima" ) %>%
fit (log_flow ~ date, data = poudre_hist_flow)
frequency = 12 observations per 1 year
# Combine models into modeltime table
models_tbl <- modeltime_table (prophet_fit, arima_fit)
# Fortcast the next 12 months w/ test data
future_dates <- poudre_hist_flow %>%
future_frame (.date_var = date, .length_out = 12 )
forcast_tbl <- models_tbl %>%
modeltime_forecast (
new_data = future_dates,
actual_data = poudre_hist_flow,
keep_data = TRUE
) %>%
mutate (.value = expm1 (.value)) # Back-transform predictions to original scale
# Join predicted vs observed for 2024
comparison_tbl <- forcast_tbl %>%
filter (.key == "prediction" ) %>%
select (.model_desc, .index, .value) %>%
rename (
predicted = .value,
date = .index,
model_desc = .model_desc
) %>%
inner_join (poudre_2024_obs_flow, by = "date" ) %>%
rename (observed = Flow) %>%
mutate (model_desc = case_when (
str_detect (model_desc, "ARIMA" ) ~ "ARIMA" ,
str_detect (model_desc, "PROPHET" ) ~ "Prophet" ,
TRUE ~ model_desc))
# Compute R-sq values
comparison_tbl %>%
group_by (model_desc) %>%
summarize (r2 = summary (lm (observed ~ predicted))$ r.squared)
# A tibble: 2 × 2
model_desc r2
<chr> <dbl>
1 ARIMA 0.922
2 Prophet 0.880
# Plot Predicted vs Observed Forcast Values
ggplot (comparison_tbl, aes (x = predicted, y = observed, color = model_desc)) +
geom_point (size = 2 ) +
geom_smooth (method = "lm" , se = FALSE , linetype = "solid" ) +
geom_abline (slope = 1 , intercept = 0 , linetype = "dashed" , color = "black" ) +
labs (
title = "Forcast (Predicted) vs Observed Monthly Flow for 2024" ,
subtitle = "Model comparison using ARIMA and Prophet" ,
x = "Predicted Flow (cfs)" ,
y = "Observed Flow (cfs)" ,
color = "Model"
) +
theme_minimal () +
theme (legend.position = "top" ,
plot.title = element_text (hjust = 0.5 ), # Center title
plot.subtitle = element_text (hjust = 0.5 ) # Center
) +
coord_fixed (ratio = 1 ) + # Ensures 1:1 aspect ratio
coord_cartesian (xlim= c (0 , 600 ), ylim = c (0 ,600 )) # Fix axis scales
Coordinate system already present. Adding new coordinate system, which will
replace the existing one.
`geom_smooth()` using formula = 'y ~ x'