# 20 Feburary 2025# ESS 330 - Daily Assignment 07# This file is a copy of the R script for assignment 07 where the COVID-19 data from # assignment 06 is visualized.## COVID-19 Data### Data# We are going to practice some data wrangling skills using a real-world # data set about COVID cases curated and maintained by the New York Times. The# data are archived on a GitHub repo [here](https://github.com/nytimes/covid-19-data). #Prepare librarieslibrary(tidyverse)
Warning: package 'lubridate' was built under R version 4.4.3
── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
✔ dplyr 1.1.4 ✔ readr 2.1.5
✔ forcats 1.0.0 ✔ stringr 1.5.1
✔ ggplot2 3.5.1 ✔ tibble 3.2.1
✔ lubridate 1.9.4 ✔ tidyr 1.3.1
✔ purrr 1.0.2
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag() masks stats::lag()
ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(dplyr)library(ggplot2)# Read-in and store NY-Times dataurl <-'https://raw.githubusercontent.com/nytimes/covid-19-data/refs/heads/master/us-states.csv'covid <-read_csv(url)
Rows: 61942 Columns: 5
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (2): state, fips
dbl (2): cases, deaths
date (1): date
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
### Question 1# Find the most recent datemax_date <-max(covid$date)most_recent_data <-filter(covid, date == max_date)# Get the 6 states with the most casestop_6_states <- most_recent_data %>%arrange(desc(cases)) %>%slice(1:6) %>%pull(state)# Filter data to the top 6 statestop_6_states_data <- covid %>%filter(state %in% top_6_states) %>%group_by(state, date) %>%summarize(state_cases =sum(cases, na.rm =TRUE))
`summarise()` has grouped output by 'state'. You can override using the
`.groups` argument.
# Set up a gg plottop_6_states_plot =ggplot(top_6_states_data, aes(x = date, y = state_cases, group = state, color = state)) +geom_line() +labs(title ="Cumulative Case Counts: COVID-19 Pandemic",caption ="Based on NY-Times COVID-19 Data.",x ="Date",y ="Number of Cases",color ="State" ) +facet_wrap(~state) +theme_bw() +theme(axis.text.x =element_text(angle =45, hjust =1), legend.position ="none" ) +scale_x_date(date_breaks ="8 month", date_labels ="%b %y")# Echo plot to dev envtop_6_states_plot
# Save the top_6_states_plot as an image#ggsave(top_6_states_plot, # file = "images\\top_6_state_cases_plot.jpg", # width = 10,# height = 6,# units = c("in"))### Question 2# Find daily total cases in the USus_daily_cases_data <- covid %>%group_by(date) %>%summarize(us_cases =sum(cases, na.rm =TRUE))# Plot us_daily_cases_dataus_daily_cases_plot =ggplot(us_daily_cases_data, aes(x = date, y = us_cases)) +geom_col(color ="darkred") +labs(title ="Cumulative Case Counts: US COVID-19 Pandemic",caption ="Based on NY-Times COVID-19 Data.",author ="Zachary Cramton",x ="Date",y ="Number of Cases") +theme_bw() +theme(axis.text.x =element_text(angle =45, hjust =1)) +scale_x_date(date_breaks ="8 month", date_labels ="%b %y")# Echo the plot to dev envus_daily_cases_plot
# Save the us_daily_plot as an image#ggsave(us_daily_cases_plot,# file = "images\\us_daily_cases_plot.jpg",# width = 10,# height = 10,# units = c("in"))