# 20 Feburary 2025# This is the R script for assignment 08 where the COVID-19 data from # assignment 06 will be visualized expanding on the visualization from# assignment 07.# Prepare packageslibrary(dplyr)
Attaching package: 'dplyr'
The following objects are masked from 'package:stats':
filter, lag
The following objects are masked from 'package:base':
intersect, setdiff, setequal, union
Warning: package 'scales' was built under R version 4.4.3
#Read in and store NY-Times Datacovid <- readr::read_csv("https://raw.githubusercontent.com/nytimes/covid-19-data/master/us-counties.csv")
Rows: 2502832 Columns: 6
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (3): county, state, fips
dbl (2): cases, deaths
date (1): date
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
# Create state-region data framedf <-data.frame(region = state.region,abbr = state.abb,state = state.name)# Check data framehead(df)
region abbr state
1 South AL Alabama
2 West AK Alaska
3 West AZ Arizona
4 South AR Arkansas
5 West CA California
6 West CO Colorado
# Join df to covid datacovid_joined <-inner_join(df,covid, by ="state")# Aggregate (Split-apply) COVID-19 data based on region.covid_summary <- covid_joined %>%group_by(region, date) %>%#Split by region and datesummarize(Cases =sum(cases/1000, na.rm =TRUE), #Apply sum functionDeaths =sum(deaths/1000, na.rm =TRUE) )
`summarise()` has grouped output by 'region'. You can override using the
`.groups` argument.
# Pivot data from to long formatcovid_long <- covid_summary %>%pivot_longer(cols =c(Cases, Deaths), names_to ="metric", values_to ="count")# Plot the long dataus_regional_covid_trends =ggplot(covid_long, aes(x = date, y = count, color = region)) +geom_line() +facet_grid(metric~region, scales ="free_y",) +labs(title ="COVID-19 Cases and Deaths by Region",subtitle ="Since 2020 (per 1,000 people)",caption ="Based on NY-Times COVID-19 Data.",x ="Date",y ="Cumulative Cases",color ="Regions") +theme_bw() +theme(legend.position ="none",axis.text.y =element_text(angle =45, hjust =1)) +scale_x_date(date_breaks ="8 month", date_labels ="%b %y") +scale_y_continuous(labels =label_number())# Save plot as an image#ggsave(us_regional_covid_trends,# file = "img/us_regional_covid_trends.png",# width = 10,# height = 6)