Lab 10: Distances and Projections - The Border Zone

ESS 330 - Quantitative Reasoning

# spatial data science
library(tidyverse)
Warning: package 'lubridate' was built under R version 4.4.3
── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
✔ dplyr     1.1.4     ✔ readr     2.1.5
✔ forcats   1.0.0     ✔ stringr   1.5.1
✔ ggplot2   3.5.1     ✔ tibble    3.2.1
✔ lubridate 1.9.4     ✔ tidyr     1.3.1
✔ purrr     1.0.2     
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag()    masks stats::lag()
ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(sf)
Warning: package 'sf' was built under R version 4.4.3
Linking to GEOS 3.13.0, GDAL 3.10.1, PROJ 9.5.1; sf_use_s2() is TRUE
library(units)
Warning: package 'units' was built under R version 4.4.3
udunits database from C:/Users/Zacha/AppData/Local/R/win-library/4.4/units/share/udunits/udunits2.xml
library(flextable)
Warning: package 'flextable' was built under R version 4.4.3

Attaching package: 'flextable'

The following object is masked from 'package:purrr':

    compose
# Data
library(AOI)
library(USAboundaries)
library(USAboundariesData)

Attaching package: 'USAboundariesData'

The following object is masked from 'package:AOI':

    zipcodes
library(rnaturalearthdata)
library(readr)
library(here)
Warning: package 'here' was built under R version 4.4.3
here() starts at C:/Users/Zacha/github/CSU/ESS_330/csu_ess_330
# Visualization
library(gghighlight)
Warning: package 'gghighlight' was built under R version 4.4.3
library(ggrepel)
Warning: package 'ggrepel' was built under R version 4.4.3
library(ggplot2)
library(knitr)
Warning: package 'knitr' was built under R version 4.4.3
# 1.1 Define eqdc projection
eqdc <- '+proj=eqdc +lat_0=40 +lon_0=-96 +lat_1=20 +lat_2=60 +x_0=0 +y_0=0 +datum=NAD83 +units=m +no_defs'

# 1.2 - Get USA state boundaries
if (!requireNamespace("AOI", quietly = TRUE)) {
  remotes::install_github("mikejohnson51/AOI")
}
states <- aoi_get(state = 'conus') %>%
  st_transform(eqdc)

# 1.3 - Get country boundaries for Mexico, the United States of America, and Canada
mx_us_can <- aoi_get(country = c("MX", "CA", "USA")) %>%
  st_transform(eqdc)

# 1.4 - Get city locations from the CSV file
uscities <- read_csv(here("data", "lab_data", "simplemaps_uscities_basic", "uscities.csv"))
Rows: 31254 Columns: 17
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (9): city, city_ascii, state_id, state_name, county_fips, county_name, s...
dbl (6): lat, lng, population, density, ranking, id
lgl (2): military, incorporated

ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
# Convert cities to sf object
cities_sf <- uscities %>% 
  st_as_sf(coords = c("lng", "lat"), crs = 4326, remove = FALSE) %>% 
  st_transform(5070)

# Get US state boundaries and cast to MULTILINESTRING
states_all <- us_states()
state_boundaries <- st_cast(states_all, "MULTILINESTRING") %>% 
  st_transform(5070)

# Filter cities (remove AK, HI, PR)
cities_sf <- cities_sf %>%
  filter(!state_id %in% c("AK", "HI", "PR"))

# GCS - create sf object in EPSG:4269 from uscities
df_sf_gcs <- uscities %>%
  st_as_sf(coords = c("lng", "lat"), crs = 4269, remove = FALSE)
# Question 2
# 2.1 - Distance to USA Border (coastline or national) (km)
# Convert USA state boundaries to a single MULTILINESTRING with resolved borders
proj_crs <- 2163

usa_border <- states %>% 
  st_union() %>% 
  st_cast("MULTILINESTRING")

cities_sf <- st_transform(cities_sf, 5070)
usa_border <- st_transform(usa_border, 5070)

state_boundaries_lines <- st_cast(state_boundaries, "MULTILINESTRING")
distance_matrix <- st_distance(cities_sf, state_boundaries_lines)
min_distances <- apply(distance_matrix, 1, min)

cities_distance <- cities_sf %>% 
  mutate(
    dist_to_border_m = as.numeric(st_distance(geometry, usa_border, by_element = FALSE)),
    dist_to_border_km = round(dist_to_border_m / 1000, 1)
  )

farthest_cities_border <- cities_distance %>% 
  arrange(desc(dist_to_border_km)) %>% 
  slice_head(n = 5) %>% 
  select(city, state_name = state_id, distance_km = dist_to_border_km)

farthest_cities_border %>% 
  flextable() %>% 
  set_header_labels(
    city = "City",
    state_name = "State",
    distance_km = "Distance to USA Border (km)"
  ) %>% 
  autofit()

City

State

Distance to USA Border (km)

geometry

Barnard

KS

1,092.2

[[XY]]

Ada

KS

1,092.1

[[XY]]

Minneapolis

KS

1,091.8

[[XY]]

Bennington

KS

1,091.0

[[XY]]

Talmage

KS

1,090.5

[[XY]]

# 2.2 - Distance to States (km)
states_proj <- st_transform(states, 5070)
states_combined <- st_combine(states_proj)
cities <- st_cast(states_combined, "MULTILINESTRING")

state_boundaries_preserved <- states_proj %>% 
  st_combine() %>% 
  st_cast("MULTILINESTRING")

cities_sf <- st_transform(cities_sf, 5070)
state_boundaries_lines <- st_transform(state_boundaries_lines, 5070)

distance_matrix <- st_distance(cities_sf, state_boundaries_lines)

min_distances <- apply(distance_matrix, 1, min)

cities_distance <- cities_sf %>%
  mutate(
    dist_to_state_border_m = as.numeric(min_distances),
    dist_to_state_border_km = round(dist_to_state_border_m / 1000, 1)  
  )

top5_farthest <- cities_distance %>% 
  arrange(desc(dist_to_state_border_km)) %>% 
  slice_head(n = 5) %>% 
  select(city, state_id, dist_to_state_border_km)

flextable(top5_farthest) %>% 
  set_header_labels(
    city = "City",
    state_id = "State",
    dist_to_state_border_km = "Distance to State Border(km)"
  ) %>% 
  autofit()

City

State

Distance to State Border(km)

geometry

Briggs

TX

313.0

[[XY]]

Lampasas

TX

311.7

[[XY]]

Bertram

TX

307.1

[[XY]]

Kempner

TX

304.9

[[XY]]

Florence

TX

301.9

[[XY]]

# 2.3 - Distance to Mexico (km)
# Isolate Mexico border and convert to MULTILINESTRING
mexico <- mx_us_can %>% 
  filter(admin == "Mexico") %>% 
  st_transform(5070)

mexico_border <- mexico %>% 
  st_cast("MULTILINESTRING")

dist_to_mexico <- st_distance(cities_sf, mexico_border)
min_dist_mexico <- apply(dist_to_mexico, 1, min)

cities_distance <- cities_sf %>% 
  mutate(dist_to_mexico_m = as.numeric(min_dist_mexico),
         dist_to_mexico_km = round(dist_to_mexico_m / 1000, 1))

farthest_mexico <- cities_distance %>% 
  arrange(desc(dist_to_mexico_km)) %>% 
  slice_head(n = 5) %>% 
  select(city, state_name = state_id, distance_km = dist_to_mexico_km)

flextable(farthest_mexico) %>% 
  set_header_labels(
    city = "City",
    state_name = "State",
    distance_km = "Distance to Mexican Border (km)"
  ) %>% 
  autofit()

City

State

Distance to Mexican Border (km)

geometry

Grand Isle

ME

3,324.0

[[XY]]

Caribou

ME

3,292.8

[[XY]]

Presque Isle

ME

3,277.3

[[XY]]

Oakfield

ME

3,218.4

[[XY]]

Vanceboro

ME

3,207.9

[[XY]]

# 2.4 - Distance to Canada (km)
canada <- mx_us_can %>% 
  filter(admin == "Canada") %>% 
  st_transform(5070)

canada_border <- st_cast(canada, "MULTILINESTRING")

dist_matrix_canada <- st_distance(cities_sf, canada_border)
min_distances_canada <- apply(dist_matrix_canada, 1, min)
cities_distance <- cities_sf %>% 
  mutate(
    dist_to_canada_m = as.numeric(min_distances_canada),
    dist_to_canada_km = round(dist_to_canada_m / 1000, 1)
  )

farthest_canada <- cities_distance %>% 
  arrange(desc(dist_to_canada_km)) %>% 
  slice_head(n = 5) %>% 
  select(city, state_name = state_id, distance_km = dist_to_canada_km)

flextable(farthest_canada) %>% 
  set_header_labels(
    city = "City",
    state_name = "State",
    distance_km = "DIstance to Canadian Border (km)"
  ) %>% 
  autofit()

City

State

DIstance to Canadian Border (km)

geometry

Guadalupe Guerra

TX

2,255.6

[[XY]]

Sandoval

TX

2,254.8

[[XY]]

Fronton

TX

2,253.9

[[XY]]

Fronton Ranchettes

TX

2,251.1

[[XY]]

Evergreen

TX

2,251.0

[[XY]]

# Question 3
# 3.1 Data
world <- rnaturalearth::ne_countries(scale = "medium", returnclass = "sf") 
continents <- world %>% 
  filter(continent %in% c("North America", "South America", "Europe")) 

states <- USAboundaries::us_states()
conus_states <- states %>% 
  filter(!state_abbr %in% c("AK", "HI", "PR"))

conus_outline <- st_union(conus_states)

top10_cities <- cities_sf %>% 
  arrange(desc(population)) %>% 
  slice_head(n = 10)

ggplot() +
  geom_sf(data = continents, fill = "lightgrey", color = "black", lty = "solid", size = 0.2) +
  geom_sf(data = conus_states, fill = "lightpink", color = "black", lty = "solid", size = 0.3) +
  geom_label_repel(
    data = top10_cities,
    aes(label = city, geometry = geometry),
    stat = "sf_coordinates",
    min.segment.length = 0,
    size = 3,
    box.padding = 0.3,
    point.padding = 0.2
  ) +
  coord_sf(xlim = c(-130, -60), ylim = c(20, 60), expand = FALSE) +
  theme_void() +
  labs(
    title = "Top 10 Largest Cities (by population) in the Continental US",
    caption = "Data Sources: Natural Earth, USAboundaries, uscities.csv"
  )
Warning in st_point_on_surface.sfc(sf::st_zm(x)): st_point_on_surface may not
give correct results for longitude/latitude data

# 3.2 City Distance from the Border
usa <- aoi_get(country = "USA")
usa_border <- st_cast(usa, "MULTILINESTRING")
usa_border <- st_transform(usa_border, 5070) 

distance_matrix <- st_distance(cities_sf, usa_border)
min_distances_km <- apply(distance_matrix, 1, min) / 1000 
cities_with_dist <- cities_sf %>%
  mutate(dist_to_border_km = as.numeric(min_distances_km))

ggplot() +
  geom_sf(data = continents, fill = "lightgrey", color = "black", lty = "solid", size = 0.2) +
  geom_sf(data = conus_outline, fill = NA, color = "black", lty = "dashed", size = 0.3) +
  geom_sf(data = usa_border, color = "black", size = 0.6, lty = "solid") +
  # cities dist
  geom_sf(data = conus_states, fill = "white", color = "black", lty = "solid", size = 0.3) +
  geom_sf(data = cities_with_dist, aes(color = cities_distance$dist_to_border_km), size = 1, alpha = 0.6) +
  scale_color_viridis_c("cividis", name = "Distance to Border (km)", option = "plasma") +
  geom_sf(data = farthest_cities_border, color = "black", fill = "darkblue", size = 2, shape = 21, stroke = 0.5) +
  geom_label_repel(
    data = farthest_cities_border, 
    aes(label = city, geometry = geometry), 
    stat = "sf_coordinates",
    min.segment.length = 0,
    fill = "pink",
    size = 3,
    box.padding = 0.3,
    point.padding = 0.2
  ) +
  geom_sf(data = usa_border, fill = NA, lty = "solid", size = 0.5) +
  coord_sf(xlim = c(-130, -60), ylim = c(20, 60), expand = FALSE) +
  theme_void() +
  labs(
    title = "USA Cities by Their Distance From the National Border",
    subtitle = "Top 5 Farthest Cities Highlighted and Labeled",
    caption = "Data: Natural Earth, USAboundaries, uscities.csv"
  )
Warning: Unknown or uninitialised column: `dist_to_border_km`.
st_point_on_surface may not give correct results for longitude/latitude data

# 3.3 City Distance from Nearest State
states <- us_states()
state_borders <- states %>%
  st_cast("MULTILINESTRING") %>%
  st_transform(5070)

distance_matrix <- st_distance(cities_sf, state_borders)
min_distances_km <- apply(distance_matrix, 1, min) / 1000 

cities_with_dist <- cities_sf %>%
  mutate(dist_to_state_border_km = as.numeric(min_distances_km))

ggplot() +
  geom_sf(data = continents, fill = "lightgrey", color = "black", lty = "solid", size = 0.2) +
  geom_sf(data = conus_outline, fill = NA, color = "black", lty = "dashed", size = 0.3) +
  geom_sf(data = usa_border, color = "black", size = 0.6, lty = "solid") +
  # cities dist
  geom_sf(data = conus_states, fill = "white", color = "black", lty = "solid", size = 0.3) +
  geom_sf(data = cities_with_dist, aes(color = cities_distance$dist_to_border_km), size = 1, alpha = 0.6) +
  scale_color_viridis_c("cividis", name = "Distance to Border (km)", option = "plasma") +
  geom_sf(data = farthest_cities_border, color = "black", fill = "darkblue", size = 2, shape = 21, stroke = 0.5) +
  geom_label_repel(
    data = farthest_cities_border, 
    aes(label = city, geometry = geometry), 
    stat = "sf_coordinates",
    min.segment.length = 0,
    fill = "pink",
    size = 3,
    box.padding = 0.3,
    point.padding = 0.2
  ) +
  geom_sf(data = usa_border, fill = NA, lty = "solid", size = 0.5) +
  coord_sf(xlim = c(-130, -60), ylim = c(20, 60), expand = FALSE) +
  theme_void() +
  labs(
    title = "USA Cities by Their Distance From the National Border",
    subtitle = "Top 5 Farthest Cities Highlighted and Labeled",
    caption = "Data: Natural Earth, USAboundaries, uscities.csv"
  )
Warning: Unknown or uninitialised column: `dist_to_border_km`.
st_point_on_surface may not give correct results for longitude/latitude data

# 3.4 Equidistance boundary from Mexico and Canada
# Reset dist to mex and can values to make sure correct values are used
conus_cities_sf <- cities_sf %>%
  filter(!state_id %in% c("AK", "HI"))

usa_border <- st_transform(usa_border, st_crs(conus_cities_sf))
mexico_border <- st_transform(mexico_border, st_crs(conus_cities_sf))
canada_border <- st_transform(canada_border, st_crs(conus_cities_sf))

dist_to_canada <- as.numeric(st_distance(conus_cities_sf, canada_border))
dist_to_mexico <- as.numeric(st_distance(conus_cities_sf, mexico_border))

equidistant_cities <- conus_cities_sf %>%
  mutate(dist_to_canada = dist_to_canada,
         dist_to_mexico = dist_to_mexico,
         dist_diff = abs(dist_to_mexico - dist_to_canada)) %>%
  filter(dist_diff <= 100)

top5_equidistant_pop <- equidistant_cities %>%
  arrange(desc(population)) %>%
  slice_head(n = 5)

ggplot() +
  geom_sf(data = states %>% st_transform(st_crs(conus_cities_sf)), fill = "lightgrey", color = "black", size = 0.3) +
  geom_sf(data = equidistant_cities, aes(color = dist_diff), size = 1) +
  gghighlight(dist_diff <= 100, use_direct_label = FALSE) +
  geom_label_repel(
    data = top5_equidistant_pop,
    aes(label = city, geometry = geometry),
    stat = "sf_coordinates",
    size = 3,
    box.padding = 0.3,
    point.padding = 0.2
  ) +
  scale_color_viridis_c(name = "Distance Difference\n(Mexico vs Canada)") +
  theme_minimal() +
  coord_sf(xlim = st_bbox(states %>% st_transform(st_crs(conus_cities_sf)))[c(1, 3)],
           ylim = st_bbox(states %>% st_transform(st_crs(conus_cities_sf)))[c(2, 4)],
           expand = FALSE) +
  labs(
    title = "Cities Approximately Equidistant (±100 km) from Mexico and Canada",
    subtitle = "Top 5 most populous cities in this zone labeled",
    caption = "Data source: uscities.csv, USAboundaries, Natural Earth"
  )
Warning: Could not calculate the predicate for layer 1; ignored

conus_cities <- cities_sf %>%
  filter(!state_id %in% c("AK", "HI"))

usa_border <- st_transform(usa_border, st_crs(conus_cities))
mexico_border <- st_transform(mexico_border, st_crs(conus_cities))
canada_border <- st_transform(canada_border, st_crs(conus_cities))

cities_proj <- st_transform(cities_sf, proj_crs)
Warning in CPL_crs_from_input(x): GDAL Message 1: CRS EPSG:2163 is deprecated.
Its non-deprecated replacement EPSG:9311 will be used instead. To use the
original CRS, set the OSR_USE_NON_DEPRECATED configuration option to NO.
mexico_proj <- st_transform(mexico_border, proj_crs)
canada_proj <- st_transform(canada_border, proj_crs)

usa_border_proj <- cities_sf %>%
  filter(!state_id %in% c("AK", "HI"))

dist_mexico_km <- apply(st_distance(cities_proj, mexico_proj), 1, min) / 1000
dist_canada_km <- apply(st_distance(cities_proj, canada_proj), 1, min) / 1000

cities_with_dist <- cities_proj %>%
  mutate(
    dist_to_mexico = dist_mexico_km,
    dist_to_canada = dist_canada_km,
    dist_diff = abs(dist_to_mexico - dist_to_canada)
  )

middle_band <- cities_with_dist %>%
  filter(dist_diff <= 100)

middle_top5 <- middle_band %>%
  arrange(desc(population)) %>%
  slice_head(n = 5)

ggplot(data = cities_with_dist) +  # Changed from cities_distance to cities_with_dist
  geom_sf(data = continents, fill = "lightgrey", color = "black", lty = "solid", size = 0.2) +
  geom_sf(data = conus_outline, fill = NA, color = "black", lty = "dashed", size = 0.5) +
  geom_sf(data = cities_with_dist, aes(color = dist_diff), fill = NA) + 
  geom_sf(aes(color = dist_diff), size = 1) +
  gghighlight(dist_diff <= 100, use_direct_label = FALSE) +
  geom_sf(data = middle_top5, size = 2, shape = 21, fill = "red") +
  geom_label_repel(
    data = middle_top5,
    aes(label = city, geometry = geometry),
    stat = "sf_coordinates",
    size = 3,
    box.padding = 0.3,
    point.padding = 0.2
  ) +
  coord_sf(xlim = c(-130, -60), ylim = c(20, 60), expand = FALSE) +
  theme_void() +
  scale_color_viridis_c(name = "Equidistant Distance\n(Mexico vs Canada)") +
  labs(
    title = "Cities Approximately Equidistant (±100 km) from Mexico and Canada",
    subtitle = "Top 5 most populous cities in this zone labeled",
    caption = "Data source: Natural Earth, USAboundaries, uscities.csv"
  )
Warning: Could not calculate the predicate for layer 1, layer 2; ignored
st_point_on_surface may not give correct results for longitude/latitude data

# Question 4
# 4.1 Quantifying Border Zone
dist_matrix_usa <- st_distance(cities_sf, usa_border)
min_distances <- apply(dist_matrix_usa, 1, min)
threshold_km <- 160

cities_distance <- cities_sf %>%
  mutate(dist_to_national_border_m = as.numeric(min_distances),
         dist_to_national_border_km = round(dist_to_national_border_m / 1000, 1))

border_zone <- cities_distance %>% 
  filter(dist_to_national_border_km <= threshold_km)

num_cities_in_zone <- nrow(border_zone)
pop_in_zone <- sum(border_zone$population, na.rm = TRUE)
total_pop <- sum(cities_distance$population, na.rm = TRUE)
percentage_in_zone <- round((pop_in_zone / total_pop) * 100, 2)

summary_df <- tibble::tibble(
  Metric = c(
    "Number of cities within 160 km of national border",
    "Population living within 160 km of border",
    "Percentage of U.S. population in this zone",
    "ACLU Estimate (approx. 2/3 of U.S. population)"
  ),
  Value = c(
    format(num_cities_in_zone, big.mark = ","),
    format(pop_in_zone, big.mark = ","),
    paste0(percentage_in_zone, "%"),
    "≈ 66%"
  )
)

flextable(summary_df) %>%
  autofit()

Metric

Value

Number of cities within 160 km of national border

10,632

Population living within 160 km of border

228,864,968

Percentage of U.S. population in this zone

57.76%

ACLU Estimate (approx. 2/3 of U.S. population)

≈ 66%

# 4.2 Mapping Border Zone
conus_cities <- cities_sf %>%
  filter(!state_id %in% c("AK", "HI"))

distance_matrix <- st_distance(conus_cities, usa_border)
min_distances <- apply(distance_matrix, 1, min)

conus_cities <- conus_cities %>%
  mutate(
    dist_to_national_border_m = as.numeric(min_distances),
    dist_to_national_border_km = round(dist_to_national_border_m / 1000, 1)
  )

danger_zone <- conus_cities %>%
  filter(dist_to_national_border_km <= threshold_km)

top10_danger <- danger_zone %>%
  arrange(desc(population)) %>%
  slice_head(n = 10)

ggplot(data = conus_cities) +
  geom_sf(aes(color = dist_to_national_border_km), size = 1) +
  scale_color_gradient(low = "orange", high = "darkred", name = "Distance to National Border (km)") +
  gghighlight(dist_to_national_border_km <= threshold_km, use_direct_label = FALSE) +
  geom_label_repel(
    data = top10_danger,
    aes(label = city, geometry = geometry),
    stat = "sf_coordinates",
    size = 3,
    box.padding = 0.3,
    point.padding = 0.2
  ) +
  theme_minimal() +
  labs(
    title = "Cities Within 100-Mile Danger Zone of U.S. National Border",
    subtitle = "Top 10 most populous cities within the Danger Zone",
    caption = "Data source: uscities.csv, USAboundaries, Natural Earth"
  )