# SPDS
library(tidyverse)
## Warning: replacing previous import 'vctrs::data_frame' by 'tibble::data_frame'
## when loading 'dplyr'
## ── Attaching packages ─────────────────────────────────────────────────────────── tidyverse 1.3.0 ──
## ✓ ggplot2 3.3.2     ✓ purrr   0.3.4
## ✓ tibble  3.0.3     ✓ dplyr   1.0.1
## ✓ tidyr   1.1.1     ✓ stringr 1.4.0
## ✓ readr   1.3.1     ✓ forcats 0.5.0
## ── Conflicts ────────────────────────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
library(sf)
## Linking to GEOS 3.8.1, GDAL 3.1.1, PROJ 6.3.1
library(units)
## udunits system database from /Library/Frameworks/R.framework/Versions/4.0/Resources/library/units/share/udunits
# Data
library(USAboundaries)
library(rnaturalearth)

# Visualization
library(gghighlight)
library(ggrepel)
library(knitr)
library(kableExtra)
## 
## Attaching package: 'kableExtra'
## The following object is masked from 'package:dplyr':
## 
##     group_rows
library(readxl)
library(leaflet)

Question #1 : Tesselated Surfaces and Plots

#Tesselation plot function

plot_tess = function(data, title){
  ggplot() + 
    geom_sf(data = data, fill = "white", col = "navy", size = .2) +   
    theme_void() +
    labs(title = title, caption = paste("This tesselation has:", nrow(data), "tiles" )) +
    theme(plot.title = element_text(hjust = .5, color =  "navy", face = "bold"))
}

1.1 Gathering Conus County Data

conus = USAboundaries::us_counties(resolution = "low") %>%
  filter(!state_name %in% c("Alaska", "Puerto Rico", "Hawaii")) %>%
  st_transform(5070)
plot_tess(conus, "CONUS Counties")

1.2 County Centroids

countycentroid = st_centroid(conus) %>%
  st_union() %>%
  st_cast("MULTIPOINT")
## Warning in st_centroid.sf(conus): st_centroid assumes attributes are constant
## over geometries of x

1.3 Tesselations and Coverages

Voroni Tesselation over County Centroids

vgrid = st_voronoi(countycentroid) %>% 
  st_cast() %>%
  st_as_sf() %>%
  mutate(id = 1:n())

vgrid = st_intersection(vgrid, st_union(conus))
## Warning: attribute variables are assumed to be spatially constant throughout all
## geometries
plot_tess(vgrid, "Voronoi Coverage") + 
  geom_sf(data = countycentroid, col = "darkred", size = .2)

Delaney Triangulation

tgrid = st_triangulate(countycentroid) %>% 
  st_cast() %>% 
  st_as_sf() %>% 
  mutate(id = 1:n())

tgrid = st_intersection(tgrid, st_union(conus))
## Warning: attribute variables are assumed to be spatially constant throughout all
## geometries
plot_tess(tgrid, "Triangulation") +
  geom_sf(data = countycentroid, col = "darkred", size = .2)

Square Grid

sq_grid = st_make_grid(conus, n = 70) %>% 
  st_as_sf() %>% 
  mutate(id = 1:n())
plot_tess(sq_grid, "Square Coverage Grid")

Hex Grid

hex_grid = st_make_grid(conus, n = 70, square = FALSE) %>% 
  st_as_sf() %>% 
  mutate(id = 1:n())
plot_tess(hex_grid, "Hexagonal Coverage Grid")

Question #2

sum_tess = function(data, string){
  variable = data %>%
    mutate(area = st_area(data)) %>%
    drop_units() %>%
    mutate(area = area/100000) 
  dataframe = data.frame(type = string, 
                         numfeat = nrow(data), 
                         meanarea = mean(variable$area),
                         stdev = sd(variable$area),
                         totalarea = sum(variable$area))
  }
conus_test = sum_tess(conus, "original")
vgrid_sum = sum_tess(vgrid, "voronoi")
tgrid_sum = sum_tess(tgrid, "Triangulation")
hexgrid_sum = sum_tess(hex_grid, "Hex Grid")
sqgrid_sum = sum_tess(sq_grid, "Square Grid")
tess_summary = bind_rows(conus_test, vgrid_sum, tgrid_sum, hexgrid_sum, sqgrid_sum)

tess_sum_table1 = knitr::kable(tess_summary,
                               caption = "Tesselation Summary Traits",
                               col.names = c("Tesselation", "Number of Features", "Mean Feature Area", "Standard Deviation of Feature Area", "Total Area of Coverage"))

tess_sum_table1
Tesselation Summary Traits
Tesselation Number of Features Mean Feature Area Standard Deviation of Feature Area Total Area of Coverage
original 3108 25217.45 34043.25 78375826
voronoi 3108 25217.45 28871.40 78375826
Triangulation 6196 12518.08 15758.00 77561997
Hex Grid 2271 37630.52 0.00 85458910
Square Grid 3108 27281.26 0.00 84790143

##2.5 Voronoi: The voronoi tessellation keeps the number of features the same as the original counties data, covers the same total area, but has a slightly smaller standard deviation of feature size. Triangulated: The triangulated tessellation has almost twice the number of features as the original counties plot and the voronoi plot, with less mean area per feature. Large features are maintained in the western united states while the eastern united states is broken up into tiny fragments. Square grid: The square grid, a tessellation which creates squares of equal area, has the same number of features as the original counties plot and the voronoi plot, but each square is exactly the same size which can help better show regional trends which could be obscured by the difference in feature size in the non-equal area tessellations. Hexgrid: The hexagonal tessellation, like the square grid, is an equal area tessellation which has fewer, but larger features than that of the square grid.

Question #3

dams = readxl::read_xlsx("../data/NID2019_U.xlsx") %>%
  filter(!is.na(LATITUDE), !is.na(LONGITUDE)) %>%
  st_as_sf(coords = c("LONGITUDE", "LATITUDE"), crs = 4326) %>%
  st_transform(5070) %>%
  st_filter(conus)
point_in_polygonds = function(points, polygon, ID){
      st_join(polygon, points) %>%
        st_drop_geometry() %>%
        count(get(ID)) %>%
        setNames(c(ID, "n")) %>%
        left_join(polygon, by = ID) %>%
        st_as_sf()
    }
conus_pip = point_in_polygonds(dams, conus, "geoid")
plot(conus_pip)
## Warning: plotting the first 9 out of 13 attributes; use max.plot = 13 to plot
## all

vgrid_pip = point_in_polygonds(dams, vgrid, "id")
plot(vgrid_pip)

tgrid_pip = point_in_polygonds(dams, tgrid, "id")
plot(tgrid_pip)

sqgrid_pip = point_in_polygonds(dams, sq_grid, "id")
plot(sqgrid_pip)

hexgrid_pip = point_in_polygonds(dams, hex_grid, "id")
plot(hexgrid_pip)

ggplot function

plot_counts = function(arg1, title){
   ggplot() + 
    geom_sf(data = arg1, aes(fill = n), col = NA, size = .2) + 
    viridis::scale_fill_viridis(option = "B") + 
    theme_void() + 
    theme(plot.title = element_text(face = "bold", color = "darkblue", hjust = .5, size = 24)) +
    labs(title = paste0(title),
         caption = paste0("Total number of dams present: ", sum(arg1$n)))
}
conus_counts = plot_counts(conus_pip, "Conus Tesselation Dam Spread")
plot(conus_counts)

vgrid_counts = plot_counts(vgrid_pip, "Voronoi Tesselation Dam Spread")
plot(vgrid_counts)

tgrid_counts = plot_counts(tgrid_pip, "Triangulated Tesselation Dam Spread")
plot(tgrid_counts)

sqgrid_counts = plot_counts(sqgrid_pip, "Square Tesselation Dam Spread")
plot(sqgrid_counts)

hexgrid_counts = plot_counts(hexgrid_pip, "Hexagonal Tesselation Dam Spread")
plot(hexgrid_counts)

3.6

MAUP, or the Modifiable Area Unit Problem is a source of statistical error in spatial analysis. The tessellations which preserve county area such as the voronoi tessellation and the triangulated tessellation show that there are many dams located in the central northern part of the united state. These large counties hold more dams as they have larger areas, so this visualization of the data may misconstrue where dams lie regionally, as opposed to county by county. By using an equal area tessellation, such as a square or hexagonal grid, it is shown that most dams actually lie right in the center of the United States. I will use the hexagonal grid going forward.
## 4

dams_of_interestC = dams %>% 
  filter(grepl("C", PURPOSES)) %>%
  select(DAM_NAME, PURPOSES, NID_STORAGE, YEAR_COMPLETED, STATE) %>% 
  st_as_sf(coords = c("LONGITUDE", "LATITUDE"), crs=4326)

dams_of_interestI = dams %>% 
  filter(grepl("I", PURPOSES)) %>%
  select(DAM_NAME, PURPOSES, NID_STORAGE, YEAR_COMPLETED, STATE) %>% 
  st_as_sf(coords = c("LONGITUDE", "LATITUDE"), crs=4326)

dams_of_interestH = dams %>% 
  filter(grepl("H", PURPOSES)) %>%
  select(DAM_NAME, PURPOSES, NID_STORAGE, YEAR_COMPLETED, STATE) %>% 
  st_as_sf(coords = c("LONGITUDE", "LATITUDE"), crs=4326)

dams_of_interestP = dams %>% 
  filter(grepl("P", PURPOSES)) %>%
  select(DAM_NAME, PURPOSES, NID_STORAGE, YEAR_COMPLETED, STATE) %>% 
  st_as_sf(coords = c("LONGITUDE", "LATITUDE"), crs=4326)
Cdams_pip = point_in_polygonds(dams_of_interestC, hex_grid, "id")
plot(Cdams_pip)

Idams_pip = point_in_polygonds(dams_of_interestI, hex_grid, "id")
plot(Idams_pip)

Hdams_pip = point_in_polygonds(dams_of_interestH, hex_grid, "id")
plot(Hdams_pip)

Pdams_pip = point_in_polygonds(dams_of_interestP, hex_grid, "id")
plot(Pdams_pip)

plot_counts1 = function(arg1, title){
   ggplot() + 
    geom_sf(data = arg1, aes(fill = n), col = NA, size = .2) + 
    viridis::scale_fill_viridis(option = "B") + 
    theme_void() + 
    theme(plot.title = element_text(face = "bold", color = "darkblue", hjust = .5, size = 24)) +
    gghighlight::gghighlight(n > (mean(n) + sd(n))) +
    labs(title = paste0(title),
         caption = paste0("Total number of dams present: ", sum(arg1$n)))
}
Cdams_counts = plot_counts1(Cdams_pip, "Flood Control Dams Spread")
plot(Cdams_counts)

Idams_counts = plot_counts1(Idams_pip, "Irrigation Dams Spread")
plot(Idams_counts)

Hdams_counts = plot_counts1(Hdams_pip, "Hydroelectric Dams Spread")
plot(Hdams_counts)

Pdams_counts = plot_counts1(Pdams_pip, "Fire Protection Dams Spread")
plot(Pdams_counts)

I think the use of the equal area Hexagonal tessellation to observe the distribution of dams used for different purposes provided some interesting insight. For example, almost all of the dams used for flood control are distributed along the Mississippi and accompanying river systems, areas used to seeng flooded plains. Alternatively, the majority of dams used to produce hydroelectric power were concentrated in the upper northeastern united states, as well as along the mountain ranges on the western coast of the united states. These hydroelectric dams use elevation change in flowing waters to produce electricity, so it makes sense that dams used for this purpose would be located in mountainous regions with ample streams and rivers which run downhill.

Extra Credit

majorrivers = read_sf("../data/MajorRivers.dbf") %>% 
  filter(SYSTEM == "Mississippi") %>%
  mutate(STATE = c("AR", "MI", "MO", "OH")) %>%
  st_transform(4326)

dams_of_interest1 = dams %>% 
  filter(grepl("C", PURPOSES), HAZARD == "H") %>%
  select(DAM_NAME, PURPOSES, NID_STORAGE, YEAR_COMPLETED, STATE) %>% 
  st_as_sf(coords = c("LONGITUDE", "LATITUDE"), crs=4326) %>% 
  st_transform(st_crs(majorrivers)) %>% 
  group_by(STATE) %>% 
  slice_max(NID_STORAGE, n=1) %>% 
  ungroup()

leaflet() %>% 
  addProviderTiles(providers$CartoDB) %>% 
  addPolylines(data = majorrivers) %>% 
  addCircleMarkers(data = dams_of_interest1,
             fillColor  = ~colorQuantile("YlOrRd", NID_STORAGE)(NID_STORAGE),
             color = NA,
             fillOpacity = .5,
             radius = ~sqrt(NID_STORAGE) / 175 ,
             label = ~DAM_NAME,
             popup = leafpop::popupTable(st_drop_geometry(dams_of_interest1), feature.id = FALSE, row.numbers = FALSE))