Lesson4

Part A

Step 2:

Generate a map of tornado paths where the paths from each year are displayed as a different color, similar to the map of tornado points in Figure 5.4. Create a composite figure containing the map of tornado paths and the map of tornado points using plot_grid().

library(sf)

## Linking to GEOS 3.11.0, GDAL 3.5.3, PROJ 9.1.0; sf_use_s2() is TRUE

library(ggplot2)
library(dplyr)

## 
## Attaching package: 'dplyr'

## The following objects are masked from 'package:stats':
## 
##     filter, lag

## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

library(tidyr)
library(scales)
library(RColorBrewer)
library(units)

## udunits database from /Library/Frameworks/R.framework/Versions/4.3-arm64/Resources/library/units/share/udunits/udunits2.xml

library(cowplot)
library(sp)
library(readr)

## 
## Attaching package: 'readr'

## The following object is masked from 'package:scales':
## 
##     col_factor

Read the data in

okcounty <- st_read("ok_counties.shp", quiet = TRUE)
tpoint <- st_read("ok_tornado_point.shp", quiet = TRUE)
tpath <- st_read("ok_tornado_path.shp", quiet = TRUE)
class(okcounty)

## [1] "sf"         "data.frame"

okcounty_sp <- as(okcounty, 'Spatial')
class(okcounty_sp)

## [1] "SpatialPolygonsDataFrame"
## attr(,"package")
## [1] "sp"

okcounty_sf <- st_as_sf(okcounty_sp)
class(okcounty_sf)

## [1] "sf"         "data.frame"

Load the initial geography of OK

ggplot(data = okcounty) +
  geom_sf(fill = NA)

#Part 2A

#Filter the data for point and path:

tpoint_16_21 <- tpoint %>%
  filter(yr >= 2016 & yr <= 2021) %>%
  select(om, yr, date)
tpath_16_21 <- tpath %>%
  filter(yr >= 2016 & yr <= 2021) %>%
  select(om, yr, date)

#Recreating figure 5.4 for paths instead of points.


torPoint <- ggplot() +
  geom_sf(data = tpoint_16_21, 
          aes(color = as.factor(yr))) +
  geom_sf(data = okcounty, fill = NA) +
  #geom_sf(data = tpoint_16_21, size=0.25) +
  scale_color_discrete(name = "Year") +
  coord_sf(datum = NA) +
  theme_void()

torPath <- ggplot() +
  geom_sf(data = tpath_16_21, 
          aes(color = as.factor(yr))) +
  geom_sf(data = okcounty, fill = NA) +
  #geom_sf(data = tpoint_16_21, size=0.25) +
  scale_color_brewer(palette = "Set2", name = "Year") +  # Change palette to whichever colorbrewer palette you prefer
  coord_sf(datum = NA) +
  theme_void()

plot_grid(torPoint, torPath, 
          labels = c("A) Tornado Point Map", 
                     "B) Tornado Path Map"),
          label_size = 12,
          ncol = 1, 
          hjust = 0, 
          label_x = 0, 
          vjust = c(1, -1),  # Adjust these values as needed
          align = "hv")

#Part 2B

Summarize the density of tornado points by both county and year and generate a faceted plot that displays maps of county-level tornado density from 2016-2021. At first I created a proportional circle density map, however I didn’t like how that looked so I made the decision to make faceted choropleth maps instead.

#overlay analysis of County and sum number of tornados in each county


# Perform spatial join and drop geometry
countypnt <- st_join(tpoint_16_21, okcounty) %>%
  st_drop_geometry()

# Summarize tornado count by GEOID and year
countysum <- countypnt %>%
  group_by(GEOID, yr) %>%
  summarize(tcnt = n())

## `summarise()` has grouped output by 'GEOID'. You can override using the
## `.groups` argument.

# Create county map with tornado count and density
countymap <- okcounty %>%
  left_join(countysum, by = "GEOID") %>%
  replace(is.na(.), 0) %>%
  mutate(area = st_area(geometry)) %>%
  mutate(tdens = 10^6 * 10^3 * tcnt / area) %>%
  filter(yr != 0) %>%
  drop_units()



# Plotting
ggplot(data = countymap) +
  geom_sf(data = okcounty) +
  geom_sf(aes(fill = tdens)) +
  facet_wrap(~ yr, ncol = 2) +
  scale_fill_distiller(palette = "BuPu", trans = "reverse")+
  labs(fill = "Tornado Density") +  # Set legend label
  theme_void()

#2C Generate Four Choropleth Maps

Generate four choropleth maps of tornado density based on quantile breaks with numbers of classes ranging from 3 to 6. Create a composite figure containing the four maps using plot_grid() and examine how the number of classes changes the interpretation of the map.

Here, I used a for loop to generate al the maps at the same time and then plot_grid() them together.

#set a title
title <- ggdraw() +
  draw_label(
    "Effects of changing breaks",
    fontface = 'bold', 
    x = 0.5,
    hjust = 0.5
  )

classrange <- c(3, 4, 5, 6)
plot_list <- list()

for (value in classrange) {
  qbrks <- quantile(countymap$tdens, probs = seq(0, 1, length.out = value+1))
  
  countymap <- countymap %>%
    mutate(tdens_c1 = cut(tdens,
                          breaks = qbrks,
                          include.lowest = TRUE))
  
  plot_name <- paste("Plot", value)  # Name for the plot
  plot_list[[plot_name]] <- ggplot(data = countymap) +
    geom_sf(aes(fill = tdens_c1)) +
    scale_fill_brewer(name = expression("Tornadoes/1000 km"^2),   
                      palette = "YlOrRd") +
    theme_void() +
    theme(legend.position = "right") 
}

# Now you can call the plots back using plot_list
plot_grid(plot_list[[1]], 
          plot_list[[2]], 
          plot_list[[3]], 
          plot_list[[4]], 
          nrow = 2, 
          align = "hv",
          ncol = 2, 
          labels = c("3 Breaks", 
                     "4 Breaks", 
                     "5 Breaks", 
                     "6 Breaks"))

To determine the correct number of breaks to use for this data it would be helpful to visualize the distribution. Lets create a histogram to view that.

# Calculate the standard deviation of tdens
tdens_sd <- sd(countymap$tdens)

# Create a histogram
histogram <- ggplot(countymap, aes(x = tdens)) +
  geom_histogram(fill = "blue", color = "black", bins = 30) +  # Adjust the number of bins as needed
  labs(title = "Histogram of tdens",
       x = "tdens",
       y = "Frequency")

# Add vertical lines for mean and one standard deviation
histogram_with_lines <- histogram +
  geom_vline(xintercept = mean(countymap$tdens), color = "red", linetype = "dashed", size = 1) +  # Mean line
  geom_vline(xintercept = mean(countymap$tdens) - tdens_sd, color = "green", linetype = "dotted", size = 1) +  # One standard deviation below the mean
  geom_vline(xintercept = mean(countymap$tdens) + tdens_sd, color = "green", linetype = "dotted", size = 1)  # One standard deviation above the mean

## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

# Show the plot
#print(histogram_with_lines)

It appears from the histogram and adding the standard deviation lines that 3 or 4 breaks is probably the correct number of breaks. Most of the tornado density is less then 2. So to break that apart to three you can get the tail data as those values are the outliers.

#Map any point data using Leaflet.

Choose any dataset of your interest (point data), and create an interactive map using the leaflet package. Customize your map, add a legend, popup information, and labels, and use an adequate color palette and scheme according to the variable mapped. These are the minimum requirements for this lab. But if you want to explore other functionalities included in the library and other datasets, please do so! Include your resulting map and code in the part B section of your RMarkdown report. Then, guide your readers through the map creation process and introduce the results.

For this assignment, I’m going to use Lodes Origin Destination Work/Home data. This was fun. Had I had more time I would have figured out how to add a legend that didn’t say the word legend. If you click on the circles it tells you how many jobs from the top 10 home towns that commute to Norwalk.

library(tidyverse)

## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ lubridate 1.9.3     ✔ tibble    3.2.1
## ✔ purrr     1.0.2     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ readr::col_factor() masks scales::col_factor()
## ✖ purrr::discard()    masks scales::discard()
## ✖ dplyr::filter()     masks stats::filter()
## ✖ dplyr::lag()        masks stats::lag()
## ✖ lubridate::stamp()  masks cowplot::stamp()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors

library(tigris)

## To enable caching of data, set `options(tigris_use_cache = TRUE)`
## in your R script or .Rprofile.

library(leaflet)
library(readr)
library(stringr)

options(tigris_class = "sf")

# Load the jobs data
jobs_data <- read_csv("lodeswithlatlon.csv")

## Rows: 21765 Columns: 7
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): work_town, home_town
## dbl (5): TotalJobs, Latw, Lonw, Lat_h, lon_h
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

# Preprocess work_town
jobs_data <- jobs_data %>%
  mutate(work_town_cleaned = str_extract(work_town, "^[^\\(]+")) %>%
  mutate(work_town_cleaned = str_trim(work_town_cleaned)) %>%
  mutate(work_town_cleaned = tolower(work_town_cleaned))

# Specific towns of interest, already in lowercase for matching
specific_towns <- c("hartford town", "stamford town", "norwalk town", "danbury town", "bridgeport town", "waterbury town", "new haven town")

# Load and filter Connecticut towns geometry for specific towns
connecticut_towns <- county_subdivisions(state = "09", cb = TRUE) %>%
  mutate(NAMELSAD = tolower(NAMELSAD)) %>%
  filter(NAMELSAD %in% specific_towns)

## Retrieving data for the year 2022

##   |                                                                              |                                                                      |   0%  |                                                                              |=========                                                             |  13%  |                                                                              |==================                                                    |  26%  |                                                                              |===========================                                           |  39%  |                                                                              |==============================================                        |  65%  |                                                                              |=======================================================               |  78%  |                                                                              |================================================================      |  91%  |                                                                              |======================================================================| 100%

# Filter jobs_data for towns of interest
jobs_data_filtered <- jobs_data %>%
  filter(work_town_cleaned %in% specific_towns)

# Ensure TotalJobs is numeric
jobs_data_filtered$TotalJobs <- as.numeric(jobs_data_filtered$TotalJobs)

# Perform the join
merged_data <- left_join(connecticut_towns, jobs_data_filtered, by = c("NAMELSAD" = "work_town_cleaned"))


# Ensure merged_data is an sf object and contains TotalJobs
# If merged_data is not yet an sf object, convert it: merged_data <- st_as_sf(merged_data, ...)

# Prepare the data: ensure numeric values and handle NAs
merged_data <- merged_data %>%
  mutate(TotalJobs = as.numeric(TotalJobs),
         TotalJobs = ifelse(is.na(TotalJobs), 0, TotalJobs))

# Define a color palette for TotalJobs
pal <- colorNumeric(palette = "viridis", domain = merged_data$TotalJobs)

# Create the Leaflet map
#leaflet(merged_data) %>%
#  addProviderTiles(providers$CartoDB.Positron) %>%
#  addPolygons(fillColor = ~pal(TotalJobs),
#              fillOpacity = 0.7,
#              color = "#BDBDC3", weight = 1,
#              popup = ~paste(NAMELSAD, "<br> Total Jobs: ", TotalJobs)) %>%
#  addLegend(pal = pal, values = ~TotalJobs,
#            title = "Total Jobs",
 #           position = "bottomright")

library(dplyr)
library(leaflet)
library(dplyr)
library(stringr)



norwalk_centroid_lat <- 41.117744  # Placeholder latitude for Norwalk's centroid
norwalk_centroid_lon <- -73.4081575  # Placeholder longitude for Norwalk's centroid

# Assuming 'jobs_data' contains your dataset
jobs_data <- jobs_data %>%
  mutate(
    home_town_cleaned = str_extract(home_town, "^[^\\(]+"),  # Extract everything before "("
    home_town_cleaned = str_trim(home_town_cleaned),  # Trim white spaces
    home_town_cleaned = tolower(home_town_cleaned)  # Convert to lowercase
  )


# Continuing from the previous data preparation step
norwalk_commute_data <- jobs_data %>%
  filter(work_town_cleaned == "norwalk town" & work_town_cleaned != home_town_cleaned) %>%
  group_by(home_town_cleaned) %>%
  summarize(TotalJobs = sum(TotalJobs, na.rm = TRUE),
            Lat_h = mean(Lat_h, na.rm = TRUE),  # Assuming average location for each town
            lon_h = mean(lon_h, na.rm = TRUE)) %>%
  ungroup() %>%
  arrange(desc(TotalJobs)) %>%
  slice_max(order_by = TotalJobs, n = 10)  # Select top 10 home towns




# Normalize TotalJobs to a suitable range for circle marker sizes
max_jobs <- max(norwalk_commute_data$TotalJobs, na.rm = TRUE)
min_jobs <- min(norwalk_commute_data$TotalJobs, na.rm = TRUE)

# Simple linear transformation to scale job counts to a reasonable range for marker sizes
# Adjust the scaling factor as needed to get visually distinct sizes
norwalk_commute_data$MarkerSize <- 5 + 15 * (norwalk_commute_data$TotalJobs - min_jobs) / (max_jobs - min_jobs)


# Load necessary libraries
library(leaflet)
library(ggplot2)

# Assuming your data ('norwalk_commute_data') and variables ('norwalk_centroid_lon', 'norwalk_centroid_lat') are already defined

# Step 1: Create the Leaflet map
map <- leaflet(norwalk_commute_data) %>%
  addTiles() %>%  # Adds default OpenStreetMap tiles
  addCircleMarkers(
    lng = ~lon_h, lat = ~Lat_h,
    radius = ~MarkerSize,  # Use the scaled job count for marker size
    color = "#0078AA", fillColor = "#0078AA",
    fillOpacity = 0.5,
    popup = ~paste(home_town_cleaned, "to Norwalk: ", TotalJobs, "jobs")
  ) %>%
  addMarkers(lng = norwalk_centroid_lon, lat = norwalk_centroid_lat, popup = "Norwalk") %>%
  setView(lng = norwalk_centroid_lon, lat = norwalk_centroid_lat, zoom = 8)

# Display the map
map

# Step 2: Create a `ggplot2` plot for the scaled circle legend
# Sample data for the legend
legend_data <- data.frame(
  Size = factor(c("Small", "Medium", "Large"), levels = c("Small", "Medium", "Large")),
  Radius = c(5, 10, 15) # Adjust these values based on the marker sizes used in your Leaflet map
)

# Creating the legend as a plot
legend_plot <- ggplot(legend_data, aes(x = Size, y = 1, size = Radius)) +
  geom_point(shape = 21, fill = "#0078AA") +
  scale_size_continuous(range = c(5, 15)) +  # Adjust this range based on your actual marker sizes
  theme_void() +  # Removes background, grids, and axes
  theme(legend.position = "bottom") +  # We don't need a legend for the legend
  labs(x = NULL, y = NULL, title = "Circle Size Legend") +
  scale_y_continuous(breaks = NULL) +  # No y-axis ticks
  scale_x_discrete(labels = function(x) x)  # Keep the labels for sizes

# Display the legend plot
legend_plot

library(ggplot2)

# Assuming 'legend_data' is your data frame with appropriate 'Size' and 'Radius' columns
legend_plot <- ggplot(legend_data, aes(x = Size, y = 1, size = Radius)) +
  geom_point(shape = 21, fill = "#0078AA") +
  scale_size_continuous(name = "Circle Size", range = c(5, 15), breaks = c(min(legend_data$Radius), max(legend_data$Radius)), labels = c("Small", "Large")) +
  theme_void() +
  labs(x = NULL, y = NULL, title = "Circle Size Legend") +
  scale_y_continuous(breaks = NULL) +
  scale_x_discrete(labels = function(x) x)

# Display the plot
print(legend_plot)

Lesson4

2024-03-16