A5 - Interactive Geospatial

Author

Affiliation

Parsa Keyvani

Georgetown University

Data Munging

faf_selected_subset <- read_csv("data/faf_selected_subset.csv")

The non-numeric data columns are of the form -. First, remove the - part of the data

faf_selected_subset$dms_orig <- gsub("^\\d+-", "", faf_selected_subset$dms_orig)
faf_selected_subset$dms_dest <- gsub("^\\d+-", "", faf_selected_subset$dms_dest)
faf_selected_subset$sctg2 <- gsub("^\\d+-", "", faf_selected_subset$sctg2)
faf_selected_subset$dms_mode <- gsub("^\\d+-", "", faf_selected_subset$dms_mode)

data_preview <- as.data.frame(head(faf_selected_subset))
datatable(data_preview)

The state information is embedded in the dms-orig and dms-dest columns. This needs to be extracted.

For most, the 2-letter abbreviations are available for extraction
For some rows, there are multiple states involved in a region. For example, 532-Portland OR-WA (WA Part). In these instances, you need to extract the state from the part in parenthesis. In this instance, you’d extract WA
Some rows have the full state names. These need to be converted to 2-letter state abbreviations

extract_state <- function(location) {
  # Extract state abbreviation from parentheses
  paren_state <- regmatches(location, regexpr("\\((\\w+) Part\\)", location))
  if (length(paren_state) > 0 && nchar(paren_state) > 0) {
    return(sub("\\((\\w+) Part\\)", "\\1", paren_state))
  }
  
  # Extract standard 2-letter state abbreviations
  state_abbr <- regmatches(location, regexpr("\\b[A-Z]{2}\\b", location))
  if (length(state_abbr) > 0 && nchar(state_abbr) > 0) {
    return(state_abbr)
  }
  
}


# Function to convert full state names to abbreviations
convert_state_name_to_abbr <- function(full_name) {
  abbr <- state_abbreviations[full_name]
  if (!is.null(abbr)) {
    return(abbr)
  } else {
    return(NA)
  }
}

faf_selected_subset$dms_orig_state <- sapply(faf_selected_subset$dms_orig, extract_state)
faf_selected_subset$dms_dest_state <- sapply(faf_selected_subset$dms_dest, extract_state)

full_state_names <- faf_selected_subset$full_state_column  
state_abbrs <- sapply(full_state_names, convert_state_name_to_abbr)

data_preview <- as.data.frame(head(faf_selected_subset))
datatable(data_preview)

Map 1: Commodity Choropleths

# Selecting the 3 commodities and totalling them
commodity_data <- faf_selected_subset %>%
  filter(sctg2 %in% c("Motorized vehicles", "Alcoholic beverages", "Electronics")) %>%
  mutate(dms_dest_state = as.character(dms_dest_state),
         commodity = as.character(sctg2),
         value = `million dollars in 2017`) %>%
  select(dms_dest_state, commodity, value) %>%
  group_by(dms_dest_state, commodity) %>%
  summarise(total_value = sum(value))

commodity_data_wide <- commodity_data %>%
  pivot_wider(names_from = commodity, values_from = total_value)

#write_csv(commodity_data_wide, "merged_data.csv")

Map 2: Largest Import by State

# Getting the largest tonnage for each state and its corresponding commodity
largest_import <- faf_selected_subset %>%
  mutate(dms_dest_state = as.character(dms_dest_state),
         commodity = as.character(sctg2),
         tonnage = `thousand tons in 2017`) %>%
  select(dms_dest_state, commodity, tonnage) %>%
  group_by(dms_dest_state, commodity) %>%
  summarise(total_tonnage = sum(tonnage)) %>%
  group_by(dms_dest_state) %>%
  slice_max(order_by = total_tonnage, n = 1) 

# Getting the full names of the states
states_sf <- st_read("data/tl_2020_us_state/tl_2020_us_state.shp")

Reading layer `tl_2020_us_state' from data source 
  `/Users/parsakeyvani/Desktop/Adv Data viz/Assignments /spring2024-a5-interactive-geospatial-keyvanip/data/tl_2020_us_state/tl_2020_us_state.shp' 
  using driver `ESRI Shapefile'
Simple feature collection with 56 features and 14 fields
Geometry type: MULTIPOLYGON
Dimension:     XY
Bounding box:  xmin: -179.2311 ymin: -14.60181 xmax: 179.8597 ymax: 71.43979
Geodetic CRS:  GCS_North_American_1983

states_sf <- as.data.frame(states_sf)

states <-states_sf %>% select(NAME, STUSPS)

largest_import <- merge(x= largest_import, y= states, by.x = "dms_dest_state", 
      by.y = "STUSPS")

#write_csv(largest_import, "largest_import.csv")

Map 3: Commodity Connection Map

# Getting the largest tonnage for each state and its corresponding commodity
connection_map <- faf_selected_subset %>%
  mutate(dms_dest_state = as.character(dms_dest_state),
         dms_orig_state = as.character(dms_orig_state),
         commodity = as.character(sctg2),
         value = `million dollars in 2017`) %>%
  filter(commodity == "Alcoholic beverages",
         dms_dest_state == "VA", dms_mode == "Truck") %>%
  group_by(dms_orig_state, dms_dest_state) %>%
  summarise(total_value = sum(value)) 

connection_map_subset <- connection_map %>%
  arrange(desc(total_value)) %>%
  head(6) %>%
  filter(dms_orig_state != "VA")

connection_map <- merge(x= connection_map, y= states, by.x = "dms_orig_state", 
      by.y = "STUSPS")

write_csv(connection_map, "connection_map.csv")

A5 - Interactive Geospatial

Data Munging

Map 1: Commodity Choropleths

Map 2: Largest Import by State

Map 3: Commodity Connection Map

Version 1: Top 4 States Exported to Virgina

Version 2: All States Exported to Virgina