In the following R code chunk, load_packages is the code
chunk name. include=FALSE suggests that the code chunk will
run, but the code itself and its outputs will not be included in the
rendered HTML. echo=TRUE in the following code chunk
suggests that the code and results from running the code will be
included in the rendered HTML.
# Load all needed libraries.
library(tidyverse)
library(sf)
library(mapview)
library(dplyr)
library(lubridate)
library(ggplot2)
library(stringr)
# Set the working directory.
setwd("~/Documents/R-spatial")
# Read the Nirvana concerts .csv file.
concerts <- read.csv('nirvana_concerts.csv')
# Filter only concerts in the United States.
concerts <- concerts %>% filter(grepl("United States", Location))
# Take the columns needed (date and location).
concerts <- concerts %>% select(Start.Date,Venue,Location)
# Create columns of city, state, and country using the Location column.
concerts <- separate(concerts, col = Location, into = c("City", "State", "Country"), sep = ", ")
# Add back Location column again to have a full location name in case of geocoding.
concerts <- mutate(concerts,
Location = paste(City, State, Country, sep = ', '))
# Subset the year by extracting the last two characters of the date.
concerts <- concerts %>%
mutate(Year = str_sub(Start.Date, -2, -1))
# Add string "19" at the beginning to get the full year.
concerts$Year <- paste0("19", concerts$Year)
# Preview the new concerts data.
head(concerts)
## Start.Date Venue City State
## 1 12/31/91 Cow Palace Arena & Event Center Daly City California
## 2 12/29/91 ASU Activity Center Tempe Arizona
## 3 12/28/91 Del Mar Fairgrounds Del Mar California
## 4 12/28/91 Paramount Theatre Seattle Washington
## 5 12/27/91 Los Angeles Memorial Sports Arena Los Angeles California
## 6 12/27/91 LA Sports Arena Los Angeles California
## Country Location Year
## 1 United States Daly City, California, United States 1991
## 2 United States Tempe, Arizona, United States 1991
## 3 United States Del Mar, California, United States 1991
## 4 United States Seattle, Washington, United States 1991
## 5 United States Los Angeles, California, United States 1991
## 6 United States Los Angeles, California, United States 1991
# Save as a .csv file.
write.csv(concerts, "nirvana_concerts_spreadsheet.csv", row.names = FALSE)
# Create a new dataframe aggregating the number of concerts by state.
concerts_state <- concerts %>%
group_by(State) %>%
summarise(n_concerts = n())
# Examine the column names and structure of this dataframe.
str(concerts_state)
## tibble [31 × 2] (S3: tbl_df/tbl/data.frame)
## $ State : chr [1:31] "Arizona" "California" "Colorado" "Connecticut" ...
## $ n_concerts: int [1:31] 4 32 4 1 3 2 4 2 7 1 ...
# Read into state shapefile and convert it into an sf object. The source of the shapefile is the 2025 Cartographic Boundary Files from the US Census.
state_sf <- st_read("/Users/samikarim/Documents/R-spatial/cb_2025_us_state_500k/cb_2025_us_state_500k.shp")
## Reading layer `cb_2025_us_state_500k' from data source
## `/Users/samikarim/Documents/R-spatial/cb_2025_us_state_500k/cb_2025_us_state_500k.shp'
## using driver `ESRI Shapefile'
## Simple feature collection with 56 features and 9 fields
## Geometry type: MULTIPOLYGON
## Dimension: XY
## Bounding box: xmin: -179.1467 ymin: -14.5487 xmax: 179.7785 ymax: 71.38782
## Geodetic CRS: NAD83
# Examine the column names and structure of this sf object.
str(state_sf)
## Classes 'sf' and 'data.frame': 56 obs. of 10 variables:
## $ STATEFP : chr "21" "32" "08" "04" ...
## $ STATENS : chr "01779786" "01779793" "01779779" "01779777" ...
## $ GEOIDFQ : chr "0400000US21" "0400000US32" "0400000US08" "0400000US04" ...
## $ GEOID : chr "21" "32" "08" "04" ...
## $ STUSPS : chr "KY" "NV" "CO" "AZ" ...
## $ NAME : chr "Kentucky" "Nevada" "Colorado" "Arizona" ...
## $ LSAD : chr "00" "00" "00" "00" ...
## $ ALAND : num 1.02e+11 2.85e+11 2.68e+11 2.94e+11 2.51e+11 ...
## $ AWATER : num 2.38e+09 1.84e+09 1.19e+09 8.54e+08 1.87e+09 ...
## $ geometry:sfc_MULTIPOLYGON of length 56; first list element: List of 2
## ..$ :List of 1
## .. ..$ : num [1:3089, 1:2] -89.4 -89.4 -89.4 -89.4 -89.4 ...
## ..$ :List of 1
## .. ..$ : num [1:42, 1:2] -89.6 -89.6 -89.6 -89.6 -89.6 ...
## ..- attr(*, "class")= chr [1:3] "XY" "MULTIPOLYGON" "sfg"
## - attr(*, "sf_column")= chr "geometry"
## - attr(*, "agr")= Factor w/ 3 levels "constant","aggregate",..: NA NA NA NA NA NA NA NA NA
## ..- attr(*, "names")= chr [1:9] "STATEFP" "STATENS" "GEOIDFQ" "GEOID" ...
# Join the concert by states dataframe to the state sf object.
dplyr::left_join(state_sf,
concerts_state,
by = c('NAME' = 'State')) -> concerts_state_sf
# Retrieve the column names of the new sf object with the concert numbers by state.
names(concerts_state_sf)
## [1] "STATEFP" "STATENS" "GEOIDFQ" "GEOID" "STUSPS"
## [6] "NAME" "LSAD" "ALAND" "AWATER" "n_concerts"
## [11] "geometry"
# Preview the sf object with the concert numbers by state.
head(concerts_state_sf)
## Simple feature collection with 6 features and 10 fields
## Geometry type: MULTIPOLYGON
## Dimension: XY
## Bounding box: xmin: -120.0065 ymin: 31.33218 xmax: -75.46062 ymax: 45.00582
## Geodetic CRS: NAD83
## STATEFP STATENS GEOIDFQ GEOID STUSPS NAME LSAD ALAND
## 1 21 01779786 0400000US21 21 KY Kentucky 00 102267794436
## 2 32 01779793 0400000US32 32 NV Nevada 00 284537074263
## 3 08 01779779 0400000US08 08 CO Colorado 00 268419398775
## 4 04 01779777 0400000US04 04 AZ Arizona 00 294366238828
## 5 56 01779807 0400000US56 56 WY Wyoming 00 251458188895
## 6 37 01027616 0400000US37 37 NC North Carolina 00 125935547374
## AWATER n_concerts geometry
## 1 2383097595 NA MULTIPOLYGON (((-89.40565 3...
## 2 1839852286 1 MULTIPOLYGON (((-120.0065 3...
## 3 1185110804 4 MULTIPOLYGON (((-109.0603 3...
## 4 853868947 4 MULTIPOLYGON (((-114.8163 3...
## 5 1868027102 NA MULTIPOLYGON (((-111.0546 4...
## 6 13453873497 3 MULTIPOLYGON (((-75.72681 3...
# Replace all states with "NA" under the concert number column as 0, since Nirvana didn't perform there in this time period.
concerts_state_sf <- concerts_state_sf %>% mutate(n_concerts = replace_na(n_concerts, 0))
# Use mapview to get a preview color-coded map of the concert numbers by state.
mapview(concerts_state_sf, zcol = "n_concerts", cex = "n_concerts", layer.name = "Number of Concerts")