In the following R code chunk, load_packages is the code chunk name. include=FALSE suggests that the code chunk will run, but the code itself and its outputs will not be included in the rendered HTML. echo=TRUE in the following code chunk suggests that the code and results from running the code will be included in the rendered HTML.

Step 1: Load all needed libraries and set the working directory.

# Load all needed libraries.

library(tidyverse)
library(sf)
library(mapview)
library(dplyr)
library(lubridate)
library(ggplot2)
library(stringr)

# Set the working directory.

setwd("~/Documents/R-spatial")

Step 2: Clean and process all the data from the csv in order to get all the columns needed for geocoding or joining to a sf object.

# Read the Nirvana concerts .csv file.
concerts <- read.csv('nirvana_concerts.csv')

# Filter only concerts in the United States.
concerts <- concerts %>% filter(grepl("United States", Location))

# Take the columns needed (date and location).
concerts <- concerts %>% select(Start.Date,Venue,Location)

# Create columns of city, state, and country using the Location column.
concerts <- separate(concerts, col = Location, into = c("City", "State", "Country"), sep = ", ")

# Add back Location column again to have a full location name in case of geocoding.
concerts <- mutate(concerts,
   Location = paste(City, State, Country, sep = ', '))

# Subset the year by extracting the last two characters of the date. 
concerts <- concerts %>%
  mutate(Year = str_sub(Start.Date, -2, -1))

# Add string "19" at the beginning to get the full year. 
concerts$Year <- paste0("19", concerts$Year)

# Preview the new concerts data.
head(concerts)
##   Start.Date                             Venue        City      State
## 1   12/31/91   Cow Palace Arena & Event Center   Daly City California
## 2   12/29/91               ASU Activity Center       Tempe    Arizona
## 3   12/28/91               Del Mar Fairgrounds     Del Mar California
## 4   12/28/91                 Paramount Theatre     Seattle Washington
## 5   12/27/91 Los Angeles Memorial Sports Arena Los Angeles California
## 6   12/27/91                   LA Sports Arena Los Angeles California
##         Country                               Location Year
## 1 United States   Daly City, California, United States 1991
## 2 United States          Tempe, Arizona, United States 1991
## 3 United States     Del Mar, California, United States 1991
## 4 United States     Seattle, Washington, United States 1991
## 5 United States Los Angeles, California, United States 1991
## 6 United States Los Angeles, California, United States 1991
# Save as a .csv file. 
write.csv(concerts, "nirvana_concerts_spreadsheet.csv", row.names = FALSE)

Step 3: Aggregate the number of concerts by states and join it into a shapefile.

# Create a new dataframe aggregating the number of concerts by state. 
concerts_state <- concerts %>% 
  group_by(State) %>% 
  summarise(n_concerts = n()) 

# Examine the column names and structure of this dataframe. 
str(concerts_state)
## tibble [31 × 2] (S3: tbl_df/tbl/data.frame)
##  $ State     : chr [1:31] "Arizona" "California" "Colorado" "Connecticut" ...
##  $ n_concerts: int [1:31] 4 32 4 1 3 2 4 2 7 1 ...
# Read into state shapefile and convert it into an sf object. The source of the shapefile is the 2025 Cartographic Boundary Files from the US Census.
state_sf <- st_read("/Users/samikarim/Documents/R-spatial/cb_2025_us_state_500k/cb_2025_us_state_500k.shp")
## Reading layer `cb_2025_us_state_500k' from data source 
##   `/Users/samikarim/Documents/R-spatial/cb_2025_us_state_500k/cb_2025_us_state_500k.shp' 
##   using driver `ESRI Shapefile'
## Simple feature collection with 56 features and 9 fields
## Geometry type: MULTIPOLYGON
## Dimension:     XY
## Bounding box:  xmin: -179.1467 ymin: -14.5487 xmax: 179.7785 ymax: 71.38782
## Geodetic CRS:  NAD83
# Examine the column names and structure of this sf object. 
str(state_sf)
## Classes 'sf' and 'data.frame':   56 obs. of  10 variables:
##  $ STATEFP : chr  "21" "32" "08" "04" ...
##  $ STATENS : chr  "01779786" "01779793" "01779779" "01779777" ...
##  $ GEOIDFQ : chr  "0400000US21" "0400000US32" "0400000US08" "0400000US04" ...
##  $ GEOID   : chr  "21" "32" "08" "04" ...
##  $ STUSPS  : chr  "KY" "NV" "CO" "AZ" ...
##  $ NAME    : chr  "Kentucky" "Nevada" "Colorado" "Arizona" ...
##  $ LSAD    : chr  "00" "00" "00" "00" ...
##  $ ALAND   : num  1.02e+11 2.85e+11 2.68e+11 2.94e+11 2.51e+11 ...
##  $ AWATER  : num  2.38e+09 1.84e+09 1.19e+09 8.54e+08 1.87e+09 ...
##  $ geometry:sfc_MULTIPOLYGON of length 56; first list element: List of 2
##   ..$ :List of 1
##   .. ..$ : num [1:3089, 1:2] -89.4 -89.4 -89.4 -89.4 -89.4 ...
##   ..$ :List of 1
##   .. ..$ : num [1:42, 1:2] -89.6 -89.6 -89.6 -89.6 -89.6 ...
##   ..- attr(*, "class")= chr [1:3] "XY" "MULTIPOLYGON" "sfg"
##  - attr(*, "sf_column")= chr "geometry"
##  - attr(*, "agr")= Factor w/ 3 levels "constant","aggregate",..: NA NA NA NA NA NA NA NA NA
##   ..- attr(*, "names")= chr [1:9] "STATEFP" "STATENS" "GEOIDFQ" "GEOID" ...
# Join the concert by states dataframe to the state sf object.
dplyr::left_join(state_sf, 
                 concerts_state, 
                 by = c('NAME' = 'State')) -> concerts_state_sf

# Retrieve the column names of the new sf object with the concert numbers by state.
names(concerts_state_sf)
##  [1] "STATEFP"    "STATENS"    "GEOIDFQ"    "GEOID"      "STUSPS"    
##  [6] "NAME"       "LSAD"       "ALAND"      "AWATER"     "n_concerts"
## [11] "geometry"
# Preview the sf object with the concert numbers by state.
head(concerts_state_sf)
## Simple feature collection with 6 features and 10 fields
## Geometry type: MULTIPOLYGON
## Dimension:     XY
## Bounding box:  xmin: -120.0065 ymin: 31.33218 xmax: -75.46062 ymax: 45.00582
## Geodetic CRS:  NAD83
##   STATEFP  STATENS     GEOIDFQ GEOID STUSPS           NAME LSAD        ALAND
## 1      21 01779786 0400000US21    21     KY       Kentucky   00 102267794436
## 2      32 01779793 0400000US32    32     NV         Nevada   00 284537074263
## 3      08 01779779 0400000US08    08     CO       Colorado   00 268419398775
## 4      04 01779777 0400000US04    04     AZ        Arizona   00 294366238828
## 5      56 01779807 0400000US56    56     WY        Wyoming   00 251458188895
## 6      37 01027616 0400000US37    37     NC North Carolina   00 125935547374
##        AWATER n_concerts                       geometry
## 1  2383097595         NA MULTIPOLYGON (((-89.40565 3...
## 2  1839852286          1 MULTIPOLYGON (((-120.0065 3...
## 3  1185110804          4 MULTIPOLYGON (((-109.0603 3...
## 4   853868947          4 MULTIPOLYGON (((-114.8163 3...
## 5  1868027102         NA MULTIPOLYGON (((-111.0546 4...
## 6 13453873497          3 MULTIPOLYGON (((-75.72681 3...
# Replace all states with "NA" under the concert number column as 0, since Nirvana didn't perform there in this time period.
concerts_state_sf <- concerts_state_sf %>% mutate(n_concerts = replace_na(n_concerts, 0))

Step 4: Visualize the count of concerts per state using mapview.

# Use mapview to get a preview color-coded map of the concert numbers by state. 
mapview(concerts_state_sf, zcol = "n_concerts", cex = "n_concerts", layer.name = "Number of Concerts")