Setting up packages

Functions

#1. Create concert sf points
parse_phish_html <- function(phish_html) {
  location <- phish_html %>% html_nodes("div.purchase-details") %>% html_text()
  venues <- character()
  cities <- character()
  states <- character()
  countries <- character()
  
  for (i in location) {
    clean_text <- i %>%
      str_trim() %>%
      str_replace_all("\\t|\\n", "") %>%
      str_replace(" map$", "")
    split_data <- str_split(clean_text, ", ", simplify = TRUE)
    if(length(split_data) == 2){
      #"Phish Phamily Phrolic at Anastasios' HouseNJ" "United States"
      print(split_data)
      vs <- str_split(split_data[1], "(?<=[a-z])(?=[A-Z])", simplify=TRUE)
      print(vs)
      venues <- c(venues, vs[1])
      cities <- c(cities, 'Not Available')
      states <- c(states, vs[2])
      countries <- c(countries, split_data[2])
    }
    if (length(split_data) == 5) {
      ec <- str_split(split_data[3], "(?<=[a-z])(?=[A-Z])", simplify=TRUE)
      venues <- c(venues, paste(split_data[1], ", ", split_data[2], " at " , ec[1]))
      cities <- c(cities, ec[2])
      states <- c(states, split_data[4])
      countries <- c(countries, split_data[5])
    } else if (length(split_data) == 3) {
      vc <- str_split(split_data[1], "(?<=[a-z])(?=[A-Z])", simplify=TRUE)
      venues <- c(venues, vc[1])
      cities <- c(cities, vc[2])
      states <- c(states, split_data[2])
      countries <- c(countries, split_data[3])
    } else if (length(split_data) == 4) {
      ec <- str_split(split_data[2], "(?<=[a-z])(?=[A-Z])", simplify=TRUE)
      venues <- c(venues, paste(split_data[1], " at ", ec[1]))
      cities <- c(cities, ec[2])
      states <- c(states, split_data[3])
      countries <- c(countries, split_data[4])
    }
  }
  
  coords_url <- phish_html %>%
    html_nodes("div.purchase-show-location") %>% 
    html_nodes("a") %>% 
    html_attr("href") 
  
  clean_coords <- sub(".*to:", "", coords_url)
  latlon <- str_split(clean_coords, "\\+", simplify = TRUE)
  concert_df <- data.frame(
    venue = venues,
    city = cities,
    state = states,
    country = countries,
    lat = as.numeric(latlon[, 1]),
    lon = as.numeric(latlon[, 2]),
    stringsAsFactors = FALSE
  )
  
  concert_sf <- st_as_sf(concert_df, coords = c("lon", "lat"), crs = 4326)
  return(concert_sf)
}

#2. State merge
count_shows_by_state <- function(concert_sf, states_sf, state_column = "STATE_ABBR") {
  # Spatial join: assign each concert point to a state polygon
  concert_with_state <- st_join(concert_sf, states_sf)
  
  # Count the number of shows per state
  show_counts <- concert_with_state %>%
    group_by(across(all_of(state_column))) %>%
    summarize(show_count = n(), .groups = "drop")
  
  # Drop geometry for joining with full state geometries
  show_counts_df <- st_drop_geometry(show_counts)
  
  # Join show counts back to the full state shapefile
  states_joined <- left_join(states_sf, show_counts_df, by = state_column)
  
  return(states_joined)
}

# Point plot the concerts
make_map <- function(sf_data, year) {
  ggplot() +
    geom_sf(data = states_sf_filt, fill = "lightblue", color = "black") +
    geom_sf(data = sf_data, color = "red") +
    theme_minimal() +
    labs(
      title = paste("Phish US Concerts (", year, ")", sep = ""),
      x = "Longitude", y = "Latitude"
    ) +
    theme(legend.position = "none")
}

Read data from Phish concerts page. Get years

states_sf <- st_read("data/US_State_Boundaries.geojson")
## Reading layer `US_State_Boundaries' from data source 
##   `/Users/williamcornejo/Desktop/Desktop - william’s MacBook Air/school/gtech705/gtech78520/final_proj/data/US_State_Boundaries.geojson' 
##   using driver `GeoJSON'
## Simple feature collection with 53 features and 16 fields
## Geometry type: MULTIPOLYGON
## Dimension:     XY
## Bounding box:  xmin: -179.1474 ymin: 17.6744 xmax: 179.7784 ymax: 71.38921
## Geodetic CRS:  WGS 84
states_sf_filt <- states_sf %>%
  filter(!(STATE_ABBR %in% c("AK", "HI")))
#years, use 90, 91, 92
phish_90 <- GET("https://phish.com/tours/1990")
phish_91 <- GET("https://phish.com/tours/1991")
phish_92 <- GET("https://phish.com/tours/1992")

phish_text90 <- content(phish_90, "text", encoding = "UTF-8")
phish_text91 <- content(phish_91, "text", encoding = "UTF-8")
phish_text92 <- content(phish_92, "text", encoding = "UTF-8")

phish_html90 <- read_html(phish_text90)
phish_html91 <- read_html(phish_text91)
phish_html92 <- read_html(phish_text92)

phish90_sf <- parse_phish_html(phish_html90)
phish91_sf <- parse_phish_html(phish_html91)
##      [,1]                                           [,2]           
## [1,] "Phish Phamily Phrolic at Anastasios' HouseNJ" "United States"
##      [,1]                                         [,2]
## [1,] "Phish Phamily Phrolic at Anastasios' House" "NJ"
phish92_sf <- parse_phish_html(phish_html92)
##      [,1]                              [,2]     
## [1,] "Stadtpark/FreilichtbuhneHamburg" "Germany"
##      [,1]                       [,2]     
## [1,] "Stadtpark/Freilichtbuhne" "Hamburg"
##      [,1]                [,2]     
## [1,] "WaldbuhneNordheim" "Germany"
##      [,1]        [,2]      
## [1,] "Waldbuhne" "Nordheim"
##      [,1]                      [,2]     
## [1,] "PhillipshalleDusseldorf" "Germany"
##      [,1]            [,2]        
## [1,] "Phillipshalle" "Dusseldorf"
##      [,1]            [,2]     
## [1,] "ResiNuremberg" "Germany"
##      [,1]   [,2]       
## [1,] "Resi" "Nuremberg"
##      [,1]                        [,2]     
## [1,] "Roskilde FestivalRoskilde" "Denmark"
##      [,1]                [,2]      
## [1,] "Roskilde Festival" "Roskilde"
##      [,1]                    [,2]    
## [1,] "Elysee MontmarteParis" "France"
##      [,1]               [,2]   
## [1,] "Elysee Montmarte" "Paris"
#some of 1992 are not in US, so remove them
phish92_sf_filt <- phish92_sf %>%
  filter(country == 'United States')
all_phish_sf <- rbind(phish90_sf, phish91_sf, phish92_sf_filt)

Making spreadsheet

phish90_sfa <- phish90_sf %>%
  mutate(year = 1990)
phish91_sfa <- phish91_sf %>%
  mutate(year = 1991)
phish92_sfa <- phish92_sf_filt %>%
  mutate(year = 1992)
all_phish_sfa <- rbind(phish90_sfa, phish91_sfa, phish92_sfa)

all_phish_df <- all_phish_sfa %>%
  st_drop_geometry() %>%  #
  select(year, city, state, country, venue) %>%
  arrange(year, state, city)
write_xlsx(all_phish_df, "phish_concerts_1990_1992.xlsx")

Merge concert data with state geojson

phish90_states <- count_shows_by_state(phish90_sf, states_sf_filt)
phish91_states <- count_shows_by_state(phish91_sf, states_sf_filt)
phish92_states <- count_shows_by_state(phish92_sf_filt, states_sf_filt)
all_phish_states <- count_shows_by_state(all_phish_sf, states_sf_filt)

Plots

Below are plots for Phish concerts in 1990. This process will be repeated for years 1990-92, with a fourth aggregated map.

1990

ggplot(phish90_states) +
  geom_sf(aes(fill = show_count), color = "white") +
  scale_fill_viridis_c(
    option = "plasma",
    na.value = "grey90",
    name = "Number of Shows"
  ) +
  theme_minimal() +
  labs(
    title = "Concerts by U.S. State (1990)",
    x = "Longitude",
    y = "Latitude"
  )+
    theme(
    panel.grid = element_blank(),
    axis.text = element_blank(),
    axis.ticks = element_blank(),
    axis.title = element_blank(),
    legend.position = "right"
  )

1991

ggplot(phish91_states) +
  geom_sf(aes(fill = show_count), color = "white") +
  scale_fill_viridis_c(
    option = "plasma",
    na.value = "grey90",
    name = "Number of Shows"
  ) +
  theme_minimal() +
  labs(
    title = "Concerts by U.S. State (1991)",
    x = "Longitude",
    y = "Latitude"
  )+
    theme(
    panel.grid = element_blank(),
    axis.text = element_blank(),
    axis.ticks = element_blank(),
    axis.title = element_blank(),
    legend.position = "right"
  )

1992

ggplot(phish92_states) +
  geom_sf(aes(fill = show_count), color = "white") +
  scale_fill_viridis_c(
    option = "plasma",
    na.value = "grey90",
    name = "Number of Shows"
  ) +
  theme_minimal(base_size=12) +
  labs(
    title = "Concerts by U.S. State (1992)",
    x = "Longitude",
    y = "Latitude"
  )+
    theme(
    panel.grid = element_blank(),
    axis.text = element_blank(),
    axis.ticks = element_blank(),
    axis.title = element_blank(),
    legend.position = "right"
  )

Aggregated over three years

mapview(all_phish_states, zcol = "show_count",
        legend = TRUE,
        layer.name = "Number of Shows",
        na.color = "gray90")

Point Pattern Analysis

states_projected <- st_transform(states_sf_filt, crs = 5070)
coords <- st_coordinates(all_phish_sf)
win <- owin(xrange = range(coords[,1]), yrange = range(coords[,2]))
phish_ppp <- ppp(x = coords[,1], y = coords[,2], window = win)
## Warning: data contain duplicated points
K <- Kest(phish_ppp)
plot(K, main = "Ripley's K-Function for Phish Concerts")

den1 <- density(phish_ppp, sigma = bw.diggle)
plot(den1)