#1. Create concert sf points
parse_phish_html <- function(phish_html) {
location <- phish_html %>% html_nodes("div.purchase-details") %>% html_text()
venues <- character()
cities <- character()
states <- character()
countries <- character()
for (i in location) {
clean_text <- i %>%
str_trim() %>%
str_replace_all("\\t|\\n", "") %>%
str_replace(" map$", "")
split_data <- str_split(clean_text, ", ", simplify = TRUE)
if(length(split_data) == 2){
#"Phish Phamily Phrolic at Anastasios' HouseNJ" "United States"
print(split_data)
vs <- str_split(split_data[1], "(?<=[a-z])(?=[A-Z])", simplify=TRUE)
print(vs)
venues <- c(venues, vs[1])
cities <- c(cities, 'Not Available')
states <- c(states, vs[2])
countries <- c(countries, split_data[2])
}
if (length(split_data) == 5) {
ec <- str_split(split_data[3], "(?<=[a-z])(?=[A-Z])", simplify=TRUE)
venues <- c(venues, paste(split_data[1], ", ", split_data[2], " at " , ec[1]))
cities <- c(cities, ec[2])
states <- c(states, split_data[4])
countries <- c(countries, split_data[5])
} else if (length(split_data) == 3) {
vc <- str_split(split_data[1], "(?<=[a-z])(?=[A-Z])", simplify=TRUE)
venues <- c(venues, vc[1])
cities <- c(cities, vc[2])
states <- c(states, split_data[2])
countries <- c(countries, split_data[3])
} else if (length(split_data) == 4) {
ec <- str_split(split_data[2], "(?<=[a-z])(?=[A-Z])", simplify=TRUE)
venues <- c(venues, paste(split_data[1], " at ", ec[1]))
cities <- c(cities, ec[2])
states <- c(states, split_data[3])
countries <- c(countries, split_data[4])
}
}
coords_url <- phish_html %>%
html_nodes("div.purchase-show-location") %>%
html_nodes("a") %>%
html_attr("href")
clean_coords <- sub(".*to:", "", coords_url)
latlon <- str_split(clean_coords, "\\+", simplify = TRUE)
concert_df <- data.frame(
venue = venues,
city = cities,
state = states,
country = countries,
lat = as.numeric(latlon[, 1]),
lon = as.numeric(latlon[, 2]),
stringsAsFactors = FALSE
)
concert_sf <- st_as_sf(concert_df, coords = c("lon", "lat"), crs = 4326)
return(concert_sf)
}
#2. State merge
count_shows_by_state <- function(concert_sf, states_sf, state_column = "STATE_ABBR") {
# Spatial join: assign each concert point to a state polygon
concert_with_state <- st_join(concert_sf, states_sf)
# Count the number of shows per state
show_counts <- concert_with_state %>%
group_by(across(all_of(state_column))) %>%
summarize(show_count = n(), .groups = "drop")
# Drop geometry for joining with full state geometries
show_counts_df <- st_drop_geometry(show_counts)
# Join show counts back to the full state shapefile
states_joined <- left_join(states_sf, show_counts_df, by = state_column)
return(states_joined)
}
# Point plot the concerts
make_map <- function(sf_data, year) {
ggplot() +
geom_sf(data = states_sf_filt, fill = "lightblue", color = "black") +
geom_sf(data = sf_data, color = "red") +
theme_minimal() +
labs(
title = paste("Phish US Concerts (", year, ")", sep = ""),
x = "Longitude", y = "Latitude"
) +
theme(legend.position = "none")
}
states_sf <- st_read("data/US_State_Boundaries.geojson")
## Reading layer `US_State_Boundaries' from data source
## `/Users/williamcornejo/Desktop/Desktop - william’s MacBook Air/school/gtech705/gtech78520/final_proj/data/US_State_Boundaries.geojson'
## using driver `GeoJSON'
## Simple feature collection with 53 features and 16 fields
## Geometry type: MULTIPOLYGON
## Dimension: XY
## Bounding box: xmin: -179.1474 ymin: 17.6744 xmax: 179.7784 ymax: 71.38921
## Geodetic CRS: WGS 84
states_sf_filt <- states_sf %>%
filter(!(STATE_ABBR %in% c("AK", "HI")))
#years, use 90, 91, 92
phish_90 <- GET("https://phish.com/tours/1990")
phish_91 <- GET("https://phish.com/tours/1991")
phish_92 <- GET("https://phish.com/tours/1992")
phish_text90 <- content(phish_90, "text", encoding = "UTF-8")
phish_text91 <- content(phish_91, "text", encoding = "UTF-8")
phish_text92 <- content(phish_92, "text", encoding = "UTF-8")
phish_html90 <- read_html(phish_text90)
phish_html91 <- read_html(phish_text91)
phish_html92 <- read_html(phish_text92)
phish90_sf <- parse_phish_html(phish_html90)
phish91_sf <- parse_phish_html(phish_html91)
## [,1] [,2]
## [1,] "Phish Phamily Phrolic at Anastasios' HouseNJ" "United States"
## [,1] [,2]
## [1,] "Phish Phamily Phrolic at Anastasios' House" "NJ"
phish92_sf <- parse_phish_html(phish_html92)
## [,1] [,2]
## [1,] "Stadtpark/FreilichtbuhneHamburg" "Germany"
## [,1] [,2]
## [1,] "Stadtpark/Freilichtbuhne" "Hamburg"
## [,1] [,2]
## [1,] "WaldbuhneNordheim" "Germany"
## [,1] [,2]
## [1,] "Waldbuhne" "Nordheim"
## [,1] [,2]
## [1,] "PhillipshalleDusseldorf" "Germany"
## [,1] [,2]
## [1,] "Phillipshalle" "Dusseldorf"
## [,1] [,2]
## [1,] "ResiNuremberg" "Germany"
## [,1] [,2]
## [1,] "Resi" "Nuremberg"
## [,1] [,2]
## [1,] "Roskilde FestivalRoskilde" "Denmark"
## [,1] [,2]
## [1,] "Roskilde Festival" "Roskilde"
## [,1] [,2]
## [1,] "Elysee MontmarteParis" "France"
## [,1] [,2]
## [1,] "Elysee Montmarte" "Paris"
#some of 1992 are not in US, so remove them
phish92_sf_filt <- phish92_sf %>%
filter(country == 'United States')
all_phish_sf <- rbind(phish90_sf, phish91_sf, phish92_sf_filt)
phish90_sfa <- phish90_sf %>%
mutate(year = 1990)
phish91_sfa <- phish91_sf %>%
mutate(year = 1991)
phish92_sfa <- phish92_sf_filt %>%
mutate(year = 1992)
all_phish_sfa <- rbind(phish90_sfa, phish91_sfa, phish92_sfa)
all_phish_df <- all_phish_sfa %>%
st_drop_geometry() %>% #
select(year, city, state, country, venue) %>%
arrange(year, state, city)
write_xlsx(all_phish_df, "phish_concerts_1990_1992.xlsx")
phish90_states <- count_shows_by_state(phish90_sf, states_sf_filt)
phish91_states <- count_shows_by_state(phish91_sf, states_sf_filt)
phish92_states <- count_shows_by_state(phish92_sf_filt, states_sf_filt)
all_phish_states <- count_shows_by_state(all_phish_sf, states_sf_filt)
Below are plots for Phish concerts in 1990. This process will be repeated for years 1990-92, with a fourth aggregated map.
ggplot(phish90_states) +
geom_sf(aes(fill = show_count), color = "white") +
scale_fill_viridis_c(
option = "plasma",
na.value = "grey90",
name = "Number of Shows"
) +
theme_minimal() +
labs(
title = "Concerts by U.S. State (1990)",
x = "Longitude",
y = "Latitude"
)+
theme(
panel.grid = element_blank(),
axis.text = element_blank(),
axis.ticks = element_blank(),
axis.title = element_blank(),
legend.position = "right"
)
ggplot(phish91_states) +
geom_sf(aes(fill = show_count), color = "white") +
scale_fill_viridis_c(
option = "plasma",
na.value = "grey90",
name = "Number of Shows"
) +
theme_minimal() +
labs(
title = "Concerts by U.S. State (1991)",
x = "Longitude",
y = "Latitude"
)+
theme(
panel.grid = element_blank(),
axis.text = element_blank(),
axis.ticks = element_blank(),
axis.title = element_blank(),
legend.position = "right"
)
ggplot(phish92_states) +
geom_sf(aes(fill = show_count), color = "white") +
scale_fill_viridis_c(
option = "plasma",
na.value = "grey90",
name = "Number of Shows"
) +
theme_minimal(base_size=12) +
labs(
title = "Concerts by U.S. State (1992)",
x = "Longitude",
y = "Latitude"
)+
theme(
panel.grid = element_blank(),
axis.text = element_blank(),
axis.ticks = element_blank(),
axis.title = element_blank(),
legend.position = "right"
)
mapview(all_phish_states, zcol = "show_count",
legend = TRUE,
layer.name = "Number of Shows",
na.color = "gray90")