#Load required packages

library(dplyr) library(tidyr) library(stringr) library(ggplot2) library(sf)

#Phish concert data

concert_data <- read.csv("PhishConcert - Sheet1.csv", stringsAsFactors = FALSE)

#Display the structure of the loaded data

str(concert_data) cat("Loaded", nrow(concert_data), "concert records\n")

#Display the first few rows of the data head(concert_data)

#Display the first few rows of the data head(mock_data)

#Parse the date column and extract year phish_data <- concert_data %>% mutate( date = as.Date(concerts), year = as.numeric(format(date, "%Y")) ) %>% filter(year >= 1992 & year <= 1994) # Filter for the years of interest (already in range)

Clean city and state fields

phish_data <- phish_data %>% mutate( # Clean city field - remove any extra quotes or commas city = str_replace_all(city, '"|,', ''), # Clean venue field - remove any extra quotes or commas venue = str_replace_all(venue, '"|,', ''), # Ensure all values are properly capitalized state = toupper(state), # Standardize country field country = toupper(country) )

#Filter for US concerts only us_concerts <- phish_data %>% filter(country == "USA")

#Display the cleaned data print(phish_data)

#Check for any issues with state codes unique(phish_data$state)

#Count concerts by state and year state_counts <- us_concerts %>% group_by(state, year) %>% summarise(concert_count = n(), .groups = "drop") %>% arrange(year, desc(concert_count))

#Display the counts print(state_counts)

#Get US state boundaries us_states <- st_as_sf(us_map("states"))

#Function to create map for a specific year create_map_for_year <- function(year_value) { # Filter data for the specific year year_data <- state_counts %>% filter(year == year_value)

# Join the data with map data map_data <- us_states %>% left_join(year_data, by = c("abbr" = "state"))

# Replace NA values with 0 map_data$concert_count[is.na(map_data$concert_count)] <- 0

# Create the map ggplot(map_data) + geom_sf(aes(fill = concert_count), color = "white", size = 0.2) + scale_fill_viridis_c(option = "plasma", name = "Number of Concerts", direction = -1, guide = guide_colorbar(barwidth = 10, barheight = 0.5), breaks = pretty_breaks()) + labs(title = paste("Phish Concerts in", year_value), subtitle = "Number of concerts per U.S. state", caption = "Data source: Mock data based on ConcertArchives.org") + theme_minimal() + theme( plot.title = element_text(size = 16, face = "bold"), plot.subtitle = element_text(size = 12), legend.position = "bottom", axis.text = element_blank(), axis.title = element_blank(), panel.grid = element_blank() ) }

#Create maps for each year map_1992 <- create_map_for_year(1992) map_1993 <- create_map_for_year(1993) map_1994 <- create_map_for_year(1994)

#Display the maps print(map_1992) print(map_1993) print(map_1994)

#Calculate the total concerts by state across all years total_by_state <- state_counts %>% group_by(state) %>% summarise(total_concerts = sum(concert_count), .groups = "drop") %>% arrange(desc(total_concerts))

#Display the top 10 states top_10_states <- total_by_state %>% head(10) print(top_10_states)

#Calculate the percentage of concerts in each region regions <- data.frame( state = state.abb, region = c("Northeast", "South", "West", "West", "West", "West", "Northeast", "South", "South", "West", "South", "Northeast", "Midwest", "Midwest", "Midwest", "Northeast", "South", "South", "Northeast", "South", "Northeast", "Midwest", "Midwest", "South", "Midwest", "West", "Midwest", "West", "Northeast", "Northeast", "South", "West", "Northeast", "South", "Midwest", "South", "West", "Northeast", "Northeast", "South", "Midwest", "South", "South", "West", "Northeast", "South", "Northeast", "West", "South", "Midwest") )

#Join region data with state counts region_counts <- total_by_state %>% left_join(regions, by = "state") %>% filter(!is.na(region)) %>% # Remove any states that might not have matched group_by(region) %>% summarise(total_concerts = sum(total_concerts), .groups = "drop") %>% mutate(percentage = total_concerts / sum(total_concerts) * 100) %>% arrange(desc(percentage))

#Display regional distribution

print(region_counts)

#Create a bar chart of regional distribution

ggplot(region_counts, aes(x = reorder(region, -percentage), y = percentage)) + geom_bar(stat = "identity", fill = "steelblue") + geom_text(aes(label = sprintf("%.1f%%", percentage)), vjust = -0.5) + labs(title = "Regional Distribution of Phish Concerts (1992-1994)", x = "Region", y = "Percentage of Concerts") + theme_minimal() + theme(axis.text.x = element_text(angle = 0, hjust = 0.5))

#Calculate the year-over-year change in concert counts by state

yoy_change <- state_counts %>% arrange(state, year) %>% group_by(state) %>% mutate( prev_year_count = lag(concert_count), yoy_change = concert_count - prev_year_count, yoy_percent = ifelse(prev_year_count > 0, round((concert_count - prev_year_count) / prev_year_count * 100, 1), NA) ) %>% filter(!is.na(yoy_change)) %>% ungroup()

#Display states with the largest increase and decrease top_increases <- yoy_change %>% arrange(desc(yoy_change)) %>% head(5)

top_decreases <- yoy_change %>% arrange(yoy_change) %>% head(5)

print("States with largest increase in concerts:") print(top_increases)

print("States with largest decrease in concerts:") print(top_decreases)

#Create a faceted map showing all three years all_years_data <- us_states %>% left_join(state_counts, by = c("abbr" = "state"))

#Replace NA values with 0 all_years_data$concert_count[is.na(all_years_data$concert_count)] <- 0

#Create the faceted map faceted_map <- ggplot(all_years_data) + geom_sf(aes(fill = concert_count), color = "white", size = 0.2) + scale_fill_viridis_c(option = "plasma", name = "Number of Concerts", direction = -1, guide = guide_colorbar(barwidth = 10, barheight = 0.5), breaks = pretty_breaks()) + facet_wrap(~ year, ncol = 3) + labs(title = "Phish Concerts (1992-1994)", subtitle = "Geographic distribution across U.S. states", caption = "Data source: Phish concert data") + theme_minimal() + theme( plot.title = element_text(size = 16, face = "bold"), plot.subtitle = element_text(size = 12), legend.position = "bottom", axis.text = element_blank(), axis.title = element_blank(), panel.grid = element_blank(), strip.text = element_text(size = 12, face = "bold") )

Display the faceted map

print(faceted_map)

Export cleaned data

write.csv(phish_data, “phish_cleaned_data_1992_1994.csv”, row.names = FALSE) write.csv(state_counts, “phish_state_counts_1992_1994.csv”, row.names = FALSE)

Save the maps

Create a summary table of concerts by state and year

summary_table <- state_counts %>% pivot_wider(names_from = year, values_from = concert_count, values_fill = 0) %>% mutate(Total = rowSums(across(where(is.numeric)))) %>% arrange(desc(Total))