library(rvest)
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(stringr)
library(purrr)
library(tidyr)
library(lubridate)
## 
## Attaching package: 'lubridate'
## The following objects are masked from 'package:base':
## 
##     date, intersect, setdiff, union
library(sf)
## Linking to GEOS 3.13.0, GDAL 3.8.5, PROJ 9.5.1; sf_use_s2() is TRUE
library(ggplot2)
library(tmap)

#Set working directory to my class folder
setwd("~/Desktop/Data Analysis and Visualization with R")

#Read in csv file with concert data 
concerts_table <- read.csv("yolatengo_concerts.csv") 

#Cleaning the data by selecting only the columns I need and adjusting the column names
concerts <- concerts_table %>%
      select(
           date = Start.Date,
          venue = Venue,
            location = Location
  )

glimpse(concerts)
## Rows: 173
## Columns: 3
## $ date     <chr> "11/19/1995", "11/18/1995", "11/14/1995", "11/12/1995", "11/1…
## $ venue    <chr> "Tramps", "Tramps", "Barrymore's Music Hall", "The Opera Hous…
## $ location <chr> "New York, New York, United States", "New York, New York, Uni…
#Adjusting date format 
concerts <- concerts %>%
  mutate(date = mdy(date))

#Filtering out NA values
concerts <- concerts %>%
  filter(
    !is.na(date),
    !is.na(location),
    location != ""
  )

library(tidyr)
#Organizing location into separate columns 
concerts <- concerts %>%
  separate(location, into = c("city", "region", "country"), sep = ", ", fill = "right")

head(concerts)
##         date                  venue      city   region       country
## 1 1995-11-19                 Tramps  New York New York United States
## 2 1995-11-18                 Tramps  New York New York United States
## 3 1995-11-14 Barrymore's Music Hall    Ottawa  Ontario        Canada
## 4 1995-11-12        The Opera House   Toronto  Ontario        Canada
## 5 1995-11-11              Blind Pig Ann Arbor Michigan United States
## 6 1995-11-10                  Metro   Chicago Illinois United States
#Geocoding data to add coordinates based on city locations
concerts <- concerts %>%
      mutate(full_location = paste(city, region, country, sep = ", "))
library(tidygeocoder)
geo_data <- concerts %>%
  geocode(full_location, method = "osm", lat = lat, long = long)
## Passing 105 addresses to the Nominatim single address geocoder
## Query completed in: 106.2 seconds
#Transforming the dataframe into an sf object 
library(sf)
 geo_sf <- geo_data %>%
    filter(!is.na(lat), !is.na(long)) %>%
   st_as_sf(coords = c("long", "lat"), crs = 4326)

#Aggregating data to identify counts per location 
library(mapview)
concert_counts <- geo_sf %>%
      group_by(city, region, country, geometry) %>%
      summarise(n_concerts = n(), .groups = "drop")
mapview(concert_counts, zcol = "n_concerts", cex = "n_concerts")
#Creating interactive map of concert counts per location 
library(leaflet)
#Adding color palette
pal <- colorNumeric(
  palette = "viridis",
  domain = concert_counts$n_concerts
)

leaflet(concert_counts) %>%
 #Adding gray basemap 
  addProviderTiles("CartoDB.Positron") %>%
 #Adding symbols 
  addCircleMarkers(
    radius = ~sqrt(n_concerts) * 2,
    color = ~pal(n_concerts),
    fillColor = ~pal(n_concerts),
    
    stroke = TRUE,
    weight = 1,
    
    fillOpacity = 0.8,
    
    popup = ~paste0(
      "<b>", city, "</b><br>",
      region, ", ", country, "<br>",
      "<b>Concerts:</b> ", n_concerts
    )
  ) %>%
  
  addLegend(
    position = "bottomright",
    pal = pal,
    values = ~n_concerts,
    title = "Yo La Tengo Concerts, 1993–1995",
    opacity = 0.8
  )
#Mapping the tour routes
geo_sf <- geo_sf %>%
  arrange(date)

coords <- st_coordinates(geo_sf)

geo_sf <- geo_sf %>%
  mutate(
    lon = coords[,1],
    lat = coords[,2]
  )

geo_sf <- geo_sf %>%
  mutate(
    next_lon = lead(lon),
    next_lat = lead(lat),
    next_date = lead(date)
  )

tour_routes <- geo_sf %>%
  filter(!is.na(next_lon)) %>%
  rowwise() %>%
  mutate(
    geometry = st_sfc(
      st_linestring(matrix(
        c(lon, lat, next_lon, next_lat),
        ncol = 2,
        byrow = TRUE
      )),
      crs = 4326
    )
  ) %>%
  st_as_sf()

library(leaflet)
library(viridis)
## Loading required package: viridisLite
tour_routes <- tour_routes %>%
  mutate(year = year(date))

pal <- colorFactor(
  palette = viridis(3),
  domain = tour_routes$year
)

leaflet(tour_routes) %>%
  
  addProviderTiles("CartoDB.Positron") %>%
  
  addPolylines(
    color = ~pal(year),
    weight = 3,
    opacity = 0.8
  ) %>%
  
  addLegend(
    "bottomright",
    pal = pal,
    values = ~year,
    title = "Tour Year"
  )
#Creating a map of country GDP for comparison -- the data I found was pulled from the World Bank and I selected only years 1993-1995
gdp_data <- read.csv('GDP-1993-1995.csv')
head(gdp_data)
##          Country.Name               Indicator.Name      X1993      X1994
## 1             Albania GDP per capita (current US$)   367.2792   586.4161
## 2             Algeria GDP per capita (current US$)  1831.0480  1525.5406
## 3             Andorra GDP per capita (current US$) 15916.8395 15748.5307
## 4              Angola GDP per capita (current US$)   449.7279   334.9736
## 5 Antigua and Barbuda GDP per capita (current US$)  8624.6202  9351.4875
## 6           Argentina GDP per capita (current US$)  6931.8560  7437.5624
##        X1995
## 1   911.3205
## 2  1466.9481
## 3 18443.2545
## 4   404.2948
## 5  9034.4755
## 6  7357.6163
#Taking the mean GDP for each country over the three years 1993-1995 and adding it as a new column
gdp_data <- gdp_data %>%  mutate(avg_93_95 = rowMeans(select(., X1993, X1994, X1995), na.rm = TRUE))
head(gdp_data)
##          Country.Name               Indicator.Name      X1993      X1994
## 1             Albania GDP per capita (current US$)   367.2792   586.4161
## 2             Algeria GDP per capita (current US$)  1831.0480  1525.5406
## 3             Andorra GDP per capita (current US$) 15916.8395 15748.5307
## 4              Angola GDP per capita (current US$)   449.7279   334.9736
## 5 Antigua and Barbuda GDP per capita (current US$)  8624.6202  9351.4875
## 6           Argentina GDP per capita (current US$)  6931.8560  7437.5624
##        X1995  avg_93_95
## 1   911.3205   621.6719
## 2  1466.9481  1607.8456
## 3 18443.2545 16702.8749
## 4   404.2948   396.3321
## 5  9034.4755  9003.5277
## 6  7357.6163  7242.3449
gdp_data <- gdp_data %>% select(country=Country.Name, GDP=avg_93_95)
#Filtering the table to only include the countries where YLT went on tour
gdp_data <- gdp_data %>% filter(country %in% concert_counts$country)
gdp_data
##           country      GDP
## 1         Belgium 24968.77
## 2          Canada 20288.02
## 3         Denmark 30956.88
## 4          France 24130.18
## 5         Germany 28188.62
## 6     Netherlands 25713.41
## 7           Spain 14065.67
## 8     Switzerland 44147.58
## 9  United Kingdom 20422.40
## 10  United States 27591.01
#Using world dataset to join the geometries to the countries in my data
library(rnaturalearth)
library(rnaturalearthdata)
## 
## Attaching package: 'rnaturalearthdata'
## The following object is masked from 'package:rnaturalearth':
## 
##     countries110
world <- ne_countries(scale = "medium", returnclass = "sf")
map_data <- world %>%
 left_join(gdp_data, by = c("name" = "country"))
mapview(map_data, zcol = "GDP")
#Map is missing United States because the name was different in my table and the world table

gdp_data$country[gdp_data$country == "United States"] <- "United States of America"
map_data <- world %>%
  left_join(gdp_data, by = c("name" = "country"))
#Checking updated map
mapview(map_data, zcol="GDP")
#Creating an interactive map of the GDP data 
pal <- colorNumeric(
  palette = "viridis",
  domain = map_data$GDP,
  na.color = "lightgrey"
)

leaflet(map_data) %>%
  addTiles() %>%
  
  addPolygons(
    fillColor = ~pal(GDP),
    weight = 1,
    color = "white",
    fillOpacity = 0.8,
    
    popup = ~paste0(
      "<strong>", name, "</strong><br>",
      "Average GDP Per Capita (1993–1995): $",
      round(GDP, 2)
    )
  ) %>%
  
  addLegend(
    pal = pal,
    values = ~GDP,
    title = "GDP Per Capita",
    position = "bottomright"
  ) %>%
  
  addControl(
    html = "<h3>GDP Per Capita of Tour Countries</h3>",
    position = "topright"
  )