library(rvest)
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(stringr)
library(purrr)
library(tidyr)
library(lubridate)
## 
## Attaching package: 'lubridate'
## The following objects are masked from 'package:base':
## 
##     date, intersect, setdiff, union
library(sf)
## Linking to GEOS 3.13.0, GDAL 3.8.5, PROJ 9.5.1; sf_use_s2() is TRUE
library(ggplot2)
library(tmap)

#Set working directory to my class folder
setwd("~/Desktop/Data Analysis and Visualization with R")

#Read in csv file with concert data 
concerts_table <- read.csv("yolatengo_concerts.csv") 

#Cleaning the data by selecting only the columns I need and adjusting the column names
concerts <- concerts_table %>%
      select(
           date = Start.Date,
          venue = Venue,
            location = Location
  )

glimpse(concerts)
## Rows: 173
## Columns: 3
## $ date     <chr> "11/19/1995", "11/18/1995", "11/14/1995", "11/12/1995", "11/1…
## $ venue    <chr> "Tramps", "Tramps", "Barrymore's Music Hall", "The Opera Hous…
## $ location <chr> "New York, New York, United States", "New York, New York, Uni…
#Adjusting date format 
concerts <- concerts %>%
  mutate(date = mdy(date))

#Filtering out NA values
concerts <- concerts %>%
  filter(
    !is.na(date),
    !is.na(location),
    location != ""
  )

library(tidyr)
#Organizing location into separate columns 
concerts <- concerts %>%
  separate(location, into = c("city", "region", "country"), sep = ", ", fill = "right")

head(concerts)
##         date                  venue      city   region       country
## 1 1995-11-19                 Tramps  New York New York United States
## 2 1995-11-18                 Tramps  New York New York United States
## 3 1995-11-14 Barrymore's Music Hall    Ottawa  Ontario        Canada
## 4 1995-11-12        The Opera House   Toronto  Ontario        Canada
## 5 1995-11-11              Blind Pig Ann Arbor Michigan United States
## 6 1995-11-10                  Metro   Chicago Illinois United States
#Geocoding data to add coordinates based on city locations
concerts <- concerts %>%
      mutate(full_location = paste(city, region, country, sep = ", "))
library(tidygeocoder)
geo_data <- concerts %>%
  geocode(full_location, method = "osm", lat = lat, long = long)
## Passing 105 addresses to the Nominatim single address geocoder
## Query completed in: 105.6 seconds
#Transforming the dataframe into an sf object 
library(sf)
 geo_sf <- geo_data %>%
    filter(!is.na(lat), !is.na(long)) %>%
   st_as_sf(coords = c("long", "lat"), crs = 4326)

#Aggregating data to identify counts per location 
concert_counts <- geo_sf %>%
      group_by(city, region, country, geometry) %>%
      summarise(n_concerts = n(), .groups = "drop")
library(mapview)
mapview(concert_counts, zcol = "n_concerts", cex = "n_concerts")