library(rvest)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(stringr)
library(purrr)
library(tidyr)
library(lubridate)
##
## Attaching package: 'lubridate'
## The following objects are masked from 'package:base':
##
## date, intersect, setdiff, union
library(sf)
## Linking to GEOS 3.13.0, GDAL 3.8.5, PROJ 9.5.1; sf_use_s2() is TRUE
library(ggplot2)
library(tmap)
#Set working directory to my class folder
setwd("~/Desktop/Data Analysis and Visualization with R")
#Read in csv file with concert data
concerts_table <- read.csv("yolatengo_concerts.csv")
#Cleaning the data by selecting only the columns I need and adjusting the column names
concerts <- concerts_table %>%
select(
date = Start.Date,
venue = Venue,
location = Location
)
glimpse(concerts)
## Rows: 173
## Columns: 3
## $ date <chr> "11/19/1995", "11/18/1995", "11/14/1995", "11/12/1995", "11/1…
## $ venue <chr> "Tramps", "Tramps", "Barrymore's Music Hall", "The Opera Hous…
## $ location <chr> "New York, New York, United States", "New York, New York, Uni…
#Adjusting date format
concerts <- concerts %>%
mutate(date = mdy(date))
#Filtering out NA values
concerts <- concerts %>%
filter(
!is.na(date),
!is.na(location),
location != ""
)
library(tidyr)
#Organizing location into separate columns
concerts <- concerts %>%
separate(location, into = c("city", "region", "country"), sep = ", ", fill = "right")
head(concerts)
## date venue city region country
## 1 1995-11-19 Tramps New York New York United States
## 2 1995-11-18 Tramps New York New York United States
## 3 1995-11-14 Barrymore's Music Hall Ottawa Ontario Canada
## 4 1995-11-12 The Opera House Toronto Ontario Canada
## 5 1995-11-11 Blind Pig Ann Arbor Michigan United States
## 6 1995-11-10 Metro Chicago Illinois United States
#Geocoding data to add coordinates based on city locations
concerts <- concerts %>%
mutate(full_location = paste(city, region, country, sep = ", "))
library(tidygeocoder)
geo_data <- concerts %>%
geocode(full_location, method = "osm", lat = lat, long = long)
## Passing 105 addresses to the Nominatim single address geocoder
## Query completed in: 105.6 seconds
#Transforming the dataframe into an sf object
library(sf)
geo_sf <- geo_data %>%
filter(!is.na(lat), !is.na(long)) %>%
st_as_sf(coords = c("long", "lat"), crs = 4326)
#Aggregating data to identify counts per location
concert_counts <- geo_sf %>%
group_by(city, region, country, geometry) %>%
summarise(n_concerts = n(), .groups = "drop")
library(mapview)
mapview(concert_counts, zcol = "n_concerts", cex = "n_concerts")