library(rvest)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(stringr)
library(purrr)
library(tidyr)
library(lubridate)
##
## Attaching package: 'lubridate'
## The following objects are masked from 'package:base':
##
## date, intersect, setdiff, union
library(sf)
## Linking to GEOS 3.13.0, GDAL 3.8.5, PROJ 9.5.1; sf_use_s2() is TRUE
library(ggplot2)
library(tmap)
#Set working directory to my class folder
setwd("~/Desktop/Data Analysis and Visualization with R")
#Read in csv file with concert data
concerts_table <- read.csv("yolatengo_concerts.csv")
#Cleaning the data by selecting only the columns I need and adjusting the column names
concerts <- concerts_table %>%
select(
date = Start.Date,
venue = Venue,
location = Location
)
glimpse(concerts)
## Rows: 173
## Columns: 3
## $ date <chr> "11/19/1995", "11/18/1995", "11/14/1995", "11/12/1995", "11/1…
## $ venue <chr> "Tramps", "Tramps", "Barrymore's Music Hall", "The Opera Hous…
## $ location <chr> "New York, New York, United States", "New York, New York, Uni…
#Adjusting date format
concerts <- concerts %>%
mutate(date = mdy(date))
#Filtering out NA values
concerts <- concerts %>%
filter(
!is.na(date),
!is.na(location),
location != ""
)
library(tidyr)
#Organizing location into separate columns
concerts <- concerts %>%
separate(location, into = c("city", "region", "country"), sep = ", ", fill = "right")
head(concerts)
## date venue city region country
## 1 1995-11-19 Tramps New York New York United States
## 2 1995-11-18 Tramps New York New York United States
## 3 1995-11-14 Barrymore's Music Hall Ottawa Ontario Canada
## 4 1995-11-12 The Opera House Toronto Ontario Canada
## 5 1995-11-11 Blind Pig Ann Arbor Michigan United States
## 6 1995-11-10 Metro Chicago Illinois United States
#Geocoding data to add coordinates based on city locations
concerts <- concerts %>%
mutate(full_location = paste(city, region, country, sep = ", "))
library(tidygeocoder)
geo_data <- concerts %>%
geocode(full_location, method = "osm", lat = lat, long = long)
## Passing 105 addresses to the Nominatim single address geocoder
## Query completed in: 106.2 seconds
#Transforming the dataframe into an sf object
library(sf)
geo_sf <- geo_data %>%
filter(!is.na(lat), !is.na(long)) %>%
st_as_sf(coords = c("long", "lat"), crs = 4326)
#Aggregating data to identify counts per location
library(mapview)
concert_counts <- geo_sf %>%
group_by(city, region, country, geometry) %>%
summarise(n_concerts = n(), .groups = "drop")
mapview(concert_counts, zcol = "n_concerts", cex = "n_concerts")
#Creating interactive map of concert counts per location
library(leaflet)
#Adding color palette
pal <- colorNumeric(
palette = "viridis",
domain = concert_counts$n_concerts
)
leaflet(concert_counts) %>%
#Adding gray basemap
addProviderTiles("CartoDB.Positron") %>%
#Adding symbols
addCircleMarkers(
radius = ~sqrt(n_concerts) * 2,
color = ~pal(n_concerts),
fillColor = ~pal(n_concerts),
stroke = TRUE,
weight = 1,
fillOpacity = 0.8,
popup = ~paste0(
"<b>", city, "</b><br>",
region, ", ", country, "<br>",
"<b>Concerts:</b> ", n_concerts
)
) %>%
addLegend(
position = "bottomright",
pal = pal,
values = ~n_concerts,
title = "Yo La Tengo Concerts, 1993–1995",
opacity = 0.8
)
#Mapping the tour routes
geo_sf <- geo_sf %>%
arrange(date)
coords <- st_coordinates(geo_sf)
geo_sf <- geo_sf %>%
mutate(
lon = coords[,1],
lat = coords[,2]
)
geo_sf <- geo_sf %>%
mutate(
next_lon = lead(lon),
next_lat = lead(lat),
next_date = lead(date)
)
tour_routes <- geo_sf %>%
filter(!is.na(next_lon)) %>%
rowwise() %>%
mutate(
geometry = st_sfc(
st_linestring(matrix(
c(lon, lat, next_lon, next_lat),
ncol = 2,
byrow = TRUE
)),
crs = 4326
)
) %>%
st_as_sf()
library(leaflet)
library(viridis)
## Loading required package: viridisLite
tour_routes <- tour_routes %>%
mutate(year = year(date))
pal <- colorFactor(
palette = viridis(3),
domain = tour_routes$year
)
leaflet(tour_routes) %>%
addProviderTiles("CartoDB.Positron") %>%
addPolylines(
color = ~pal(year),
weight = 3,
opacity = 0.8
) %>%
addLegend(
"bottomright",
pal = pal,
values = ~year,
title = "Tour Year"
)
#Creating a map of country GDP for comparison -- the data I found was pulled from the World Bank and I selected only years 1993-1995
gdp_data <- read.csv('GDP-1993-1995.csv')
head(gdp_data)
## Country.Name Indicator.Name X1993 X1994
## 1 Albania GDP per capita (current US$) 367.2792 586.4161
## 2 Algeria GDP per capita (current US$) 1831.0480 1525.5406
## 3 Andorra GDP per capita (current US$) 15916.8395 15748.5307
## 4 Angola GDP per capita (current US$) 449.7279 334.9736
## 5 Antigua and Barbuda GDP per capita (current US$) 8624.6202 9351.4875
## 6 Argentina GDP per capita (current US$) 6931.8560 7437.5624
## X1995
## 1 911.3205
## 2 1466.9481
## 3 18443.2545
## 4 404.2948
## 5 9034.4755
## 6 7357.6163
#Taking the mean GDP for each country over the three years 1993-1995 and adding it as a new column
gdp_data <- gdp_data %>% mutate(avg_93_95 = rowMeans(select(., X1993, X1994, X1995), na.rm = TRUE))
head(gdp_data)
## Country.Name Indicator.Name X1993 X1994
## 1 Albania GDP per capita (current US$) 367.2792 586.4161
## 2 Algeria GDP per capita (current US$) 1831.0480 1525.5406
## 3 Andorra GDP per capita (current US$) 15916.8395 15748.5307
## 4 Angola GDP per capita (current US$) 449.7279 334.9736
## 5 Antigua and Barbuda GDP per capita (current US$) 8624.6202 9351.4875
## 6 Argentina GDP per capita (current US$) 6931.8560 7437.5624
## X1995 avg_93_95
## 1 911.3205 621.6719
## 2 1466.9481 1607.8456
## 3 18443.2545 16702.8749
## 4 404.2948 396.3321
## 5 9034.4755 9003.5277
## 6 7357.6163 7242.3449
gdp_data <- gdp_data %>% select(country=Country.Name, GDP=avg_93_95)
#Filtering the table to only include the countries where YLT went on tour
gdp_data <- gdp_data %>% filter(country %in% concert_counts$country)
gdp_data
## country GDP
## 1 Belgium 24968.77
## 2 Canada 20288.02
## 3 Denmark 30956.88
## 4 France 24130.18
## 5 Germany 28188.62
## 6 Netherlands 25713.41
## 7 Spain 14065.67
## 8 Switzerland 44147.58
## 9 United Kingdom 20422.40
## 10 United States 27591.01
#Using world dataset to join the geometries to the countries in my data
library(rnaturalearth)
library(rnaturalearthdata)
##
## Attaching package: 'rnaturalearthdata'
## The following object is masked from 'package:rnaturalearth':
##
## countries110
world <- ne_countries(scale = "medium", returnclass = "sf")
map_data <- world %>%
left_join(gdp_data, by = c("name" = "country"))
mapview(map_data, zcol = "GDP")
#Map is missing United States because the name was different in my table and the world table
gdp_data$country[gdp_data$country == "United States"] <- "United States of America"
map_data <- world %>%
left_join(gdp_data, by = c("name" = "country"))
#Checking updated map
mapview(map_data, zcol="GDP")
#Creating an interactive map of the GDP data
pal <- colorNumeric(
palette = "viridis",
domain = map_data$GDP,
na.color = "lightgrey"
)
leaflet(map_data) %>%
addTiles() %>%
addPolygons(
fillColor = ~pal(GDP),
weight = 1,
color = "white",
fillOpacity = 0.8,
popup = ~paste0(
"<strong>", name, "</strong><br>",
"Average GDP Per Capita (1993–1995): $",
round(GDP, 2)
)
) %>%
addLegend(
pal = pal,
values = ~GDP,
title = "GDP Per Capita",
position = "bottomright"
) %>%
addControl(
html = "<h3>GDP Per Capita of Tour Countries</h3>",
position = "topright"
)