Image One: Janet World Tour - Australia Poster (Janet World Tour, 2024)
Scraping a website of concert data over a three-year period, to map each year of data and a composite map of all three years of data. For the purposes of this project, I have selected Janet Jackson and her Janet World Tour in which she performed 125 shows across the world from 1993 to 1995.
As someone who is a fan of music and enjoys live shows, the concept of the project immediately piqued my interest. There also is a reason why people become frequent concert goers, as in the experience of Phish fans or even Deadheads, which I’m sure inspired the concert project. For one, it creates communities and opportunities for connection among people, understood as positive social outcomes (Rickard et al., 2025). It will also be interesting to see where in the world artists travel and if there is particular weight given to certain cities or world regions as a result. Additionally the location and setting of live shows, as in single stadium shows as opposed to festivals or more impromptu settings (Hoeven & Hitters, 2019).
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.2.0 ✔ readr 2.2.0
## ✔ forcats 1.0.1 ✔ stringr 1.6.0
## ✔ ggplot2 4.0.2 ✔ tibble 3.3.1
## ✔ lubridate 1.9.5 ✔ tidyr 1.3.2
## ✔ purrr 1.2.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(tidygeocoder)
library(sf)
## Linking to GEOS 3.13.0, GDAL 3.8.5, PROJ 9.5.1; sf_use_s2() is TRUE
library(mapview)
library(rvest)
##
## Attaching package: 'rvest'
##
## The following object is masked from 'package:readr':
##
## guess_encoding
library(ggplot2)
library(dplyr)
library(tmap)
library(leaflet)
library(maps)
##
## Attaching package: 'maps'
##
## The following object is masked from 'package:purrr':
##
## map
library(ggmap)
## ℹ Google's Terms of Service: <https://mapsplatform.google.com>
## Stadia Maps' Terms of Service: <https://stadiamaps.com/terms-of-service>
## OpenStreetMap's Tile Usage Policy: <https://operations.osmfoundation.org/policies/tiles>
## ℹ Please cite ggmap if you use it! Use `citation("ggmap")` for details.
##
## Attaching package: 'ggmap'
##
##
## The following object is masked from 'package:tidygeocoder':
##
## geocode
# scraping the janet tour data
url <- "https://reference.org/facts/Janet_World_Tour/5Gs5dLpv#tour-dates"
html_code <- read_html(url)
janet_tour_html <- html_code %>% html_nodes("table") %>% .[[1]]
janet_tour_df <- janet_tour_html %>% html_table()
head(janet_tour_df)
## # A tibble: 6 × 4
## Date City Country Venue
## <chr> <chr> <chr> <chr>
## 1 North America36 North America36 North America36 North America36
## 2 November 24, 1993 Cincinnati United States Riverfront Coliseum
## 3 November 26, 1993 Toronto Canada SkyDome
## 4 November 28, 1993 Landover United States USAir Arena
## 5 November 29, 1993 Landover United States USAir Arena
## 6 December 1, 1993 Rosemont United States Rosemont Horizon
#checking data and removing first row
library(janitor)
##
## Attaching package: 'janitor'
## The following objects are masked from 'package:stats':
##
## chisq.test, fisher.test
head(janet_tour_df)
## # A tibble: 6 × 4
## Date City Country Venue
## <chr> <chr> <chr> <chr>
## 1 North America36 North America36 North America36 North America36
## 2 November 24, 1993 Cincinnati United States Riverfront Coliseum
## 3 November 26, 1993 Toronto Canada SkyDome
## 4 November 28, 1993 Landover United States USAir Arena
## 5 November 29, 1993 Landover United States USAir Arena
## 6 December 1, 1993 Rosemont United States Rosemont Horizon
janet_tour_df <- janet_tour_df [-1,]
#read in cleaned csv file
janet_tour_df<- read.csv("~/Downloads/Janet World Tour (93-95).csv")
#geocode data frame using "world cities" data and merge data
world.cities$Location <- paste(world.cities$name,
world.cities$country.etc,
sep=",")
##### Creating a Location column.
#adding in location column merging city and country
janet_tour_df$Location<- paste(janet_tour_df$City,
janet_tour_df$Country,
sep=",")
view(janet_tour_df)
tour_cities <- janet_tour_df
tour_cities_df<- merge(janet_tour_df, world.cities, by.x="Location", by.y="Location", all.x=TRUE)
view(tour_cities_df)
#turn data frame into a shape file
tour_cities_data <- na.omit(tour_cities_df)
tour_cities_sf <- st_as_sf(tour_cities_data, coords = c("long", "lat"), crs=4326)
tour_cities_sf
## Simple feature collection with 85 features and 11 fields
## Geometry type: POINT
## Dimension: XY
## Bounding box: xmin: -123.09 ymin: -37.81 xmax: 153.02 ymax: 59.91
## Geodetic CRS: WGS 84
## First 10 features:
## Location Date City State Country
## 8 Adelaide,Australia 2/21/95 Adelaide Australia
## 9 Albany,USA 1/24/94 Albany New York USA
## 10 Albany,USA 1/24/94 Albany New York USA
## 11 Albany,USA 1/24/94 Albany New York USA
## 12 Albuquerque,USA 4/22/94 Albuquerque New Mexico USA
## 13 Atlanta,USA 1/6/94 Atlanta Georgia USA
## 14 Atlanta,USA 1/5/94 Atlanta Georgia USA
## 15 Atlanta,USA 7/31/94 Atlanta Georgia USA
## 16 Bangkok,Thailand 3/5/95 Bangkok Thailand
## 17 Bangkok,Thailand 3/6/95 Bangkok Thailand
## Venue Year name country.etc pop capital
## 8 Adelaide Entertainment Centre 1995 Adelaide Australia 1076969 0
## 9 Knickerbocker Arena 1994 Albany USA 93576 0
## 10 Knickerbocker Arena 1994 Albany USA 75510 0
## 11 Knickerbocker Arena 1994 Albany USA 45535 0
## 12 Tingley Coliseum 1994 Albuquerque USA 494962 0
## 13 Omni Coliseum 1994 Atlanta USA 424096 0
## 14 Omni Coliseum 1994 Atlanta USA 424096 0
## 15 Coca-Cola Lakewood Amphitheatre 1994 Atlanta USA 424096 0
## 16 Indoor Stadium Huamark 1995 Bangkok Thailand 4935988 1
## 17 Indoor Stadium Huamark 1995 Bangkok Thailand 4935988 1
## geometry
## 8 POINT (138.6 -34.93)
## 9 POINT (-73.8 42.67)
## 10 POINT (-84.18 31.58)
## 11 POINT (-123.09 44.62)
## 12 POINT (-106.62 35.12)
## 13 POINT (-84.42 33.76)
## 14 POINT (-84.42 33.76)
## 15 POINT (-84.42 33.76)
## 16 POINT (100.5 13.73)
## 17 POINT (100.5 13.73)
mapview(tour_cities_sf)
concerts_num <- tour_cities_sf %>%
group_by(Venue,Location,geometry) %>%
summarize(n_concerts = n(), .groups="drop")
concerts_num
## Simple feature collection with 69 features and 3 fields
## Geometry type: POINT
## Dimension: XY
## Bounding box: xmin: -123.09 ymin: -37.81 xmax: 153.02 ymax: 59.91
## Geodetic CRS: WGS 84
## # A tibble: 69 × 4
## Venue Location geometry n_concerts
## <chr> <chr> <POINT [°]> <int>
## 1 ARCO Arena Sacrament… (-121.47 38.57) 1
## 2 Adelaide Entertainment Centre Adelaide,… (138.6 -34.93) 1
## 3 Alsterdorfer Sporthalle Hamburg,G… (10 53.55) 1
## 4 America West Arena Phoenix,U… (-112.07 33.54) 1
## 5 Arena Berlin Berlin,Ge… (13.38 52.52) 1
## 6 BJCC Coliseum Birmingha… (-86.8 33.53) 1
## 7 Bradley Center Milwaukee… (-87.97 43.06) 1
## 8 Brisbane Entertainment Centre Brisbane,… (153.02 -27.46) 2
## 9 Carver Arena Peoria,USA (-112.24 33.69) 1
## 10 Carver Arena Peoria,USA (-89.61 40.74) 1
## # ℹ 59 more rows
#Data frames for concerts year
concerts_1993 <- tour_cities_sf %>%
filter(Year == "1993")
concerts_1994 <- tour_cities_sf %>%
filter(Year == "1994")
concerts_1995 <- tour_cities_sf %>%
filter(Year == "1995")
concert_count_1993 <- concerts_1993 %>%
group_by(Venue, City, State, Country,geometry) %>%
summarize(n_concerts = n(), .groups="drop")
mapview(concert_count_1993, zcol ="n_concerts", cex="n_concerts", label="Venue")
1994 Map
concert_count_1994 <- concerts_1994 %>%
group_by(Venue, City, State, Country,geometry) %>%
summarize(n_concerts = n(), .groups="drop")
mapview(concert_count_1994, zcol ="n_concerts", cex="n_concerts", label="Venue")
concert_count_1995 <- concerts_1995 %>%
group_by(Venue, City, State, Country,geometry) %>%
summarize(n_concerts = n(), .groups="drop")
mapview(concert_count_1995, zcol ="n_concerts", cex="n_concerts", label="Venue")
#All Three Years (1993 - 1995)
mapview(concerts_num, zcol ="n_concerts", cex="n_concerts", label="Venue")
table(janet_tour_df$Country)
##
## Australia Belgium Canada Denmark England
## 7 11 1 2 1 6
## France Germany Japan Netherlands Norway Philippines
## 3 6 4 2 1 1
## Singapore Spain Sweden Switzerland Thailand USA
## 2 1 1 1 2 80
y = data.frame(country=c('Australia', 'Belgium', 'Canada', 'Denmark', 'England', 'France', 'Germany', 'Japan', 'Netherland', 'Norway', 'Phillipines', 'Singapore', 'Spain', 'Sweden', 'Switzerland', 'Thailand', 'USA'),
count=c(11, 1, 2, 1, 6, 3, 6, 4, 2, 1, 1, 2, 1, 1, 1, 2, 80))
barplot(y$count, main="Concert Count by Country", horiz=TRUE, xlab = "Total Concerts", ylab = "Country", col = c("green"))
#reference table for countries along y-axis
Hoeven, A., & Hitters, E. (2019). The social and cultural values of live music: Sustaining urban live music ecologies. Cities, 90(90), 263–271. https://doi.org/10.1016/j.cities.2019.02.015
Janet World Tour. (2024). Reference.org Encyclopedia and Reference Library. https://reference.org/facts/Janet_World_Tour/5Gs5dLpv
Rickard, N. S., Lewis, K., Ballantyne, J., & Dingle, G. (2025). The unifying power of live music events: A systematic review of social outcomes for audience members. Musicae Scientiae. https://doi.org/10.1177/10298649251349703