Image One: Janet World Tour - Australia Poster (Janet World Tour, 2024)

Project Overview

Scraping a website of concert data over a three-year period, to map each year of data and a composite map of all three years of data. For the purposes of this project, I have selected Janet Jackson and her Janet World Tour in which she performed 125 shows across the world from 1993 to 1995.

As someone who is a fan of music and enjoys live shows, the concept of the project immediately piqued my interest. There also is a reason why people become frequent concert goers, as in the experience of Phish fans or even Deadheads, which I’m sure inspired the concert project. For one, it creates communities and opportunities for connection among people, understood as positive social outcomes (Rickard et al., 2025). It will also be interesting to see where in the world artists travel and if there is particular weight given to certain cities or world regions as a result. Additionally the location and setting of live shows, as in single stadium shows as opposed to festivals or more impromptu settings (Hoeven & Hitters, 2019).

Load packages.
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.2.0     ✔ readr     2.2.0
## ✔ forcats   1.0.1     ✔ stringr   1.6.0
## ✔ ggplot2   4.0.2     ✔ tibble    3.3.1
## ✔ lubridate 1.9.5     ✔ tidyr     1.3.2
## ✔ purrr     1.2.1     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(tidygeocoder)
library(sf)
## Linking to GEOS 3.13.0, GDAL 3.8.5, PROJ 9.5.1; sf_use_s2() is TRUE
library(mapview)
library(rvest)
## 
## Attaching package: 'rvest'
## 
## The following object is masked from 'package:readr':
## 
##     guess_encoding
library(ggplot2)
library(dplyr)
library(tmap)
library(leaflet)
library(maps)
## 
## Attaching package: 'maps'
## 
## The following object is masked from 'package:purrr':
## 
##     map
library(ggmap)
## ℹ Google's Terms of Service: <https://mapsplatform.google.com>
##   Stadia Maps' Terms of Service: <https://stadiamaps.com/terms-of-service>
##   OpenStreetMap's Tile Usage Policy: <https://operations.osmfoundation.org/policies/tiles>
## ℹ Please cite ggmap if you use it! Use `citation("ggmap")` for details.
## 
## Attaching package: 'ggmap'
## 
## 
## The following object is masked from 'package:tidygeocoder':
## 
##     geocode
Scraping the janet. World Tour data.
# scraping the janet tour data 
url <- "https://reference.org/facts/Janet_World_Tour/5Gs5dLpv#tour-dates"
html_code <- read_html(url)
janet_tour_html <- html_code %>% html_nodes("table") %>% .[[1]]
janet_tour_df <- janet_tour_html %>% html_table()
head(janet_tour_df)
## # A tibble: 6 × 4
##   Date              City            Country         Venue              
##   <chr>             <chr>           <chr>           <chr>              
## 1 North America36   North America36 North America36 North America36    
## 2 November 24, 1993 Cincinnati      United States   Riverfront Coliseum
## 3 November 26, 1993 Toronto         Canada          SkyDome            
## 4 November 28, 1993 Landover        United States   USAir Arena        
## 5 November 29, 1993 Landover        United States   USAir Arena        
## 6 December 1, 1993  Rosemont        United States   Rosemont Horizon
Data cleaning post-scrape.
#checking data and removing first row
library(janitor)
## 
## Attaching package: 'janitor'
## The following objects are masked from 'package:stats':
## 
##     chisq.test, fisher.test
head(janet_tour_df)
## # A tibble: 6 × 4
##   Date              City            Country         Venue              
##   <chr>             <chr>           <chr>           <chr>              
## 1 North America36   North America36 North America36 North America36    
## 2 November 24, 1993 Cincinnati      United States   Riverfront Coliseum
## 3 November 26, 1993 Toronto         Canada          SkyDome            
## 4 November 28, 1993 Landover        United States   USAir Arena        
## 5 November 29, 1993 Landover        United States   USAir Arena        
## 6 December 1, 1993  Rosemont        United States   Rosemont Horizon
janet_tour_df <- janet_tour_df [-1,]
Geocoding Data
#read in cleaned csv file
janet_tour_df<- read.csv("~/Downloads/Janet World Tour (93-95).csv")


#geocode data frame using "world cities" data and merge data
world.cities$Location <- paste(world.cities$name,
                              world.cities$country.etc,
                             sep=",")

##### Creating a Location column.
#adding in location column merging city and country
janet_tour_df$Location<- paste(janet_tour_df$City,
                               janet_tour_df$Country,
                                sep=",")
view(janet_tour_df)

tour_cities <- janet_tour_df
tour_cities_df<- merge(janet_tour_df, world.cities, by.x="Location", by.y="Location", all.x=TRUE)
view(tour_cities_df)
Data frame to Shape file
#turn data frame into a shape file

tour_cities_data <- na.omit(tour_cities_df)
tour_cities_sf <- st_as_sf(tour_cities_data, coords = c("long", "lat"), crs=4326)

tour_cities_sf
## Simple feature collection with 85 features and 11 fields
## Geometry type: POINT
## Dimension:     XY
## Bounding box:  xmin: -123.09 ymin: -37.81 xmax: 153.02 ymax: 59.91
## Geodetic CRS:  WGS 84
## First 10 features:
##              Location    Date        City      State   Country
## 8  Adelaide,Australia 2/21/95    Adelaide            Australia
## 9          Albany,USA 1/24/94      Albany   New York       USA
## 10         Albany,USA 1/24/94      Albany   New York       USA
## 11         Albany,USA 1/24/94      Albany   New York       USA
## 12    Albuquerque,USA 4/22/94 Albuquerque New Mexico       USA
## 13        Atlanta,USA  1/6/94     Atlanta    Georgia       USA
## 14        Atlanta,USA  1/5/94     Atlanta    Georgia       USA
## 15        Atlanta,USA 7/31/94     Atlanta    Georgia       USA
## 16   Bangkok,Thailand  3/5/95     Bangkok             Thailand
## 17   Bangkok,Thailand  3/6/95     Bangkok             Thailand
##                              Venue Year        name country.etc     pop capital
## 8    Adelaide Entertainment Centre 1995    Adelaide   Australia 1076969       0
## 9              Knickerbocker Arena 1994      Albany         USA   93576       0
## 10             Knickerbocker Arena 1994      Albany         USA   75510       0
## 11             Knickerbocker Arena 1994      Albany         USA   45535       0
## 12                Tingley Coliseum 1994 Albuquerque         USA  494962       0
## 13                   Omni Coliseum 1994     Atlanta         USA  424096       0
## 14                   Omni Coliseum 1994     Atlanta         USA  424096       0
## 15 Coca-Cola Lakewood Amphitheatre 1994     Atlanta         USA  424096       0
## 16          Indoor Stadium Huamark 1995     Bangkok    Thailand 4935988       1
## 17          Indoor Stadium Huamark 1995     Bangkok    Thailand 4935988       1
##                 geometry
## 8   POINT (138.6 -34.93)
## 9    POINT (-73.8 42.67)
## 10  POINT (-84.18 31.58)
## 11 POINT (-123.09 44.62)
## 12 POINT (-106.62 35.12)
## 13  POINT (-84.42 33.76)
## 14  POINT (-84.42 33.76)
## 15  POINT (-84.42 33.76)
## 16   POINT (100.5 13.73)
## 17   POINT (100.5 13.73)
mapview(tour_cities_sf)

Concert Maps

concerts_num <- tour_cities_sf %>%
  group_by(Venue,Location,geometry) %>%
  summarize(n_concerts = n(), .groups="drop")

concerts_num
## Simple feature collection with 69 features and 3 fields
## Geometry type: POINT
## Dimension:     XY
## Bounding box:  xmin: -123.09 ymin: -37.81 xmax: 153.02 ymax: 59.91
## Geodetic CRS:  WGS 84
## # A tibble: 69 × 4
##    Venue                         Location          geometry n_concerts
##    <chr>                         <chr>          <POINT [°]>      <int>
##  1 ARCO Arena                    Sacrament… (-121.47 38.57)          1
##  2 Adelaide Entertainment Centre Adelaide,…  (138.6 -34.93)          1
##  3 Alsterdorfer Sporthalle       Hamburg,G…      (10 53.55)          1
##  4 America West Arena            Phoenix,U… (-112.07 33.54)          1
##  5 Arena Berlin                  Berlin,Ge…   (13.38 52.52)          1
##  6 BJCC Coliseum                 Birmingha…   (-86.8 33.53)          1
##  7 Bradley Center                Milwaukee…  (-87.97 43.06)          1
##  8 Brisbane Entertainment Centre Brisbane,… (153.02 -27.46)          2
##  9 Carver Arena                  Peoria,USA (-112.24 33.69)          1
## 10 Carver Arena                  Peoria,USA  (-89.61 40.74)          1
## # ℹ 59 more rows
Data Frames and 1993 Map
#Data frames for concerts year
concerts_1993 <- tour_cities_sf %>%
  filter(Year == "1993")
concerts_1994 <- tour_cities_sf %>%
  filter(Year == "1994")
concerts_1995 <- tour_cities_sf %>%
  filter(Year == "1995")


concert_count_1993 <- concerts_1993 %>%
  group_by(Venue, City, State, Country,geometry) %>%
  summarize(n_concerts = n(), .groups="drop") 

mapview(concert_count_1993, zcol ="n_concerts", cex="n_concerts", label="Venue")

1994 Map

concert_count_1994 <- concerts_1994 %>%
  group_by(Venue, City, State, Country,geometry) %>%
  summarize(n_concerts = n(), .groups="drop") 
mapview(concert_count_1994, zcol ="n_concerts", cex="n_concerts", label="Venue")
1995 Map
concert_count_1995 <- concerts_1995 %>%
  group_by(Venue, City, State, Country,geometry) %>%
  summarize(n_concerts = n(), .groups="drop") 
mapview(concert_count_1995, zcol ="n_concerts", cex="n_concerts", label="Venue")
Map: 1993 to 1995
#All Three Years (1993 - 1995)

mapview(concerts_num, zcol ="n_concerts", cex="n_concerts", label="Venue")
table(janet_tour_df$Country)
## 
##               Australia     Belgium      Canada     Denmark     England 
##           7          11           1           2           1           6 
##      France     Germany       Japan Netherlands      Norway Philippines 
##           3           6           4           2           1           1 
##   Singapore       Spain      Sweden Switzerland    Thailand         USA 
##           2           1           1           1           2          80
y = data.frame(country=c('Australia', 'Belgium', 'Canada', 'Denmark', 'England', 'France', 'Germany', 'Japan', 'Netherland', 'Norway', 'Phillipines', 'Singapore', 'Spain', 'Sweden', 'Switzerland', 'Thailand', 'USA'),
count=c(11, 1, 2, 1, 6, 3, 6, 4, 2, 1, 1, 2, 1, 1, 1, 2, 80))
barplot(y$count, main="Concert Count by Country", horiz=TRUE, xlab = "Total Concerts", ylab = "Country", col = c("green"))

#reference table for countries along y-axis

References

Hoeven, A., & Hitters, E. (2019). The social and cultural values of live music: Sustaining urban live music ecologies. Cities, 90(90), 263–271. https://doi.org/10.1016/j.cities.2019.02.015

Janet World Tour. (2024). Reference.org Encyclopedia and Reference Library. https://reference.org/facts/Janet_World_Tour/5Gs5dLpv

Rickard, N. S., Lewis, K., Ballantyne, J., & Dingle, G. (2025). The unifying power of live music events: A systematic review of social outcomes for audience members. Musicae Scientiae. https://doi.org/10.1177/10298649251349703