library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.2.0     ✔ readr     2.2.0
## ✔ forcats   1.0.1     ✔ stringr   1.6.0
## ✔ ggplot2   4.0.2     ✔ tibble    3.3.1
## ✔ lubridate 1.9.5     ✔ tidyr     1.3.2
## ✔ purrr     1.2.1     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(rvest)
## 
## Attaching package: 'rvest'
## 
## The following object is masked from 'package:readr':
## 
##     guess_encoding
library(ggplot2)
library(dplyr)
library(sf)
## Linking to GEOS 3.13.0, GDAL 3.8.5, PROJ 9.5.1; sf_use_s2() is TRUE
library(tmap)
library(leaflet)
url <- "https://reference.org/facts/Janet_World_Tour/5Gs5dLpv#tour-dates"

html_code <- read_html(url)

table_html <- html_code %>% html_nodes("table") %>% .[[1]]

table_df <- table_html %>% html_table()

head(table_df)
## # A tibble: 6 × 4
##   Date              City            Country         Venue              
##   <chr>             <chr>           <chr>           <chr>              
## 1 North America36   North America36 North America36 North America36    
## 2 November 24, 1993 Cincinnati      United States   Riverfront Coliseum
## 3 November 26, 1993 Toronto         Canada          SkyDome            
## 4 November 28, 1993 Landover        United States   USAir Arena        
## 5 November 29, 1993 Landover        United States   USAir Arena        
## 6 December 1, 1993  Rosemont        United States   Rosemont Horizon
head(table_df)
## # A tibble: 6 × 4
##   Date              City            Country         Venue              
##   <chr>             <chr>           <chr>           <chr>              
## 1 North America36   North America36 North America36 North America36    
## 2 November 24, 1993 Cincinnati      United States   Riverfront Coliseum
## 3 November 26, 1993 Toronto         Canada          SkyDome            
## 4 November 28, 1993 Landover        United States   USAir Arena        
## 5 November 29, 1993 Landover        United States   USAir Arena        
## 6 December 1, 1993  Rosemont        United States   Rosemont Horizon
str(table_df)
## tibble [131 × 4] (S3: tbl_df/tbl/data.frame)
##  $ Date   : chr [1:131] "North America36" "November 24, 1993" "November 26, 1993" "November 28, 1993" ...
##  $ City   : chr [1:131] "North America36" "Cincinnati" "Toronto" "Landover" ...
##  $ Country: chr [1:131] "North America36" "United States" "Canada" "United States" ...
##  $ Venue  : chr [1:131] "North America36" "Riverfront Coliseum" "SkyDome" "USAir Arena" ...
library(janitor)
## 
## Attaching package: 'janitor'
## The following objects are masked from 'package:stats':
## 
##     chisq.test, fisher.test
glimpse(table_df)
## Rows: 131
## Columns: 4
## $ Date    <chr> "North America36", "November 24, 1993", "November 26, 1993", "…
## $ City    <chr> "North America36", "Cincinnati", "Toronto", "Landover", "Lando…
## $ Country <chr> "North America36", "United States", "Canada", "United States",…
## $ Venue   <chr> "North America36", "Riverfront Coliseum", "SkyDome", "USAir Ar…
#rename column names
names(table_df) <- c("Date", "City", "Country", "Venue")

# clean further
table_df$LOCATION <- paste(table_df$City,
                               table_df$Country,
                                sep=", ")

view(table_df$LOCATION)
#map
leaflet(table_df)