library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.2.0 ✔ readr 2.2.0
## ✔ forcats 1.0.1 ✔ stringr 1.6.0
## ✔ ggplot2 4.0.2 ✔ tibble 3.3.1
## ✔ lubridate 1.9.5 ✔ tidyr 1.3.2
## ✔ purrr 1.2.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(rvest)
##
## Attaching package: 'rvest'
##
## The following object is masked from 'package:readr':
##
## guess_encoding
library(ggplot2)
library(dplyr)
library(sf)
## Linking to GEOS 3.13.0, GDAL 3.8.5, PROJ 9.5.1; sf_use_s2() is TRUE
library(tmap)
library(leaflet)
url <- "https://reference.org/facts/Janet_World_Tour/5Gs5dLpv#tour-dates"
html_code <- read_html(url)
table_html <- html_code %>% html_nodes("table") %>% .[[1]]
table_df <- table_html %>% html_table()
head(table_df)
## # A tibble: 6 × 4
## Date City Country Venue
## <chr> <chr> <chr> <chr>
## 1 North America36 North America36 North America36 North America36
## 2 November 24, 1993 Cincinnati United States Riverfront Coliseum
## 3 November 26, 1993 Toronto Canada SkyDome
## 4 November 28, 1993 Landover United States USAir Arena
## 5 November 29, 1993 Landover United States USAir Arena
## 6 December 1, 1993 Rosemont United States Rosemont Horizon
head(table_df)
## # A tibble: 6 × 4
## Date City Country Venue
## <chr> <chr> <chr> <chr>
## 1 North America36 North America36 North America36 North America36
## 2 November 24, 1993 Cincinnati United States Riverfront Coliseum
## 3 November 26, 1993 Toronto Canada SkyDome
## 4 November 28, 1993 Landover United States USAir Arena
## 5 November 29, 1993 Landover United States USAir Arena
## 6 December 1, 1993 Rosemont United States Rosemont Horizon
str(table_df)
## tibble [131 × 4] (S3: tbl_df/tbl/data.frame)
## $ Date : chr [1:131] "North America36" "November 24, 1993" "November 26, 1993" "November 28, 1993" ...
## $ City : chr [1:131] "North America36" "Cincinnati" "Toronto" "Landover" ...
## $ Country: chr [1:131] "North America36" "United States" "Canada" "United States" ...
## $ Venue : chr [1:131] "North America36" "Riverfront Coliseum" "SkyDome" "USAir Arena" ...
library(janitor)
##
## Attaching package: 'janitor'
## The following objects are masked from 'package:stats':
##
## chisq.test, fisher.test
glimpse(table_df)
## Rows: 131
## Columns: 4
## $ Date <chr> "North America36", "November 24, 1993", "November 26, 1993", "…
## $ City <chr> "North America36", "Cincinnati", "Toronto", "Landover", "Lando…
## $ Country <chr> "North America36", "United States", "Canada", "United States",…
## $ Venue <chr> "North America36", "Riverfront Coliseum", "SkyDome", "USAir Ar…
#rename column names
names(table_df) <- c("Date", "City", "Country", "Venue")
# clean further
table_df$LOCATION <- paste(table_df$City,
table_df$Country,
sep=", ")
view(table_df$LOCATION)
#map
leaflet(table_df)