library(tidyverse)
## -- Attaching packages --------------------------------------- tidyverse 1.3.1 --
## v ggplot2 3.3.5 v purrr 0.3.4
## v tibble 3.1.1 v dplyr 1.0.5
## v tidyr 1.1.3 v stringr 1.4.0
## v readr 1.4.0 v forcats 0.5.1
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
library(rvest)
##
## Attaching package: 'rvest'
## The following object is masked from 'package:readr':
##
## guess_encoding
url<- "https://www.w3schools.com/cssref/css_selectors.asp"
html_data<- read_html(url)
#html_nodes will select all nodes, html_node to select one
#inspect elements- use # for IDs
#convert to table then as tibble to tidy it up
#use name repair to get around column name issues
df_raw <- html_data %>%
html_node(".ws-table-all.notranslate") %>%
html_table() %>%
as_tibble(.name_repair = "unique")
tibble(df_raw)
## # A tibble: 60 x 3
## Selector Example `Example description`
## <chr> <chr> <chr>
## 1 .class .intro "Selects all elements with class=\"intro\""
## 2 .class1.clas~ .name1.na~ "Selects all elements with both name1 and name2 set~
## 3 .class1 .cla~ .name1 .n~ "Selects all elements with name2 that is a descenda~
## 4 #id #firstname "Selects the element with id=\"firstname\""
## 5 * * "Selects all elements"
## 6 element p "Selects all <p> elements"
## 7 element.class p.intro "Selects all <p> elements with class=\"intro\""
## 8 element,elem~ div, p "Selects all <div> elements and all <p> elements"
## 9 element elem~ div p "Selects all <p> elements inside <div> elements"
## 10 element>elem~ div > p "Selects all <p> elements where the parent is a <di~
## # ... with 50 more rows