library(rvest)
library(tidyverse)
# Create an empty list to store the results
resultados <- list()
# Iterate on the first 9 pages
for (i in 1:9) {
# Read the page and extract the table
reddit_ex <- paste0("https://opengovca.com/alberta-child-care?page=", i) %>%
read_html() %>%
html_nodes(xpath = 'body/div/div/div/div/div/table') %>%
html_table() |>
data.frame()
# Add a column with the page number
reddit_ex$pagina <- paste0("pag", i)
# Add a column with the page number
resultados[[i]] <- reddit_ex
}
# Combine all results in a single data frame
todos_los_resultados <- do.call(rbind, resultados)
head(todos_los_resultados)
## Business.Name
## 1 AGAPELAND DAYCARE CENTRE LTD.
## 2 MONTESSORI PLAY AND LEARN (THE)
## 3 MONTESSORI SCHOOL HOUSE DAY CARE
## 4 NORTH ROCKY VIEW COMMUNITY LINKS FAMILY CHILD CARE
## 5 LAUDERDALE AFTERSCHOOL CARE
## 6 AGAPELAND PROGRAM FOR SCHOOL AGE CHILDREN
## Office.Address Inspection.Date pagina
## 1 Bay 13/15/16/17 Corinthia Plaza, Leduc, AB T9E6J9 2018-06-29 pag1
## 2 7730 106 Street, Edmonton, AB T6E4W3 2018-06-29 pag1
## 3 4004 114 Street, Edmonton, AB T6J1M6 2018-06-29 pag1
## 4 125 Main St N, Airdrie, AB T4B0P7 2018-06-29 pag1
## 5 10816 129 Avenue, Edmonton, AB T5E5W9 2018-06-29 pag1
## 6 Bay 13/15/16/17 Corinthia Plaza, Leduc, AB T9E6J9 2018-06-29 pag1
tail(todos_los_resultados)
## Business.Name
## 895 CANORA SCHOOL AGE CHILDCARE
## 896 LITTLE BLESSINGS CHRISTIAN PRESCHOOL
## 897 MENORAH ACADEMY DAYCARE
## 898 A FAIRYTALE BEGINNING
## 899 SUMMIT KIDS - NORTH HAVEN
## 900 WYE CHILD CARE CENTRE
## Office.Address Inspection.Date pagina
## 895 15120 104 Ave, Edmonton, AB T5P0R5 2018-05-15 pag9
## 896 5104 Ellerslie Rd Sw, Edmonton, AB T6X1A4 2018-05-15 pag9
## 897 10735 Mcqueen Rd (144 Street), Edmonton, AB T5N3L1 2018-05-15 pag9
## 898 515 10470 98 Ave, Fort Saskatchewan, AB T8L0V6 2018-05-15 pag9
## 899 4922 North Haven Drive Nw, Calgary, AB T2K2K2 2018-05-15 pag9
## 900 163a 22560 Wye Road, Sherwood Park, AB T8A4T6 2018-05-15 pag9
ggplot(todos_los_resultados, aes(x=pagina))+
geom_bar(fill='#CBEE86', color='black')+
theme_classic()+
labs(title = 'Filas por cada página',
y= 'cantidad')

ggplot(todos_los_resultados, aes(x=Inspection.Date))+
geom_bar(fill='#D7AE55', color='black')+
theme_classic()+
labs(title = 'Filas por cada año',
y= 'cantidad')+
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))
