library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(rvest)
link = "https://www.nytco.com/careers/newsroom/newsroom-fellowship/2022-23-fellows/"
page = read_html(link)
name = page %>% html_nodes(".m-card--small .m-card__title") %>% html_text()
bio_link = page %>% html_nodes(".m-card__inner") %>%
html_attr("href") %>% head(- 3)
title = page %>% html_nodes(".m-card__subtitle") %>% html_text()
get_info = function(bio_link) {
bio_page = read_html(bio_link)
infor = bio_page %>% html_nodes(".m-module-text__content") %>% html_text()
return(infor)
}
infor1 = sapply(bio_link, FUN = get_info)
fellows = data.frame(name, title, infor1, stringsAsFactors = FALSE)