library (httr)
library (rvest)
# 뉴스 정보를 가져오는 함수
get_news_for_date <- function (search_date) {
# 검색어 설정 및 인코딩
query <- " \" 특수교육 \" "
encoded_query <- URLencode (query, reserved = TRUE )
# 수정된 base_url
base_url <- sprintf ("https://search.naver.com/search.naver?where=news&query=%s&sm=tab_opt&sort=1&photo=0&field=0&pd=3&ds=%s&de=%s&docid=&related=0&mynews=0&office_type=0&office_section_code=0&news_office_checked=&nso=so:dd,p:from%s%s&is_sug_officeid=0" , encoded_query, search_date, search_date, gsub (" \\ ." , "" , search_date), gsub (" \\ ." , "" , search_date))
html_obj <- httr:: GET (base_url) %>% httr:: content (as = "text" ) %>% rvest:: read_html ()
news_titles <- rvest:: html_nodes (html_obj, "a.news_tit" ) %>% rvest:: html_text ()
news_urls <- rvest:: html_nodes (html_obj, "a.news_tit" ) %>% rvest:: html_attr ("href" )
press_names <- rvest:: html_nodes (html_obj, "a.info.press" ) %>% rvest:: html_text ()
if (length (news_titles) == 0 ) {
return (data.frame (date = search_date, title = NA , url = NA , press = NA , stringsAsFactors = FALSE ))
} else {
return (data.frame (date = search_date, title = news_titles, url = news_urls, press = press_names, stringsAsFactors = FALSE ))
}
}
# 원하는 날짜를 여기에 지정
search_date <- "2024.05.16" # 예시 날짜
all_news <- get_news_for_date (search_date)
file_name <- sprintf ("./data/특수교육_news_%s.csv" , gsub (" \\ ." , "" , search_date))
write.csv (all_news, file = file_name, row.names = FALSE )