こんな感じで力ずくでhit数を取り出していたのが、前回。
今回は、もう少しスマートにやります。(あと、reveal.js 使ってみました。)
library(rvest) # これが本丸
library(tidyverse)
library(patchwork)# #73の応用セッションで教えてもらった二つ作りました。
make_token <- function(URL,query_w,appID,start_y,end_y){
token <- paste0(URL,"?","q=",query_w,"&appID=",appID,
"&format=rss","&count=200",
"&year_from=",start_y,"&year_to=",end_y)
return(token)
}plot_hit2 <- function(query_w,hit_year){
tbl_df(hit_year) %>% rename(Year=year,Count=hits) %>%
mutate(Cumsum=cumsum(Count)) -> .gd0
p <- ggplot(.gd0) + geom_point(aes(x= Year,y= Count),colour="blue") +
geom_segment(aes(x=Year,y=Count,xend=Year,yend=0),colour="grey")
p1 <- p + ggtitle(paste0("CiNiiで検索(hit数):",query_w)) +
theme_grey(base_family = "sans")
p <- ggplot(.gd0) + geom_line(aes(x=Year,y=Cumsum)) +
geom_point(aes(x=Year,y=Cumsum))
p2 <- p + ggtitle(paste0("CiNiiで検索(累積):",query_w)) +
theme_grey(base_family = "sans")
print(p2 + p1 + plot_layout(ncol = 1, heights = c(3, 1)))
}appID = "XXXXXXXXXXXXXXXXXXX"
URL = "http://ci.nii.ac.jp/opensearch/search"query_w = "対応分析" #URLencode は不要
start <- 1980
end <- 2018year_nn <- data.frame()
for(i in start:end){
res.xml <- read_xml(make_token(URL,query_w,appID,i,i))
xml_children(res.xml)[[1]] %>% as_list() -> res.header
res.header$totalResults[[1]] %>% as.numeric() -> nn
year_nn <- rbind(year_nn,c(i,nn))
}
colnames(year_nn) <- c("year","hits")
year_nn %>% mutate(hitcum=cumsum(hits)) %>% tail()## year hits hitcum
## 34 2013 26 261
## 35 2014 28 289
## 36 2015 27 316
## 37 2016 19 335
## 38 2017 19 354
## 39 2018 16 370
xml_children(res.xml)## {xml_nodeset (20)}
## [1] <channel rdf:about="https://ci.nii.ac.jp/opensearch/search?q=%e5%af ...
## [2] <item rdf:about="https://ci.nii.ac.jp/naid/40021429732">\n <title> ...
## [3] <item rdf:about="https://ci.nii.ac.jp/naid/40021185987">\n <title> ...
## [4] <item rdf:about="https://ci.nii.ac.jp/naid/120006243116">\n <title ...
## [5] <item rdf:about="https://ci.nii.ac.jp/naid/120006000156">\n <title ...
## [6] <item rdf:about="https://ci.nii.ac.jp/naid/40021559870">\n <title> ...
## [7] <item rdf:about="https://ci.nii.ac.jp/naid/40021330481">\n <title> ...
## [8] <item rdf:about="https://ci.nii.ac.jp/naid/40021195144">\n <title> ...
## [9] <item rdf:about="https://ci.nii.ac.jp/naid/130007474484">\n <title ...
## [10] <item rdf:about="https://ci.nii.ac.jp/naid/130007422541">\n <title ...
## [11] <item rdf:about="https://ci.nii.ac.jp/naid/130006776985">\n <title ...
## [12] <item rdf:about="https://ci.nii.ac.jp/naid/130006302454">\n <title ...
## [13] <item rdf:about="https://ci.nii.ac.jp/naid/130006192952">\n <title ...
## [14] <item rdf:about="https://ci.nii.ac.jp/naid/130006179573">\n <title ...
## [15] <item rdf:about="https://ci.nii.ac.jp/naid/130006086145">\n <title ...
## [16] <item rdf:about="https://ci.nii.ac.jp/naid/130006077627">\n <title ...
## [17] <item rdf:about="https://ci.nii.ac.jp/naid/130005864949">\n <title ...
## [18] <item rdf:about="https://ci.nii.ac.jp/naid/130005466783">\n <title ...
## [19] <item rdf:about="https://ci.nii.ac.jp/naid/120006502028">\n <title ...
## [20] <item rdf:about="https://ci.nii.ac.jp/naid/120006342410">\n <title ...
xml_children(res.xml)[[1]]## {xml_node}
## <channel about="https://ci.nii.ac.jp/opensearch/search?q=%e5%af%be%e5%bf%9c%e5%88%86%e6%9e%90&appID=j38sofo6PQBkh3Zl2Gt5&format=rss&count=200&year_from=2017&year_to=2017&REMOTE_ADDR2=126.235.212.127">
## [1] <title>CiNii OpenSearch - 対応分析 2017 2017</title>
## [2] <description>CiNii OpenSearch - 対応分析 2017 2017</description>
## [3] <link>https://ci.nii.ac.jp/opensearch/search?q=%e5%af%be%e5%bf%9c%e5 ...
## [4] <dc:date>2018-11-09T22:05:43+09:00</dc:date>
## [5] <opensearch:totalResults>19</opensearch:totalResults>
## [6] <opensearch:startIndex>1</opensearch:startIndex>
## [7] <opensearch:itemsPerPage>19</opensearch:itemsPerPage>
## [8] <items>\n <rdf:Seq>\n <rdf:li rdf:resource="https://ci.nii.ac.jp ...
xml_children(res.xml)[[1]] %>% as_list()## $title
## $title[[1]]
## [1] "CiNii OpenSearch - 対応分析 2017 2017"
##
##
## $description
## $description[[1]]
## [1] "CiNii OpenSearch - 対応分析 2017 2017"
##
##
## $link
## $link[[1]]
## [1] "https://ci.nii.ac.jp/opensearch/search?q=%e5%af%be%e5%bf%9c%e5%88%86%e6%9e%90&appID=j38sofo6PQBkh3Zl2Gt5&format=rss&count=200&year_from=2017&year_to=2017&REMOTE_ADDR2=126.235.212.127"
##
##
## $date
## $date[[1]]
## [1] "2018-11-09T22:05:43+09:00"
##
##
## $totalResults
## $totalResults[[1]]
## [1] "19"
##
##
## $startIndex
## $startIndex[[1]]
## [1] "1"
##
##
## $itemsPerPage
## $itemsPerPage[[1]]
## [1] "19"
##
##
## $items
## $items$Seq
## $items$Seq$li
## list()
## attr(,"resource")
## [1] "https://ci.nii.ac.jp/naid/40021429732"
##
## $items$Seq$li
## list()
## attr(,"resource")
## [1] "https://ci.nii.ac.jp/naid/40021185987"
##
## $items$Seq$li
## list()
## attr(,"resource")
## [1] "https://ci.nii.ac.jp/naid/120006243116"
##
## $items$Seq$li
## list()
## attr(,"resource")
## [1] "https://ci.nii.ac.jp/naid/120006000156"
##
## $items$Seq$li
## list()
## attr(,"resource")
## [1] "https://ci.nii.ac.jp/naid/40021559870"
##
## $items$Seq$li
## list()
## attr(,"resource")
## [1] "https://ci.nii.ac.jp/naid/40021330481"
##
## $items$Seq$li
## list()
## attr(,"resource")
## [1] "https://ci.nii.ac.jp/naid/40021195144"
##
## $items$Seq$li
## list()
## attr(,"resource")
## [1] "https://ci.nii.ac.jp/naid/130007474484"
##
## $items$Seq$li
## list()
## attr(,"resource")
## [1] "https://ci.nii.ac.jp/naid/130007422541"
##
## $items$Seq$li
## list()
## attr(,"resource")
## [1] "https://ci.nii.ac.jp/naid/130006776985"
##
## $items$Seq$li
## list()
## attr(,"resource")
## [1] "https://ci.nii.ac.jp/naid/130006302454"
##
## $items$Seq$li
## list()
## attr(,"resource")
## [1] "https://ci.nii.ac.jp/naid/130006192952"
##
## $items$Seq$li
## list()
## attr(,"resource")
## [1] "https://ci.nii.ac.jp/naid/130006179573"
##
## $items$Seq$li
## list()
## attr(,"resource")
## [1] "https://ci.nii.ac.jp/naid/130006086145"
##
## $items$Seq$li
## list()
## attr(,"resource")
## [1] "https://ci.nii.ac.jp/naid/130006077627"
##
## $items$Seq$li
## list()
## attr(,"resource")
## [1] "https://ci.nii.ac.jp/naid/130005864949"
##
## $items$Seq$li
## list()
## attr(,"resource")
## [1] "https://ci.nii.ac.jp/naid/130005466783"
##
## $items$Seq$li
## list()
## attr(,"resource")
## [1] "https://ci.nii.ac.jp/naid/120006502028"
##
## $items$Seq$li
## list()
## attr(,"resource")
## [1] "https://ci.nii.ac.jp/naid/120006342410"
##
##
##
## attr(,"about")
## [1] "https://ci.nii.ac.jp/opensearch/search?q=%e5%af%be%e5%bf%9c%e5%88%86%e6%9e%90&appID=j38sofo6PQBkh3Zl2Gt5&format=rss&count=200&year_from=2017&year_to=2017&REMOTE_ADDR2=126.235.212.127"
totalResult に結果があることがわかるのでとりだせばよい。
plot_hit2("対応分析",year_nn)