This is the good thing about open database. Although Google Scholar (GS) is not a completely open database, but people at Google let us to do the following things using R
and Scholar
. The following post was my first trial to the scholar
package to harvest GS data.
Your GS ID will be written in the url of your profile, for instance:
In my trial, I used the following packages:
scholar
tidyverse
# Ommit the '#' sign if you haven't installed the packages
#install.packages("scholar")
library(scholar)
#install.packages("tidyverse")
library(tidyverse)
#install.packages("gridExtra")
library(gridExtra)
E <- "Myvc78MAAAAJ&hl" #hydrogeology
ES <- "6U_YtvMAAAAJ&hl" #math
Kh <- "4_asJ0MAAAAJ&hl" #physics
JT <- "P7FvGMEAAAAJ&hl" #paleontology
E.profile <- get_profile(E)
ES.profile <- get_profile(ES)
Kh.profile <- get_profile(Kh)
JT.profile <- get_profile(JT)
E.profile
ES.profile
Kh.profile
JT.profile
E.num <- get_num_articles(E)
ES.num <- get_num_articles(ES)
Kh.num <- get_num_articles(Kh)
JT.num <- get_num_articles(JT)
num <- c(E.num, ES.num, Kh.num, JT.num)
barplot(num,
names.arg=c("E", "ES", "Kh", "JT"))
IDs <- c(ES, Kh, JT)
compare_scholar_careers(IDs, career = TRUE)
ES.cite.year <- get_citation_history(ES) # success
Kh.cite.year <- get_citation_history(Kh) # success
JT.cite.year <- get_citation_history(JT) # success
# E.cite.year <- get_citation_history(E) # hmm, error, strange
ES.hist <- ggplot(ES.cite.year, aes(year,cites)) +
geom_bar(stat='identity',fill=colors()[128]) +
scale_x_continuous(
breaks = c(2009, 2010, 2011, 2012, 2013, 2014, 2015, 2016, 2017))
Kh.hist <- ggplot(Kh.cite.year,
aes(year,cites)) +
geom_bar(stat='identity',fill=colors()[120]) +
scale_x_continuous(
breaks = c(2009, 2010, 2011, 2012, 2013, 2014, 2015, 2016, 2017))
JT.hist <- ggplot(JT.cite.year,
aes(year,cites)) +
geom_bar(stat='identity',fill=colors()[118]) +
scale_x_continuous(
breaks = c(2009, 2010, 2011, 2012, 2013, 2014, 2015, 2016, 2017))
ES.hist
Kh.hist
JT.hist
## My work around
E.cite.year <- get_publications(E)
E.cite.year <- as.data.frame(lapply(E.cite.year,
function(x) if (is.factor(x)) as.character(x) else x))
w <-grep("Ultrafast", E.cite.year$title)
E.cite.year$title[w]
E.cite.year$pubid[w]
E.cite.year <- get_article_cite_history(E, E.cite.year$pubid[w])
E.hist <- ggplot(E.cite.year, aes(year, cites)) +
geom_bar(stat='identity',fill=colors()[110]) +
scale_x_continuous(
breaks = c(2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013, 2014, 2015, 2016, 2017))+
theme(axis.text.x = element_text(angle = 45, hjust = 1))
get_num_distinct_journals(E)
get_num_distinct_journals(ES)
get_num_distinct_journals(Kh)
get_num_distinct_journals(JT)
get_num_top_journals(E)
get_num_top_journals(ES)
get_num_top_journals(Kh)
get_num_top_journals(JT)
get_oldest_article(E)
get_oldest_article(ES)
get_oldest_article(Kh)
get_oldest_article(JT)
E.pubs <- get_publications(E)
write.csv(pubs, file = "citations.csv")
E.x <- predict_h_index(E)
ES.x <- predict_h_index(ES)
Kh.x <- predict_h_index(Kh)
JT.x <- predict_h_index(JT)