This is the good thing about open database. Although Google Scholar (GS) is not a completely open database, but people at Google let us to do the following things using R and Scholar. The following post was my first trial to the scholar package to harvest GS data.
Your GS ID will be written in the url of your profile, for instance:
In my trial, I used the following packages:
scholartidyverse# Ommit the '#' sign if you haven't installed the packages
#install.packages("scholar")
library(scholar)
#install.packages("tidyverse")
library(tidyverse)
#install.packages("gridExtra")
library(gridExtra)
E <- "Myvc78MAAAAJ&hl" #hydrogeology
ES <- "6U_YtvMAAAAJ&hl" #math
Kh <- "4_asJ0MAAAAJ&hl" #physics
JT <- "P7FvGMEAAAAJ&hl" #paleontology
E.profile <- get_profile(E)
ES.profile <- get_profile(ES)
Kh.profile <- get_profile(Kh)
JT.profile <- get_profile(JT)
E.profile
ES.profile
Kh.profile
JT.profile
E.num <- get_num_articles(E)
ES.num <- get_num_articles(ES)
Kh.num <- get_num_articles(Kh)
JT.num <- get_num_articles(JT)
num <- c(E.num, ES.num, Kh.num, JT.num)
barplot(num,
names.arg=c("E", "ES", "Kh", "JT"))
IDs <- c(ES, Kh, JT)
compare_scholar_careers(IDs, career = TRUE)
ES.cite.year <- get_citation_history(ES) # success
Kh.cite.year <- get_citation_history(Kh) # success
JT.cite.year <- get_citation_history(JT) # success
# E.cite.year <- get_citation_history(E) # hmm, error, strange
ES.hist <- ggplot(ES.cite.year, aes(year,cites)) +
geom_bar(stat='identity',fill=colors()[128]) +
scale_x_continuous(
breaks = c(2009, 2010, 2011, 2012, 2013, 2014, 2015, 2016, 2017))
Kh.hist <- ggplot(Kh.cite.year,
aes(year,cites)) +
geom_bar(stat='identity',fill=colors()[120]) +
scale_x_continuous(
breaks = c(2009, 2010, 2011, 2012, 2013, 2014, 2015, 2016, 2017))
JT.hist <- ggplot(JT.cite.year,
aes(year,cites)) +
geom_bar(stat='identity',fill=colors()[118]) +
scale_x_continuous(
breaks = c(2009, 2010, 2011, 2012, 2013, 2014, 2015, 2016, 2017))
ES.hist
Kh.hist
JT.hist
## My work around
E.cite.year <- get_publications(E)
E.cite.year <- as.data.frame(lapply(E.cite.year,
function(x) if (is.factor(x)) as.character(x) else x))
w <-grep("Ultrafast", E.cite.year$title)
E.cite.year$title[w]
E.cite.year$pubid[w]
E.cite.year <- get_article_cite_history(E, E.cite.year$pubid[w])
E.hist <- ggplot(E.cite.year, aes(year, cites)) +
geom_bar(stat='identity',fill=colors()[110]) +
scale_x_continuous(
breaks = c(2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013, 2014, 2015, 2016, 2017))+
theme(axis.text.x = element_text(angle = 45, hjust = 1))
get_num_distinct_journals(E)
get_num_distinct_journals(ES)
get_num_distinct_journals(Kh)
get_num_distinct_journals(JT)
get_num_top_journals(E)
get_num_top_journals(ES)
get_num_top_journals(Kh)
get_num_top_journals(JT)
get_oldest_article(E)
get_oldest_article(ES)
get_oldest_article(Kh)
get_oldest_article(JT)
E.pubs <- get_publications(E)
write.csv(pubs, file = "citations.csv")
E.x <- predict_h_index(E)
ES.x <- predict_h_index(ES)
Kh.x <- predict_h_index(Kh)
JT.x <- predict_h_index(JT)