This is the R Markdown Notebook created for the JCI analysis. Notebook and data are available at GitHub.
source('functions.R')
library(reldist)
library(gglorenz)
library(ggplot2)
library(GGally)
library(dplyr)
library(ggnewscale)
set.seed(13)
First line only can be executed if you have downloaded data from JCR in multiple files, otherwise, use the second line which includes the anonymized JCR data.
#df <- read_jcr('data/files/')
df <- read.delim('data/jcr.tsv', stringsAsFactors=FALSE, check.names=FALSE)
Some descriptive statistics of the dataset.
summary(df$`2020 JCI`)
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 0.0000 0.3400 0.6400 0.8058 1.0300 77.6400 66
Gini Index is calculated for each JCR indicator.
gini(df$`2020 JIF`[which(!is.na(df$`2020 JIF`))])
## [1] 0.4444105
gini(df$`2020 JCI`[which(!is.na(df$`2020 JCI`))])
## [1] 0.4365864
gini(df$`5 Year JIF` [which(!is.na(df$`5 Year JIF`))])
## [1] 0.4403715
gini(df$`Immediacy Index`[which(!is.na(df$`Immediacy Index`))])
## [1] 0.6220866
gini(df$`Eigenfactor`[which(!is.na(df$`Eigenfactor`))])
## [1] 0.8019052
gini(df$`Total Citations`[which(!is.na(df$`Total Citations`))])
## [1] 0.7918109
gini(df$`Article Influence Score`[which(!is.na(df$`Article Influence Score`))])
## [1] 0.5499661
A new data.frame
is created to obtain the Lorenz curve in an easy way.
df_den <- data.frame()
for(x in c('2020 JCI','2020 JIF', '5 Year JIF', 'Immediacy Index', 'Eigenfactor', 'Total Citations', 'Article Influence Score')){
aux <-df[,c('Cat','DB',x)]
names(aux)[3] <- 'Value'
aux$Indicator <- x
df_den <- rbind.data.frame(df_den, aux, stringsAsFactors = FALSE)
}
df_den[which(df_den$Indicator=='2020 JCI'),'Indicator'] <- 'Journal Citation Indicator'
df_den[which(df_den$Indicator=='2020 JIF'),'Indicator'] <- 'Journal Impact Factor'
df_den[which(df_den$Indicator=='5 Year JIF'),'Indicator'] <- '5-Year Journal Impact Factor'
The Lorenz curve is obtained.
ggplot(df_den, aes(x=Value, color=Indicator)) +
stat_lorenz(desc=FALSE, alpha=0.7, size=1) +
geom_abline(size=0.8) +
scale_color_manual(values=c('#fb0007','#129176', '#e5c421', '#f46f08', '#4cacd0', '#291b37', '#de8bb7'))+
theme_light() +
theme(panel.grid=element_blank(),
legend.title=element_blank(),
text=element_text(family='Arial', size=12.5, color='black'),
axis.text=element_text(color='black', size=12),
axis.ticks=element_line(color='black'),
legend.position='bottom',
panel.border=element_rect(colour='black'),
strip.background=element_rect(colour='black', fill='black'),
strip.text=element_text(size=14),
legend.text=element_text(size=10))+
guides(fill=guide_colorbar(label.position='bottom',
title.position='left', title.vjust=0.85)) +
labs(y='Cummulative percentage of indicator', x='Cummulative percentage of journals')
A new data.frame
is created for obtaining a matrix of plots.
m_plot <- df[,c('2020 JCI', '2020 JIF', '5 Year JIF', 'Immediacy Index', 'Eigenfactor', 'Total Citations', 'Article Influence Score', 'DB')]
names(m_plot)[c(1,2,7)] <- c('JCI', 'JIF', 'AIS')
The matrix of plots with the correlations and distribution of data is generated.
ggpairs(m_plot, columns=1:7,
upper=list(continuous=wrap('cor', size=5)),
lower=list(continuous=wrap('points', alpha=0.6)))+
theme(text=element_text(family='Arial', size=12.5, color='black'),
axis.text=element_text(color='black', size=11),
axis.ticks=element_line(color='black'),
strip.background=element_rect(colour='black', fill='black'),
strip.text=element_text(size=12, color='white'),
legend.position='none'
)
Pearson correlations between the seven indicators are calculated.
df_cor <- data.frame()
for(x in c('2020 JIF', '5 Year JIF', 'Immediacy Index', 'Eigenfactor', 'Total Citations', 'Article Influence Score')){
aux <-biblio_cor(df, x, '2020 JCI')
aux$Indicator <- x
df_cor <- rbind.data.frame(df_cor, aux, stringsAsFactors = FALSE)
}
df_cor <- df_cor[which(!is.na(df_cor$Correlation)),]
ESI field are assigned to each Web of Science Category.
esi <- read.csv2('data/final_map.csv', stringsAsFactors=FALSE)
esi[which(esi$WC=="Women's Studies"), 'WC'] <- 'Womens Studies'
esi$WC <- toupper(esi$WC)
df_cor <- inner_join(df_cor, esi[,c('WC', 'ESI')], by=c('Cat'='WC'))
Firstly, correlations between JCI and JIF and 5-Year JIF are calculated.
jci_jif <- df_cor[which((df_cor$Indicator %in% c('2020 JIF', '5 Year JIF')) & (df_cor$DB %in% c('SSCI', 'SCIE'))),]
jci_jif[which(jci_jif$DB=='SSCI'),'DB'] <- 'Social Sciences'
jci_jif[which(jci_jif$DB=='SCIE'),'DB'] <- 'Science'
jci_jif[which(jci_jif$Indicator=='2020 JIF'),'Indicator'] <- 'Journal Impact Factor'
jci_jif[which(jci_jif$Indicator=='5 Year JIF'),'Indicator'] <- '5-Year Journal Impact Factor'
jci_jif$Indicator <- factor(jci_jif$Indicator, levels=c('Journal Impact Factor', '5-Year Journal Impact Factor'), ordered=TRUE)
Boxplot with correlations greater than 0.5 is generated.
ggplot(data=jci_jif, aes(y=Correlation, x=DB, group=DB))+
geom_boxplot(outlier.shape=NA, width=0.8) +
geom_jitter(data=jci_jif[which(jci_jif$Indicator=='Journal Impact Factor'),], aes(fill=Correlation, size=Docs), shape=21, color='black', stroke=0.5, alpha=0.6)+
scale_fill_gradient(low='white', high='#eb2701', na.value=NA, breaks=c(0.5, 0.75, 1), limits=c(0.5,1))+
new_scale_fill()+
geom_jitter(data=jci_jif[which(jci_jif$Indicator=='5-Year Journal Impact Factor'),], aes(fill=Correlation, size=Docs), shape=21, color='black', stroke=0.5, alpha=0.6)+
scale_fill_gradient(low='white', high='blue', na.value=NA, breaks=c(0.5, 0.75, 1), limits=c(0.5,1))+
ylim(c(0.5,1))+
scale_size_continuous(range=c(1,5), breaks=c(0, 25000,50000, 100000, 125000, 150000, 200000))+
theme_light()+
theme(panel.grid=element_blank(),
text=element_text(family='Arial', size=12.5, color='black'),
axis.text=element_text(color='black', size=11),
axis.ticks=element_line(color='black'),
legend.position='none',
legend.box='vertical',
panel.border=element_rect(colour='black'),
strip.background=element_rect(colour='black', fill='black'),
strip.text = element_text(size=14))+
guides(fill=guide_colorbar(label.position='bottom',
title.position='left', title.vjust=0.85)) +
labs(x='')+
facet_wrap(~Indicator)
The complete boxplot does not vary much, only 7 more categories are included.
ggplot(data=jci_jif, aes(y=Correlation, x=DB, group=DB))+
geom_boxplot(outlier.shape=NA, width=0.8) +
geom_jitter(data=jci_jif[which(jci_jif$Indicator=='Journal Impact Factor'),], aes(fill=Correlation, size=Docs), shape=21, color='black', stroke=0.5, alpha=0.6)+
scale_fill_gradient(low='white', high='#eb2701', na.value=NA, breaks=c(0.5, 0.75, 1), limits=c(0.5,1))+
new_scale_fill()+
geom_jitter(data=jci_jif[which(jci_jif$Indicator=='5-Year Journal Impact Factor'),], aes(fill=Correlation, size=Docs), shape=21, color='black', stroke=0.5, alpha=0.6)+
scale_fill_gradient(low='white', high='blue', na.value=NA, breaks=c(0, 0.5, 1), limits=c(0,1))+
scale_size_continuous(range=c(1,5), breaks=c(0, 25000,50000, 100000, 125000, 150000, 200000))+
theme_light()+
theme(panel.grid=element_blank(),
text=element_text(family='Arial', size=12.5, color='black'),
axis.text=element_text(color='black', size=11),
axis.ticks=element_line(color='black'),
legend.position='none',
legend.box='vertical',
panel.border=element_rect(colour='black'),
strip.background=element_rect(colour='black', fill='black'),
strip.text = element_text(size=14))+
guides(fill=guide_colorbar(label.position='bottom',
title.position='left', title.vjust=0.85)) +
labs(x='')+
facet_wrap(~Indicator)
The correlations between the JCI and the rest of the indicators are calculated, but taking into account only those categories with more than 25 journals. This is because a few small categories greatly alter the results.
ot_indicators <- df_cor[which((!(df_cor$Indicator %in% c('2020 JIF', '5 Year JIF'))) & df_cor$Journals>25),]
ot_indicators[which(ot_indicators$DB=='SSCI'),'DB'] <- 'Social\nSciences'
ot_indicators[which(ot_indicators$DB=='SCIE'),'DB'] <- 'Science'
ot_indicators[which(ot_indicators$DB=='AHCI'),'DB'] <- 'Arts &\nHumanities'
ot_indicators[which(ot_indicators$DB=='ESCI'),'DB'] <- 'Emerging\nSources'
Boxplots are generated.
ggplot(data=ot_indicators, aes(y=Correlation, x=DB, group=DB))+
geom_boxplot(outlier.shape=NA, width=0.8) +
geom_jitter(aes(fill=Correlation, size=Docs), shape=21, color='black', stroke=0.5, alpha=0.6)+
scale_fill_gradient(low='white', high='#eb2701', na.value=NA, breaks=c(0, 0.5, 1), limits=c(0,1))+
scale_size_continuous(range=c(0.1,3.5), breaks=c(0, 25000,50000, 100000, 125000, 150000, 200000))+
theme_light()+
theme(panel.grid=element_blank(),
text=element_text(family='Arial', size=12.5, color='black'),
axis.text=element_text(color='black', size=10.5),
axis.ticks=element_line(color='black'),
legend.position='bottom',
panel.border=element_rect(colour='black'),
strip.background=element_rect(colour='black', fill='black'),
strip.text=element_text(size=14))+
guides(fill = guide_colorbar(label.position='bottom',
title.position='left', title.vjust=0.85)) +
labs(x='')+
facet_wrap(~Indicator)