Orientador: Eduardo Faerstein (IMS/UERJ)

BIBLIOTECAS

library(knitr)
library(epiDisplay)
library(rworldmap)
library(igraph)
library(networkD3)
library(timevis)

```

BANCO DE DADOS

# dataset = read.table(file.choose(), header = TRUE, sep = ";")
setwd("C:/Users/Ronaldo Alves/Desktop/TESE_Ronaldo_2018/capitulo_2_artigo_revisao_IJE 2018/apendices_cap_2/script_analise")
dataset = read.table("database_review_all_data_12jan18.csv", header = TRUE, sep = ";")
map = read.table("freq_all_affiliation.csv", sep=";", h=T)

Analise de confiabilidade: amostra de 15% da bibliografia

{set.seed(2018)
data = dataset[sample(1:nrow(dataset), size = 0.15*nrow(dataset)), ]
list(sort(data$UT))}
## [[1]]
##  [1]   2  14  18  20  27  38  42  53  54  55  60  67  77  82  95 104 109
## [18] 125 129 141 142 147 149 162 176 189 193 194 196 211 218 221 222 224
## [35] 225 226 233 241 250 255 270 274 278 285 291 292 295 300 325 330 333
## [52] 339 350 358 361 375 376 389 394 399 413 416

2.4. Resultados

Volume e tendência de uso dos índices de desigualdade.

all_Y = data.frame(c(1985:2016))
x = data.frame(table(dataset$PY[dataset$MT==1]))
y = data.frame(table(dataset$PY[dataset$MT==2]))
z = data.frame(table(dataset$PY[dataset$MT==3]))
x1 = merge(all_Y, x, by.x = "c.1985.2016.", by.y = "Var1", all = T)
x2 = merge(x1, y, by.x = "c.1985.2016.", by.y = "Var1", all = T)
freq_indices = merge(x2, z, by.x = "c.1985.2016.", by.y = "Var1", all = T)
colnames(freq_indices) = c("ano", "freq_rii","freq_sii","freq_both")
freq_citation = data.frame(cbind(c(1985,1988,1991,1994,1995,1996,1997,1998,1999,2000,2001,2002,2003,2004,2005,2006,2007,2008,2009,2010,2011,2012,2013,2014,2015,2016),
                c(sum(dataset$mLCS[dataset$PY==1985]), sum(dataset$mLCS[dataset$PY==1988]), sum(dataset$mLCS[dataset$PY==1991]), sum(dataset$mLCS[dataset$PY==1994]), sum(dataset$mLCS[dataset$PY==1995]), sum(dataset$mLCS[dataset$PY==1996]), sum(dataset$mLCS[dataset$PY==1997]),sum(dataset$mLCS[dataset$PY==1998]), sum(dataset$mLCS[dataset$PY==1999]),sum(dataset$mLCS[dataset$PY==2000]),
                  sum(dataset$mLCS[dataset$PY==2001]), sum(dataset$mLCS[dataset$PY==2002]), sum(dataset$mLCS[dataset$PY==2003]), sum(dataset$mLCS[dataset$PY==2004]), sum(dataset$mLCS[dataset$PY==2005]), sum(dataset$mLCS[dataset$PY==2006]), sum(dataset$mLCS[dataset$PY==2007]),sum(dataset$mLCS[dataset$PY==2008]), sum(dataset$mLCS[dataset$PY==2009]),sum(dataset$mLCS[dataset$PY==2010]),
                  sum(dataset$mLCS[dataset$PY==2011]), sum(dataset$mLCS[dataset$PY==2012]), sum(dataset$mLCS[dataset$PY==2013]), sum(dataset$mLCS[dataset$PY==2014]), sum(dataset$mLCS[dataset$PY==2015]), sum(dataset$mLCS[dataset$PY==2016])), 
                c(sum(dataset$CR1[dataset$PY==1985]), sum(dataset$CR1[dataset$PY==1988]), sum(dataset$CR1[dataset$PY==1991]), sum(dataset$CR1[dataset$PY==1994]), sum(dataset$CR1[dataset$PY==1995]),sum(dataset$CR1[dataset$PY==1996]), sum(dataset$CR1[dataset$PY==1997]),sum(dataset$CR1[dataset$PY==1998]), sum(dataset$CR1[dataset$PY==1999]),sum(dataset$CR1[dataset$PY==2000]), 
                  sum(dataset$CR1[dataset$PY==2001]),sum(dataset$CR1[dataset$PY==2002]), sum(dataset$CR1[dataset$PY==2003]),sum(dataset$CR1[dataset$PY==2004]), sum(dataset$CR1[dataset$PY==2005]),sum(dataset$CR1[dataset$PY==2006]), sum(dataset$CR1[dataset$PY==2007]),sum(dataset$CR1[dataset$PY==2008]), sum(dataset$CR1[dataset$PY==2009]),sum(dataset$CR1[dataset$PY==2010]),
                  sum(dataset$CR1[dataset$PY==2011]),sum(dataset$CR1[dataset$PY==2012]), sum(dataset$CR1[dataset$PY==2013]),sum(dataset$CR1[dataset$PY==2014]), sum(dataset$CR1[dataset$PY==2015]),sum(dataset$CR1[dataset$PY==2016])), 
                c(sum(dataset$TC_wos[dataset$PY==1985], na.rm=T),sum(dataset$TC_wos[dataset$PY==1988], na.rm=T), sum(dataset$TC_wos[dataset$PY==1991], na.rm=T),sum(dataset$TC_wos[dataset$PY==1994], na.rm=T), sum(dataset$TC_wos[dataset$PY==1995], na.rm=T),sum(dataset$TC_wos[dataset$PY==1996], na.rm=T), sum(dataset$TC_wos[dataset$PY==1997], na.rm=T),sum(dataset$TC_wos[dataset$PY==1998], na.rm=T), sum(dataset$TC_wos[dataset$PY==1999], na.rm=T),sum(dataset$TC_wos[dataset$PY==2000], na.rm=T),
                  sum(dataset$TC_wos[dataset$PY==2001], na.rm=T),sum(dataset$TC_wos[dataset$PY==2002], na.rm=T), sum(dataset$TC_wos[dataset$PY==2003], na.rm=T),sum(dataset$TC_wos[dataset$PY==2004], na.rm=T), sum(dataset$TC_wos[dataset$PY==2005], na.rm=T),sum(dataset$TC_wos[dataset$PY==2006], na.rm=T), sum(dataset$TC_wos[dataset$PY==2007], na.rm=T),sum(dataset$TC_wos[dataset$PY==2008], na.rm=T), sum(dataset$TC_wos[dataset$PY==2009], na.rm=T),sum(dataset$TC_wos[dataset$PY==2010], na.rm=T),
                  sum(dataset$TC_wos[dataset$PY==2011], na.rm=T),sum(dataset$TC_wos[dataset$PY==2012], na.rm=T), sum(dataset$TC_wos[dataset$PY==2013], na.rm=T),sum(dataset$TC_wos[dataset$PY==2014], na.rm=T), sum(dataset$TC_wos[dataset$PY==2015], na.rm=T),sum(dataset$TC_wos[dataset$PY==2016], na.rm=T)), 
                c(sum(dataset$TC_scopus[dataset$PY==1985], na.rm=T),sum(dataset$TC_scopus[dataset$PY==1988], na.rm=T), sum(dataset$TC_scopus[dataset$PY==1991], na.rm=T),sum(dataset$TC_scopus[dataset$PY==1994], na.rm=T), sum(dataset$TC_scopus[dataset$PY==1995], na.rm=T),sum(dataset$TC_scopus[dataset$PY==1996], na.rm=T), sum(dataset$TC_scopus[dataset$PY==1997], na.rm=T),sum(dataset$TC_scopus[dataset$PY==1998], na.rm=T), sum(dataset$TC_scopus[dataset$PY==1999], na.rm=T),sum(dataset$TC_scopus[dataset$PY==2000], na.rm=T),
                  sum(dataset$TC_scopus[dataset$PY==2001], na.rm=T),sum(dataset$TC_scopus[dataset$PY==2002], na.rm=T), sum(dataset$TC_scopus[dataset$PY==2003], na.rm=T),sum(dataset$TC_scopus[dataset$PY==2004], na.rm=T), sum(dataset$TC_scopus[dataset$PY==2005], na.rm=T),sum(dataset$TC_scopus[dataset$PY==2006], na.rm=T), sum(dataset$TC_scopus[dataset$PY==2007], na.rm=T),sum(dataset$TC_scopus[dataset$PY==2008], na.rm=T), sum(dataset$TC_scopus[dataset$PY==2009], na.rm=T),sum(dataset$TC_scopus[dataset$PY==2010], na.rm=T),
                  sum(dataset$TC_scopus[dataset$PY==2011], na.rm=T),sum(dataset$TC_scopus[dataset$PY==2012], na.rm=T), sum(dataset$TC_scopus[dataset$PY==2013], na.rm=T),sum(dataset$TC_scopus[dataset$PY==2014], na.rm=T), sum(dataset$TC_scopus[dataset$PY==2015], na.rm=T),sum(dataset$TC_scopus[dataset$PY==2016], na.rm=T))))
freq_article = data.frame(table(dataset$PY))
freqs_year = merge(freq_citation, freq_article, by.x="X1", by.y="Var1", all = T)
colnames(freqs_year) = c("ano", "freq_mLCS","freq_LCR","freq_wos", "freq_scopus","freq_article")
freq_all_PY = merge(freq_indices, freqs_year, by.x = "ano", by.y = "ano", all = T)
freq_all_PY[is.na(freq_all_PY)] = 0
sm_article=smooth.spline(freq_all_PY$freq_article~freq_all_PY$ano, cv=F)
sm_rii=smooth.spline(freq_all_PY$freq_rii~freq_all_PY$ano, cv=F)
sm_sii=smooth.spline(freq_all_PY$freq_sii~freq_all_PY$ano, cv=F)
sm_both=smooth.spline(freq_all_PY$freq_both~freq_all_PY$ano, cv=F)

predict_values = data.frame(predict(sm_article), predict(sm_rii), predict(sm_sii), predict(sm_both))

freq_all_PY$pred_article=predict_values$y
freq_all_PY$pred_rii=predict_values$y.1
freq_all_PY$pred_sii=predict_values$y.2
freq_all_PY$pred_both=predict_values$y.3

Figura 2. Volume e tendência de uso dos índices de desigualdade, 1985-2016 (N=417).

# par(mar=c(3.6, 4.1, 1, 0), cex.axis=1.3, cex.lab=1.3, family = "serif")
graph_PY = barplot(freq_all_PY$freq_article, beside = T, ylab = "# of publications", col = "black", las = 2, names.arg = seq(1985,2016, by=1), density = 0, ylim=c(0,50))
text(graph_PY, freq_all_PY$freq_article, freq_all_PY$freq_article , pos = 3)
lines(graph_PY, freq_all_PY$pred_article, lwd = 2, type="l", col="black")
lines(graph_PY, freq_all_PY$pred_rii, lwd = 1, lty = 1, type="o", pch=16, col="darkblue")
lines(graph_PY, freq_all_PY$pred_sii, lwd = 1, lty = 1, type="o", pch=15, col="darkgreen")
lines(graph_PY, freq_all_PY$pred_both, lwd = 1, lty = 1, type="o", pch=17, col="darkred")
legend("topleft", inset = 0, cex=1.1, lty=c(1,1,1,1), pch=c(NA, 16, 15, 17), col=c("black", "darkblue", "darkgreen", "darkred"), legend=c("All", "RII", "SII", "Both"), bty="n", y.intersp = 0.5, x.intersp = 0.5)

Figura 2.1. Volume e tendência de uso dos índices de desigualdade, 1985-2016 (N=417).

graph_PY = barplot(freq_all_PY$freq_article, beside = T, ylab = "# of publications", col = "black", las = 2, names.arg = seq(1985,2016, by=1), density = 0, ylim=c(0,50))
lines(graph_PY, freq_all_PY$freq_article, type = "b", col="black", lwd = 2)
lines(graph_PY, freq_all_PY$freq_rii, type = "b", col="darkblue", lwd = 2)
lines(graph_PY, freq_all_PY$freq_sii, type = "b", col="darkgreen", lwd = 2)
lines(graph_PY, freq_all_PY$freq_both, type = "b", col="darkred", lwd = 2)
legend("topleft", inset = 0, cex=1.1, lty=c(1,1,1,1), pch=c(NA, 16, 15, 17), col=c("black", "darkblue", "darkgreen", "darkred"), legend=c("All", "RII", "SII", "Both"), bty="n", y.intersp = 0.5, x.intersp = 0.5)

PESQUISADORES

sum(dataset$AU3) # author appearances = 2401
## [1] 2401
round(mean(dataset$AU3), 1); round(sd(dataset$AU3), 1)
## [1] 5.8
## [1] 6.1
table(dataset$AU3)
## 
##  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 21 27 28 29 31 36 41 42 
## 13 53 81 79 59 42 24 13 14  8  2  3  5  2  3  2  2  1  1  1  1  1  1  1  1 
## 43 44 50 
##  1  2  1
AU = as.character(dataset$AU)
AU_1 = paste(AU, collapse = "; ")
AU_2 = strsplit(AU_1, "; ")
dim(table(AU_2))
## [1] 1267
freq_authors = data.frame(table(AU_2))

Tabela 1. Autores mais produtivos da revisão sistemática de escopo, 1985-2016 (N=1267).

knitr::kable(head(freq_authors[order(freq_authors$Freq, decreasing = T), ], 25), row.names=F, results="asis")
AU_2 Freq
KUNST AE 41
MACKENBACH JP 29
DAVEY-SMITH G 22
MARMOT M 21
MENVIELLE G 21
MARTIKAINEN P 18
BORRELL C 17
REGIDOR E 15
STRAND BH 13
BOPP M 12
LEINSALU M 12
SHIPLEY MJ 12
BRUNNER EJ 11
DEBOOSERE P 11
KHANG YH 11
BLAKELY T 10
COSTA G 10
LEYLAND AH 10
SINGH-MANOUX A 10
FERRIE JE 9
HARPER S 9
LAHELMA E 9
BARROS AJD 8
CHASTANG JF 8
ESNAOLA S 8

Afiliação dos artigos (todos os autores)

x = as.character(dataset$C1)
x1 = gsub("\\n", " ", x)
x2 = gsub("[:;,:]", "", x1)
x3 = paste(x2, collapse = " ")
x4 = strsplit(x3, "[.] ")
x4 = data.frame(x4)
colnames(x4) = "country"
x5 = data.frame(gsub(".*] ", "", x4$country)) # posso separar em 2 colunas!
colnames(x5) = "country"
x6 = data.frame(gsub("[.]$", "", x5$country))
colnames(x6) = "country"
x7 = data.frame(gsub("ENGLAND", "UK", x6$country))
colnames(x7) = "country"
x7 = data.frame(gsub("SCOTLAND", "UK", x7$country))
freq_all_affiliation = data.frame(table(x7))
colnames(freq_all_affiliation) = c("country", "Freq")

Afiliação de todos os autores.

knitr::kable(head(freq_all_affiliation[order(freq_all_affiliation$Freq, decreasing = T), ], 25), row.names=F, results="asis")
country Freq
UK 177
USA 75
NETHERLANDS 65
FRANCE 51
SWEDEN 47
SPAIN 46
NORWAY 43
FINLAND 39
ITALY 27
SOUTH KOREA 26
DENMARK 24
CANADA 23
AUSTRALIA 22
BRAZIL 21
SWITZERLAND 21
GERMANY 20
BELGIUM 19
NEW ZEALAND 16
LITHUANIA 12
JAPAN 10
CZECH REPUBLIC 9
ESTONIA 8
GREECE 8
COLOMBIA 7
AUSTRIA 6

Afiliação dos artigos (1o Autor)

dim(table(dataset$AU1))
## [1] 303
freq_1o_author = data.frame(table(dataset$AU1))
table(freq_1o_author$Freq)
## 
##   1   2   3   4   5   6   7   9 
## 240  39  13   4   2   3   1   1
freq_affiliation = data.frame(table(dataset$RP))
freq_affiliation$Percent = round((freq_affiliation$Freq / sum(freq_affiliation$Freq))*100, 1)

Afiliação dos autores principais.

knitr::kable(head(freq_affiliation[order(freq_affiliation$Freq, decreasing = T), ], 20), row.names=F, results="asis")
Var1 Freq Percent
ENGLAND 61 14.6
USA 37 8.9
NETHERLANDS 33 7.9
SCOTLAND 32 7.7
FRANCE 28 6.7
SOUTH KOREA 25 6.0
NORWAY 24 5.8
SPAIN 21 5.0
BRAZIL 19 4.6
SWEDEN 14 3.4
NEW ZEALAND 13 3.1
AUSTRALIA 12 2.9
CANADA 12 2.9
BELGIUM 11 2.6
FINLAND 11 2.6
SWITZERLAND 8 1.9
ITALY 7 1.7
DENMARK 6 1.4
JAPAN 6 1.4
AUSTRIA 5 1.2

PERIÓDICOS CIENTÍFICOS

freq_journals = data.frame(table(dataset$SO))
freq_journals$Percent = round((freq_journals$Freq / sum(freq_journals$Freq))*100, 1)

Tabela 2. Periódicos mais produtivos da revisão sistemática de escopo, 1985-2016 (N=136).

knitr::kable(head(freq_journals[order(freq_journals$Freq, decreasing = T), ], 20), row.names=F, results="asis")
Var1 Freq Percent
JOURNAL OF EPIDEMIOLOGY AND COMMUNITY HEALTH 71 17.0
BMC PUBLIC HEALTH 19 4.6
EUROPEAN JOURNAL OF PUBLIC HEALTH 17 4.1
PLOS ONE 17 4.1
SOCIAL SCIENCE AND MEDICINE 17 4.1
INTERNATIONAL JOURNAL FOR EQUITY IN HEALTH 15 3.6
INTERNATIONAL JOURNAL OF EPIDEMIOLOGY 14 3.4
BMJ 12 2.9
AMERICAN JOURNAL OF PUBLIC HEALTH 11 2.6
BMJ OPEN 8 1.9
AMERICAN JOURNAL OF EPIDEMIOLOGY 7 1.7
EUROPEAN JOURNAL OF EPIDEMIOLOGY 7 1.7
PREVENTIVE MEDICINE 7 1.7
COMMUNITY DENTISTRY AND ORAL EPIDEMIOLOGY 6 1.4
INTERNATIONAL JOURNAL OF PUBLIC HEALTH 6 1.4
SCANDINAVIAN JOURNAL OF PUBLIC HEALTH 6 1.4
AUSTRALIAN AND NEW ZEALAND JOURNAL OF PUBLIC HEALTH 5 1.2
PUBLIC HEALTH 5 1.2
REVISTA PANAMERICANA DE SALUD PUBLICA 5 1.2
JOURNAL OF DENTAL RESEARCH 4 1.0

DISTRIBUIÇÃO GEOGRÁFICA

par(mar=c(1.5, 1, 1, 1), cex.axis=1.3, cex.lab=1.3, family = "serif")
map2 = joinCountryData2Map(map, joinCode = "ISO3", nameJoinColumn = "ISO3V10", verbose = T)
## 60 codes from your data successfully matched countries in the map
## 0 codes from your data failed to match with a country code in the map
##      failedCodes failedCountries
## 183 codes from the map weren't represented in your data
classInt = classInt::classIntervals(map2[["Freq"]], n=5, style="jenks")
## Warning in classInt::classIntervals(map2[["Freq"]], n = 5, style =
## "jenks"): var has missing values, omitted in finding classes
catMethod = classInt[["brks"]] 
colourPalette = RColorBrewer::brewer.pal(5,"RdPu") 

Figura 3. Distribuição geográfica da produção acadêmica relacionada ao uso dos índices de desigualdade, segundo a afiliação dos autores principais (N=39).

mapParams = mapCountryData(map2, nameColumnToPlot="Freq", addLegend=F, catMethod = catMethod, colourPalette = colourPalette, mapTitle = "")
do.call(addMapLegend, c(mapParams, legendLabels="all", legendIntervals="data", legendWidth = 0.5))

ARTIGOS CITADOS METODOLOGICAMENTE

CR0 = as.character(dataset$CR)
CR1 = paste(CR0, collapse = "; ")
CR2 = strsplit(CR1, "; ")
sum(table(CR2)) # 43 NA
## [1] 845
freq_CR = data.frame(table(CR2))

Tabela 3. Artigos mais citados metodologicamente da revisão sistemática de escopo (N=97).

knitr::kable(head(freq_CR[order(freq_CR$Freq, decreasing = T), ], 25), row.names=FALSE, results="asis")
CR2 Freq
MACKENBACH J, 1997, SOCIAL SCIENCE AND MEDICINE 217
WAGSTAFF A, 1991, SOCIAL SCIENCE AND MEDICINE 85
PAMUK E, 1985, POPULATION STUDIES 57
43
REGIDOR E, 2004, JOURNAL OF EPIDEMIOLOGY AND COMMUNITY HEALTH 28
HAYES L, 2002, JOURNAL OF EPIDEMIOLOGY AND COMMUNITY HEALTH 26
MACKENBACH J, 2008, NEW ENGLAND JOURNAL OF MEDICINE 25
SERGEANT J, 2006, BIOSTATISTIC 23
KUNST A, 1994, AMERICAN JOURNAL OF PUBLIC HEALTH 21
DAVEY-SMITH G, 1998, JOURNAL OF EPIDEMIOLOGY AND COMMUNITY HEALTH 19
KHANG Y, 2008, BMC PUBLIC HEALTH 17
KAKWANI N, 1997, JOURNAL OF ECONOMETRICS 16
ERNSTSEN L, 2012, BMC PUBLIC HEALTH 13
KUNST A, 1994, INTERNATIONAL JOURNAL OF EPIDEMIOLOGY 13
PAMUK E, 1988, EUROPEAN JOURNAL OF POPULATION 13
LOW A, 2004, JOURNAL OF PUBLIC HEALTH 11
KEPPEL K, 2005, VITAL AND HEALTH STATISTICS 10
BARROS A, 2013, PLOS ONE 9
DAVEY-SMITH G, 2002, JOURNAL OF EPIDEMIOLOGY AND COMMUNITY HEALTH 9
KUNST A, 1995, JOURNAL OF EPIDEMIOLOGY AND COMMUNITY HEALTH 8
CHENG N, 2008, AMERICAN JOURNAL OF EPIDEMIOLOGY 7
HARPER S, 2010, MILBANK QUARTERLY 7
MACKENBACH J, 1997, LANCET 7
EZENDAM N, 2008, EUROPEAN JOURNAL OF CANCER 6
KHANG Y, 2004, JOURNAL OF EPIDEMIOLOGY AND COMMUNITY HEALTH 6

TIMELINE

data = data.frame(id = 1:17, content = c("Pamuk 1985", "Pamuk 1988", "Wagstaff 1991", "Kunst IJE 1994", "Kunst AJPH 1994", "Mackenbach 1997", "Kakwani 1997", "Davey-Smith 1998", "Hayes 2002", "Regidor 2004", "Low 2004", "Keppel 2005", "Sergeant 2006", "Mackenbach 2008", "Khang 2008", "Ernstsen 2012", "Moreno 2015"),
                  start = c("1985", "1988", "1991", "1994", "1994", "1997", "1997", "1998", "2002", "2004", "2004", "2005", "2006", "2008", "2008", "2012", "2015"))
timevis(data)

ROYs network

src = c("Pamuk", "Wagstaff", "Kakwani", "Pamuk", "Kunst", "Davey-Smith", "Khang", "Pamuk", "Kunst", "Hayes", "Regidor", "Kunst", "Kunst", "Pamuk", "Kunst")
target = c("Wagstaff", "Kakwani", "Low", "Kunst", "Davey-Smith", "Khang", "Ernstsen", "Hayes", "Hayes", "Regidor", "Keppel", "Sergeant", "Mackenbach", "Moreno", "Moreno")
networkData = data.frame(src, target)
simpleNetwork(networkData, fontSize = 20, linkDistance = 100, zoom = TRUE)
plot.igraph(graph.data.frame(networkData, directed = TRUE), edge.arrow.size = 0.2, edge.curved = NULL, vertex.color="orange", size = 18, width = 5, arrow.width = 5, vertex.label.dist=0)

Outras informações: Language, Document Type, Uses of the RII/SII, Citations

table(dataset$LA) # idiomas: 1/english 2/spanish 3/portuguese 4/french 5/italian
## 
##   1   2   3   4   5 
## 406   7   2   1   1
table(dataset$DT) # document type: 1/review article 2/research article
## 
##   1   2 
##  29 388
table(dataset$MT) # inequality index: 1/RII 2/SII 3/RII and SII
## 
##   1   2   3 
## 211  53 153
table(dataset$IC) # colaboração internacional
## 
##   N   Y 
## 229 188
table(dataset$DT, dataset$MT)
##    
##       1   2   3
##   1   9   4  16
##   2 202  49 137
sum(dataset$mLCS) # mLCS - Local Citation Score (within methods section) = 802
## [1] 802
sum(dataset$CR1) # mLCR - Local Cited References (within methods section) = 802
## [1] 802
table(dataset$mLCS)
## 
##   0   1   2   3   4   5   6   7   8   9  10  11  13  16  17  19  21  23 
## 320  37  17   8   4   4   5   3   1   2   1   1   3   1   1   1   1   1 
##  25  26  28  57  85 217 
##   1   1   1   1   1   1
sum(dataset$tc_wos, na.rm = T) # number of citations in WoS
## [1] 15240
sum(dataset$tc_scopus, na.rm = T) # number of citations in Scopus
## [1] 16950
summary(dataset$tc_wos)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##       0       3      13      38      34    1057      16
summary(dataset$tc_scopus)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##    0.00    4.00   13.00   40.94   35.00 1088.00       3
rm(list=ls())