Library loading
pacman::p_load(
rio,
here,
tidyverse,
ggplot2,
data.table,
dplyr,
bibliometrix,
readxl,
tidyr
)
Data loading and converting
# combine multiple, single papers into one bib file
d <- list.files("path", pattern="\\.bib$", full.names=T)
read_files<-lapply(d,readLines)
Unlist_files<-unlist(read_files)
write(Unlist_files, file = "path/Bib.bib")
file <- "path/Bib.bib"
M <- convert2df(file = file, dbsource = "scopus", format = "bibtex")
##
## Converting your scopus collection into a bibliographic dataframe
##
##
## Warning:
## In your file, some mandatory metadata are missing. Bibliometrix functions may not work properly!
##
## Please, take a look at the vignettes:
## - 'Data Importing and Converting' (https://www.bibliometrix.org/vignettes/Data-Importing-and-Converting.html)
## - 'A brief introduction to bibliometrix' (https://www.bibliometrix.org/vignettes/Introduction_to_bibliometrix.html)
##
##
## Missing fields: CR
## Done!
##
##
## Generating affiliation field tag AU_UN from C1: Done!
openxlsx::write.xlsx(M, "M.xlsx", rowNames=TRUE)
Bibliometric Analysis
Descriptive analysis of the bibliographic data frame
results <- biblioAnalysis(M, sep = ";")
S <- summary(object = results, k = 10, pause = FALSE)
##
##
## MAIN INFORMATION ABOUT DATA
##
## Timespan 2016 : 2023
## Sources (Journals, Books, etc) 29
## Documents 74
## Annual Growth Rate % 13.99
## Document Average Age 3.03
## Average citations per doc 11.08
## Average citations per year per doc 2.182
## References 1
##
## DOCUMENT TYPES
## article 64
## book chapter 2
## conference paper 6
## data paper 2
##
## DOCUMENT CONTENTS
## Keywords Plus (ID) 261
## Author's Keywords (DE) 129
##
## AUTHORS
## Authors 68
## Author Appearances 256
## Authors of single-authored docs 2
##
## AUTHORS COLLABORATION
## Single-authored docs 4
## Documents per Author 1.09
## Co-Authors per Doc 3.46
## International co-authorships % 29.73
##
##
## Annual Scientific Production
##
## Year Articles
## 2016 4
## 2017 8
## 2018 6
## 2019 14
## 2020 10
## 2021 10
## 2022 12
## 2023 10
##
## Annual Percentage Growth Rate 13.99
##
##
## Most Productive Authors
##
## Authors Articles Authors Articles Fractionalized
## 1 HONG J 24 HONG J 8.29
## 2 LIVINGSTON M 12 SUN Y 4.85
## 3 MCARTHUR DP 12 MCARTHUR DP 4.50
## 4 SUN Y 12 SILA-NOWICKA K 3.35
## 5 SILA-NOWICKA K 10 LIVINGSTON M 3.13
## 6 THAKURIAH PV 10 THAKURIAH PV 2.47
## 7 BAILEY N 8 THAKURIAH P 2.40
## 8 LIDO C 8 WANG Y 2.33
## 9 WANG Y 8 ARCHER T 2.00
## 10 MASON P 6 LIDO C 1.85
##
##
## Top manuscripts per citations
##
## Paper DOI TC TCperYear NTC
## 1 SUN Y, 2017, INT J ENVIRON RES PUBLIC HEALTH-a 10.3390/ijerph14030274 68 9.71 1.73
## 2 SUN Y, 2017, INT J ENVIRON RES PUBLIC HEALTH-a-b-c 10.3390/ijerph14030274 68 9.71 1.73
## 3 SUN Y, 2017, INT J ENVIRON RES PUBLIC HEALTH 10.3390/ijerph14060644 58 8.29 1.48
## 4 SUN Y, 2017, INT J ENVIRON RES PUBLIC HEALTH-a-b 10.3390/ijerph14060644 58 8.29 1.48
## 5 MCARTHUR DP, 2019, J TRANSP GEOGR 10.1016/j.jtrangeo.2018.11.018 47 9.40 3.32
## 6 MCARTHUR DP, 2019, J TRANSP GEOGR-a 10.1016/j.jtrangeo.2018.11.018 47 9.40 3.32
## 7 ANEJIONU OCD, 2019, FUTURE GENER COMPUT SYST 10.1016/j.future.2019.03.052 27 5.40 1.91
## 8 ANEJIONU OCD, 2019, FUTURE GENER COMPUT SYST-a 10.1016/j.future.2019.03.052 27 5.40 1.91
## 9 HONG J, 2020, TRANSP RES PART A POLICY PRACT 10.1016/j.tra.2020.01.008 26 6.50 1.88
## 10 HONG J, 2020, TRANSP RES PART A POLICY PRACT-a 10.1016/j.tra.2020.01.008 26 6.50 1.88
##
##
## Corresponding Author's Countries
##
## Country Articles Freq SCP MCP MCP_Ratio
## 1 UNITED KINGDOM 68 0.919 52 16 0.235
## 2 ESTONIA 2 0.027 0 2 1.000
## 3 NIGERIA 2 0.027 0 2 1.000
## 4 SAUDI ARABIA 2 0.027 0 2 1.000
##
##
## SCP: Single Country Publications
##
## MCP: Multiple Country Publications
##
##
## Total Citations per Country
##
## Country Total Citations Average Article Citations
## 1 UNITED KINGDOM 818 12
## 2 ESTONIA 2 1
## 3 NIGERIA 0 0
## 4 SAUDI ARABIA 0 0
##
##
## Most Relevant Sources
##
## Sources
## 1 JOURNAL OF TRANSPORT GEOGRAPHY
## 2 INTERNATIONAL ARCHIVES OF THE PHOTOGRAMMETRY REMOTE SENSING AND SPATIAL INFORMATION SCIENCES - ISPRS ARCHIVES
## 3 HOUSING STUDIES
## 4 INTERNATIONAL JOURNAL OF ENVIRONMENTAL RESEARCH AND PUBLIC HEALTH
## 5 JOURNAL OF TRANSPORT AND HEALTH
## 6 APPLIED GEOGRAPHY
## 7 COMPUTERS ENVIRONMENT AND URBAN SYSTEMS
## 8 DATA IN BRIEF
## 9 ENERGY AND BUILDINGS
## 10 ENVIRONMENT AND PLANNING B: URBAN ANALYTICS AND CITY SCIENCE
## Articles
## 1 8
## 2 6
## 3 4
## 4 4
## 5 4
## 6 2
## 7 2
## 8 2
## 9 2
## 10 2
##
##
## Most Relevant Keywords
##
## Author Keywords (DE) Articles Keywords-Plus (ID) Articles
## 1 CYCLING 14 SCOTLAND 28
## 2 STRAVA 12 UNITED KINGDOM 26
## 3 CROWDSOURCED DATA 10 CROWDSOURCING 16
## 4 BIG DATA 6 GLASGOW [SCOTLAND] 16
## 5 CROWDSOURCED GEOGRAPHIC INFORMATION 6 GLASGOW [GLASGOW (ADS)] 14
## 6 STRAVA METRO 6 CYCLE TRANSPORT 12
## 7 ACTIVE TRAVEL 4 TRAVEL BEHAVIOR 12
## 8 AIR POLLUTION EXPOSURE 4 CYCLING 10
## 9 HOUSING 4 INTERNET 10
## 10 ICT 4 HOUSING 8
openxlsx::write.xlsx(S, "S.xlsx", rowNames=TRUE)
Author, Affiliations, Country
Bibliometrix fieldtag: # AU : Author # C1 : Authors’
Affiliations # SO : Journal names # PY : Publication year # DT :
Document type
DE : Author’s keywords
KW <- M %>%
mutate(DE1 = strsplit(as.character(DE), ";")) %>%
unnest(DE1)
KW$DE1 <- str_replace_all(KW$DE1, "[^a-zA-Z0-9]", " ")
KW$DE1 <- trimws(KW$DE1, "both")
# export to excel file
openxlsx::write.xlsx(KW, "Keywords.xlsx", rowNames=TRUE)
AU : Author
AU <- M %>%
mutate(Author = strsplit(as.character(AU), ";")) %>%
unnest(Author)
AU$Author <- str_replace_all(AU$Author, "[^a-zA-Z0-9]", " ")
AU$Author <- trimws(AU$Author, "both")
# export to excel file
openxlsx::write.xlsx(AU, "AU author.xlsx", rowNames=TRUE)
C1 : Authors’ Affiliations
## split to colums
C1 <- M %>%
mutate(Author_Affiliation = strsplit(as.character(C1), ";")) %>%
unnest(Author_Affiliation)
# C1$Author_Affiliation <- str_replace_all(C1$Author_Affiliation, "[^a-zA-Z0-9]", " ") # no need this code because it remove the ","
C1$Author_Affiliation <- trimws(C1$Author_Affiliation, "both")
C1 <- splitstackshape::cSplit(C1, "Author_Affiliation", ",")
C1 <- C1 %>%
mutate(across(Author_Affiliation_1:Author_Affiliation_7, ~ str_replace(., "THE UNIVERSITY OF GLASGOW", "UNIVERSITY OF GLASGOW")),
across(Author_Affiliation_1:Author_Affiliation_7, as.character))
## split to row
C1 <- M %>%
mutate(Author_Affiliation = strsplit(as.character(C1), ";")) %>%
unnest(Author_Affiliation)
Affiliation <-C1 %>%
mutate(Author_Affiliation = strsplit(as.character(Author_Affiliation), ",")) %>%
unnest(Author_Affiliation)
Affiliation$Author_Affiliation <- str_replace_all(Affiliation$Author_Affiliation, "[^a-zA-Z0-9]", " ")
Affiliation$Author_Affiliation <- trimws(Affiliation$Author_Affiliation, "both")
Affiliation <- Affiliation %>%
mutate(
Author_Affiliation = case_when(
str_detect(Author_Affiliation, "THE UNIVERSITY OF GLASGOW") ~ "UNIVERSITY OF GLASGOW",
.default = as.character(Author_Affiliation)
)
)
# export to excel file
openxlsx::write.xlsx(Affiliation, "Authors Affiliations.xlsx", rowNames=TRUE)
Network analysis
Country collaboration
# Create a country collaboration network
M <- metaTagExtraction(M, Field = "AU_CO", sep = ";")
NetMatrix <- biblioNetwork(M, analysis = "collaboration", network = "countries", sep = ";")
# Plot the network
net=networkPlot(NetMatrix, n = dim(NetMatrix)[1], Title = "Country Collaboration", type = "sphere", size=TRUE, remove.multiple=FALSE,labelsize=0.8)

net=networkPlot(NetMatrix, n = dim(NetMatrix)[1], Title = "Country Collaboration", type = "circle", size=TRUE, remove.multiple=FALSE,labelsize=0.8)

Affiliation collaboration
#--- Data preparation (column AU_UN)
## string split
AU_UN <- M %>%
mutate(university = strsplit(as.character(AU_UN), ";")) %>%
unnest(university)
AU_UN$university <- str_replace_all(AU_UN$university, "[^a-zA-Z0-9]", " ")
AU_UN$university <- trimws(AU_UN$university, "both")
## word cleaning
lookup <- AU_UN %>%
mutate(
university1 = case_when(
str_detect(university, "THE UNIVERSITY OF GLASGOW") ~ "UNIVERSITY OF GLASGOW",
str_detect(university, "GLASGOW") ~ "UNIVERSITY OF GLASGOW",
str_detect(university, "SOCIAL POLICY AND CRIMINOLOGY UNIVERSITY OF STIRLING") ~ "NIVERSITY OF STIRLING",
.default = as.character(university)
)
)
## create lookup table
lookup <- lookup[ , c("university", "university1")]
## clean data in original file
M$AU_UN <- str_replace_all(M$AU_UN, "[^a-zA-Z0-9\\;]", " ") #remove non regex except ;
M$AU_UN <- stringi::stri_replace_all_regex(
str = M$AU_UN,
pattern = paste0("\\b", lookup$university, "\\b"), # add word boundaries
replacement = lookup$university1,
vectorize_all = FALSE,
opts_regex = stringi::stri_opts_regex(case_insensitive = FALSE) # case_insensitive = FALSE <- ex: HEY vs hey <-- capitalise / normal
)
# Create a Univerity collaboration network
NetMatrix <- biblioNetwork(M, analysis = "collaboration", network = "universities", sep = ";") # using column AU_UN
# Plot the network
net=networkPlot(NetMatrix, n = dim(NetMatrix)[1], Title = "University Collaboration", type = "circle", size=TRUE, remove.multiple=FALSE,labelsize=0.8) # type = "sphere", type = "circle", type = "fruchterman"

Author colaboration
# Create a author collaboration network
NetMatrix <- biblioNetwork(M, analysis = "collaboration", network = "authors", sep = ";")
# Plot the network
net=networkPlot(NetMatrix, n = dim(NetMatrix)[1], Title = "Author Collaboration", type = "sphere", size=TRUE, remove.multiple=FALSE,labelsize=0.8) # type = "circle"

Keywords co-occurrences
# Create keywords co-occurrences network
NetMatrix <- biblioNetwork(M, analysis = "co-occurrences", network = "author_keywords", sep = ";")
# Plot the network
net=networkPlot(NetMatrix,
normalize="association",
weighted=T,
n = 40,
Title = "Author Keyword Co-occurrences",
type = "sphere",
size=T,edgesize = 6,labelsize=0.7, remove.multiple = T)

net_groups_kw <- as.data.frame.table(net$cluster_res)