Library loading

pacman::p_load(
  rio,          
  here,         
  tidyverse,
  ggplot2, 
  data.table, 
  dplyr,
  bibliometrix,
  readxl, 
  tidyr
  )

Data loading and converting

# combine multiple, single papers into one bib file 
d <- list.files("path", pattern="\\.bib$", full.names=T)
read_files<-lapply(d,readLines)
Unlist_files<-unlist(read_files)
write(Unlist_files, file = "path/Bib.bib")

file <- "path/Bib.bib"
M <- convert2df(file = file, dbsource = "scopus", format = "bibtex")

## 
## Converting your scopus collection into a bibliographic dataframe
## 
## 
## Warning:
## In your file, some mandatory metadata are missing. Bibliometrix functions may not work properly!
## 
## Please, take a look at the vignettes:
## - 'Data Importing and Converting' (https://www.bibliometrix.org/vignettes/Data-Importing-and-Converting.html)
## - 'A brief introduction to bibliometrix' (https://www.bibliometrix.org/vignettes/Introduction_to_bibliometrix.html)
## 
## 
## Missing fields:  CR 
## Done!
## 
## 
## Generating affiliation field tag AU_UN from C1:  Done!

openxlsx::write.xlsx(M, "M.xlsx", rowNames=TRUE)

Bibliometric Analysis

Descriptive analysis of the bibliographic data frame

results <- biblioAnalysis(M, sep = ";")
S <- summary(object = results, k = 10, pause = FALSE)

## 
## 
## MAIN INFORMATION ABOUT DATA
## 
##  Timespan                              2016 : 2023 
##  Sources (Journals, Books, etc)        29 
##  Documents                             74 
##  Annual Growth Rate %                  13.99 
##  Document Average Age                  3.03 
##  Average citations per doc             11.08 
##  Average citations per year per doc    2.182 
##  References                            1 
##  
## DOCUMENT TYPES                     
##  article               64 
##  book chapter          2 
##  conference paper      6 
##  data paper            2 
##  
## DOCUMENT CONTENTS
##  Keywords Plus (ID)                    261 
##  Author's Keywords (DE)                129 
##  
## AUTHORS
##  Authors                               68 
##  Author Appearances                    256 
##  Authors of single-authored docs       2 
##  
## AUTHORS COLLABORATION
##  Single-authored docs                  4 
##  Documents per Author                  1.09 
##  Co-Authors per Doc                    3.46 
##  International co-authorships %        29.73 
##  
## 
## Annual Scientific Production
## 
##  Year    Articles
##     2016        4
##     2017        8
##     2018        6
##     2019       14
##     2020       10
##     2021       10
##     2022       12
##     2023       10
## 
## Annual Percentage Growth Rate 13.99 
## 
## 
## Most Productive Authors
## 
##    Authors        Articles Authors        Articles Fractionalized
## 1  HONG J               24 HONG J                            8.29
## 2  LIVINGSTON M         12 SUN Y                             4.85
## 3  MCARTHUR DP          12 MCARTHUR DP                       4.50
## 4  SUN Y                12 SILA-NOWICKA K                    3.35
## 5  SILA-NOWICKA K       10 LIVINGSTON M                      3.13
## 6  THAKURIAH PV         10 THAKURIAH PV                      2.47
## 7  BAILEY N              8 THAKURIAH P                       2.40
## 8  LIDO C                8 WANG Y                            2.33
## 9  WANG Y                8 ARCHER T                          2.00
## 10 MASON P               6 LIDO C                            1.85
## 
## 
## Top manuscripts per citations
## 
##                                        Paper                                     DOI TC TCperYear  NTC
## 1  SUN Y, 2017, INT J ENVIRON RES PUBLIC HEALTH-a     10.3390/ijerph14030274         68      9.71 1.73
## 2  SUN Y, 2017, INT J ENVIRON RES PUBLIC HEALTH-a-b-c 10.3390/ijerph14030274         68      9.71 1.73
## 3  SUN Y, 2017, INT J ENVIRON RES PUBLIC HEALTH       10.3390/ijerph14060644         58      8.29 1.48
## 4  SUN Y, 2017, INT J ENVIRON RES PUBLIC HEALTH-a-b   10.3390/ijerph14060644         58      8.29 1.48
## 5  MCARTHUR DP, 2019, J TRANSP GEOGR                  10.1016/j.jtrangeo.2018.11.018 47      9.40 3.32
## 6  MCARTHUR DP, 2019, J TRANSP GEOGR-a                10.1016/j.jtrangeo.2018.11.018 47      9.40 3.32
## 7  ANEJIONU OCD, 2019, FUTURE GENER COMPUT SYST       10.1016/j.future.2019.03.052   27      5.40 1.91
## 8  ANEJIONU OCD, 2019, FUTURE GENER COMPUT SYST-a     10.1016/j.future.2019.03.052   27      5.40 1.91
## 9  HONG J, 2020, TRANSP RES PART A POLICY PRACT       10.1016/j.tra.2020.01.008      26      6.50 1.88
## 10 HONG J, 2020, TRANSP RES PART A POLICY PRACT-a     10.1016/j.tra.2020.01.008      26      6.50 1.88
## 
## 
## Corresponding Author's Countries
## 
##          Country Articles  Freq SCP MCP MCP_Ratio
## 1 UNITED KINGDOM       68 0.919  52  16     0.235
## 2 ESTONIA               2 0.027   0   2     1.000
## 3 NIGERIA               2 0.027   0   2     1.000
## 4 SAUDI ARABIA          2 0.027   0   2     1.000
## 
## 
## SCP: Single Country Publications
## 
## MCP: Multiple Country Publications
## 
## 
## Total Citations per Country
## 
##     Country      Total Citations Average Article Citations
## 1 UNITED KINGDOM             818                        12
## 2 ESTONIA                      2                         1
## 3 NIGERIA                      0                         0
## 4 SAUDI ARABIA                 0                         0
## 
## 
## Most Relevant Sources
## 
##                                                                                                   Sources       
## 1  JOURNAL OF TRANSPORT GEOGRAPHY                                                                               
## 2  INTERNATIONAL ARCHIVES OF THE PHOTOGRAMMETRY REMOTE SENSING AND SPATIAL INFORMATION SCIENCES - ISPRS ARCHIVES
## 3  HOUSING STUDIES                                                                                              
## 4  INTERNATIONAL JOURNAL OF ENVIRONMENTAL RESEARCH AND PUBLIC HEALTH                                            
## 5  JOURNAL OF TRANSPORT AND HEALTH                                                                              
## 6  APPLIED GEOGRAPHY                                                                                            
## 7  COMPUTERS ENVIRONMENT AND URBAN SYSTEMS                                                                      
## 8  DATA IN BRIEF                                                                                                
## 9  ENERGY AND BUILDINGS                                                                                         
## 10 ENVIRONMENT AND PLANNING B: URBAN ANALYTICS AND CITY SCIENCE                                                 
##    Articles
## 1         8
## 2         6
## 3         4
## 4         4
## 5         4
## 6         2
## 7         2
## 8         2
## 9         2
## 10        2
## 
## 
## Most Relevant Keywords
## 
##              Author Keywords (DE)      Articles  Keywords-Plus (ID)     Articles
## 1  CYCLING                                   14 SCOTLAND                      28
## 2  STRAVA                                    12 UNITED KINGDOM                26
## 3  CROWDSOURCED DATA                         10 CROWDSOURCING                 16
## 4  BIG DATA                                   6 GLASGOW [SCOTLAND]            16
## 5  CROWDSOURCED GEOGRAPHIC INFORMATION        6 GLASGOW [GLASGOW (ADS)]       14
## 6  STRAVA METRO                               6 CYCLE TRANSPORT               12
## 7  ACTIVE TRAVEL                              4 TRAVEL BEHAVIOR               12
## 8  AIR POLLUTION EXPOSURE                     4 CYCLING                       10
## 9  HOUSING                                    4 INTERNET                      10
## 10 ICT                                        4 HOUSING                        8

openxlsx::write.xlsx(S, "S.xlsx", rowNames=TRUE)

Author, Affiliations, Country

Bibliometrix fieldtag: # AU : Author # C1 : Authors’ Affiliations # SO : Journal names # PY : Publication year # DT : Document type

DE : Author’s keywords

KW <- M %>% 
  mutate(DE1 = strsplit(as.character(DE), ";")) %>% 
  unnest(DE1)

KW$DE1 <- str_replace_all(KW$DE1, "[^a-zA-Z0-9]", " ")

KW$DE1 <- trimws(KW$DE1, "both")

# export to excel file 
openxlsx::write.xlsx(KW, "Keywords.xlsx", rowNames=TRUE)

AU : Author

AU <- M %>% 
  mutate(Author = strsplit(as.character(AU), ";")) %>% 
  unnest(Author)

AU$Author <- str_replace_all(AU$Author, "[^a-zA-Z0-9]", " ")

AU$Author <- trimws(AU$Author, "both")

# export to excel file 
openxlsx::write.xlsx(AU, "AU author.xlsx", rowNames=TRUE)

C1 : Authors’ Affiliations

## split to colums 
C1 <- M %>% 
  mutate(Author_Affiliation = strsplit(as.character(C1), ";")) %>% 
  unnest(Author_Affiliation)

# C1$Author_Affiliation <- str_replace_all(C1$Author_Affiliation, "[^a-zA-Z0-9]", " ") # no need this code because it remove the "," 

C1$Author_Affiliation <- trimws(C1$Author_Affiliation, "both")
C1 <- splitstackshape::cSplit(C1, "Author_Affiliation", ",")
C1 <- C1 %>% 
  mutate(across(Author_Affiliation_1:Author_Affiliation_7, ~ str_replace(., "THE UNIVERSITY OF GLASGOW", "UNIVERSITY OF GLASGOW")),
         across(Author_Affiliation_1:Author_Affiliation_7, as.character))

## split to row
C1 <- M %>% 
  mutate(Author_Affiliation = strsplit(as.character(C1), ";")) %>% 
  unnest(Author_Affiliation)

Affiliation <-C1  %>% 
  mutate(Author_Affiliation = strsplit(as.character(Author_Affiliation), ",")) %>% 
  unnest(Author_Affiliation)

Affiliation$Author_Affiliation <- str_replace_all(Affiliation$Author_Affiliation, "[^a-zA-Z0-9]", " ")
Affiliation$Author_Affiliation <- trimws(Affiliation$Author_Affiliation, "both")

Affiliation <- Affiliation %>% 
  mutate(
    Author_Affiliation = case_when(
      str_detect(Author_Affiliation, "THE UNIVERSITY OF GLASGOW") ~ "UNIVERSITY OF GLASGOW",
      .default = as.character(Author_Affiliation)
    )
  )

# export to excel file 
openxlsx::write.xlsx(Affiliation, "Authors Affiliations.xlsx", rowNames=TRUE)

Network analysis

Country collaboration

# Create a country collaboration network
M <- metaTagExtraction(M, Field = "AU_CO", sep = ";")
NetMatrix <- biblioNetwork(M, analysis = "collaboration", network = "countries", sep = ";")

# Plot the network
net=networkPlot(NetMatrix, n = dim(NetMatrix)[1], Title = "Country Collaboration", type = "sphere", size=TRUE, remove.multiple=FALSE,labelsize=0.8)

net=networkPlot(NetMatrix, n = dim(NetMatrix)[1], Title = "Country Collaboration", type = "circle", size=TRUE, remove.multiple=FALSE,labelsize=0.8)

Affiliation collaboration

#--- Data preparation  (column AU_UN)

## string split 
AU_UN <- M %>% 
  mutate(university = strsplit(as.character(AU_UN), ";")) %>% 
  unnest(university)

AU_UN$university <- str_replace_all(AU_UN$university, "[^a-zA-Z0-9]", " ") 
AU_UN$university <- trimws(AU_UN$university, "both")

## word cleaning 
lookup <- AU_UN %>% 
  mutate(
    university1 = case_when(
      str_detect(university, "THE UNIVERSITY OF GLASGOW") ~ "UNIVERSITY OF GLASGOW",
      str_detect(university, "GLASGOW") ~ "UNIVERSITY OF GLASGOW",
      str_detect(university, "SOCIAL POLICY AND CRIMINOLOGY UNIVERSITY OF STIRLING") ~ "NIVERSITY OF STIRLING",
                .default = as.character(university)
  )
)

## create lookup table 
lookup <- lookup[ , c("university", "university1")]

## clean data in original file  
M$AU_UN <- str_replace_all(M$AU_UN, "[^a-zA-Z0-9\\;]", " ") #remove non regex except ;

M$AU_UN <- stringi::stri_replace_all_regex(
  str = M$AU_UN,
  pattern = paste0("\\b", lookup$university, "\\b"), # add word boundaries
  replacement = lookup$university1,
  vectorize_all = FALSE, 
  opts_regex = stringi::stri_opts_regex(case_insensitive = FALSE) # case_insensitive = FALSE <- ex: HEY  vs hey <-- capitalise / normal 
)

# Create a Univerity collaboration network
NetMatrix <- biblioNetwork(M, analysis = "collaboration", network = "universities", sep = ";")  # using column AU_UN

# Plot the network
net=networkPlot(NetMatrix, n = dim(NetMatrix)[1], Title = "University Collaboration", type = "circle", size=TRUE,  remove.multiple=FALSE,labelsize=0.8)  # type = "sphere", type = "circle", type = "fruchterman"

Author colaboration

# Create a author collaboration network
NetMatrix <- biblioNetwork(M, analysis = "collaboration", network = "authors", sep = ";")

# Plot the network 
net=networkPlot(NetMatrix, n = dim(NetMatrix)[1], Title = "Author Collaboration", type = "sphere", size=TRUE, remove.multiple=FALSE,labelsize=0.8)  # type = "circle"

Keywords co-occurrences

# Create keywords co-occurrences network
NetMatrix <- biblioNetwork(M, analysis = "co-occurrences", network = "author_keywords", sep = ";")  

# Plot the network
net=networkPlot(NetMatrix, 
                normalize="association", 
                weighted=T, 
                n = 40, 
                Title = "Author Keyword Co-occurrences", 
                type = "sphere", 
                size=T,edgesize = 6,labelsize=0.7, remove.multiple = T)

net_groups_kw <- as.data.frame.table(net$cluster_res)

UofG ubdc bibliographic search and analysis

2023-08-03

Library loading

Data loading and converting

Bibliometric Analysis

Descriptive analysis of the bibliographic data frame

Author, Affiliations, Country

DE : Author’s keywords

AU : Author

C1 : Authors’ Affiliations

Network analysis

Country collaboration

Affiliation collaboration

Author colaboration

Keywords co-occurrences