litsearchR

library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(ggplot2)
library(ggraph)
library(igraph)
## 
## Attaching package: 'igraph'
## The following objects are masked from 'package:dplyr':
## 
##     as_data_frame, groups, union
## The following objects are masked from 'package:stats':
## 
##     decompose, spectrum
## The following object is masked from 'package:base':
## 
##     union
library(readr)
library(litsearchr)

setwd("C:/Users/subas/OneDrive - Texas A&M Transportation Institute/0_PAPERS/ALL_PAPERS/Journal/Submitted/2020_2021/SYSTEMATIC_LIT/litsearchR")
naive_results <- import_results(file="BikeShare.ris")
## Reading file BikeShare.ris ... done
nrow(naive_results)
## [1] 103
#naive_results
colnames(naive_results)
##  [1] "document_type" "source_type"   "author"        "year"         
##  [5] "title"         "journal"       "volume"        "abstract"     
##  [9] "keywords"      "doi"           "accession_zr"  "url"          
## [13] "publisher"     "start_page"    "issue"         "eppi_id"      
## [17] "ET"            "issn"
#naive_results[1, "title"]
#naive_results[1, "keywords"]
extract_terms(keywords=naive_results[, "keywords"], method="tagged")
## Loading required namespace: stopwords
##   [1] "activity centers"                      
##   [2] "after studies"                         
##   [3] "american community survey"             
##   [4] "annual average daily traffic"          
##   [5] "arlington county virginia"             
##   [6] "baltimore maryland"                    
##   [7] "bayes' theorem"                        
##   [8] "bicycle commuting"                     
##   [9] "bicycle facilities"                    
##  [10] "bicycle helmets"                       
##  [11] "bicycle lanes"                         
##  [12] "bicycle sharing"                       
##  [13] "bicycle sharing stations"              
##  [14] "bicycle travel"                        
##  [15] "binomial distributions"                
##  [16] "birmingham alabama"                    
##  [17] "boston massachusetts"                  
##  [18] "built environment"                     
##  [19] "capital bikeshare"                     
##  [20] "case studies"                          
##  [21] "chicago illinois"                      
##  [22] "choice models"                         
##  [23] "citi bike"                             
##  [24] "cluster analysis"                      
##  [25] "consumer preferences"                  
##  [26] "crash analysis"                        
##  [27] "data analysis"                         
##  [28] "data mining"                           
##  [29] "district department of transportation" 
##  [30] "economic benefits"                     
##  [31] "electric vehicles"                     
##  [32] "equity justice"                        
##  [33] "feasibility analysis"                  
##  [34] "feeder services"                       
##  [35] "geographic information systems"        
##  [36] "global positioning system"             
##  [37] "helmet use"                            
##  [38] "highway safety"                        
##  [39] "honolulu hawaii"                       
##  [40] "intermodal transfer"                   
##  [41] "international comparison"              
##  [42] "land use"                              
##  [43] "linear regression analysis"            
##  [44] "literature reviews"                    
##  [45] "low income groups"                     
##  [46] "mathematical models"                   
##  [47] "membership organizations"              
##  [48] "metrorail washington metropolitan area"
##  [49] "minneapolis minnesota"                 
##  [50] "mobile applications"                   
##  [51] "modal shift"                           
##  [52] "modal split"                           
##  [53] "mode choice"                           
##  [54] "multimodal transportation"             
##  [55] "nanjing china"                         
##  [56] "new york new york"                     
##  [57] "nice ride"                             
##  [58] "nonmotorized transportation"           
##  [59] "north america"                         
##  [60] "online survey"                         
##  [61] "periods of the day"                    
##  [62] "philadelphia pennsylvania"             
##  [63] "population density"                    
##  [64] "public transit"                        
##  [65] "rail transit"                          
##  [66] "rail transit stations"                 
##  [67] "rapid transit"                         
##  [68] "regression analysis"                   
##  [69] "revealed preferences"                  
##  [70] "route choice"                          
##  [71] "san francisco bay area"                
##  [72] "san francisco california"              
##  [73] "service disruption"                    
##  [74] "shared mobility"                       
##  [75] "smart cards"                           
##  [76] "social factors"                        
##  [77] "socioeconomic factors"                 
##  [78] "spatial analysis"                      
##  [79] "spatiotemporal analysis"               
##  [80] "statistical analysis"                  
##  [81] "strategic planning"                    
##  [82] "sustainable transportation"            
##  [83] "time duration"                         
##  [84] "traffic flow"                          
##  [85] "transit riders"                        
##  [86] "transportation planning"               
##  [87] "travel behavior"                       
##  [88] "travel demand"                         
##  [89] "travel patterns"                       
##  [90] "travel surveys"                        
##  [91] "trip chaining"                         
##  [92] "trip generation"                       
##  [93] "trip length"                           
##  [94] "trip purpose"                          
##  [95] "united states"                         
##  [96] "urban areas"                           
##  [97] "vehicle sharing"                       
##  [98] "washington district of columbia"       
##  [99] "washington metropolitan area"          
## [100] "weather conditions"
keywords <- extract_terms(keywords=naive_results[, "keywords"], method="tagged", min_n=1)
keywords
##   [1] "access"                                
##   [2] "accessibility"                         
##   [3] "activity centers"                      
##   [4] "after studies"                         
##   [5] "age"                                   
##   [6] "american community survey"             
##   [7] "annual average daily traffic"          
##   [8] "arlington county virginia"             
##   [9] "attitudes"                             
##  [10] "baltimore maryland"                    
##  [11] "bayes' theorem"                        
##  [12] "before"                                
##  [13] "behavior"                              
##  [14] "bicycle commuting"                     
##  [15] "bicycle facilities"                    
##  [16] "bicycle helmets"                       
##  [17] "bicycle lanes"                         
##  [18] "bicycle sharing"                       
##  [19] "bicycle sharing stations"              
##  [20] "bicycle travel"                        
##  [21] "bicycles"                              
##  [22] "bicycling"                             
##  [23] "binomial distributions"                
##  [24] "birmingham alabama"                    
##  [25] "boston massachusetts"                  
##  [26] "built environment"                     
##  [27] "capital bikeshare"                     
##  [28] "case studies"                          
##  [29] "chicago illinois"                      
##  [30] "china"                                 
##  [31] "choice models"                         
##  [32] "citi bike"                             
##  [33] "citibike"                              
##  [34] "cities"                                
##  [35] "cluster analysis"                      
##  [36] "communities"                           
##  [37] "commuters"                             
##  [38] "consumer preferences"                  
##  [39] "covid-19"                              
##  [40] "crash analysis"                        
##  [41] "cyclists"                              
##  [42] "data analysis"                         
##  [43] "data mining"                           
##  [44] "demand"                                
##  [45] "demographics"                          
##  [46] "destination"                           
##  [47] "district department of transportation" 
##  [48] "economic benefits"                     
##  [49] "electric vehicles"                     
##  [50] "equity justice"                        
##  [51] "feasibility analysis"                  
##  [52] "feeder services"                       
##  [53] "gender"                                
##  [54] "geographic information systems"        
##  [55] "global positioning system"             
##  [56] "helmet use"                            
##  [57] "highway safety"                        
##  [58] "honolulu hawaii"                       
##  [59] "impacts"                               
##  [60] "implementation"                        
##  [61] "infrastructure"                        
##  [62] "intermodal transfer"                   
##  [63] "international comparison"              
##  [64] "jobs"                                  
##  [65] "land use"                              
##  [66] "linear regression analysis"            
##  [67] "literature reviews"                    
##  [68] "location"                              
##  [69] "logits"                                
##  [70] "low income groups"                     
##  [71] "marketing"                             
##  [72] "mathematical models"                   
##  [73] "membership organizations"              
##  [74] "methodology"                           
##  [75] "metrorail washington metropolitan area"
##  [76] "minneapolis minnesota"                 
##  [77] "mobile applications"                   
##  [78] "mobility"                              
##  [79] "modal shift"                           
##  [80] "modal split"                           
##  [81] "mode choice"                           
##  [82] "multimodal transportation"             
##  [83] "nanjing china"                         
##  [84] "new york new york"                     
##  [85] "nice ride"                             
##  [86] "nonmotorized transportation"           
##  [87] "north america"                         
##  [88] "online survey"                         
##  [89] "operations"                            
##  [90] "origin"                                
##  [91] "periods of the day"                    
##  [92] "philadelphia pennsylvania"             
##  [93] "policy"                                
##  [94] "population density"                    
##  [95] "pricing"                               
##  [96] "public transit"                        
##  [97] "race"                                  
##  [98] "rail transit"                          
##  [99] "rail transit stations"                 
## [100] "rapid transit"                         
## [101] "regression analysis"                   
## [102] "revealed preferences"                  
## [103] "revenues"                              
## [104] "ridership"                             
## [105] "route choice"                          
## [106] "safety"                                
## [107] "san francisco bay area"                
## [108] "san francisco california"              
## [109] "scooters"                              
## [110] "service disruption"                    
## [111] "shared mobility"                       
## [112] "smart cards"                           
## [113] "social factors"                        
## [114] "socioeconomic factors"                 
## [115] "spatial analysis"                      
## [116] "spatiotemporal analysis"               
## [117] "statistical analysis"                  
## [118] "strategic planning"                    
## [119] "suburbs"                               
## [120] "surveys"                               
## [121] "sustainable transportation"            
## [122] "time duration"                         
## [123] "tourism"                               
## [124] "tourists"                              
## [125] "traffic flow"                          
## [126] "transfers"                             
## [127] "transit riders"                        
## [128] "transportation planning"               
## [129] "travel behavior"                       
## [130] "travel demand"                         
## [131] "travel patterns"                       
## [132] "travel surveys"                        
## [133] "trip chaining"                         
## [134] "trip generation"                       
## [135] "trip length"                           
## [136] "trip purpose"                          
## [137] "united states"                         
## [138] "urban areas"                           
## [139] "validation"                            
## [140] "vehicle sharing"                       
## [141] "washington district of columbia"       
## [142] "washington metropolitan area"          
## [143] "weather conditions"
extract_terms(text=naive_results[, "title"], method="fakerake", min_freq=3, min_n=2)
##  [1] "bikeshare access"          "bikeshare demand"         
##  [3] "bikeshare programs"        "bikeshare ridership"      
##  [5] "bikeshare station"         "bikeshare system"         
##  [7] "bikeshare systems"         "bikeshare trips"          
##  [9] "bikeshare users"           "capital bikeshare"        
## [11] "capital bikeshare trips"   "casual users"             
## [13] "disadvantaged communities" "dockless bikeshare"       
## [15] "route choice"
clinpsy_stopwords <- read_lines("stop.txt")
#clinpsy_stopwords
all_stopwords <- c(get_stopwords("English"), clinpsy_stopwords)
title_terms <- extract_terms(
  text=naive_results[, "title"],
  method="fakerake",
  min_freq=3, min_n=2,
  stopwords=all_stopwords
)

title_terms
##  [1] "bikeshare access"          "bikeshare demand"         
##  [3] "bikeshare programs"        "bikeshare ridership"      
##  [5] "bikeshare station"         "bikeshare system"         
##  [7] "bikeshare systems"         "bikeshare trips"          
##  [9] "bikeshare users"           "capital bikeshare"        
## [11] "capital bikeshare trips"   "casual users"             
## [13] "disadvantaged communities" "dockless bikeshare"       
## [15] "route choice"
terms <- unique(c(keywords, title_terms))
docs <- paste(naive_results[, "title"], naive_results[, "abstract"])
dfm <- create_dfm(elements=docs, features=terms)
g <- create_network(dfm, min_studies=3)

ggraph(g, layout="stress") +
  coord_fixed() +
  expand_limits(x=c(-3, 3)) +
  geom_edge_link(aes(alpha=weight)) +
  geom_node_point(shape="circle filled", fill="white") +
  geom_node_text(aes(label=name), hjust="outward", check_overlap=TRUE) +
  guides(edge_alpha=FALSE)+theme_bw(base_size=18)
## Warning: `guides(<scale> = FALSE)` is deprecated. Please use `guides(<scale> =
## "none")` instead.

strengths <- strength(g)

data.frame(term=names(strengths), strength=strengths, row.names=NULL) %>%
  mutate(rank=rank(strength, ties.method="min")) %>%
  arrange(strength) ->
  term_strengths

term_strengths
##                          term strength rank
## 1            bikeshare access       20    1
## 2                   transfers       21    2
## 3                     tourism       22    3
## 4                    scooters       23    4
## 5                   citi bike       24    5
## 6  sustainable transportation       24    5
## 7             trip generation       24    5
## 8          weather conditions       24    5
## 9            cluster analysis       27    9
## 10               route choice       27    9
## 11  global positioning system       28   11
## 12                      china       29   12
## 13             transit riders       29   12
## 14                  commuters       30   14
## 15           spatial analysis       30   14
## 16    capital bikeshare trips       31   16
## 17            bicycle sharing       32   17
## 18                  marketing       33   18
## 19                 helmet use       35   19
## 20               trip purpose       35   19
## 21             public transit       38   21
## 22               rail transit       38   21
## 23                       jobs       39   23
## 24  disadvantaged communities       39   23
## 25             implementation       43   25
## 26                     before       46   26
## 27            travel patterns       49   27
## 28                    surveys       51   28
## 29                methodology       52   29
## 30                mode choice       52   29
## 31        bikeshare ridership       57   31
## 32            travel behavior       60   32
## 33              north america       66   33
## 34                    pricing       67   34
## 35                   cyclists       69   35
## 36         dockless bikeshare       71   36
## 37               demographics       72   37
## 38                     safety       75   38
## 39                urban areas       77   39
## 40           bikeshare demand       78   40
## 41               casual users       78   40
## 42                       race       80   42
## 43                     gender       82   43
## 44                 operations       85   44
## 45                   land use       90   45
## 46              united states      113   46
## 47          built environment      114   47
## 48                    impacts      117   48
## 49                   bicycles      119   49
## 50                destination      121   50
## 51              accessibility      122   51
## 52            bikeshare trips      125   52
## 53                     origin      126   53
## 54                  bicycling      135   54
## 55                communities      148   55
## 56         bikeshare programs      151   56
## 57                   behavior      160   57
## 58                   mobility      160   57
## 59                     policy      167   59
## 60            bikeshare users      175   60
## 61                     demand      179   61
## 62          capital bikeshare      182   62
## 63          bikeshare station      183   63
## 64             infrastructure      198   64
## 65                   location      204   65
## 66                  ridership      215   66
## 67                     access      246   67
## 68                     cities      304   68
## 69          bikeshare systems      341   69
## 70           bikeshare system      444   70
## 71                        age      543   71
cutoff_fig <- ggplot(term_strengths, aes(x=rank, y=strength, label=term)) +
  geom_line() +
  geom_point() +
  geom_text(data=filter(term_strengths, rank>5), hjust="right", 
            nudge_y=20, check_overlap=TRUE)+theme_bw(base_size=18)

cutoff_fig

cutoff_cum <- find_cutoff(g, method="cumulative", percent=0.8)

cutoff_cum
## [1] 72
cutoff_fig +
  geom_hline(yintercept=cutoff_cum, linetype="dashed")

get_keywords(reduce_graph(g, cutoff_cum))
##  [1] "access"             "accessibility"      "age"               
##  [4] "behavior"           "bicycles"           "bicycling"         
##  [7] "built environment"  "capital bikeshare"  "cities"            
## [10] "communities"        "demand"             "demographics"      
## [13] "destination"        "gender"             "impacts"           
## [16] "infrastructure"     "land use"           "location"          
## [19] "mobility"           "operations"         "origin"            
## [22] "policy"             "race"               "ridership"         
## [25] "safety"             "united states"      "urban areas"       
## [28] "bikeshare demand"   "bikeshare programs" "bikeshare station" 
## [31] "bikeshare system"   "bikeshare systems"  "bikeshare trips"   
## [34] "bikeshare users"    "casual users"
cutoff_change <- find_cutoff(g, method="changepoint", knot_num=3)

cutoff_change
## [1]  90 215 304 543
cutoff_fig +
  geom_hline(yintercept=cutoff_change, linetype="dashed")

g_redux <- reduce_graph(g, cutoff_change[1])
selected_terms <- get_keywords(g_redux)

selected_terms
##  [1] "access"             "accessibility"      "age"               
##  [4] "behavior"           "bicycles"           "bicycling"         
##  [7] "built environment"  "capital bikeshare"  "cities"            
## [10] "communities"        "demand"             "destination"       
## [13] "impacts"            "infrastructure"     "land use"          
## [16] "location"           "mobility"           "origin"            
## [19] "policy"             "ridership"          "united states"     
## [22] "bikeshare programs" "bikeshare station"  "bikeshare system"  
## [25] "bikeshare systems"  "bikeshare trips"    "bikeshare users"