Prerequisites

Software dependencies

This document does not install the required R packages by default. You can run the script install.R to install all required dependencies on a new R installation, or use install.packages(..) to install missing R packages.

source("install.R")

The text analysis is based the R package tidytext from the tidyverse suite of packages and uses the dplyr grammar. Read the tidytext tutorial to learn about the used functions and concepts.

The plots and tables of survey data and evaluation use the packages ggplot2.

Required libraries and runtime environment description.

library("pdftools")
library("stringr")
library("knitr")
library("tibble")
library("tidytext")
library("purrr")
library("dplyr")
library("wordcloud")
library("RColorBrewer")
library("readr")
library("ggplot2")
library("rvest")
library("ggthemes")
library("grid")
library("gridBase")
library("gridExtra")
library("devtools")
library("rlang")
library("huxtable")
library("here")
library("httr")
library("googledrive")
library("SnowballC")

Seed

Seed is set for making word cloud generation reproducible.

set.seed(1)

Data

data_path <- "all-manuscripts"

The data for the analysis is required in form of directories with PDF files of all conference papers and poster abstracts. Due to copyright of full papers, the full paper PDFs must be manually added to the respective directory. Short papers and poster abstracts are dowloaded automatically.

Add the PDFs to a directory called all-manuscripts this file with one subdirectoy per year:

list.files("all-manuscripts/")
## [1] "2018"      "2019"      "2019-test"

The following downloads of AGILE short papers are not executed by default.

dir.create(here::here(data_path, "2018"))

page <- read_html("https://agile-online.org/programme-2018/accepted-papers-and-posters-2018")

all_links <- page %>%
    html_nodes(css = "a") %>%
    html_attr("href") %>%
    as.list()

drive_links <- all_links[str_detect(string = all_links, pattern = "drive.google")]
drive_links[sapply(drive_links, is.null)] <- NULL

drive_ids <- lapply(drive_links, as_id)
lapply(drive_ids, drive_download, overwrite = TRUE)
dir.create(here::here(data_path, "2019"))

page <- read_html("https://agile-online.org/conference-2019/programme-2019/accepted-papers-and-posters-2019")

all_links <- page %>%
    html_nodes(css = "a") %>%
    html_attr("href") %>%
    as.list()

pdf_links <- all_links[str_detect(string = all_links, pattern = ".*Upload_your_PDF.*")]
pdf_links[sapply(pdf_links, is.null)] <- NULL
pdf_links <- paste0("https://agile-online.org", pdf_links)

for (link in pdf_links) {
  download.file(url = link,
                destfile = here::here(data_path,
                                      "2019",
                                      stringr::str_extract(link, "([^/]+$)")))
}

2018

Loading and cleaning

files_2018 <- dir(path = here::here(data_path, "2018"), pattern = ".pdf$", full.names = TRUE)

This analysis was created with the following 125 documents:

##   [1] "/10.1007_2F978-3-319-78208-9_1.pdf"                                                                                                                           
##   [2] "/10.1007_2F978-3-319-78208-9_10.pdf"                                                                                                                          
##   [3] "/10.1007_2F978-3-319-78208-9_11.pdf"                                                                                                                          
##   [4] "/10.1007_2F978-3-319-78208-9_12.pdf"                                                                                                                          
##   [5] "/10.1007_2F978-3-319-78208-9_13.pdf"                                                                                                                          
##   [6] "/10.1007_2F978-3-319-78208-9_14.pdf"                                                                                                                          
##   [7] "/10.1007_2F978-3-319-78208-9_15.pdf"                                                                                                                          
##   [8] "/10.1007_2F978-3-319-78208-9_16.pdf"                                                                                                                          
##   [9] "/10.1007_2F978-3-319-78208-9_17.pdf"                                                                                                                          
##  [10] "/10.1007_2F978-3-319-78208-9_18.pdf"                                                                                                                          
##  [11] "/10.1007_2F978-3-319-78208-9_19.pdf"                                                                                                                          
##  [12] "/10.1007_2F978-3-319-78208-9_2.pdf"                                                                                                                           
##  [13] "/10.1007_2F978-3-319-78208-9_3.pdf"                                                                                                                           
##  [14] "/10.1007_2F978-3-319-78208-9_4.pdf"                                                                                                                           
##  [15] "/10.1007_2F978-3-319-78208-9_5.pdf"                                                                                                                           
##  [16] "/10.1007_2F978-3-319-78208-9_6.pdf"                                                                                                                           
##  [17] "/10.1007_2F978-3-319-78208-9_7.pdf"                                                                                                                           
##  [18] "/10.1007_2F978-3-319-78208-9_8.pdf"                                                                                                                           
##  [19] "/10.1007_2F978-3-319-78208-9_9.pdf"                                                                                                                           
##  [20] "/101 AGILE_2018_opendata_municipality_final.pdf"                                                                                                              
##  [21] "/102 AGILE_2018_poster_revised.pdf"                                                                                                                           
##  [22] "/103_paper.pdf"                                                                                                                                               
##  [23] "/104_paper.pdf"                                                                                                                                               
##  [24] "/105 105_short_paper.pdf"                                                                                                                                     
##  [25] "/106 A Pattern-based Framework for Designing Location-based Games.pdf"                                                                                        
##  [26] "/107 ShortPaper ID 107.pdf"                                                                                                                                   
##  [27] "/108 AGILE_CameraReady2.pdf"                                                                                                                                  
##  [28] "/109 revised_final.pdf"                                                                                                                                       
##  [29] "/110 poster_110_agile_2018_distributed_generation_of_imagepyramids_revised.pdf"                                                                               
##  [30] "/111 AGILE_2018_paper_111.pdf"                                                                                                                                
##  [31] "/112 AGILE 2018_Huang et al..pdf"                                                                                                                             
##  [32] "/114HarrieKarstensHuang.pdf"                                                                                                                                  
##  [33] "/115 Agile_poster_115_Promoting_walking_cycling.pdf"                                                                                                          
##  [34] "/118M+ñs-ShortPaper.pdf"                                                                                                                                      
##  [35] "/120.pdf"                                                                                                                                                     
##  [36] "/121 AGILE_2018_Short_Paper_ID_121_Pajarito.pdf"                                                                                                              
##  [37] "/122nDPointCloud_AGILE2018.pdf"                                                                                                                               
##  [38] "/123A Platform for Coordinating Voluntary Helpers in Disaster Response.pdf"                                                                                   
##  [39] "/124 Topological_Reconstruction_AGILE_2018 (Final).pdf"                                                                                                       
##  [40] "/125Kotavaara_at_al_AGILE_2018.pdf"                                                                                                                           
##  [41] "/126 AGILE2018_SP_126.pdf"                                                                                                                                    
##  [42] "/127 AGILE_2018_poster 127.pdf"                                                                                                                               
##  [43] "/128AGILE_2018_v1c_ShortPaper_ENACT_final.pdf"                                                                                                                
##  [44] "/129 AGILE_2018_Poster_City SImulation Lab_revised.pdf"                                                                                                       
##  [45] "/130Ledermann (2018) Towards Automatic Extraction of Cartographic Metadata from the Code of Online Maps.pdf"                                                  
##  [46] "/131 Agile2018_Paper_TCH-JIN-FJO_Final_v2.pdf"                                                                                                                
##  [47] "/132 AGILE_2018_Metral_final_5.pdf"                                                                                                                           
##  [48] "/134Hoda Allahbakhshi_AGILE_2018.pdf"                                                                                                                         
##  [49] "/135 Short Paper ID- 135_20180425.pdf"                                                                                                                        
##  [50] "/136 Poster 136-9_04_w_authors.pdf"                                                                                                                           
##  [51] "/137 _AGILE_2018_v2.pdf"                                                                                                                                      
##  [52] "/139 Agile_HannahHaacke.pdf"                                                                                                                                  
##  [53] "/140AGILE2018_Yi-Min Chang Chien (revised).pdf"                                                                                                               
##  [54] "/142 Agile 2018 Alegams short_authors-final.pdf"                                                                                                              
##  [55] "/143AGILE_2018_Living_Textbook - 14 FEB 2018_after_review.pdf"                                                                                                
##  [56] "/144 poster-144.pdf"                                                                                                                                          
##  [57] "/145 Short Paper ID 145-An improved European LULC map derived by data integration_correction.pdf"                                                             
##  [58] "/146 Short paper ID 146.pdf"                                                                                                                                  
##  [59] "/147 AGILE_2018_SCBA_of_HV_OGD_final.pdf"                                                                                                                     
##  [60] "/148 ShortPaper_148txt.pdf"                                                                                                                                   
##  [61] "/149 ShortPaper_ID_149_txt.pdf"                                                                                                                               
##  [62] "/150 paper_150_AGILE_2018_V1.1.pdf"                                                                                                                           
##  [63] "/151 AGILE_2018_Mai.pdf"                                                                                                                                      
##  [64] "/152 sstein_cedeus-research-sdi_v0.5-final.pdf"                                                                                                               
##  [65] "/153 AGILE_2018_NKayhko_revised_April10th.pdf"                                                                                                                
##  [66] "/155 Fina_AGILE_2018_Msilanga.pdf"                                                                                                                            
##  [67] "/157 AGILE_2018_Short PaperTheodomirMugiranezaFinal.pdf"                                                                                                      
##  [68] "/160 JuhaszHochmair_AGILE2018_revised_final.pdf"                                                                                                              
##  [69] "/161 Citizense poster manuscript 2018 (final).pdf"                                                                                                            
##  [70] "/162 AGILE_2018_PaperAndPoster_Poster162_revised_20180413.pdf"                                                                                                
##  [71] "/163 AGILE2018-Davidovic_Mooney_Stoimenov_camera_ready.pdf"                                                                                                   
##  [72] "/164.pdf"                                                                                                                                                     
##  [73] "/165 Paper 165_Spatial Patterns for Crime Spots (revised).pdf"                                                                                                
##  [74] "/166 AGILE_2018_final_djerriri.pdf"                                                                                                                           
##  [75] "/167_v2.pdf"                                                                                                                                                  
##  [76] "/168 AGILE_2018.pdf"                                                                                                                                          
##  [77] "/170 AGILE_18.pdf"                                                                                                                                            
##  [78] "/171 171-revised-final.pdf"                                                                                                                                   
##  [79] "/172 AGILE18 short paper submission final ZG PM.pdf"                                                                                                          
##  [80] "/173 short-paper-173.pdf"                                                                                                                                     
##  [81] "/41_AGILE_2018_Agung_Indrajit-rev.pdf"                                                                                                                        
##  [82] "/48 paper 48.pdf"                                                                                                                                             
##  [83] "/49 AGILE_poster_Final.pdf"                                                                                                                                   
##  [84] "/51 Short_Paper_ID_51.pdf"                                                                                                                                    
##  [85] "/52 Spatial Vision Analysis of Non Spatial Data.pdf"                                                                                                          
##  [86] "/53 AGILE_2018_ShortPaperID53_Dane.pdf"                                                                                                                       
##  [87] "/54 PosterSubmission_DingMa3.pdf"                                                                                                                             
##  [88] "/55 Collaboration between Science, Practice, Citizens final.pdf"                                                                                              
##  [89] "/57_Heinzlef-Coping with urban floods_a special decision-support system to improve resilience.pdf"                                                            
##  [90] "/58 agileSEnviroPoster58.pdf"                                                                                                                                 
##  [91] "/59 ShortPaper59_revised.pdf"                                                                                                                                 
##  [92] "/62 Revised_AGILE_Poster_20180408.pdf"                                                                                                                        
##  [93] "/63 Revised_Document_Westerholt.pdf"                                                                                                                          
##  [94] "/64 short_paper_64.pdf"                                                                                                                                       
##  [95] "/65 Paper_ID_65_AGILE2018_Landuse_Characterisation.pdf"                                                                                                       
##  [96] "/66 Kernel Density Estimation (KDE) vs. Hot-Spot Analysis - Detecting Criminal Hot Spots in the City of San Francisco_UPDATE.pdf"                             
##  [97] "/67 AGILE_2018_Villette_Purves.pdf"                                                                                                                           
##  [98] "/68 68 Brox et al. AGILE_short paper brain drain after review final 3.4.18.pdf"                                                                               
##  [99] "/69 AGILE_2018_Geocoding_Social_Media_Messages.pdf"                                                                                                           
## [100] "/70 AGILE_Paper_Final_Lund_2018_Ongaya_Kizito.pdf"                                                                                                            
## [101] "/71_finalized.pdf"                                                                                                                                            
## [102] "/72 short_paper_72.pdf"                                                                                                                                       
## [103] "/73 AGILE_2018_shortpaper_revised_for_Libreoffice.pdf"                                                                                                        
## [104] "/74 Training SegNet for Cropland Classification of High Resolution Remote Sensing Images.pdf"                                                                 
## [105] "/75 Simulating multiple land use changes by incorporating deep belief network into cellular automata, a case study in BEIJING-TIANJIN-HEBEI region, China.pdf"
## [106] "/76 AGILE_2018_short_review.pdf"                                                                                                                              
## [107] "/77 Poster 77 - SharingBuidingInformation version - 2018-03-27.pdf"                                                                                           
## [108] "/80 RSanya_AGILE2018_shortPaperID80.pdf"                                                                                                                      
## [109] "/81 AGILE_2018_81 revision.pdf"                                                                                                                               
## [110] "/82 seusn_poster_agile2018-poster82.pdf"                                                                                                                      
## [111] "/83 AGILE_20180405_PerOla.pdf"                                                                                                                                
## [112] "/84_Agile_Yuanxuan_Submission_V3.pdf"                                                                                                                         
## [113] "/86 richter etal agile2018_final.pdf"                                                                                                                         
## [114] "/87 Short Paper ID.pdf"                                                                                                                                       
## [115] "/89 AGILE_2018_Rivised Paper.pdf"                                                                                                                             
## [116] "/90 AGILE_2018_poster90.pdf"                                                                                                                                  
## [117] "/91 POSTER 91_Spatial estimation _xiaoqian LIU.pdf"                                                                                                           
## [118] "/93 Agile_paper_sub.pdf"                                                                                                                                      
## [119] "/94 Short_Paper_ID_94_AGILE_2018_UAS_Mission_Support_Final_2018_04_10.pdf"                                                                                    
## [120] "/96 Poster 96.pdf"                                                                                                                                            
## [121] "/98 agile_2018_short_paper_final.pdf"                                                                                                                         
## [122] "/99 TopicWave_poster99_revision_final.pdf"                                                                                                                    
## [123] "/Petter_AGILE_2018_Education_Final.pdf"                                                                                                                       
## [124] "/Petter2_AGILE_2018_Final_Paper_Uganda.pdf"                                                                                                                   
## [125] "/Yanzi_Poster paper_AGILE_0605_1.pdf"

Count the types of submissions:

full_papers_2018 <- length(str_match(files_2018, "10.100")[!is.na(str_match(files_2018, "10.100"))])
other_papers_2018 <- length(str_match(files_2018, "10.100")[is.na(str_match(files_2018, "10.100"))])

There are 19 full papers and 106 short papers/posters.

Read the data from PDFs and preprocess to create a tidy data structure without stop words:

my_stop_words <- tibble(
  word = c(
    "et",
    "al",
    "fig",
    "e.g",
    "i.e",
    "http",
    "ing",
    "pp",
    "figure",
    "table",
    "based",
    "lund", # location of conference 2018
    "https"
  ),
  lexicon = "agile"
)
all_stop_words <- stop_words %>%
  bind_rows(my_stop_words)
texts <- lapply(files_2018, pdf_text)
texts <- unlist(lapply(texts, str_c, collapse = TRUE))
infos <- lapply(files_2018, pdf_info)

make_id <- function(files) {
  str_extract(files, "([^/]+$)")
}

tidy_texts_2018 <- tibble(id = make_id(files_2018),
                     file = files_2018,
                     text = texts,
                     pages = map_chr(infos, function(info) {info$pages}))

papers_words <- tidy_texts_2018 %>%
  select(file, text) %>%
  unnest_tokens(word, text)

suppressWarnings({
  no_numbers <- papers_words %>%
    filter(is.na(as.numeric(word)))
})

no_stop_words_2018 <- no_numbers %>%
  anti_join(all_stop_words, by = "word") %>%
  mutate(id = make_id(file))

# https://github.com/juliasilge/tidytext/issues/17
no_stop_stems_2018 <- no_stop_words_2018 %>%
  mutate(word_stem = wordStem(word))

About 49 % of the words are considered stop words. There are 17931 unique word stems of 23292 words.

How many non-stop words does each document have?

no_stop_words_2018 %>%
  group_by(id) %>%
  summarise(words = n()) %>%
  arrange(desc(words))
id words
10.1007_2F978-3-319-78208-9_2.pdf 4813
10.1007_2F978-3-319-78208-9_10.pdf 4530
10.1007_2F978-3-319-78208-9_7.pdf 4343
10.1007_2F978-3-319-78208-9_9.pdf 4246
10.1007_2F978-3-319-78208-9_8.pdf 3632
10.1007_2F978-3-319-78208-9_15.pdf 3586
10.1007_2F978-3-319-78208-9_13.pdf 3490
10.1007_2F978-3-319-78208-9_16.pdf 3430
10.1007_2F978-3-319-78208-9_12.pdf 3372
10.1007_2F978-3-319-78208-9_14.pdf 3337
10.1007_2F978-3-319-78208-9_1.pdf 3107
10.1007_2F978-3-319-78208-9_17.pdf 3015
10.1007_2F978-3-319-78208-9_19.pdf 3008
10.1007_2F978-3-319-78208-9_11.pdf 2960
10.1007_2F978-3-319-78208-9_3.pdf 2778
10.1007_2F978-3-319-78208-9_4.pdf 2736
10.1007_2F978-3-319-78208-9_18.pdf 2641
10.1007_2F978-3-319-78208-9_6.pdf 2598
10.1007_2F978-3-319-78208-9_5.pdf 2571
121 AGILE_2018_Short_Paper_ID_121_Pajarito.pdf 2413
152 sstein_cedeus-research-sdi_v0.5-final.pdf 2382
41_AGILE_2018_Agung_Indrajit-rev.pdf 2286
122nDPointCloud_AGILE2018.pdf 2274
128AGILE_2018_v1c_ShortPaper_ENACT_final.pdf 2218
171 171-revised-final.pdf 2180
93 Agile_paper_sub.pdf 2049
139 Agile_HannahHaacke.pdf 1951
172 AGILE18 short paper submission final ZG PM.pdf 1934
168 AGILE_2018.pdf 1888
70 AGILE_Paper_Final_Lund_2018_Ongaya_Kizito.pdf 1876
126 AGILE2018_SP_126.pdf 1874
104_paper.pdf 1873
125Kotavaara_at_al_AGILE_2018.pdf 1867
145 Short Paper ID 145-An improved European LULC map derived by data integration_correction.pdf 1834
134Hoda Allahbakhshi_AGILE_2018.pdf 1817
106 A Pattern-based Framework for Designing Location-based Games.pdf 1815
147 AGILE_2018_SCBA_of_HV_OGD_final.pdf 1814
151 AGILE_2018_Mai.pdf 1814
89 AGILE_2018_Rivised Paper.pdf 1796
112 AGILE 2018_Huang et al..pdf 1790
173 short-paper-173.pdf 1757
170 AGILE_18.pdf 1755
53 AGILE_2018_ShortPaperID53_Dane.pdf 1740
65 Paper_ID_65_AGILE2018_Landuse_Characterisation.pdf 1737
107 ShortPaper ID 107.pdf 1706
75 Simulating multiple land use changes by incorporating deep belief network into cellular automata, a case study in BEIJING-TIANJIN-HEBEI region, China.pdf 1706
69 AGILE_2018_Geocoding_Social_Media_Messages.pdf 1697
109 revised_final.pdf 1696
131 Agile2018_Paper_TCH-JIN-FJO_Final_v2.pdf 1687
149 ShortPaper_ID_149_txt.pdf 1670
160 JuhaszHochmair_AGILE2018_revised_final.pdf 1670
76 AGILE_2018_short_review.pdf 1668
130Ledermann (2018) Towards Automatic Extraction of Cartographic Metadata from the Code of Online Maps.pdf 1663
63 Revised_Document_Westerholt.pdf 1658
123A Platform for Coordinating Voluntary Helpers in Disaster Response.pdf 1650
74 Training SegNet for Cropland Classification of High Resolution Remote Sensing Images.pdf 1638
108 AGILE_CameraReady2.pdf 1636
132 AGILE_2018_Metral_final_5.pdf 1627
71_finalized.pdf 1617
124 Topological_Reconstruction_AGILE_2018 (Final).pdf 1605
59 ShortPaper59_revised.pdf 1600
66 Kernel Density Estimation (KDE) vs. Hot-Spot Analysis - Detecting Criminal Hot Spots in the City of San Francisco_UPDATE.pdf 1596
135 Short Paper ID- 135_20180425.pdf 1582
165 Paper 165_Spatial Patterns for Crime Spots (revised).pdf 1570
80 RSanya_AGILE2018_shortPaperID80.pdf 1567
142 Agile 2018 Alegams short_authors-final.pdf 1560
150 paper_150_AGILE_2018_V1.1.pdf 1560
51 Short_Paper_ID_51.pdf 1539
146 Short paper ID 146.pdf 1531
72 short_paper_72.pdf 1531
118M+ñs-ShortPaper.pdf 1519
48 paper 48.pdf 1516
163 AGILE2018-Davidovic_Mooney_Stoimenov_camera_ready.pdf 1512
94 Short_Paper_ID_94_AGILE_2018_UAS_Mission_Support_Final_2018_04_10.pdf 1505
64 short_paper_64.pdf 1502
55 Collaboration between Science, Practice, Citizens final.pdf 1462
86 richter etal agile2018_final.pdf 1455
101 AGILE_2018_opendata_municipality_final.pdf 1449
68 68 Brox et al. AGILE_short paper brain drain after review final 3.4.18.pdf 1448
111 AGILE_2018_paper_111.pdf 1436
103_paper.pdf 1422
143AGILE_2018_Living_Textbook - 14 FEB 2018_after_review.pdf 1408
57_Heinzlef-Coping with urban floods_a special decision-support system to improve resilience.pdf 1404
137 _AGILE_2018_v2.pdf 1391
87 Short Paper ID.pdf 1368
98 agile_2018_short_paper_final.pdf 1364
Petter_AGILE_2018_Education_Final.pdf 1362
67 AGILE_2018_Villette_Purves.pdf 1327
114HarrieKarstensHuang.pdf 1311
52 Spatial Vision Analysis of Non Spatial Data.pdf 1292
140AGILE2018_Yi-Min Chang Chien (revised).pdf 1280
Petter2_AGILE_2018_Final_Paper_Uganda.pdf 1280
84_Agile_Yuanxuan_Submission_V3.pdf 1277
73 AGILE_2018_shortpaper_revised_for_Libreoffice.pdf 1202
157 AGILE_2018_Short PaperTheodomirMugiranezaFinal.pdf 1177
153 AGILE_2018_NKayhko_revised_April10th.pdf 1159
82 seusn_poster_agile2018-poster82.pdf 1113
166 AGILE_2018_final_djerriri.pdf 1023
162 AGILE_2018_PaperAndPoster_Poster162_revised_20180413.pdf 975
81 AGILE_2018_81 revision.pdf 941
58 agileSEnviroPoster58.pdf 931
148 ShortPaper_148txt.pdf 921
155 Fina_AGILE_2018_Msilanga.pdf 913
99 TopicWave_poster99_revision_final.pdf 906
62 Revised_AGILE_Poster_20180408.pdf 872
105 105_short_paper.pdf 809
120.pdf 792
136 Poster 136-9_04_w_authors.pdf 750
Yanzi_Poster paper_AGILE_0605_1.pdf 750
115 Agile_poster_115_Promoting_walking_cycling.pdf 716
161 Citizense poster manuscript 2018 (final).pdf 710
54 PosterSubmission_DingMa3.pdf 709
164.pdf 704
127 AGILE_2018_poster 127.pdf 702
102 AGILE_2018_poster_revised.pdf 701
90 AGILE_2018_poster90.pdf 672
144 poster-144.pdf 666
91 POSTER 91_Spatial estimation _xiaoqian LIU.pdf 621
77 Poster 77 - SharingBuidingInformation version - 2018-03-27.pdf 611
83 AGILE_20180405_PerOla.pdf 597
167_v2.pdf 567
129 AGILE_2018_Poster_City SImulation Lab_revised.pdf 561
110 poster_110_agile_2018_distributed_generation_of_imagepyramids_revised.pdf 537
96 Poster 96.pdf 519
49 AGILE_poster_Final.pdf 474

Text analysis

How often do the following terms on reproducible research appear in each paper?

The detection matches full words using regex option \b.

  • reproduc (reproduc.*, reproducibility, reproducible, reproduce, reproduction)
  • replic (replicat.*, i.e. replication, replicate)
  • repeatab (repeatab.*, i.e. repeatability, repeatable)
  • software
  • (pseudo) code/script(s) [column name code]
  • algorithm (algorithm.*, i.e. algorithms, algorithmic)
  • process (process.*, i.e. processing, processes, preprocessing)
  • data (data.*, i.e. dataset(s), database(s))
  • result(s)
  • repository(ies)
tidy_texts_2018_lower <- str_to_lower(tidy_texts_2018$text)
word_counts <- tibble(
  id = tidy_texts_2018$id,
  `reproduc..` = str_count(tidy_texts_2018_lower, "\\breproduc.*\\b"),
  `replic..` = str_count(tidy_texts_2018_lower, "\\breplicat.*\\b"),
  `repeatab..` = str_count(tidy_texts_2018_lower, "\\brepeatab.*\\b"),
  `code` = str_count(tidy_texts_2018_lower, "(\\bcode\\b|\\bscript.*\\b|\\bpseudo\ code\\b)"),
  `software` = str_count(tidy_texts_2018_lower, "\\bsoftware\\b"),
  `algorithm(s)` = str_count(tidy_texts_2018_lower, "\\balgorithm.*\\b"),
  `(pre)process..` = str_count(tidy_texts_2018_lower, "(\\bprocess.*\\b|\\bpreprocess.*\\b|\\bpre-process.*\\b)"),
  `data.*` = str_count(tidy_texts_2018_lower, "\\bdata.*\\b"),
  `result(s)` = str_count(tidy_texts_2018_lower, "\\bresults?\\b"),
  `repository/ies` = str_count(tidy_texts_2018_lower, "\\brepositor(y|ies)\\b")
) %>%
  mutate(all = rowSums(.[-1]))

word_counts_sums_total_2018 <- word_counts %>% 
  summarise_if(is.numeric, funs(sum)) %>%
  add_column(id = "Total", .before = 0)
## Warning: funs() is soft deprecated as of dplyr 0.8.0
## please use list() instead
## 
##   # Before:
##   funs(name = f(.))
## 
##   # After: 
##   list(name = ~ f(.))
## This warning is displayed once per session.
rbind(word_counts, word_counts_sums_total_2018)
id reproduc.. replic.. repeatab.. code software algorithm(s) (pre)process.. data.* result(s) repository/ies all
10.1007_2F978-3-319-78208-9_1.pdf 0 0 0 0 0 8 7 66 41 0 122       
10.1007_2F978-3-319-78208-9_10.pdf 0 0 0 0 0 0 11 6 13 0 30       
10.1007_2F978-3-319-78208-9_11.pdf 0 0 0 0 1 0 8 32 11 0 52       
10.1007_2F978-3-319-78208-9_12.pdf 0 0 0 0 0 42 11 48 5 0 106       
10.1007_2F978-3-319-78208-9_13.pdf 0 0 0 1 1 51 27 40 30 1 151       
10.1007_2F978-3-319-78208-9_14.pdf 0 0 0 0 0 17 4 34 5 0 60       
10.1007_2F978-3-319-78208-9_15.pdf 0 0 0 0 0 8 41 66 16 0 131       
10.1007_2F978-3-319-78208-9_16.pdf 0 0 0 0 0 2 8 27 3 0 40       
10.1007_2F978-3-319-78208-9_17.pdf 0 0 0 0 1 6 5 95 8 0 115       
10.1007_2F978-3-319-78208-9_18.pdf 0 0 0 0 0 1 2 45 8 0 56       
10.1007_2F978-3-319-78208-9_19.pdf 0 0 0 1 1 0 3 27 24 0 56       
10.1007_2F978-3-319-78208-9_2.pdf 0 0 0 7 12 3 3 53 26 0 104       
10.1007_2F978-3-319-78208-9_3.pdf 0 0 0 0 2 0 2 28 10 0 42       
10.1007_2F978-3-319-78208-9_4.pdf 0 0 0 0 0 11 11 35 20 0 77       
10.1007_2F978-3-319-78208-9_5.pdf 0 0 0 0 3 0 5 13 6 0 27       
10.1007_2F978-3-319-78208-9_6.pdf 0 0 0 0 3 7 2 28 6 2 48       
10.1007_2F978-3-319-78208-9_7.pdf 0 0 0 1 46 1 3 77 12 4 144       
10.1007_2F978-3-319-78208-9_8.pdf 0 1 0 0 0 0 4 62 2 0 69       
10.1007_2F978-3-319-78208-9_9.pdf 0 0 0 7 12 0 13 122 4 0 158       
101 AGILE_2018_opendata_municipality_final.pdf 0 0 0 0 2 0 4 59 0 0 65       
102 AGILE_2018_poster_revised.pdf 0 0 0 0 0 1 0 1 0 0 2       
103_paper.pdf 0 0 0 0 0 0 0 60 2 0 62       
104_paper.pdf 0 0 0 1 0 0 35 1 6 0 43       
105 105_short_paper.pdf 1 0 0 1 3 0 0 7 5 0 17       
106 A Pattern-based Framework for Designing Location-based Games.pdf 0 0 0 3 1 0 4 7 0 0 15       
107 ShortPaper ID 107.pdf 0 0 0 0 0 0 5 2 3 0 10       
108 AGILE_CameraReady2.pdf 0 0 0 0 1 38 1 5 9 0 54       
109 revised_final.pdf 0 0 0 0 0 1 14 6 5 0 26       
110 poster_110_agile_2018_distributed_generation_of_imagepyramids_revised.pdf 0 0 0 0 5 2 11 10 1 0 29       
111 AGILE_2018_paper_111.pdf 0 0 0 1 0 0 0 5 6 0 12       
112 AGILE 2018_Huang et al..pdf 0 0 0 0 0 0 1 93 1 0 95       
114HarrieKarstensHuang.pdf 0 0 0 3 0 0 4 18 5 0 30       
115 Agile_poster_115_Promoting_walking_cycling.pdf 0 0 0 0 0 0 0 23 0 0 23       
118M+ñs-ShortPaper.pdf 0 0 0 6 1 0 7 62 1 0 77       
120.pdf 1 0 0 0 0 3 1 30 7 0 42       
121 AGILE_2018_Short_Paper_ID_121_Pajarito.pdf 0 2 0 1 0 0 3 46 3 0 55       
122nDPointCloud_AGILE2018.pdf 0 0 0 1 4 0 22 79 4 0 110       
123A Platform for Coordinating Voluntary Helpers in Disaster Response.pdf 0 0 0 0 5 3 18 4 5 0 35       
124 Topological_Reconstruction_AGILE_2018 (Final).pdf 0 0 0 0 3 10 13 28 2 0 56       
125Kotavaara_at_al_AGILE_2018.pdf 0 0 0 1 0 1 0 31 7 0 40       
126 AGILE2018_SP_126.pdf 0 0 0 0 1 0 2 29 24 0 56       
127 AGILE_2018_poster 127.pdf 0 0 0 0 0 2 1 4 4 0 11       
128AGILE_2018_v1c_ShortPaper_ENACT_final.pdf 0 0 0 1 0 0 0 61 6 0 68       
129 AGILE_2018_Poster_City SImulation Lab_revised.pdf 0 0 0 0 0 0 4 1 1 0 6       
130Ledermann (2018) Towards Automatic Extraction of Cartographic Metadata from the Code of Online Maps.pdf 0 1 0 39 7 3 8 29 1 0 88       
131 Agile2018_Paper_TCH-JIN-FJO_Final_v2.pdf 0 0 0 0 0 0 25 7 3 0 35       
132 AGILE_2018_Metral_final_5.pdf 0 0 0 1 0 2 18 73 0 0 94       
134Hoda Allahbakhshi_AGILE_2018.pdf 1 0 0 0 0 5 0 34 2 0 42       
135 Short Paper ID- 135_20180425.pdf 0 0 0 0 0 1 0 54 1 0 56       
136 Poster 136-9_04_w_authors.pdf 0 0 0 0 0 3 0 7 3 0 13       
137 _AGILE_2018_v2.pdf 0 0 0 0 3 0 3 7 7 0 20       
139 Agile_HannahHaacke.pdf 2 0 0 0 0 2 2 44 22 0 72       
140AGILE2018_Yi-Min Chang Chien (revised).pdf 0 0 0 0 1 0 0 17 11 0 29       
142 Agile 2018 Alegams short_authors-final.pdf 0 0 0 0 0 0 2 8 8 0 18       
143AGILE_2018_Living_Textbook - 14 FEB 2018_after_review.pdf 0 0 0 0 1 0 3 2 5 0 11       
144 poster-144.pdf 0 0 0 0 0 0 0 3 0 0 3       
145 Short Paper ID 145-An improved European LULC map derived by data integration_correction.pdf 0 0 0 0 0 1 3 48 7 0 59       
146 Short paper ID 146.pdf 0 0 0 0 1 0 6 18 5 0 30       
147 AGILE_2018_SCBA_of_HV_OGD_final.pdf 0 0 0 0 0 0 3 152 1 0 156       
148 ShortPaper_148txt.pdf 0 0 0 0 1 0 1 2 3 0 7       
149 ShortPaper_ID_149_txt.pdf 1 0 0 0 0 0 0 1 8 0 10       
150 paper_150_AGILE_2018_V1.1.pdf 0 0 0 1 0 17 2 17 13 0 50       
151 AGILE_2018_Mai.pdf 0 0 0 0 0 3 3 21 25 1 53       
152 sstein_cedeus-research-sdi_v0.5-final.pdf 3 0 0 0 27 0 2 110 13 1 156       
153 AGILE_2018_NKayhko_revised_April10th.pdf 0 0 0 0 5 0 0 15 1 2 23       
155 Fina_AGILE_2018_Msilanga.pdf 0 0 0 0 1 0 2 13 2 0 18       
157 AGILE_2018_Short PaperTheodomirMugiranezaFinal.pdf 0 0 0 0 1 1 1 14 5 0 22       
160 JuhaszHochmair_AGILE2018_revised_final.pdf 0 0 0 0 0 0 1 11 11 0 23       
161 Citizense poster manuscript 2018 (final).pdf 0 1 0 0 0 0 6 24 2 0 33       
162 AGILE_2018_PaperAndPoster_Poster162_revised_20180413.pdf 0 0 0 0 0 0 1 8 8 0 17       
163 AGILE2018-Davidovic_Mooney_Stoimenov_camera_ready.pdf 0 1 0 2 1 0 11 30 4 0 49       
164.pdf 0 0 0 0 0 27 0 0 7 0 34       
165 Paper 165_Spatial Patterns for Crime Spots (revised).pdf 0 0 0 0 0 2 1 10 11 0 24       
166 AGILE_2018_final_djerriri.pdf 0 0 0 0 2 18 7 24 3 0 54       
167_v2.pdf 0 0 0 0 3 0 2 9 0 0 14       
168 AGILE_2018.pdf 0 0 0 0 1 0 3 3 4 0 11       
170 AGILE_18.pdf 0 0 0 0 0 1 7 61 3 0 72       
171 171-revised-final.pdf 0 0 0 0 0 4 2 22 10 0 38       
172 AGILE18 short paper submission final ZG PM.pdf 0 4 0 0 0 0 4 52 20 0 80       
173 short-paper-173.pdf 10 1 0 3 1 1 2 16 2 0 36       
41_AGILE_2018_Agung_Indrajit-rev.pdf 0 0 0 0 1 0 9 89 0 0 99       
48 paper 48.pdf 0 0 0 0 0 0 0 34 11 0 45       
49 AGILE_poster_Final.pdf 0 0 0 0 0 0 1 5 1 0 7       
51 Short_Paper_ID_51.pdf 0 0 0 0 2 10 3 31 0 0 46       
52 Spatial Vision Analysis of Non Spatial Data.pdf 0 0 0 0 0 0 7 40 13 0 60       
53 AGILE_2018_ShortPaperID53_Dane.pdf 0 0 0 0 2 0 3 32 8 0 45       
54 PosterSubmission_DingMa3.pdf 0 0 0 0 0 0 1 12 0 0 13       
55 Collaboration between Science, Practice, Citizens final.pdf 0 0 0 0 1 1 5 17 4 0 28       
57_Heinzlef-Coping with urban floods_a special decision-support system to improve resilience.pdf 0 0 0 0 0 0 0 7 5 0 12       
58 agileSEnviroPoster58.pdf 0 0 0 0 4 1 3 14 2 0 24       
59 ShortPaper59_revised.pdf 0 0 0 0 0 11 2 50 0 0 63       
62 Revised_AGILE_Poster_20180408.pdf 0 0 0 1 0 0 0 10 2 0 13       
63 Revised_Document_Westerholt.pdf 0 0 0 0 0 0 10 18 15 0 43       
64 short_paper_64.pdf 0 0 0 0 0 2 2 2 3 0 9       
65 Paper_ID_65_AGILE2018_Landuse_Characterisation.pdf 0 0 0 0 1 0 1 26 6 1 35       
66 Kernel Density Estimation (KDE) vs. Hot-Spot Analysis - Detecting Criminal Hot Spots in the City of San Francisco_UPDATE.pdf 0 0 0 0 3 1 5 26 11 0 46       
67 AGILE_2018_Villette_Purves.pdf 0 0 0 0 0 0 4 13 7 0 24       
68 68 Brox et al. AGILE_short paper brain drain after review final 3.4.18.pdf 0 0 0 0 0 0 0 4 11 0 15       
69 AGILE_2018_Geocoding_Social_Media_Messages.pdf 0 0 0 0 1 2 12 18 11 0 44       
70 AGILE_Paper_Final_Lund_2018_Ongaya_Kizito.pdf 0 0 0 0 1 0 0 12 2 0 15       
71_finalized.pdf 1 0 0 4 2 1 1 91 1 0 101       
72 short_paper_72.pdf 0 0 0 0 0 0 4 18 7 0 29       
73 AGILE_2018_shortpaper_revised_for_Libreoffice.pdf 0 0 0 0 0 4 1 0 5 0 10       
74 Training SegNet for Cropland Classification of High Resolution Remote Sensing Images.pdf 0 0 0 0 0 2 3 14 17 0 36       
75 Simulating multiple land use changes by incorporating deep belief network into cellular automata, a case study in BEIJING-TIANJIN-HEBEI region, China.pdf 0 0 0 0 0 3 10 25 6 0 44       
76 AGILE_2018_short_review.pdf 0 0 0 0 5 0 19 86 3 0 113       
77 Poster 77 - SharingBuidingInformation version - 2018-03-27.pdf 0 0 0 0 0 0 9 27 2 0 38       
80 RSanya_AGILE2018_shortPaperID80.pdf 0 0 0 0 0 0 4 40 6 0 50       
81 AGILE_2018_81 revision.pdf 0 0 0 0 0 0 0 3 1 0 4       
82 seusn_poster_agile2018-poster82.pdf 0 0 0 0 0 0 0 1 3 0 4       
83 AGILE_20180405_PerOla.pdf 0 0 0 1 0 0 1 3 1 0 6       
84_Agile_Yuanxuan_Submission_V3.pdf 0 0 0 0 0 0 1 16 8 0 25       
86 richter etal agile2018_final.pdf 0 0 0 0 0 0 5 1 8 0 14       
87 Short Paper ID.pdf 0 0 0 0 2 0 12 12 12 0 38       
89 AGILE_2018_Rivised Paper.pdf 0 0 0 0 7 9 13 56 1 5 91       
90 AGILE_2018_poster90.pdf 0 0 0 0 0 0 0 0 1 0 1       
91 POSTER 91_Spatial estimation _xiaoqian LIU.pdf 0 0 0 0 0 0 1 20 4 0 25       
93 Agile_paper_sub.pdf 0 0 0 2 0 0 8 36 7 0 53       
94 Short_Paper_ID_94_AGILE_2018_UAS_Mission_Support_Final_2018_04_10.pdf 0 0 0 0 2 0 1 24 3 0 30       
96 Poster 96.pdf 0 0 0 0 0 0 0 1 2 0 3       
98 agile_2018_short_paper_final.pdf 0 1 0 0 0 19 6 18 16 0 60       
99 TopicWave_poster99_revision_final.pdf 0 0 0 0 0 0 3 13 0 0 16       
Petter_AGILE_2018_Education_Final.pdf 0 0 0 0 9 0 2 1 3 0 15       
Petter2_AGILE_2018_Final_Paper_Uganda.pdf 0 0 0 0 0 0 1 24 6 0 31       
Yanzi_Poster paper_AGILE_0605_1.pdf 0 0 0 0 0 2 11 4 4 0 21       
Total 20 12 0 90 207 377 635 3536 827 17 5.72e+03

What are top used words (not stems)?

countPapersUsingWord <- function(the_word) {
  sapply(the_word, function(w) {
    no_stop_words_2018 %>%
      filter(word == w) %>%
      group_by(id) %>%
      count %>%
      nrow
  })
}

top_words_2018 <- no_stop_words_2018 %>%
  group_by(word) %>%
  tally %>%
  arrange(desc(n)) %>%
  head(20) %>%
  mutate(`# papers` = countPapersUsingWord(word)) %>%
  add_column(place = c(1:nrow(.)), .before = 0)

top_words_2018
place word n # papers
1 data 3303 122
2 spatial 1393 110
3 information 1199 118
4 analysis 860 110
5 model 807 89
6 time 779 95
7 map 757 86
8 study 697 96
9 results 661 106
10 urban 552 72
11 research 540 111
12 system 505 75
13 land 503 51
14 models 487 73
15 location 482 76
16 user 468 57
17 users 463 62
18 city 440 59
19 gis 438 78
20 true 428 112

What are the top word stems?

countPapersUsingStem <- function(the_stem) {
  sapply(the_stem, function(s) {
    no_stop_stems_2018 %>%
      filter(word_stem == s) %>%
      group_by(id) %>%
      count %>%
      nrow
  })
}

top_stems_2018 <- no_stop_stems_2018 %>%
  group_by(word_stem) %>%
  tally %>%
  arrange(desc(n)) %>%
  head(20) %>%
  mutate(`# papers` = countPapersUsingStem(word_stem)) %>%
  add_column(place = c(1:nrow(.)), .before = 0)

top_stems_2018
place word_stem n # papers
1 data 3303 122
2 model 1567 104
3 spatial 1541 110
4 map 1389 104
5 inform 1254 119
6 studi 940 108
7 result 939 116
8 user 931 72
9 time 861 99
10 analysi 860 110
11 system 850 97
12 locat 834 98
13 method 742 103
14 develop 692 110
15 research 668 113
16 geograph 640 98
17 process 635 99
18 citi 583 70
19 urban 583 76
20 approach 576 101

Word cloud based on word stems

wordStemPlot <- function(word_stem_data, top_stem_data, year, minimum_occurence, fp_count, op_count) {

  cloud_words <- word_stem_data %>%
    group_by(word_stem) %>%
    tally %>%
    filter(n >= minimum_occurence) %>%
    arrange(desc(n))
  
  def.par <- par(no.readonly = TRUE)
  par(mar = rep(0,4))
  layout(mat = matrix(data = c(1,2,3,4), nrow = 2, ncol = 2, byrow = TRUE),
         widths = c(lcm(8),lcm(8)),
         heights = c(lcm(2),lcm(11)))
  #       -> nf
  #layout.show(nf)
  
  plot.new()
  text(0.5, 0.5, paste0("Word stem cloud of AGILE ", year, " Submissions"), font = 2)
  text(0.5, 0.15, paste0("Based on ", fp_count, " full papers and ", op_count, " short papers/posters.\n",
                        "Showing ", nrow(cloud_words), " of ", sum(cloud_words$n),
                        " word stems occuring at least ", minimum_occurence, " times."), font = 1, cex = 0.7)
  plot.new()
  text(0.5, 0.5, paste0("Top word stems of AGILE ", year, " Submissions"), font = 2)
  text(0.5, 0.15, paste0("Code available at https://github.com/nuest/\nreproducible-research-and-giscience"), font = 1, cex = 0.7)
  
  wordcloud(cloud_words$word_stem, cloud_words$n,
            max.words = Inf,
            random.order = FALSE,
            fixed.asp = FALSE,
            rot.per = 0,
            color = brewer.pal(8,"Dark2"))
  
  frame() # thx to https://stackoverflow.com/a/25194694/261210
  vps <- baseViewports()
  pushViewport(vps$inner, vps$figure, vps$plot)
  grid.table(as.matrix(top_stem_data),
             theme = ttheme_minimal(base_size = 11,
                                    padding = unit(c(10,5), "pt"))
             )
  popViewport(3)
  par(def.par)
}
# minimum occurence manually tested so that all words could be plotted
wordStemPlot(no_stop_stems_2018, top_stems_2018, "2018", 200, full_papers_2018, other_papers_2018)