library(tidyr)
library(dplyr)
library(knitr)

kable(

Data

data <- tbl_df(read.csv2("daten.csv", stringsAsFactors = TRUE))
#table(data)
head(data, n = 2)
## # A tibble: 2 x 7
##      ID
##   <int>
## 1     1
## 2     2
## # ... with 6 more variables: pckgs_last12months <fctr>,
## #   data_formats_in <fctr>, data_formats_intermed <fctr>,
## #   data_formats_out <fctr>, visualisations <fctr>, vis_pckgs <fctr>

Analysis

Used packages

# split up packages column and re-gather them
packages <- data %>%
  separate(pckgs_last12months, paste("pkg_", c(1:12), sep = ""), ",") %>%
  gather(pkg_prio, pkg, pkg_1:pkg_12) %>%
  select(ID, pkg_prio, pkg)

packages_counts <- packages %>%
  group_by(pkg) %>%
  summarise(n = n()) %>%
  na.omit() %>%
  arrange(desc(n))
kable(packages_counts)
pkg n
sp 7
ggplot2 4
raster 4
rgdal 4
spacetime 4
devtools 3
maptools 2
RColorBrewer 2
scidb 2
zoo 2
1
bfast 1
BFAST 1
biclust 1
custer 1
dplyr 1
fpc 1
gstat 1
igraph 1
INLA 1
knitr 1
landsat 1
maps 1
mapview 1
move 1
OpenStreetMap 1
plyr 1
reshape 1
rJava 1
rmarkdown 1
RPostgreSQL 1
RStoolbox 1
SciDBR 1
scidbst 1
SDMtools 1
Snowball CC 1
sos4R 1
spate 1
SpatioTemporal 1
spatstat 1
stats 1
stringr 1
strucchange 1
tm 1
trajectories 1
WordCloud 1

Packages and their dependencies

What are common dependencies of the used packages?

By the way, CRAN currently has 8943 packages.

library(tools)
#library(miniCRAN) # https://cran.r-project.org/web/packages/miniCRAN/vignettes/miniCRAN-dependency-graph.html

packages_deps <- packages_counts %>%
  filter(n > 1) %>%
  mutate(deps = tools::package_dependencies(pkg), 
         deps_recursive = tools::package_dependencies(pkg, recursive = TRUE),
         reverse_recursive = tools::package_dependencies(pkg, reverse = TRUE, recursive = TRUE))
#tools::package_dependencies(c("sp", "ggplot2"))

#first_order_deps <- table(unlist(packages_deps$deps))
#recursive_deps <- table(unlist(packages_deps$deps_recursive))
recursive_deps <- tbl_df(unlist(packages_deps$deps_recursive)) %>%
  rename(pkg = value) %>%
  group_by(pkg) %>%
  summarise(n = n()) %>%
  arrange(desc(n))

# combine relevant packages with the morst relevant recursive dependencies
packages_with_deps <- bind_rows(packages_counts %>% filter(n > 1), recursive_deps %>% filter(n > median(n))) %>%
  group_by(pkg) %>%
  summarise(n = sum(n)) %>%
  arrange(desc(n))
kable(packages_with_deps)
pkg n
sp 11
graphics 9
stats 9
utils 9
grDevices 8
grid 8
methods 8
lattice 7
ggplot2 4
raster 4
rgdal 4
spacetime 4
zoo 4
devtools 3
digest 3
maptools 2
RColorBrewer 2
Rcpp 2
scidb 2
tools 2

Data formats

formats_in <- data %>%
  separate(data_formats_in, paste("fmt", c(1:5), sep = "_"), ",") %>%
  gather(format, format_in, fmt_1:fmt_5) %>%
  select(ID, format_in)

formats_in_grouped <- formats_in %>%
  group_by(format_in) %>%
  summarise(n = n()) %>%
  na.omit() %>%
  arrange(desc(n))
kable(formats_in_grouped)
format_in n
csv 4
txt 3
json 2
raster 2
shp 2
array 1
ascii 1
dataframe 1
geojson 1
GeoTIFF 1
HDF 1
kml 1
kmz 1
NetCDF 1
postgresql 1
scidb arrays 1
sdidbst objects 1
tif 1
xlsx 1
xml 1
xsl 1
formats_intermed <- data %>%
  separate(data_formats_intermed, paste("fmt", c(1:5), sep = "_"), ",") %>%
  gather(format, format_intermed, fmt_1:fmt_5) %>%
  select(ID, format_intermed)

formats_intermed_grouped <- formats_intermed %>%
  group_by(format_intermed) %>%
  summarise(n = n()) %>%
  na.omit() %>%
  arrange(desc(n))
kable(formats_intermed_grouped)
format_intermed n
5
dataframe 2
array 1
raster 1
rData 1
scidb array 1
vector 1
formats_out <- data %>%
  separate(data_formats_out, paste("fmt", c(1:5), sep = "_"), ",") %>%
  gather(formato, format_out, fmt_1:fmt_5) %>%
  select(ID, format_out)

formats_out_grouped <- formats_out %>%
  group_by(format_out) %>%
  summarise(n = n()) %>%
  na.omit() %>%
  arrange(desc(n))
kable(formats_out_grouped)
format_out n
txt 3
histogram 2
maps 2
pdf 2
scatter plot 2
shp 2
time series 2
1
box-whisker plot 1
csv 1
hdf 1
jpg 1
list 1
png 1
R 1
raster 1
rasterstack 1
Rdata 1
tif 1

Visualisations

# split up packages column and re-gather them
visualisations <- data %>%
  separate(visualisations, paste("vis_", c(1:10), sep = ""), ",") %>%
  gather(vis_prio, vis, vis_1:vis_10) %>%
  select(ID, vis)

vis_grouped <- visualisations %>%
  group_by(vis) %>%
  summarise(n = n()) %>%
  na.omit() %>%
  arrange(desc(n))
kable(vis_grouped)
vis n
histogram 5
bar chart 4
maps 4
time series 4
box-whisker plot 3
line chart 3
dot chart 2
raster 2
scatter plot 2
sp 2
zoo 2
1
function graph 1
graphics 1
pictures 1
scatter pot 1
spacetime 1
videos 1
word cloud 1
xts 1

Visualisation packages

# split up packages column and re-gather them
vis_packages <- data %>%
  separate(vis_pckgs, paste("pkg_", c(1:10), sep = ""), ",") %>%
  gather(pkg_prio, pkg, pkg_1:pkg_10) %>%
  select(ID, pkg)

vis_pkg_grouped <- vis_packages %>%
  group_by(pkg) %>%
  summarise(n = n()) %>%
  na.omit() %>%
  arrange(desc(n))
kable(vis_pkg_grouped)
pkg n
4
ggplot2 4
sp 3
raster 2
RColorBrewer 2
animate 1
clu 1
igrpah 1
landsat 1
maptools 1
mapview 1
OpenStreetMap 1
rasterVis 1
RStoolbox 1
SDMtools 1
spacetime 1
trajectories 1
WordCloud 1
zoo 1

Discussion

Conclusions

Meta

sessionInfo()
## R version 3.3.1 (2016-06-21)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Ubuntu 16.04.1 LTS
## 
## locale:
##  [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C              
##  [3] LC_TIME=en_GB.UTF-8        LC_COLLATE=en_US.UTF-8    
##  [5] LC_MONETARY=en_GB.UTF-8    LC_MESSAGES=en_US.UTF-8   
##  [7] LC_PAPER=en_GB.UTF-8       LC_NAME=C                 
##  [9] LC_ADDRESS=C               LC_TELEPHONE=C            
## [11] LC_MEASUREMENT=en_GB.UTF-8 LC_IDENTIFICATION=C       
## 
## attached base packages:
## [1] tools     stats     graphics  grDevices utils     datasets  methods  
## [8] base     
## 
## other attached packages:
## [1] knitr_1.14  dplyr_0.5.0 tidyr_0.6.0
## 
## loaded via a namespace (and not attached):
##  [1] Rcpp_0.12.6     packrat_0.4.7-1 digest_0.6.10   assertthat_0.1 
##  [5] R6_2.1.2        DBI_0.5         formatR_1.4     magrittr_1.5   
##  [9] evaluate_0.9    highr_0.6       stringi_1.1.1   lazyeval_0.2.0 
## [13] rmarkdown_1.0   stringr_1.0.0   yaml_2.1.13     htmltools_0.3.5
## [17] tibble_1.1
date()
## [1] "Sat Aug 13 15:48:16 2016"