# ==============================================================================
# Descriptive statistics on SNA software tools
# ==============================================================================
# The scripts reads in the survey data
# Cleans the relevant variable
# Reduces the number of categories
# Creates the descriptive statistics
# Survey question: Q7 - Which are your preferred social network analysis software tools
# Original input data: 'NetCanvas - General Survey 1_May 16, 2017_03.18.csv'
# ==============================================================================
# (1) Loading packages.
library(car)
library(data.table)
# ==============================================================================
# (2) Set working directory and data
rm(list=ls())
whoareyou <- "MSS"
root <- paste0("S:/",whoareyou,"/Research/Projects/netCanvas/Outreach Core/Community Feedback/Community Survey/Survey Analysis/")
setwd(root)
d <- read.csv("inputData20170318.csv", header=T, sep=",")
# ==============================================================================
# (3) Cleaning up the mess in the software related questions manually
# (3.1) Creating the 'recode' syntax for Q7_1, Q7_2, Q7_
a <- as.matrix(d$Q7_1)
b <- as.matrix(d$Q7_2)
c <- as.matrix(d$Q7_3)
dd <- rbind(a,b,c)
dd <- recode(dd, "c('Statnet package in R (mostly sna and network packages)',
'Egonet',
'igraph',
'igraph (R)',
'R sna, igraph',
'R statnet, network, sna',
'R igraph',
'RSiena',
'egonet',
'sna',
'Rs SNA',
'sienna',
'EpiModel',
'statnet',
'Siena',
'Packages in R (sna, igraph, network, RSiena)',
'igraph in R',
'EgoNet',
'statnet/sna',
'Statnet (R)',
'R/sna',
'ergm',
'Rs ggplot2',
'R Package',
'statnet (R)',
'Statnet',
'R more generally',
'network, sna and igraph (R applications)',
'raw R',
'R sna network rsiena',
'Siena (R)',
' R',
'R ',
'Igraph')='R'")
dd <- recode(dd, "c('UCInet',
'UCI Net',
'ucinet',
'UCINET',
'UCINet',
'Netdraw',
'UciNet',
'NetDraw')='Ucinet'")
dd <- recode(dd, "c('python igraph',
'Python/networkx',
'programming directly in java/C++/python',
'NetworkX (Python)')='Python'")
dd <- recode(dd, "c('ORA',
'ora')='Ora'")
dd <- recode(dd, "c('NodeXL',
'nodexl',
'Nodexl')='NodeXl'")
dd <- recode(dd, "c('PAJEK',
'pajeck',
'pajek')='Pajek'")
dd <- recode(dd, "c('visone',
'VISONE')='Visone'")
dd <- recode(dd, "c('gephi',
'GEPHI',
'Gephi ',
' Gephi')='Gephi'")
dd <- recode(dd, "c('MPNet',
'PNET',
'Pnet',
'PNet')='Mpnet'")
dd <- recode(dd, "c('SPSS/Stata')='Spss'")
# (3.2) Q7_1
d$Q7_1 <- recode(d$Q7_1, "c('Statnet package in R (mostly sna and network packages)',
'Egonet',
'igraph',
'igraph (R)',
'R sna, igraph',
'R statnet, network, sna',
'R igraph',
'RSiena',
'egonet',
'sna',
'Rs SNA',
'sienna',
'EpiModel',
'statnet',
'Siena',
'Packages in R (sna, igraph, network, RSiena)',
'igraph in R',
'EgoNet',
'statnet/sna',
'Statnet (R)',
'R/sna',
'ergm',
'Rs ggplot2',
'R Package',
'statnet (R)',
'Statnet',
'R more generally',
'network, sna and igraph (R applications)',
'raw R',
'R sna network rsiena',
'Siena (R)',
' R',
'R ',
'Igraph')='R'")
d$Q7_1 <- recode(d$Q7_1, "c('UCInet',
'UCI Net',
'ucinet',
'UCINET',
'UCINet',
'Netdraw',
'UciNet',
'NetDraw')='Ucinet'")
d$Q7_1 <- recode(d$Q7_1, "c('python igraph',
'Python/networkx',
'programming directly in java/C++/python',
'NetworkX (Python)')='Python'")
d$Q7_1 <- recode(d$Q7_1, "c('ORA',
'ora')='Ora'")
d$Q7_1 <- recode(d$Q7_1, "c('NodeXL',
'nodexl',
'Nodexl')='NodeXl'")
d$Q7_1 <- recode(d$Q7_1, "c('PAJEK',
'pajeck',
'pajek')='Pajek'")
d$Q7_1 <- recode(d$Q7_1, "c('visone',
'VISONE')='Visone'")
d$Q7_1 <- recode(d$Q7_1, "c('gephi',
'GEPHI',
'Gephi ',
' Gephi')='Gephi'")
d$Q7_1 <- recode(d$Q7_1, "c('MPNet',
'PNET',
'Pnet',
'PNet')='Mpnet'")
d$Q7_1 <- recode(d$Q7_1, "c('SPSS/Stata')='Spss'")
# (3.2) Q7_2
d$Q7_2 <- recode(d$Q7_2, "c('Statnet package in R (mostly sna and network packages)',
'Egonet',
'igraph',
'igraph (R)',
'R sna, igraph',
'R statnet, network, sna',
'R igraph',
'RSiena',
'egonet',
'sna',
'Rs SNA',
'sienna',
'EpiModel',
'statnet',
'Siena',
'Packages in R (sna, igraph, network, RSiena)',
'igraph in R',
'EgoNet',
'statnet/sna',
'Statnet (R)',
'R/sna',
'ergm',
'Rs ggplot2',
'R Package',
'statnet (R)',
'Statnet',
'R more generally',
'network, sna and igraph (R applications)',
'raw R',
'R sna network rsiena',
'Siena (R)',
' R',
'R ',
'Igraph')='R'")
d$Q7_2 <- recode(d$Q7_2, "c('UCInet',
'UCI Net',
'ucinet',
'UCINET',
'UCINet',
'Netdraw',
'UciNet',
'NetDraw')='Ucinet'")
d$Q7_2 <- recode(d$Q7_2, "c('python igraph',
'Python/networkx',
'programming directly in java/C++/python',
'NetworkX (Python)')='Python'")
d$Q7_2 <- recode(d$Q7_2, "c('ORA',
'ora')='Ora'")
d$Q7_2 <- recode(d$Q7_2, "c('NodeXL',
'nodexl',
'Nodexl')='NodeXl'")
d$Q7_2 <- recode(d$Q7_2, "c('PAJEK',
'pajeck',
'pajek')='Pajek'")
d$Q7_2 <- recode(d$Q7_2, "c('visone',
'VISONE')='Visone'")
d$Q7_2 <- recode(d$Q7_2, "c('gephi',
'GEPHI',
'Gephi ',
' Gephi')='Gephi'")
d$Q7_2 <- recode(d$Q7_2, "c('MPNet',
'PNET',
'Pnet',
'PNet')='Mpnet'")
d$Q7_2 <- recode(d$Q7_2, "c('SPSS/Stata')='Spss'")
# (3.2) Q7_2
d$Q7_3 <- recode(d$Q7_3, "c('Statnet package in R (mostly sna and network packages)',
'Egonet',
'igraph',
'igraph (R)',
'R sna, igraph',
'R statnet, network, sna',
'R igraph',
'RSiena',
'egonet',
'sna',
'Rs SNA',
'sienna',
'EpiModel',
'statnet',
'Siena',
'Packages in R (sna, igraph, network, RSiena)',
'igraph in R',
'EgoNet',
'statnet/sna',
'Statnet (R)',
'R/sna',
'ergm',
'Rs ggplot2',
'R Package',
'statnet (R)',
'Statnet',
'R more generally',
'network, sna and igraph (R applications)',
'raw R',
'R sna network rsiena',
'Siena (R)',
' R',
'R ',
'Igraph')='R'")
d$Q7_3 <- recode(d$Q7_3, "c('UCInet',
'UCI Net',
'ucinet',
'UCINET',
'UCINet',
'Netdraw',
'UciNet',
'NetDraw')='Ucinet'")
d$Q7_3 <- recode(d$Q7_3, "c('python igraph',
'Python/networkx',
'programming directly in java/C++/python',
'NetworkX (Python)')='Python'")
d$Q7_3 <- recode(d$Q7_3, "c('ORA',
'ora')='Ora'")
d$Q7_3 <- recode(d$Q7_3, "c('NodeXL',
'nodexl',
'Nodexl')='NodeXl'")
d$Q7_3 <- recode(d$Q7_3, "c('PAJEK',
'pajeck',
'pajek')='Pajek'")
d$Q7_3 <- recode(d$Q7_3, "c('visone',
'VISONE')='Visone'")
d$Q7_3 <- recode(d$Q7_3, "c('gephi',
'GEPHI',
'Gephi ',
' Gephi')='Gephi'")
d$Q7_3 <- recode(d$Q7_3, "c('MPNet',
'PNET',
'Pnet',
'PNet')='Mpnet'")
d$Q7_3 <- recode(d$Q7_3, "c('SPSS/Stata')='Spss'")
# netanalytics NA
d[62,21] <- ""
d[115,21] <- ""
d[143,21] <- ""
d[162,21] <- ""
d[175,21] <- ""
d[179,22] <- ""
# ==============================================================================
# (4) Descriptives on software use
a <- as.matrix(d$Q7_1)
b <- as.matrix(d$Q7_2)
c <- as.matrix(d$Q7_3)
a <- apply(a, 2, function(x) gsub("^$|^ $", NA, x))
b <- apply(b, 2, function(x) gsub("^$|^ $", NA, x))
c <- apply(c, 2, function(x) gsub("^$|^ $", NA, x))
b <- na.omit(b)
c <- na.omit(c)
dd <- rbind(a,b,c)
# table(dd, useNA = "always")
# Further reduction of categories
ddd <- recode(dd, "c('Atlas.ti',
'Cuttlefish',
'cytoscape',
'NetMiner',
'NVIVO',
'Ora',
'PARTNER',
'ptracker',
'RDS-A',
'SocNetV',
'Tableau',
'Tableau',
'VisuaLyzer')='Other'")
data <- recode(ddd, "c('SAS',
'Spss',
'Stata')='Spss/Sas/Stata'")
t <- table(data, useNA = "always")
t <- t[order(t)]
dt <- data.table(t)
tt <- table(data)
tt <- tt[order(tt)]
dtt <- data.table(tt)
# Frequency table of how many times a software or software type was mentioned
# Note that 105 out of the 181 respondents did not answer the question at all (NA)
dt
## data N
## 1: Mpnet 4
## 2: Visone 4
## 3: NodeXl 5
## 4: Python 5
## 5: Spss/Sas/Stata 6
## 6: Pajek 8
## 7: Gephi 12
## 8: Other 13
## 9: Ucinet 45
## 10: R 64
## 11: NA 105
# Plot the freqencies without the missing cases
plot(tt, main="", sub="", xlab="Software", ylab="N of times mentioned")

# ==============================================================================
# (5) file formats
# 'csv' and 'txt' file formats should be able to cover all of these software
# however, the structure of these files highly depent on the specific software
# and / or the the analysis