# ==============================================================================
# Descriptive statistics on SNA software tools
# ==============================================================================

# The scripts reads in the survey data
# Cleans the relevant variable
# Reduces the number of categories
# Creates the descriptive statistics

# Survey question: Q7 - Which are your preferred social network analysis software tools
# Original input data: 'NetCanvas - General Survey 1_May 16, 2017_03.18.csv'

# ==============================================================================

# (1) Loading packages.

  library(car)
  library(data.table)

# ==============================================================================
# (2) Set working directory and data

  rm(list=ls())
  whoareyou <- "MSS"
  root <- paste0("S:/",whoareyou,"/Research/Projects/netCanvas/Outreach Core/Community Feedback/Community Survey/Survey Analysis/")
  setwd(root)

  d <- read.csv("inputData20170318.csv", header=T, sep=",")
  
# ==============================================================================
# (3) Cleaning up the mess in the software related questions manually
  
  
  # (3.1) Creating the 'recode' syntax for Q7_1, Q7_2, Q7_
    a <- as.matrix(d$Q7_1)
    b <- as.matrix(d$Q7_2)
    c <- as.matrix(d$Q7_3)
    
    dd <- rbind(a,b,c)
    
    dd <- recode(dd, "c('Statnet package in R (mostly sna and network packages)',
                      'Egonet',
                      'igraph',
                      'igraph (R)',
                      'R sna, igraph',
                      'R statnet, network, sna',
                      'R igraph',
                      'RSiena',
                      'egonet',
                      'sna',
                      'Rs SNA',
                      'sienna',
                      'EpiModel',
                      'statnet',
                      'Siena',
                      'Packages in R (sna, igraph, network, RSiena)',
                      'igraph in R',
                      'EgoNet',
                      'statnet/sna',
                      'Statnet (R)',
                      'R/sna',
                      'ergm',
                      'Rs ggplot2',
                      'R Package',
                      'statnet (R)',
                      'Statnet',
                      'R more generally',
                      'network, sna and igraph (R applications)',
                      'raw R',
                      'R sna network rsiena',
                      'Siena (R)',
                      ' R',
                      'R ',
                      'Igraph')='R'")
    
    dd <- recode(dd, "c('UCInet',
                      'UCI Net',
                      'ucinet',
                      'UCINET',
                      'UCINet',
                      'Netdraw',
                      'UciNet',
                      'NetDraw')='Ucinet'")
    
    dd <- recode(dd, "c('python igraph',
                      'Python/networkx',
                      'programming directly in java/C++/python',
                      'NetworkX (Python)')='Python'")
    
    dd <- recode(dd, "c('ORA',
                      'ora')='Ora'")
    
    dd <- recode(dd, "c('NodeXL',
                      'nodexl',
                      'Nodexl')='NodeXl'")
    
    dd <- recode(dd, "c('PAJEK',
                      'pajeck',
                      'pajek')='Pajek'")
    
    dd <- recode(dd, "c('visone',
                      'VISONE')='Visone'")
    
    dd <- recode(dd, "c('gephi',
                      'GEPHI',
                      'Gephi ',
                      ' Gephi')='Gephi'")
    
    dd <- recode(dd, "c('MPNet',
                      'PNET',
                      'Pnet',
                      'PNet')='Mpnet'")
    
    dd <- recode(dd, "c('SPSS/Stata')='Spss'")
    
  # (3.2) Q7_1
    
    d$Q7_1 <- recode(d$Q7_1, "c('Statnet package in R (mostly sna and network packages)',
                 'Egonet',
                 'igraph',
                 'igraph (R)',
                 'R sna, igraph',
                 'R statnet, network, sna',
                 'R igraph',
                 'RSiena',
                 'egonet',
                 'sna',
                 'Rs SNA',
                 'sienna',
                 'EpiModel',
                 'statnet',
                 'Siena',
                 'Packages in R (sna, igraph, network, RSiena)',
                 'igraph in R',
                 'EgoNet',
                 'statnet/sna',
                 'Statnet (R)',
                 'R/sna',
                 'ergm',
                 'Rs ggplot2',
                 'R Package',
                 'statnet (R)',
                 'Statnet',
                 'R more generally',
                 'network, sna and igraph (R applications)',
                 'raw R',
                 'R sna network rsiena',
                 'Siena (R)',
                 ' R',
                 'R ',
                 'Igraph')='R'")
    
    d$Q7_1 <- recode(d$Q7_1, "c('UCInet',
                 'UCI Net',
                 'ucinet',
                 'UCINET',
                 'UCINet',
                 'Netdraw',
                 'UciNet',
                 'NetDraw')='Ucinet'")
    
    d$Q7_1 <- recode(d$Q7_1, "c('python igraph',
                 'Python/networkx',
                 'programming directly in java/C++/python',
                 'NetworkX (Python)')='Python'")
    
    d$Q7_1 <- recode(d$Q7_1, "c('ORA',
                 'ora')='Ora'")
    
    d$Q7_1 <- recode(d$Q7_1, "c('NodeXL',
                 'nodexl',
                 'Nodexl')='NodeXl'")
    
    d$Q7_1 <- recode(d$Q7_1, "c('PAJEK',
                 'pajeck',
                 'pajek')='Pajek'")
    
    d$Q7_1 <- recode(d$Q7_1, "c('visone',
                 'VISONE')='Visone'")
    
    d$Q7_1 <- recode(d$Q7_1, "c('gephi',
                      'GEPHI',
                     'Gephi ',
                     ' Gephi')='Gephi'")
    
    d$Q7_1 <- recode(d$Q7_1, "c('MPNet',
                 'PNET',
                 'Pnet',
                 'PNet')='Mpnet'")
    
    d$Q7_1 <- recode(d$Q7_1, "c('SPSS/Stata')='Spss'")
    
  # (3.2) Q7_2
    
    d$Q7_2 <- recode(d$Q7_2, "c('Statnet package in R (mostly sna and network packages)',
                 'Egonet',
                 'igraph',
                 'igraph (R)',
                 'R sna, igraph',
                 'R statnet, network, sna',
                 'R igraph',
                 'RSiena',
                 'egonet',
                 'sna',
                 'Rs SNA',
                 'sienna',
                 'EpiModel',
                 'statnet',
                 'Siena',
                 'Packages in R (sna, igraph, network, RSiena)',
                 'igraph in R',
                 'EgoNet',
                 'statnet/sna',
                 'Statnet (R)',
                 'R/sna',
                 'ergm',
                 'Rs ggplot2',
                 'R Package',
                 'statnet (R)',
                 'Statnet',
                 'R more generally',
                 'network, sna and igraph (R applications)',
                 'raw R',
                 'R sna network rsiena',
                 'Siena (R)',
                 ' R',
                 'R ',
                 'Igraph')='R'")
    
    d$Q7_2 <- recode(d$Q7_2, "c('UCInet',
                      'UCI Net',
                      'ucinet',
                      'UCINET',
                      'UCINet',
                      'Netdraw',
                      'UciNet',
                      'NetDraw')='Ucinet'")
    
    d$Q7_2 <- recode(d$Q7_2, "c('python igraph',
                      'Python/networkx',
                      'programming directly in java/C++/python',
                      'NetworkX (Python)')='Python'")
    
    d$Q7_2 <- recode(d$Q7_2, "c('ORA',
                      'ora')='Ora'")
    
    d$Q7_2 <- recode(d$Q7_2, "c('NodeXL',
                      'nodexl',
                      'Nodexl')='NodeXl'")
    
    d$Q7_2 <- recode(d$Q7_2, "c('PAJEK',
                      'pajeck',
                      'pajek')='Pajek'")
    
    d$Q7_2 <- recode(d$Q7_2, "c('visone',
                      'VISONE')='Visone'")
    
    d$Q7_2 <- recode(d$Q7_2, "c('gephi',
                      'GEPHI',
                     'Gephi ',
                     ' Gephi')='Gephi'")
    
    d$Q7_2 <- recode(d$Q7_2, "c('MPNet',
                      'PNET',
                      'Pnet',
                      'PNet')='Mpnet'")
    
    d$Q7_2 <- recode(d$Q7_2, "c('SPSS/Stata')='Spss'")
    
  # (3.2) Q7_2
    
    d$Q7_3 <- recode(d$Q7_3, "c('Statnet package in R (mostly sna and network packages)',
                  'Egonet',
                 'igraph',
                 'igraph (R)',
                 'R sna, igraph',
                 'R statnet, network, sna',
                 'R igraph',
                 'RSiena',
                 'egonet',
                 'sna',
                 'Rs SNA',
                 'sienna',
                 'EpiModel',
                 'statnet',
                 'Siena',
                 'Packages in R (sna, igraph, network, RSiena)',
                 'igraph in R',
                 'EgoNet',
                 'statnet/sna',
                 'Statnet (R)',
                 'R/sna',
                 'ergm',
                 'Rs ggplot2',
                 'R Package',
                 'statnet (R)',
                 'Statnet',
                 'R more generally',
                 'network, sna and igraph (R applications)',
                 'raw R',
                 'R sna network rsiena',
                 'Siena (R)',
                 ' R',
                 'R ',
                 'Igraph')='R'")
    
    d$Q7_3 <- recode(d$Q7_3, "c('UCInet',
                      'UCI Net',
                      'ucinet',
                      'UCINET',
                      'UCINet',
                      'Netdraw',
                      'UciNet',
                      'NetDraw')='Ucinet'")
    
    d$Q7_3 <- recode(d$Q7_3, "c('python igraph',
                      'Python/networkx',
                      'programming directly in java/C++/python',
                      'NetworkX (Python)')='Python'")
    
    d$Q7_3 <- recode(d$Q7_3, "c('ORA',
                      'ora')='Ora'")
    
    d$Q7_3 <- recode(d$Q7_3, "c('NodeXL',
                      'nodexl',
                      'Nodexl')='NodeXl'")
    
    d$Q7_3 <- recode(d$Q7_3, "c('PAJEK',
                      'pajeck',
                      'pajek')='Pajek'")
    
    d$Q7_3 <- recode(d$Q7_3, "c('visone',
                      'VISONE')='Visone'")
    
    d$Q7_3 <- recode(d$Q7_3, "c('gephi',
                      'GEPHI',
                     'Gephi ',
                     ' Gephi')='Gephi'")
    
    d$Q7_3 <- recode(d$Q7_3, "c('MPNet',
                      'PNET',
                      'Pnet',
                      'PNet')='Mpnet'")
    
    d$Q7_3 <- recode(d$Q7_3, "c('SPSS/Stata')='Spss'")


  # netanalytics NA
  d[62,21] <- ""
  d[115,21] <- ""
  d[143,21] <- ""
  d[162,21] <- ""
  d[175,21] <- ""
  d[179,22] <- ""
  
# ==============================================================================
# (4) Descriptives on software use
  
  a <- as.matrix(d$Q7_1)
  b <- as.matrix(d$Q7_2)
  c <- as.matrix(d$Q7_3)
  
  a <- apply(a, 2, function(x) gsub("^$|^ $", NA, x))
  b <- apply(b, 2, function(x) gsub("^$|^ $", NA, x))
  c <- apply(c, 2, function(x) gsub("^$|^ $", NA, x))
  
  b <- na.omit(b)
  c <- na.omit(c)
  
  dd <- rbind(a,b,c)
  
  # table(dd, useNA = "always")
  
  # Further reduction of categories
  ddd <- recode(dd, "c('Atlas.ti',
               'Cuttlefish',
               'cytoscape',
               'NetMiner',
               'NVIVO',
               'Ora',
               'PARTNER',
               'ptracker',
               'RDS-A',
               'SocNetV',
               'Tableau',
               'Tableau',
               'VisuaLyzer')='Other'")
  
  data <- recode(ddd, "c('SAS',
               'Spss',
               'Stata')='Spss/Sas/Stata'")
  
  t <- table(data, useNA = "always")
  t <- t[order(t)]
  dt <- data.table(t)
  
  tt <- table(data)
  tt <- tt[order(tt)]
  dtt <- data.table(tt)
  
  # Frequency table of how many times a software or software type was mentioned
  # Note that 105 out of the 181 respondents did not answer the question at all (NA)
  dt
##               data   N
##  1:          Mpnet   4
##  2:         Visone   4
##  3:         NodeXl   5
##  4:         Python   5
##  5: Spss/Sas/Stata   6
##  6:          Pajek   8
##  7:          Gephi  12
##  8:          Other  13
##  9:         Ucinet  45
## 10:              R  64
## 11:             NA 105
  # Plot the freqencies without the missing cases
  plot(tt, main="", sub="", xlab="Software", ylab="N of times mentioned")

# ==============================================================================
# (5) file formats
  
  # 'csv' and 'txt' file formats should be able to cover all of these software
  # however, the structure of these files highly depent on the specific software
  # and / or the the analysis