# Load packages
library(haven)
## Warning: package 'haven' was built under R version 3.6.2
library(tidyr)
## Warning: package 'tidyr' was built under R version 3.6.2
library(tidyverse)
## Warning: package 'tidyverse' was built under R version 3.6.2
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.1 ──
## βœ“ ggplot2 3.3.5     βœ“ dplyr   1.0.7
## βœ“ tibble  3.1.6     βœ“ stringr 1.4.0
## βœ“ readr   2.1.1     βœ“ forcats 0.5.1
## βœ“ purrr   0.3.4
## Warning: package 'ggplot2' was built under R version 3.6.2
## Warning: package 'tibble' was built under R version 3.6.2
## Warning: package 'readr' was built under R version 3.6.2
## Warning: package 'purrr' was built under R version 3.6.2
## Warning: package 'dplyr' was built under R version 3.6.2
## Warning: package 'forcats' was built under R version 3.6.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
library(dplyr)
library(sjlabelled)
## Warning: package 'sjlabelled' was built under R version 3.6.2
## 
## Attaching package: 'sjlabelled'
## The following object is masked from 'package:forcats':
## 
##     as_factor
## The following object is masked from 'package:dplyr':
## 
##     as_label
## The following object is masked from 'package:ggplot2':
## 
##     as_label
## The following objects are masked from 'package:haven':
## 
##     as_factor, read_sas, read_spss, read_stata, write_sas, zap_labels
library(labelled)
## 
## Attaching package: 'labelled'
## The following objects are masked from 'package:sjlabelled':
## 
##     copy_labels, remove_labels, to_character, to_factor, val_labels
library(readr)
library(ggplot2)
library(igraph)
## Warning: package 'igraph' was built under R version 3.6.2
## 
## Attaching package: 'igraph'
## The following objects are masked from 'package:dplyr':
## 
##     as_data_frame, groups, union
## The following objects are masked from 'package:purrr':
## 
##     compose, simplify
## The following object is masked from 'package:tibble':
## 
##     as_data_frame
## The following object is masked from 'package:tidyr':
## 
##     crossing
## The following objects are masked from 'package:stats':
## 
##     decompose, spectrum
## The following object is masked from 'package:base':
## 
##     union
library(factoextra)
## Warning: package 'factoextra' was built under R version 3.6.2
## Welcome! Want to learn more? See two factoextra-related books at https://goo.gl/ve3WBa
library(graphlayouts)
## Warning: package 'graphlayouts' was built under R version 3.6.2
library(expss)
## Warning: package 'expss' was built under R version 3.6.2
## Loading required package: maditr
## 
## To get total summary skip 'by' argument: take_all(mtcars, mean)
## 
## Attaching package: 'maditr'
## The following objects are masked from 'package:dplyr':
## 
##     between, coalesce, first, last
## The following object is masked from 'package:purrr':
## 
##     transpose
## The following object is masked from 'package:readr':
## 
##     cols
## 
## Attaching package: 'expss'
## The following object is masked from 'package:igraph':
## 
##     %u%
## The following object is masked from 'package:labelled':
## 
##     is.labelled
## The following object is masked from 'package:sjlabelled':
## 
##     read_spss
## The following objects are masked from 'package:stringr':
## 
##     fixed, regex
## The following objects are masked from 'package:dplyr':
## 
##     compute, contains, na_if, recode, vars
## The following objects are masked from 'package:purrr':
## 
##     keep, modify, modify_if, when
## The following object is masked from 'package:ggplot2':
## 
##     vars
## The following objects are masked from 'package:tidyr':
## 
##     contains, nest
## The following objects are masked from 'package:haven':
## 
##     is.labelled, read_spss
library(networkD3)
library(magrittr)
## Warning: package 'magrittr' was built under R version 3.6.2
## 
## Attaching package: 'magrittr'
## The following objects are masked from 'package:expss':
## 
##     and, equals, not, or
## The following object is masked from 'package:purrr':
## 
##     set_names
## The following object is masked from 'package:tidyr':
## 
##     extract
library(htmlwidgets)
## Warning: package 'htmlwidgets' was built under R version 3.6.2
## 
## Attaching package: 'htmlwidgets'
## The following object is masked from 'package:networkD3':
## 
##     JS
library(htmltools)
## Warning: package 'htmltools' was built under R version 3.6.2
# Load dataset 
bh <- read_csv('https://docs.google.com/spreadsheets/d/e/2PACX-1vRK1ZSJueUrd7uXUGrteX5QQgUiDhXnl8XfF9ijc4xcxuC3y21kr7QoCuwbQsz8xwtcX6859Gc_djBL/pub?output=csv')
## Rows: 695 Columns: 19
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr  (7): gender, mode_shop_v, shopper_type, state_v, region, type_serv_v, f...
## dbl (12): id, age, mode_shop, freq_use_gen, marital, education, employment, ...
## 
## β„Ή Use `spec()` to retrieve the full column specification for this data.
## β„Ή Specify the column types or set `show_col_types = FALSE` to quiet this message.
print(bh)
## # A tibble: 695 Γ— 19
##        id   age gender mode_shop mode_shop_v freq_use_gen shopper_type   marital
##     <dbl> <dbl> <chr>      <dbl> <chr>              <dbl> <chr>            <dbl>
##  1 100020    50 Male           1 Smartphone             5 Heavy Shopper        1
##  2 100020    50 Male           1 Smartphone             5 Heavy Shopper        1
##  3 100020    50 Male           1 Smartphone             5 Heavy Shopper        1
##  4 100022    35 Male           3 Tablet                 4 Frequent Shop…       2
##  5 100022    35 Male           3 Tablet                 4 Frequent Shop…       2
##  6 100035    25 Male           2 Computer               6 Heavy Shopper        2
##  7 100035    25 Male           2 Computer               6 Heavy Shopper        2
##  8 100035    25 Male           2 Computer               6 Heavy Shopper        2
##  9 100045    40 Male           1 Smartphone             7 Heavy Shopper        2
## 10 100045    40 Male           1 Smartphone             7 Heavy Shopper        2
## # … with 685 more rows, and 11 more variables: education <dbl>,
## #   employment <dbl>, income <dbl>, state <dbl>, state_v <chr>, region <chr>,
## #   area <dbl>, type_serv <dbl>, type_serv_v <chr>, freq_use_serv <dbl>,
## #   freq_use_serv_v <chr>
dim(bh)
## [1] 695  19
names(bh)
##  [1] "id"              "age"             "gender"          "mode_shop"      
##  [5] "mode_shop_v"     "freq_use_gen"    "shopper_type"    "marital"        
##  [9] "education"       "employment"      "income"          "state"          
## [13] "state_v"         "region"          "area"            "type_serv"      
## [17] "type_serv_v"     "freq_use_serv"   "freq_use_serv_v"
bh <- data.frame(bh)
# Retrieves the participants-types of service used connections to be saved under an graph object called "g_bh" (edgelist); 
# "freq_use_serv" and "region" are edge. "id" and "type_serv" are vertex. 

g_bh <- graph.data.frame(bh[,c("id", "type_serv_v", "freq_use_serv", "region")])
g_bh
## IGRAPH a4f10c3 DN-- 211 695 -- 
## + attr: name (v/c), freq_use_serv (e/n), region (e/c)
## + edges from a4f10c3 (vertex names):
##  [1] 100020->Food & Beverages 100020->Parcel           100020->Rideshare       
##  [4] 100022->Convenience      100022->Rideshare        100035->Grocery         
##  [7] 100035->Home Project     100035->Pharmacy         100045->Alcohol         
## [10] 100045->Convenience      100045->Essential        100045->Food & Beverages
## [13] 100045->Grocery          100045->Home Project     100045->Homecare        
## [16] 100045->Laundry          100045->Pharmacy         100052->Alcohol         
## [19] 100052->Essential        100052->Food & Beverages 100052->Grocery         
## [22] 100052->Home Project     100052->Laundry          100052->Parcel          
## + ... omitted several edges
# Adds the two-mode structure to the graph "g_bh"

V(g_bh)$type <- V(g_bh)$name %in% bh$id
table(V(g_bh)$type)[2]
## TRUE 
##  200
i<-table(V(g_bh)$type)[2]
# Gets centrality measures

cent_bh <- data.frame(bet=betweenness(g_bh, normalized = T, directed = FALSE)/max(betweenness(g_bh, normalized = T, directed = FALSE)), eig = evcent(g_bh)$vector, degree = degree(g_bh, mode="total")) 
cent_bh$name <- rownames(cent_bh) 
head(cent_bh);tail(cent_bh)
##                 bet        eig degree   name
## 100020 0.0011950355 0.09192867      3 100020
## 100022 0.0005284214 0.05043208      2 100022
## 100035 0.0011999189 0.08567962      3 100035
## 100045 0.0091106071 0.21456374      9 100045
## 100052 0.0066798761 0.20216549      8 100052
## 100093 0.0000000000 0.05194816      1 100093
##                     bet       eig degree         name
## Home Project 0.03896414 0.2151311     30 Home Project
## Pharmacy     0.11004896 0.4341981     55     Pharmacy
## Alcohol      0.04154360 0.3365041     40      Alcohol
## Essential    0.16448031 0.5027003     67    Essential
## Homecare     0.01507444 0.1633278     22     Homecare
## Laundry      0.01895451 0.2513033     29      Laundry
# Gets time invariant actor attributes

actor_bh <- bh[!duplicated(bh$id), c("id", "mode_shop_v", "freq_use_gen", "area", "shopper_type")]
head(actor_bh)
##        id mode_shop_v freq_use_gen area     shopper_type
## 1  100020  Smartphone            5    1    Heavy Shopper
## 4  100022      Tablet            4    2 Frequent Shopper
## 6  100035    Computer            6    1    Heavy Shopper
## 9  100045  Smartphone            7    1    Heavy Shopper
## 18 100052  Smartphone            6    2    Heavy Shopper
## 26 100093  Smartphone            3    2 Frequent Shopper
# Interactive Visualization

V(g_bh)$label <- V(g_bh)$name
V(g_bh)$name <- 1:length(V(g_bh))
# Gets edge list from graph, also any other attribute at the edge level to be included in the mapping

links_bh <- as.data.frame(cbind(get.edgelist(g_bh, names=TRUE), E(g_bh)$freq_use_serv, E(g_bh)$region))

#Needs to be numeric
links_bh$V1 <- as.numeric(as.character(links_bh$V1))
links_bh$V2 <- as.numeric(as.character(links_bh$V2))
links_bh$V3 <- as.numeric(as.character(links_bh$V3))

head(links_bh)
##   V1  V2 V3      V4
## 1  1 201  4   South
## 2  1 202  2   South
## 3  1 203  5   South
## 4  2 204  5 Midwest
## 5  2 203  5 Midwest
## 6  3 205  6    West
str(links_bh)
## 'data.frame':    695 obs. of  4 variables:
##  $ V1: num  1 1 1 2 2 3 3 3 4 4 ...
##  $ V2: num  201 202 203 204 203 205 206 207 208 204 ...
##  $ V3: num  4 2 5 5 5 6 6 6 7 5 ...
##  $ V4: Factor w/ 4 levels "Midwest","Northeast",..: 3 3 3 1 1 4 4 4 3 3 ...
colnames(links_bh)<-c("source", "target", "value", "region")
# Indexing -- !Counts begin at zero in computer programming (Javascript)!
links_bh[,1:2] <- (links_bh[,1:2]-1)

dim(links_bh)
## [1] 695   4
# Add attributes at the actor level

V(g_bh)$mode_shop <- actor_bh$mode_shop_v[match(V(g_bh)$label, actor_bh$id)]
V(g_bh)$shopper_type <- actor_bh$shopper_type[match(V(g_bh)$label, actor_bh$id)]
V(g_bh)$freq_use_gen <- actor_bh$freq_use_gen[match(V(g_bh)$label, actor_bh$id)]
V(g_bh)$area <- actor_bh$area[match(V(g_bh)$label, actor_bh$id)]
V(g_bh)$size <- cent_bh$bet[match(V(g_bh)$label, cent_bh$name)]

summary(V(g_bh)$size)
##      Min.   1st Qu.    Median      Mean   3rd Qu.      Max. 
## 0.0000000 0.0007439 0.0017357 0.0147945 0.0041166 1.0000000
summary(actor_bh$shopper_type)
##    Length     Class      Mode 
##       200 character character
summary(actor_bh$mode_shop)
##    Length     Class      Mode 
##       200 character character
summary(actor_bh$freq_use_gen)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   1.000   3.000   4.000   3.885   5.000   7.000
summary(actor_bh$area)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   1.000   1.000   2.000   1.725   2.000   3.000
# Get vertex list (nodes list)

nodes_bh <- data.frame(name=c(
  paste("ID: ", V(g_bh)$label[1:i], 
        " Type of Shopper: ", V(g_bh)$freq_use_gen[1:i],
        " Mode of Shop: ", V(g_bh)$mode_shop[1:i], sep=""), 
        V(g_bh)$label[(i+1):length(V(g_bh)$label)]),
        freq_use_gen = V(g_bh)$freq_use_gen,
        size = abs(V(g_bh)$size))

#Name of 'Type of Service' rather than code
nodes_bh$name[(i+1):length(V(g_bh)$label)] <- bh$type_serv_v[match(nodes_bh$name[(i+1):length(V(g_bh)$label)], bh$type_serv_v)]
head(nodes_bh);tail(nodes_bh)
##                                                     name freq_use_gen
## 1 ID: 100020 Type of Shopper: 5 Mode of Shop: Smartphone            5
## 2     ID: 100022 Type of Shopper: 4 Mode of Shop: Tablet            4
## 3   ID: 100035 Type of Shopper: 6 Mode of Shop: Computer            6
## 4 ID: 100045 Type of Shopper: 7 Mode of Shop: Smartphone            7
## 5 ID: 100052 Type of Shopper: 6 Mode of Shop: Smartphone            6
## 6 ID: 100093 Type of Shopper: 3 Mode of Shop: Smartphone            3
##           size
## 1 0.0011950355
## 2 0.0005284214
## 3 0.0011999189
## 4 0.0091106071
## 5 0.0066798761
## 6 0.0000000000
##             name freq_use_gen       size
## 206 Home Project           NA 0.03896414
## 207     Pharmacy           NA 0.11004896
## 208      Alcohol           NA 0.04154360
## 209    Essential           NA 0.16448031
## 210     Homecare           NA 0.01507444
## 211      Laundry           NA 0.01895451
nodes_bh$group <- NA
nodes_bh$group[1:i] <- cut(as.numeric(nodes_bh$freq_use_gen[1:i]), c(0,3,5,8), right = FALSE)

table(is.na(nodes_bh$group)) 
## 
## FALSE  TRUE 
##   200    11
table(nodes_bh$group) 
## 
##  1  2  3 
## 40 91 69
head(nodes_bh[is.na(nodes_bh$group),],20)
##                 name freq_use_gen       size group
## 201 Food & Beverages           NA 0.66406360    NA
## 202           Parcel           NA 0.03005758    NA
## 203        Rideshare           NA 0.46113815    NA
## 204      Convenience           NA 0.08675819    NA
## 205          Grocery           NA 1.00000000    NA
## 206     Home Project           NA 0.03896414    NA
## 207         Pharmacy           NA 0.11004896    NA
## 208          Alcohol           NA 0.04154360    NA
## 209        Essential           NA 0.16448031    NA
## 210         Homecare           NA 0.01507444    NA
## 211          Laundry           NA 0.01895451    NA
nodes_bh$group <- ifelse(is.na(nodes_bh$group), "Type of Service", ifelse(nodes_bh$group==1, "Occasional Shopper", ifelse(nodes_bh$group==2, "Frequent Shopper", ifelse(nodes_bh$group==3, "Heavy Shopper", 9999))))

counts <- data.frame(table(nodes_bh$group))

counts$labels <- paste(counts$Var1, ", N= ", counts$Freq, sep="")
nodes_bh$groups <- counts$labels[match(nodes_bh$group, counts$Var1)] 
head(nodes_bh) 
##                                                     name freq_use_gen
## 1 ID: 100020 Type of Shopper: 5 Mode of Shop: Smartphone            5
## 2     ID: 100022 Type of Shopper: 4 Mode of Shop: Tablet            4
## 3   ID: 100035 Type of Shopper: 6 Mode of Shop: Computer            6
## 4 ID: 100045 Type of Shopper: 7 Mode of Shop: Smartphone            7
## 5 ID: 100052 Type of Shopper: 6 Mode of Shop: Smartphone            6
## 6 ID: 100093 Type of Shopper: 3 Mode of Shop: Smartphone            3
##           size            group                  groups
## 1 0.0011950355    Heavy Shopper    Heavy Shopper, N= 69
## 2 0.0005284214 Frequent Shopper Frequent Shopper, N= 91
## 3 0.0011999189    Heavy Shopper    Heavy Shopper, N= 69
## 4 0.0091106071    Heavy Shopper    Heavy Shopper, N= 69
## 5 0.0066798761    Heavy Shopper    Heavy Shopper, N= 69
## 6 0.0000000000 Frequent Shopper Frequent Shopper, N= 91
# Network Visualization
netviz_bh <- forceNetwork(Links = links_bh, Nodes = nodes_bh,
                     Source = 'source', Target = 'target',
                     NodeID = 'name',
                     Group = 'groups', # color nodes by group calculated earlier
                     charge = -200, # node repulsion
                     #linkDistance = JS("function(d) { return d.linkDistance; }"), 
                     linkDistance = JS("function(d){return d.value}"),
                     #linkWidth = JS("function(d) { return Math.sqrt(d.value)*2; }"),
                     linkWidth = 1,
                     opacity = 0.8,
                     Value = "value",
                     Nodesize = 'size', 
                     radiusCalculation = JS("Math.sqrt(d.nodesize*30)+4"),
                     zoom = T, 
                     fontSize=10,
                     bounded= F,
                     legend= TRUE,
                     linkColour = ifelse(links_bh$region == "Northeast", "#999999", ifelse(links_bh$region == "South", "#999999", "#999999")),
                     colourScale = JS("d3.scaleOrdinal(d3.schemeCategory10)"))
# HTML Style

HTMLaddons <- 
  "function(el, x) { 
d3.select('body').style('background-color', ' #4f4f4f ')
d3.selectAll('.legend text').style('fill', 'white') 
 d3.selectAll('.link').append('svg:title')
      .text(function(d) { return 'Frequency of Use: ' + d.value + ', Region: ' + d.region ; })
  var options = x.options;
  var svg = d3.select(el).select('svg')
  var node = svg.selectAll('.node');
  var link = svg.selectAll('link');
  var mouseout = d3.selectAll('.node').on('mouseout');
  function nodeSize(d) {
    if (options.nodesize) {
      return eval(options.radiusCalculation);
    } else {
      return 6;
    }
  }

  
d3.selectAll('.node').on('click', onclick)

  function onclick(d) {
    if (d3.select(this).on('mouseout') == mouseout) {
      d3.select(this).on('mouseout', mouseout_clicked);
    } else {
      d3.select(this).on('mouseout', mouseout);
    }
  }

  function mouseout_clicked(d) {
    node.style('opacity', +options.opacity);
    link.style('opacity', +options.opacity);

    d3.select(this).select('circle').transition()
      .duration(750)
      .attr('r', function(d){return nodeSize(d);});
    d3.select(this).select('text').transition()
    
      .duration(1250)
      .attr('x', 0)
      .style('font', options.fontSize + 'px ');
  }

}
"

# netviz_bh$x$links_bh$linkDistance <- (1/links_bh$value)*500 (Code NOT IN USED)
netviz_bh$x$links_bh$region <- links_bh$region
onRender(netviz_bh, HTMLaddons)