# Load packages
library(haven)
## Warning: package 'haven' was built under R version 3.6.2
library(tidyr)
## Warning: package 'tidyr' was built under R version 3.6.2
library(tidyverse)
## Warning: package 'tidyverse' was built under R version 3.6.2
## ββ Attaching packages βββββββββββββββββββββββββββββββββββββββ tidyverse 1.3.1 ββ
## β ggplot2 3.3.5 β dplyr 1.0.7
## β tibble 3.1.6 β stringr 1.4.0
## β readr 2.1.1 β forcats 0.5.1
## β purrr 0.3.4
## Warning: package 'ggplot2' was built under R version 3.6.2
## Warning: package 'tibble' was built under R version 3.6.2
## Warning: package 'readr' was built under R version 3.6.2
## Warning: package 'purrr' was built under R version 3.6.2
## Warning: package 'dplyr' was built under R version 3.6.2
## Warning: package 'forcats' was built under R version 3.6.2
## ββ Conflicts ββββββββββββββββββββββββββββββββββββββββββ tidyverse_conflicts() ββ
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
library(dplyr)
library(sjlabelled)
## Warning: package 'sjlabelled' was built under R version 3.6.2
##
## Attaching package: 'sjlabelled'
## The following object is masked from 'package:forcats':
##
## as_factor
## The following object is masked from 'package:dplyr':
##
## as_label
## The following object is masked from 'package:ggplot2':
##
## as_label
## The following objects are masked from 'package:haven':
##
## as_factor, read_sas, read_spss, read_stata, write_sas, zap_labels
library(labelled)
##
## Attaching package: 'labelled'
## The following objects are masked from 'package:sjlabelled':
##
## copy_labels, remove_labels, to_character, to_factor, val_labels
library(readr)
library(ggplot2)
library(igraph)
## Warning: package 'igraph' was built under R version 3.6.2
##
## Attaching package: 'igraph'
## The following objects are masked from 'package:dplyr':
##
## as_data_frame, groups, union
## The following objects are masked from 'package:purrr':
##
## compose, simplify
## The following object is masked from 'package:tibble':
##
## as_data_frame
## The following object is masked from 'package:tidyr':
##
## crossing
## The following objects are masked from 'package:stats':
##
## decompose, spectrum
## The following object is masked from 'package:base':
##
## union
library(factoextra)
## Warning: package 'factoextra' was built under R version 3.6.2
## Welcome! Want to learn more? See two factoextra-related books at https://goo.gl/ve3WBa
library(graphlayouts)
## Warning: package 'graphlayouts' was built under R version 3.6.2
library(expss)
## Warning: package 'expss' was built under R version 3.6.2
## Loading required package: maditr
##
## To get total summary skip 'by' argument: take_all(mtcars, mean)
##
## Attaching package: 'maditr'
## The following objects are masked from 'package:dplyr':
##
## between, coalesce, first, last
## The following object is masked from 'package:purrr':
##
## transpose
## The following object is masked from 'package:readr':
##
## cols
##
## Attaching package: 'expss'
## The following object is masked from 'package:igraph':
##
## %u%
## The following object is masked from 'package:labelled':
##
## is.labelled
## The following object is masked from 'package:sjlabelled':
##
## read_spss
## The following objects are masked from 'package:stringr':
##
## fixed, regex
## The following objects are masked from 'package:dplyr':
##
## compute, contains, na_if, recode, vars
## The following objects are masked from 'package:purrr':
##
## keep, modify, modify_if, when
## The following object is masked from 'package:ggplot2':
##
## vars
## The following objects are masked from 'package:tidyr':
##
## contains, nest
## The following objects are masked from 'package:haven':
##
## is.labelled, read_spss
library(networkD3)
library(magrittr)
## Warning: package 'magrittr' was built under R version 3.6.2
##
## Attaching package: 'magrittr'
## The following objects are masked from 'package:expss':
##
## and, equals, not, or
## The following object is masked from 'package:purrr':
##
## set_names
## The following object is masked from 'package:tidyr':
##
## extract
library(htmlwidgets)
## Warning: package 'htmlwidgets' was built under R version 3.6.2
##
## Attaching package: 'htmlwidgets'
## The following object is masked from 'package:networkD3':
##
## JS
library(htmltools)
## Warning: package 'htmltools' was built under R version 3.6.2
# Load dataset
bh <- read_csv('https://docs.google.com/spreadsheets/d/e/2PACX-1vRK1ZSJueUrd7uXUGrteX5QQgUiDhXnl8XfF9ijc4xcxuC3y21kr7QoCuwbQsz8xwtcX6859Gc_djBL/pub?output=csv')
## Rows: 695 Columns: 19
## ββ Column specification ββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
## Delimiter: ","
## chr (7): gender, mode_shop_v, shopper_type, state_v, region, type_serv_v, f...
## dbl (12): id, age, mode_shop, freq_use_gen, marital, education, employment, ...
##
## βΉ Use `spec()` to retrieve the full column specification for this data.
## βΉ Specify the column types or set `show_col_types = FALSE` to quiet this message.
print(bh)
## # A tibble: 695 Γ 19
## id age gender mode_shop mode_shop_v freq_use_gen shopper_type marital
## <dbl> <dbl> <chr> <dbl> <chr> <dbl> <chr> <dbl>
## 1 100020 50 Male 1 Smartphone 5 Heavy Shopper 1
## 2 100020 50 Male 1 Smartphone 5 Heavy Shopper 1
## 3 100020 50 Male 1 Smartphone 5 Heavy Shopper 1
## 4 100022 35 Male 3 Tablet 4 Frequent Shop⦠2
## 5 100022 35 Male 3 Tablet 4 Frequent Shop⦠2
## 6 100035 25 Male 2 Computer 6 Heavy Shopper 2
## 7 100035 25 Male 2 Computer 6 Heavy Shopper 2
## 8 100035 25 Male 2 Computer 6 Heavy Shopper 2
## 9 100045 40 Male 1 Smartphone 7 Heavy Shopper 2
## 10 100045 40 Male 1 Smartphone 7 Heavy Shopper 2
## # β¦ with 685 more rows, and 11 more variables: education <dbl>,
## # employment <dbl>, income <dbl>, state <dbl>, state_v <chr>, region <chr>,
## # area <dbl>, type_serv <dbl>, type_serv_v <chr>, freq_use_serv <dbl>,
## # freq_use_serv_v <chr>
dim(bh)
## [1] 695 19
names(bh)
## [1] "id" "age" "gender" "mode_shop"
## [5] "mode_shop_v" "freq_use_gen" "shopper_type" "marital"
## [9] "education" "employment" "income" "state"
## [13] "state_v" "region" "area" "type_serv"
## [17] "type_serv_v" "freq_use_serv" "freq_use_serv_v"
bh <- data.frame(bh)
# Retrieves the participants-types of service used connections to be saved under an graph object called "g_bh" (edgelist);
# "freq_use_serv" and "region" are edge. "id" and "type_serv" are vertex.
g_bh <- graph.data.frame(bh[,c("id", "type_serv_v", "freq_use_serv", "region")])
g_bh
## IGRAPH a4f10c3 DN-- 211 695 --
## + attr: name (v/c), freq_use_serv (e/n), region (e/c)
## + edges from a4f10c3 (vertex names):
## [1] 100020->Food & Beverages 100020->Parcel 100020->Rideshare
## [4] 100022->Convenience 100022->Rideshare 100035->Grocery
## [7] 100035->Home Project 100035->Pharmacy 100045->Alcohol
## [10] 100045->Convenience 100045->Essential 100045->Food & Beverages
## [13] 100045->Grocery 100045->Home Project 100045->Homecare
## [16] 100045->Laundry 100045->Pharmacy 100052->Alcohol
## [19] 100052->Essential 100052->Food & Beverages 100052->Grocery
## [22] 100052->Home Project 100052->Laundry 100052->Parcel
## + ... omitted several edges
# Adds the two-mode structure to the graph "g_bh"
V(g_bh)$type <- V(g_bh)$name %in% bh$id
table(V(g_bh)$type)[2]
## TRUE
## 200
i<-table(V(g_bh)$type)[2]
# Gets centrality measures
cent_bh <- data.frame(bet=betweenness(g_bh, normalized = T, directed = FALSE)/max(betweenness(g_bh, normalized = T, directed = FALSE)), eig = evcent(g_bh)$vector, degree = degree(g_bh, mode="total"))
cent_bh$name <- rownames(cent_bh)
head(cent_bh);tail(cent_bh)
## bet eig degree name
## 100020 0.0011950355 0.09192867 3 100020
## 100022 0.0005284214 0.05043208 2 100022
## 100035 0.0011999189 0.08567962 3 100035
## 100045 0.0091106071 0.21456374 9 100045
## 100052 0.0066798761 0.20216549 8 100052
## 100093 0.0000000000 0.05194816 1 100093
## bet eig degree name
## Home Project 0.03896414 0.2151311 30 Home Project
## Pharmacy 0.11004896 0.4341981 55 Pharmacy
## Alcohol 0.04154360 0.3365041 40 Alcohol
## Essential 0.16448031 0.5027003 67 Essential
## Homecare 0.01507444 0.1633278 22 Homecare
## Laundry 0.01895451 0.2513033 29 Laundry
# Gets time invariant actor attributes
actor_bh <- bh[!duplicated(bh$id), c("id", "mode_shop_v", "freq_use_gen", "area", "shopper_type")]
head(actor_bh)
## id mode_shop_v freq_use_gen area shopper_type
## 1 100020 Smartphone 5 1 Heavy Shopper
## 4 100022 Tablet 4 2 Frequent Shopper
## 6 100035 Computer 6 1 Heavy Shopper
## 9 100045 Smartphone 7 1 Heavy Shopper
## 18 100052 Smartphone 6 2 Heavy Shopper
## 26 100093 Smartphone 3 2 Frequent Shopper
# Interactive Visualization
V(g_bh)$label <- V(g_bh)$name
V(g_bh)$name <- 1:length(V(g_bh))
# Gets edge list from graph, also any other attribute at the edge level to be included in the mapping
links_bh <- as.data.frame(cbind(get.edgelist(g_bh, names=TRUE), E(g_bh)$freq_use_serv, E(g_bh)$region))
#Needs to be numeric
links_bh$V1 <- as.numeric(as.character(links_bh$V1))
links_bh$V2 <- as.numeric(as.character(links_bh$V2))
links_bh$V3 <- as.numeric(as.character(links_bh$V3))
head(links_bh)
## V1 V2 V3 V4
## 1 1 201 4 South
## 2 1 202 2 South
## 3 1 203 5 South
## 4 2 204 5 Midwest
## 5 2 203 5 Midwest
## 6 3 205 6 West
str(links_bh)
## 'data.frame': 695 obs. of 4 variables:
## $ V1: num 1 1 1 2 2 3 3 3 4 4 ...
## $ V2: num 201 202 203 204 203 205 206 207 208 204 ...
## $ V3: num 4 2 5 5 5 6 6 6 7 5 ...
## $ V4: Factor w/ 4 levels "Midwest","Northeast",..: 3 3 3 1 1 4 4 4 3 3 ...
colnames(links_bh)<-c("source", "target", "value", "region")
# Indexing -- !Counts begin at zero in computer programming (Javascript)!
links_bh[,1:2] <- (links_bh[,1:2]-1)
dim(links_bh)
## [1] 695 4
# Add attributes at the actor level
V(g_bh)$mode_shop <- actor_bh$mode_shop_v[match(V(g_bh)$label, actor_bh$id)]
V(g_bh)$shopper_type <- actor_bh$shopper_type[match(V(g_bh)$label, actor_bh$id)]
V(g_bh)$freq_use_gen <- actor_bh$freq_use_gen[match(V(g_bh)$label, actor_bh$id)]
V(g_bh)$area <- actor_bh$area[match(V(g_bh)$label, actor_bh$id)]
V(g_bh)$size <- cent_bh$bet[match(V(g_bh)$label, cent_bh$name)]
summary(V(g_bh)$size)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.0000000 0.0007439 0.0017357 0.0147945 0.0041166 1.0000000
summary(actor_bh$shopper_type)
## Length Class Mode
## 200 character character
summary(actor_bh$mode_shop)
## Length Class Mode
## 200 character character
summary(actor_bh$freq_use_gen)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 1.000 3.000 4.000 3.885 5.000 7.000
summary(actor_bh$area)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 1.000 1.000 2.000 1.725 2.000 3.000
# Get vertex list (nodes list)
nodes_bh <- data.frame(name=c(
paste("ID: ", V(g_bh)$label[1:i],
" Type of Shopper: ", V(g_bh)$freq_use_gen[1:i],
" Mode of Shop: ", V(g_bh)$mode_shop[1:i], sep=""),
V(g_bh)$label[(i+1):length(V(g_bh)$label)]),
freq_use_gen = V(g_bh)$freq_use_gen,
size = abs(V(g_bh)$size))
#Name of 'Type of Service' rather than code
nodes_bh$name[(i+1):length(V(g_bh)$label)] <- bh$type_serv_v[match(nodes_bh$name[(i+1):length(V(g_bh)$label)], bh$type_serv_v)]
head(nodes_bh);tail(nodes_bh)
## name freq_use_gen
## 1 ID: 100020 Type of Shopper: 5 Mode of Shop: Smartphone 5
## 2 ID: 100022 Type of Shopper: 4 Mode of Shop: Tablet 4
## 3 ID: 100035 Type of Shopper: 6 Mode of Shop: Computer 6
## 4 ID: 100045 Type of Shopper: 7 Mode of Shop: Smartphone 7
## 5 ID: 100052 Type of Shopper: 6 Mode of Shop: Smartphone 6
## 6 ID: 100093 Type of Shopper: 3 Mode of Shop: Smartphone 3
## size
## 1 0.0011950355
## 2 0.0005284214
## 3 0.0011999189
## 4 0.0091106071
## 5 0.0066798761
## 6 0.0000000000
## name freq_use_gen size
## 206 Home Project NA 0.03896414
## 207 Pharmacy NA 0.11004896
## 208 Alcohol NA 0.04154360
## 209 Essential NA 0.16448031
## 210 Homecare NA 0.01507444
## 211 Laundry NA 0.01895451
nodes_bh$group <- NA
nodes_bh$group[1:i] <- cut(as.numeric(nodes_bh$freq_use_gen[1:i]), c(0,3,5,8), right = FALSE)
table(is.na(nodes_bh$group))
##
## FALSE TRUE
## 200 11
table(nodes_bh$group)
##
## 1 2 3
## 40 91 69
head(nodes_bh[is.na(nodes_bh$group),],20)
## name freq_use_gen size group
## 201 Food & Beverages NA 0.66406360 NA
## 202 Parcel NA 0.03005758 NA
## 203 Rideshare NA 0.46113815 NA
## 204 Convenience NA 0.08675819 NA
## 205 Grocery NA 1.00000000 NA
## 206 Home Project NA 0.03896414 NA
## 207 Pharmacy NA 0.11004896 NA
## 208 Alcohol NA 0.04154360 NA
## 209 Essential NA 0.16448031 NA
## 210 Homecare NA 0.01507444 NA
## 211 Laundry NA 0.01895451 NA
nodes_bh$group <- ifelse(is.na(nodes_bh$group), "Type of Service", ifelse(nodes_bh$group==1, "Occasional Shopper", ifelse(nodes_bh$group==2, "Frequent Shopper", ifelse(nodes_bh$group==3, "Heavy Shopper", 9999))))
counts <- data.frame(table(nodes_bh$group))
counts$labels <- paste(counts$Var1, ", N= ", counts$Freq, sep="")
nodes_bh$groups <- counts$labels[match(nodes_bh$group, counts$Var1)]
head(nodes_bh)
## name freq_use_gen
## 1 ID: 100020 Type of Shopper: 5 Mode of Shop: Smartphone 5
## 2 ID: 100022 Type of Shopper: 4 Mode of Shop: Tablet 4
## 3 ID: 100035 Type of Shopper: 6 Mode of Shop: Computer 6
## 4 ID: 100045 Type of Shopper: 7 Mode of Shop: Smartphone 7
## 5 ID: 100052 Type of Shopper: 6 Mode of Shop: Smartphone 6
## 6 ID: 100093 Type of Shopper: 3 Mode of Shop: Smartphone 3
## size group groups
## 1 0.0011950355 Heavy Shopper Heavy Shopper, N= 69
## 2 0.0005284214 Frequent Shopper Frequent Shopper, N= 91
## 3 0.0011999189 Heavy Shopper Heavy Shopper, N= 69
## 4 0.0091106071 Heavy Shopper Heavy Shopper, N= 69
## 5 0.0066798761 Heavy Shopper Heavy Shopper, N= 69
## 6 0.0000000000 Frequent Shopper Frequent Shopper, N= 91
# Network Visualization
netviz_bh <- forceNetwork(Links = links_bh, Nodes = nodes_bh,
Source = 'source', Target = 'target',
NodeID = 'name',
Group = 'groups', # color nodes by group calculated earlier
charge = -200, # node repulsion
#linkDistance = JS("function(d) { return d.linkDistance; }"),
linkDistance = JS("function(d){return d.value}"),
#linkWidth = JS("function(d) { return Math.sqrt(d.value)*2; }"),
linkWidth = 1,
opacity = 0.8,
Value = "value",
Nodesize = 'size',
radiusCalculation = JS("Math.sqrt(d.nodesize*30)+4"),
zoom = T,
fontSize=10,
bounded= F,
legend= TRUE,
linkColour = ifelse(links_bh$region == "Northeast", "#999999", ifelse(links_bh$region == "South", "#999999", "#999999")),
colourScale = JS("d3.scaleOrdinal(d3.schemeCategory10)"))
# HTML Style
HTMLaddons <-
"function(el, x) {
d3.select('body').style('background-color', ' #4f4f4f ')
d3.selectAll('.legend text').style('fill', 'white')
d3.selectAll('.link').append('svg:title')
.text(function(d) { return 'Frequency of Use: ' + d.value + ', Region: ' + d.region ; })
var options = x.options;
var svg = d3.select(el).select('svg')
var node = svg.selectAll('.node');
var link = svg.selectAll('link');
var mouseout = d3.selectAll('.node').on('mouseout');
function nodeSize(d) {
if (options.nodesize) {
return eval(options.radiusCalculation);
} else {
return 6;
}
}
d3.selectAll('.node').on('click', onclick)
function onclick(d) {
if (d3.select(this).on('mouseout') == mouseout) {
d3.select(this).on('mouseout', mouseout_clicked);
} else {
d3.select(this).on('mouseout', mouseout);
}
}
function mouseout_clicked(d) {
node.style('opacity', +options.opacity);
link.style('opacity', +options.opacity);
d3.select(this).select('circle').transition()
.duration(750)
.attr('r', function(d){return nodeSize(d);});
d3.select(this).select('text').transition()
.duration(1250)
.attr('x', 0)
.style('font', options.fontSize + 'px ');
}
}
"
# netviz_bh$x$links_bh$linkDistance <- (1/links_bh$value)*500 (Code NOT IN USED)
netviz_bh$x$links_bh$region <- links_bh$region
onRender(netviz_bh, HTMLaddons)