Here you can finda a first a draft of Sankey Graphs for Website Flow Pages - This report was based on LWT data sent from Felipe in 2015.
#If you don't have these packages installed use:
#install.packages("igraph")
#install.packages("networkD3")
#install.packages("curl")
require(igraph) #load package for social network analysis
require(networkD3) #load package for sankey network
require(curl) #load package to download csv files
#In order to change the Sankey plot configuration - please, acesses "~/Library/Frameworks/R.framework/Versions/"your version 3.xxx"/library/networwD3/htmlwidgets/lib/sankey.js
# This current version was made using the following sankey.js "https://dl.dropboxusercontent.com/s/5386z4jwrymlubq/sankey.js"data <- read.csv(curl("https://dl.dropboxusercontent.com/s/dz571ak1whb8lqd/grafo.csv"), sep = ";")
#Importing bi-directional squared matrix with labels into a "data" object
matrix=as.matrix(data)
#Transforming dataframe into a "matrix" adjacency with
colnames(matrix)<-c(0:40)
#Coding matrix coluns as numeric labels
label <- read.csv(curl("https://dl.dropboxusercontent.com/s/erdu802n7bsqk5e/label.csv"), sep=";")
#Importing original labels - page adress
Nodes <- as.data.frame(label$Label)
#Generating nodes object with orderd labels
colnames(Nodes)<- as.factor(colnames(Nodes))
#Setting column names network1=graph_from_adjacency_matrix(matrix,mode="directed",weighted=T,diag=T)
#Generating a graph network structure - overall flow - directed with weights and diag different from zero
Links1 <- get.data.frame(network1)
#Getting a dataframe from igraph object (from, to, weight) - overall flow
Links1$logweight <- log(Links1$weight)
#Creating a transformed variable using log weight
Links1$from<-as.numeric(Links1$from)
#Converting to numeric
Links1$to<-as.numeric(Links1$to)
#Converting to numeric
#Generating first sankey plot object
#sankeyNetwork(Links= Links1, Nodes = Nodes, Source = 'from', Target = 'to', Value = 'logweight', NodeID ="label$Label", fontSize=12, nodeWidth=30)
#This is not included as we need to check the Java Scrisct template for Sankey Plot with Recursive Patternsnetwork2=graph_from_adjacency_matrix(matrix,mode="undirected",weighted=T,diag=F)
#Generating a igraph network structure - directed flow without loops - undirected with weight and diag equal to zero - only the upper matrix triangle it is used
#IMPORTANT: Hopefully this is the right one to show forward flow - if it is not the case use the following network object
#network2=graph_from_adjacency_matrix(matrix,mode="lower",weighted=TRUE,diag=F)
Links2 <- get.data.frame(network2)
#Getting a dataframe from igraph object (from, to, weight) - directed flow without loops
Links2$logweight <- log(Links2$weight)
#Creating a transformed variable using log weight
Links2$from<-as.numeric(Links2$from)
#Converting to numeric
Links2$to<-as.numeric(Links2$to)
#Converting to numeric
#Generating sankey plot object
sankeyNetwork(Links= Links2, Nodes = Nodes, Source = 'from', Target = 'to', Value = 'logweight', NodeID ="label$Label", fontSize=12, nodeWidth=5)sankeyFT <- function(x, y) {
a <- y[(y$from %in% c(x)|y$to %in% c(x)),]
a1<-cat("#Links Forward (From) and Backward (To),\n")
sankeyNetwork(Links= a, Nodes = Nodes, Source = 'from', Target = 'to', Value = 'logweight', NodeID ="label$Label", fontSize=12, nodeWidth=30)
}sankeyF <- function(x, y) {
b <- y[y$from %in% c(x),]
b1<-cat("#Links From - no log transformation due to low level of observations\n")
sankeyNetwork(Links= b, Nodes = Nodes, Source = 'from', Target = 'to', Value = 'logweight', NodeID ="label$Label", fontSize=12, nodeWidth=30)
}sankeyT <- function(x, y) {
c <- y[y$to %in% c(x),]
c1<-cat("#Links To - no log transformation due to low level of observations\n")
sankeyNetwork(Links= c, Nodes = Nodes, Source = 'from', Target = 'to', Value = 'logweight', NodeID ="label$Label", fontSize=12, nodeWidth=30)
}Selecting links made by at least one node from Zero Step (Frontpages)
#sankeyFT(c(1:3,15,27,32:39),Links1)
#sankeyF(c(1:3,15,27,32:39), Links1)
#sankeyT(c(1:3,15,27,32:39), Links1)
#These plots are not included as we need to check the Java Scrisct template for Sankey Plot with Recursive PatternssankeyFT(c(1:3,15,27,32:39),Links2)## #Links Forward (From) and Backward (To),
sankeyF(c(1:3,15,27,32:39), Links2)## #Links From - no log transformation due to low level of observations
sankeyT(c(1:3,15,27,32:39), Links2)## #Links To - no log transformation due to low level of observations
Selecting links made by at least one node from First Step
#sankeyFT(c(16:26),Links1)
#sankeyF(c(16:26), Links1)
#sankeyT(c(16:26), Links1)
#These plots are not included as we need to check the Java Scrisct template for Sankey Plot with Recursive PatternssankeyFT(c(16:26),Links2)## #Links Forward (From) and Backward (To),
sankeyF(c(16:26), Links2)## #Links From - no log transformation due to low level of observations
sankeyT(c(16:26), Links2)## #Links To - no log transformation due to low level of observations
Selecting links made by at least one node from First Step
#sankeyFT(c(4:14),Links1)
#sankeyF(c(4:14), Links1)
#sankeyT(c(4:14), Links1)
#These plots are not included as we need to check the Java Scrisct template for Sankey Plot with Recursive PatternssankeyFT(c(4:14),Links2)## #Links Forward (From) and Backward (To),
sankeyF(c(4:14), Links2)## #Links From - no log transformation due to low level of observations
sankeyT(c(4:14), Links2)## #Links To - no log transformation due to low level of observations
Selecting links made by at least one node from First Step
#sankeyFT(c(28:38),Links1)
#sankeyF(c(28:38), Links1)
#sankeyT(c(28:38), Links1)
#These plots are not included as we need to check the Java Scrisct template for Sankey Plot with Recursive PatternssankeyFT(c(28:38),Links2)## #Links Forward (From) and Backward (To),
sankeyF(c(28:38), Links2)## #Links From - no log transformation due to low level of observations
sankeyT(c(28:38), Links2)## #Links To - no log transformation due to low level of observations