Displaying the Samsung data

# install.packages("mclust")
# install.packages("plyr")
# install.packages("stringr")
# install.packages("igraph")
# install.packages("stringi")
# install.packages("magrittr")
# install.packages("dplyr")
# install.packages("sna")
# install.packages("RColorBrewer")
# install.packages("visNetwork")
#source("http://bioconductor.org/biocLite.R")
#biocLite("RBGL")
#biocLite("graph")

Load and process data

library(mclust)
## Package 'mclust' version 5.4
## Type 'citation("mclust")' for citing this R package in publications.
library(RColorBrewer)       
library(sna)
## Loading required package: statnet.common
## 
## Attaching package: 'statnet.common'
## The following object is masked from 'package:base':
## 
##     order
## Loading required package: network
## network: Classes for Relational Data
## Version 1.13.0 created on 2015-08-31.
## copyright (c) 2005, Carter T. Butts, University of California-Irvine
##                     Mark S. Handcock, University of California -- Los Angeles
##                     David R. Hunter, Penn State University
##                     Martina Morris, University of Washington
##                     Skye Bender-deMoll, University of Washington
##  For citation information, type citation("network").
##  Type help("network-package") to get started.
## sna: Tools for Social Network Analysis
## Version 2.4 created on 2016-07-23.
## copyright (c) 2005, Carter T. Butts, University of California-Irvine
##  For citation information, type citation("sna").
##  Type help(package="sna") to get started.
library(graph)
## Loading required package: BiocGenerics
## Loading required package: parallel
## 
## Attaching package: 'BiocGenerics'
## The following objects are masked from 'package:parallel':
## 
##     clusterApply, clusterApplyLB, clusterCall, clusterEvalQ,
##     clusterExport, clusterMap, parApply, parCapply, parLapply,
##     parLapplyLB, parRapply, parSapply, parSapplyLB
## The following object is masked from 'package:statnet.common':
## 
##     order
## The following objects are masked from 'package:stats':
## 
##     IQR, mad, sd, var, xtabs
## The following objects are masked from 'package:base':
## 
##     anyDuplicated, append, as.data.frame, cbind, colMeans,
##     colnames, colSums, do.call, duplicated, eval, evalq, Filter,
##     Find, get, grep, grepl, intersect, is.unsorted, lapply,
##     lengths, Map, mapply, match, mget, order, paste, pmax,
##     pmax.int, pmin, pmin.int, Position, rank, rbind, Reduce,
##     rowMeans, rownames, rowSums, sapply, setdiff, sort, table,
##     tapply, union, unique, unsplit, which, which.max, which.min
## 
## Attaching package: 'graph'
## The following object is masked from 'package:sna':
## 
##     degree
library(igraph)
## 
## Attaching package: 'igraph'
## The following objects are masked from 'package:graph':
## 
##     degree, edges, intersection, union
## The following objects are masked from 'package:BiocGenerics':
## 
##     normalize, union
## The following objects are masked from 'package:sna':
## 
##     betweenness, bonpow, closeness, components, degree,
##     dyad.census, evcent, hierarchy, is.connected, neighborhood,
##     triad.census
## The following objects are masked from 'package:network':
## 
##     %c%, %s%, add.edges, add.vertices, delete.edges,
##     delete.vertices, get.edge.attribute, get.edges,
##     get.vertex.attribute, is.bipartite, is.directed,
##     list.edge.attributes, list.vertex.attributes,
##     set.edge.attribute, set.vertex.attribute
## The following objects are masked from 'package:stats':
## 
##     decompose, spectrum
## The following object is masked from 'package:base':
## 
##     union
library(readr)
library(plyr)
## 
## Attaching package: 'plyr'
## The following object is masked from 'package:graph':
## 
##     join
## The following object is masked from 'package:network':
## 
##     is.discrete
library(stringr)
## 
## Attaching package: 'stringr'
## The following object is masked from 'package:graph':
## 
##     boundary
library(stringi)
library(magrittr)
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:plyr':
## 
##     arrange, count, desc, failwith, id, mutate, rename, summarise,
##     summarize
## The following objects are masked from 'package:igraph':
## 
##     as_data_frame, groups, union
## The following object is masked from 'package:graph':
## 
##     union
## The following objects are masked from 'package:BiocGenerics':
## 
##     combine, intersect, setdiff, union
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
Samsung <- read_rds("Samsung.RDS")
tData <- data.frame(1:length(Samsung$screenName),Samsung$screenName,Samsung$tweettext)
tData$Samsung.screenName <- as.character(tData$Samsung.screenName)
tData$Samsung.tweettext <- as.character(tData$Samsung.tweettext)

Function to create a list of Poster-retweeter

createList <- function(tData) {
  
  # Reads data
  nData <- 
    tData %>% 
   set_colnames(c("id", "screenname", "tweet")) %>%
      tbl_df()
  
  # Extracts poster information
  retweeterPoster <- 
    nData %>%
    mutate(is_retweeted = stri_detect_regex(tweet, "(RT|via)((?:\\b\\W*@\\w+)+)")) %>%
    filter(is_retweeted) %>%
    rowwise() %>%
    do({
      # Gets retwitter
      who_retweet <- 
        stri_extract_first_regex(.$tweet, "(RT|via)((?:\\b\\W*@\\w+)+)")[[1]] %>%
        stri_extract_first_regex("@[a-zA-Z0-9_]{1,}") %>%
        stri_replace_all_fixed("@", "")
      
      # Returns pair
      data_frame(who_post = .$screenname, who_retweet = who_retweet, 
                 combi = stri_c(sort(c(.$screenname, who_retweet)), collapse = " "))
    }) %>%
    ungroup() %>%
    group_by(combi) %>%
    summarize(from = min(who_post, who_retweet), 
              to = max(who_post, who_retweet), 
              weight = n()) %>%
    ungroup() %>%
    select(-combi)
  
  # Returns results
  retweeterPoster
}

retweeterPoster <- createList(tData)

Create graph

m <- ftM2adjM(ft = as.matrix(retweeterPoster[, 1:2]), W = retweeterPoster$weight, edgemode = "directed")
g1 <- as(m, "graphNEL")

# Calculate centrality
node <- data.frame(nodes(g1))
node$betweenness <-  sna::betweenness(m)
node$degree <- sna::degree(m)
sortlist <- node[order(-node$degree),]
head(sortlist, 10)
##          nodes.g1. betweenness degree
## 96   ilovemyiwatch       299.5     62
## 97   ILoveMyPebble       260.5     54
## 157       Techmeme        16.0     17
## 119     KathysDeep        21.0     12
## 233  Pebble_Knight         0.0      8
## 55  chutamasbarato         6.0      7
## 153 raymondochoa12        26.0      7
## 215        YouTube         0.0      7
## 25      angrysoofa         5.0      6
## 31            asda         0.0      5
# Defines clusters for nodes in 3 groups by degree centrality
node %<>%
  mutate(size = log(node$degree)) %>%
  mutate(size = ifelse(size == -Inf, 1, size))
N = 3
node %<>%
  mutate(group = Mclust(size, G = N)$classification)

Visualize the network with visNetwork

library(visNetwork)

gnode <- data.frame(node$nodes.g1.)
gnode<- setNames(gnode, "id")
gnode$shape <- "dot"  
gnode$shadow <- TRUE # Nodes will drop shadow
gnode$title <- node$nodes.g1. #Click to show title
gnode$label <- node$degree # Node label by degree centrality
gnode$group <- node$group
gnode$size <- gnode$group*3 # Node size by group
gnode$color.background <- c("slategrey", "tomato", "gold")[gnode$group]

visNetwork(
  gnode,
  setNames(retweeterPoster, c("from", "to", "weight"))
) %>%
 visOptions(highlightNearest = TRUE, 
             selectedBy = "group")

Displaying the Samsung2 data (after one month of more information)

Load and process data

library(mclust)
library(RColorBrewer)       
library(sna)
library(graph)
library(igraph)
library(readr)
library(plyr)
library(stringr)
library(stringi)
library(magrittr)
library(dplyr)

Samsung2 <- read_rds("Samsung2.RDS")
tData <- data.frame(1:length(Samsung2$screenName),Samsung2$screenName,Samsung2$tweettext)
tData$Samsung2.screenName <- as.character(tData$Samsung2.screenName)
tData$Samsung2.tweettext <- as.character(tData$Samsung2.tweettext)

Function to create a list of Poster-retweeter

createList <- function(tData) {
  
  # Reads data
  nData <- 
    tData %>% 
   set_colnames(c("id", "screenname", "tweet")) %>%
      tbl_df()
  
  # Extracts poster information
  retweeterPoster <- 
    nData %>%
    mutate(is_retweeted = stri_detect_regex(tweet, "(RT|via)((?:\\b\\W*@\\w+)+)")) %>%
    filter(is_retweeted) %>%
    rowwise() %>%
    do({
      # Gets retwitter
      who_retweet <- 
        stri_extract_first_regex(.$tweet, "(RT|via)((?:\\b\\W*@\\w+)+)")[[1]] %>%
        stri_extract_first_regex("@[a-zA-Z0-9_]{1,}") %>%
        stri_replace_all_fixed("@", "")
      
      # Returns pair
      data_frame(who_post = .$screenname, who_retweet = who_retweet, 
                 combi = stri_c(sort(c(.$screenname, who_retweet)), collapse = " "))
    }) %>%
    ungroup() %>%
    group_by(combi) %>%
    summarize(from = min(who_post, who_retweet), 
              to = max(who_post, who_retweet), 
              weight = n()) %>%
    ungroup() %>%
    select(-combi)
  
  # Returns results
  retweeterPoster
}

retweeterPoster <- createList(tData)

Create graph

m <- ftM2adjM(ft = as.matrix(retweeterPoster[, 1:2]), W = retweeterPoster$weight, edgemode = "directed")
g1 <- as(m, "graphNEL")

# Calculate centrality
node <- data.frame(nodes(g1))
node$betweenness <-  sna::betweenness(m)
node$degree <- sna::degree(m)
sortlist <- node[order(-node$degree),]
head(sortlist, 10)
##           nodes.g1. betweenness degree
## 103            Fact        2280    106
## 132   ilovemyiwatch         238     50
## 45         applenws         246     49
## 133   ILoveMyPebble         210     44
## 201 TheWorldStories          96     35
## 39   androidcentral          51     23
## 157     mayflashfly          54     15
## 202          tnkgrl          46     11
## 7         _WistJeAl           0      9
## 127    HuffPostTech           7      8
# Defines clusters for nodes in 3 groups by degree centrality
node %<>%
  mutate(size = log(node$degree)) %>%
  mutate(size = ifelse(size == -Inf, 1, size))
N = 3
node %<>%
  mutate(group = Mclust(size, G = N)$classification)

Visualize the network with visNetwork

library(visNetwork)

gnode <- data.frame(node$nodes.g1.)
gnode<- setNames(gnode, "id")
gnode$shape <- "dot"  
gnode$shadow <- TRUE # Nodes will drop shadow
gnode$title <- node$nodes.g1. #Click to show title
gnode$label <- node$degree # Node label by degree centrality
gnode$group <- node$group
gnode$size <- gnode$group*3 # Node size by group
gnode$color.background <- c("slategrey", "tomato", "gold")[gnode$group]

visNetwork(
  gnode,
  setNames(retweeterPoster, c("from", "to", "weight"))
) %>%
 visOptions(highlightNearest = TRUE, 
             selectedBy = "group")

OBSERVATION

From the data extracted above, I analyze that the bonding has been become more densed and the bridging has also grown between the nodes. The tables generated from both the data also show the betweeness and the degree of the clusters is also increased. The betweenness has been increased by retweets on the nodes for a particular cluster.