# install.packages("mclust")
# install.packages("plyr")
# install.packages("stringr")
# install.packages("igraph")
# install.packages("stringi")
# install.packages("magrittr")
# install.packages("dplyr")
# install.packages("sna")
# install.packages("RColorBrewer")
# install.packages("visNetwork")
#source("http://bioconductor.org/biocLite.R")
#biocLite("RBGL")
#biocLite("graph")
library(mclust)
## Package 'mclust' version 5.4
## Type 'citation("mclust")' for citing this R package in publications.
library(RColorBrewer)
library(sna)
## Loading required package: statnet.common
##
## Attaching package: 'statnet.common'
## The following object is masked from 'package:base':
##
## order
## Loading required package: network
## network: Classes for Relational Data
## Version 1.13.0 created on 2015-08-31.
## copyright (c) 2005, Carter T. Butts, University of California-Irvine
## Mark S. Handcock, University of California -- Los Angeles
## David R. Hunter, Penn State University
## Martina Morris, University of Washington
## Skye Bender-deMoll, University of Washington
## For citation information, type citation("network").
## Type help("network-package") to get started.
## sna: Tools for Social Network Analysis
## Version 2.4 created on 2016-07-23.
## copyright (c) 2005, Carter T. Butts, University of California-Irvine
## For citation information, type citation("sna").
## Type help(package="sna") to get started.
library(graph)
## Loading required package: BiocGenerics
## Loading required package: parallel
##
## Attaching package: 'BiocGenerics'
## The following objects are masked from 'package:parallel':
##
## clusterApply, clusterApplyLB, clusterCall, clusterEvalQ,
## clusterExport, clusterMap, parApply, parCapply, parLapply,
## parLapplyLB, parRapply, parSapply, parSapplyLB
## The following object is masked from 'package:statnet.common':
##
## order
## The following objects are masked from 'package:stats':
##
## IQR, mad, sd, var, xtabs
## The following objects are masked from 'package:base':
##
## anyDuplicated, append, as.data.frame, cbind, colMeans,
## colnames, colSums, do.call, duplicated, eval, evalq, Filter,
## Find, get, grep, grepl, intersect, is.unsorted, lapply,
## lengths, Map, mapply, match, mget, order, paste, pmax,
## pmax.int, pmin, pmin.int, Position, rank, rbind, Reduce,
## rowMeans, rownames, rowSums, sapply, setdiff, sort, table,
## tapply, union, unique, unsplit, which, which.max, which.min
##
## Attaching package: 'graph'
## The following object is masked from 'package:sna':
##
## degree
library(igraph)
##
## Attaching package: 'igraph'
## The following objects are masked from 'package:graph':
##
## degree, edges, intersection, union
## The following objects are masked from 'package:BiocGenerics':
##
## normalize, union
## The following objects are masked from 'package:sna':
##
## betweenness, bonpow, closeness, components, degree,
## dyad.census, evcent, hierarchy, is.connected, neighborhood,
## triad.census
## The following objects are masked from 'package:network':
##
## %c%, %s%, add.edges, add.vertices, delete.edges,
## delete.vertices, get.edge.attribute, get.edges,
## get.vertex.attribute, is.bipartite, is.directed,
## list.edge.attributes, list.vertex.attributes,
## set.edge.attribute, set.vertex.attribute
## The following objects are masked from 'package:stats':
##
## decompose, spectrum
## The following object is masked from 'package:base':
##
## union
library(readr)
library(plyr)
##
## Attaching package: 'plyr'
## The following object is masked from 'package:graph':
##
## join
## The following object is masked from 'package:network':
##
## is.discrete
library(stringr)
##
## Attaching package: 'stringr'
## The following object is masked from 'package:graph':
##
## boundary
library(stringi)
library(magrittr)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:plyr':
##
## arrange, count, desc, failwith, id, mutate, rename, summarise,
## summarize
## The following objects are masked from 'package:igraph':
##
## as_data_frame, groups, union
## The following object is masked from 'package:graph':
##
## union
## The following objects are masked from 'package:BiocGenerics':
##
## combine, intersect, setdiff, union
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
Samsung <- read_rds("Samsung.RDS")
tData <- data.frame(1:length(Samsung$screenName),Samsung$screenName,Samsung$tweettext)
tData$Samsung.screenName <- as.character(tData$Samsung.screenName)
tData$Samsung.tweettext <- as.character(tData$Samsung.tweettext)
createList <- function(tData) {
# Reads data
nData <-
tData %>%
set_colnames(c("id", "screenname", "tweet")) %>%
tbl_df()
# Extracts poster information
retweeterPoster <-
nData %>%
mutate(is_retweeted = stri_detect_regex(tweet, "(RT|via)((?:\\b\\W*@\\w+)+)")) %>%
filter(is_retweeted) %>%
rowwise() %>%
do({
# Gets retwitter
who_retweet <-
stri_extract_first_regex(.$tweet, "(RT|via)((?:\\b\\W*@\\w+)+)")[[1]] %>%
stri_extract_first_regex("@[a-zA-Z0-9_]{1,}") %>%
stri_replace_all_fixed("@", "")
# Returns pair
data_frame(who_post = .$screenname, who_retweet = who_retweet,
combi = stri_c(sort(c(.$screenname, who_retweet)), collapse = " "))
}) %>%
ungroup() %>%
group_by(combi) %>%
summarize(from = min(who_post, who_retweet),
to = max(who_post, who_retweet),
weight = n()) %>%
ungroup() %>%
select(-combi)
# Returns results
retweeterPoster
}
retweeterPoster <- createList(tData)
m <- ftM2adjM(ft = as.matrix(retweeterPoster[, 1:2]), W = retweeterPoster$weight, edgemode = "directed")
g1 <- as(m, "graphNEL")
# Calculate centrality
node <- data.frame(nodes(g1))
node$betweenness <- sna::betweenness(m)
node$degree <- sna::degree(m)
sortlist <- node[order(-node$degree),]
head(sortlist, 10)
## nodes.g1. betweenness degree
## 96 ilovemyiwatch 299.5 62
## 97 ILoveMyPebble 260.5 54
## 157 Techmeme 16.0 17
## 119 KathysDeep 21.0 12
## 233 Pebble_Knight 0.0 8
## 55 chutamasbarato 6.0 7
## 153 raymondochoa12 26.0 7
## 215 YouTube 0.0 7
## 25 angrysoofa 5.0 6
## 31 asda 0.0 5
# Defines clusters for nodes in 3 groups by degree centrality
node %<>%
mutate(size = log(node$degree)) %>%
mutate(size = ifelse(size == -Inf, 1, size))
N = 3
node %<>%
mutate(group = Mclust(size, G = N)$classification)
library(visNetwork)
gnode <- data.frame(node$nodes.g1.)
gnode<- setNames(gnode, "id")
gnode$shape <- "dot"
gnode$shadow <- TRUE # Nodes will drop shadow
gnode$title <- node$nodes.g1. #Click to show title
gnode$label <- node$degree # Node label by degree centrality
gnode$group <- node$group
gnode$size <- gnode$group*3 # Node size by group
gnode$color.background <- c("slategrey", "tomato", "gold")[gnode$group]
visNetwork(
gnode,
setNames(retweeterPoster, c("from", "to", "weight"))
) %>%
visOptions(highlightNearest = TRUE,
selectedBy = "group")
library(mclust)
library(RColorBrewer)
library(sna)
library(graph)
library(igraph)
library(readr)
library(plyr)
library(stringr)
library(stringi)
library(magrittr)
library(dplyr)
Samsung2 <- read_rds("Samsung2.RDS")
tData <- data.frame(1:length(Samsung2$screenName),Samsung2$screenName,Samsung2$tweettext)
tData$Samsung2.screenName <- as.character(tData$Samsung2.screenName)
tData$Samsung2.tweettext <- as.character(tData$Samsung2.tweettext)
createList <- function(tData) {
# Reads data
nData <-
tData %>%
set_colnames(c("id", "screenname", "tweet")) %>%
tbl_df()
# Extracts poster information
retweeterPoster <-
nData %>%
mutate(is_retweeted = stri_detect_regex(tweet, "(RT|via)((?:\\b\\W*@\\w+)+)")) %>%
filter(is_retweeted) %>%
rowwise() %>%
do({
# Gets retwitter
who_retweet <-
stri_extract_first_regex(.$tweet, "(RT|via)((?:\\b\\W*@\\w+)+)")[[1]] %>%
stri_extract_first_regex("@[a-zA-Z0-9_]{1,}") %>%
stri_replace_all_fixed("@", "")
# Returns pair
data_frame(who_post = .$screenname, who_retweet = who_retweet,
combi = stri_c(sort(c(.$screenname, who_retweet)), collapse = " "))
}) %>%
ungroup() %>%
group_by(combi) %>%
summarize(from = min(who_post, who_retweet),
to = max(who_post, who_retweet),
weight = n()) %>%
ungroup() %>%
select(-combi)
# Returns results
retweeterPoster
}
retweeterPoster <- createList(tData)
m <- ftM2adjM(ft = as.matrix(retweeterPoster[, 1:2]), W = retweeterPoster$weight, edgemode = "directed")
g1 <- as(m, "graphNEL")
# Calculate centrality
node <- data.frame(nodes(g1))
node$betweenness <- sna::betweenness(m)
node$degree <- sna::degree(m)
sortlist <- node[order(-node$degree),]
head(sortlist, 10)
## nodes.g1. betweenness degree
## 103 Fact 2280 106
## 132 ilovemyiwatch 238 50
## 45 applenws 246 49
## 133 ILoveMyPebble 210 44
## 201 TheWorldStories 96 35
## 39 androidcentral 51 23
## 157 mayflashfly 54 15
## 202 tnkgrl 46 11
## 7 _WistJeAl 0 9
## 127 HuffPostTech 7 8
# Defines clusters for nodes in 3 groups by degree centrality
node %<>%
mutate(size = log(node$degree)) %>%
mutate(size = ifelse(size == -Inf, 1, size))
N = 3
node %<>%
mutate(group = Mclust(size, G = N)$classification)
library(visNetwork)
gnode <- data.frame(node$nodes.g1.)
gnode<- setNames(gnode, "id")
gnode$shape <- "dot"
gnode$shadow <- TRUE # Nodes will drop shadow
gnode$title <- node$nodes.g1. #Click to show title
gnode$label <- node$degree # Node label by degree centrality
gnode$group <- node$group
gnode$size <- gnode$group*3 # Node size by group
gnode$color.background <- c("slategrey", "tomato", "gold")[gnode$group]
visNetwork(
gnode,
setNames(retweeterPoster, c("from", "to", "weight"))
) %>%
visOptions(highlightNearest = TRUE,
selectedBy = "group")
From the data extracted above, I analyze that the bonding has been become more densed and the bridging has also grown between the nodes. The tables generated from both the data also show the betweeness and the degree of the clusters is also increased. The betweenness has been increased by retweets on the nodes for a particular cluster.