#install.packages("mclust")
#install.packages("plyr")
#install.packages("stringr")
#install.packages("igraph")
#install.packages("stringi")
#install.packages("magrittr")
#install.packages("dplyr")
#install.packages("sna")
#install.packages("RColorBrewer")
#install.packages("visNetwork")
#source("http://bioconductor.org/biocLite.R")
#biocLite("RBGL")
#biocLite("graph")
library(mclust)
## Warning: package 'mclust' was built under R version 3.4.4
## Package 'mclust' version 5.4
## Type 'citation("mclust")' for citing this R package in publications.
library(RColorBrewer)
library(sna)
## Warning: package 'sna' was built under R version 3.4.4
## Loading required package: statnet.common
##
## Attaching package: 'statnet.common'
## The following object is masked from 'package:base':
##
## order
## Loading required package: network
## Warning: package 'network' was built under R version 3.4.4
## network: Classes for Relational Data
## Version 1.13.0.1 created on 2015-08-31.
## copyright (c) 2005, Carter T. Butts, University of California-Irvine
## Mark S. Handcock, University of California -- Los Angeles
## David R. Hunter, Penn State University
## Martina Morris, University of Washington
## Skye Bender-deMoll, University of Washington
## For citation information, type citation("network").
## Type help("network-package") to get started.
## sna: Tools for Social Network Analysis
## Version 2.4 created on 2016-07-23.
## copyright (c) 2005, Carter T. Butts, University of California-Irvine
## For citation information, type citation("sna").
## Type help(package="sna") to get started.
library(graph)
## Loading required package: BiocGenerics
## Loading required package: parallel
##
## Attaching package: 'BiocGenerics'
## The following objects are masked from 'package:parallel':
##
## clusterApply, clusterApplyLB, clusterCall, clusterEvalQ,
## clusterExport, clusterMap, parApply, parCapply, parLapply,
## parLapplyLB, parRapply, parSapply, parSapplyLB
## The following object is masked from 'package:statnet.common':
##
## order
## The following objects are masked from 'package:stats':
##
## IQR, mad, sd, var, xtabs
## The following objects are masked from 'package:base':
##
## anyDuplicated, append, as.data.frame, cbind, colMeans,
## colnames, colSums, do.call, duplicated, eval, evalq, Filter,
## Find, get, grep, grepl, intersect, is.unsorted, lapply,
## lengths, Map, mapply, match, mget, order, paste, pmax,
## pmax.int, pmin, pmin.int, Position, rank, rbind, Reduce,
## rowMeans, rownames, rowSums, sapply, setdiff, sort, table,
## tapply, union, unique, unsplit, which, which.max, which.min
##
## Attaching package: 'graph'
## The following object is masked from 'package:sna':
##
## degree
library(igraph)
## Warning: package 'igraph' was built under R version 3.4.4
##
## Attaching package: 'igraph'
## The following objects are masked from 'package:graph':
##
## degree, edges, intersection, union
## The following objects are masked from 'package:BiocGenerics':
##
## normalize, union
## The following objects are masked from 'package:sna':
##
## betweenness, bonpow, closeness, components, degree,
## dyad.census, evcent, hierarchy, is.connected, neighborhood,
## triad.census
## The following objects are masked from 'package:network':
##
## %c%, %s%, add.edges, add.vertices, delete.edges,
## delete.vertices, get.edge.attribute, get.edges,
## get.vertex.attribute, is.bipartite, is.directed,
## list.edge.attributes, list.vertex.attributes,
## set.edge.attribute, set.vertex.attribute
## The following objects are masked from 'package:stats':
##
## decompose, spectrum
## The following object is masked from 'package:base':
##
## union
library(readr)
library(plyr)
## Warning: package 'plyr' was built under R version 3.4.4
##
## Attaching package: 'plyr'
## The following object is masked from 'package:graph':
##
## join
## The following object is masked from 'package:network':
##
## is.discrete
library(stringi)
## Warning: package 'stringi' was built under R version 3.4.4
library(stringr)
## Warning: package 'stringr' was built under R version 3.4.4
##
## Attaching package: 'stringr'
## The following object is masked from 'package:graph':
##
## boundary
library(magrittr)
## Warning: package 'magrittr' was built under R version 3.4.4
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:plyr':
##
## arrange, count, desc, failwith, id, mutate, rename, summarise,
## summarize
## The following objects are masked from 'package:igraph':
##
## as_data_frame, groups, union
## The following object is masked from 'package:graph':
##
## union
## The following objects are masked from 'package:BiocGenerics':
##
## combine, intersect, setdiff, union
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
Samsung <- read_rds("Samsung.RDS")
tData <- data.frame(1:length(Samsung$screenName),Samsung$screenName,Samsung$tweettext)
tData$Samsung.screenName <- as.character(tData$Samsung.screenName)
tData$Samsung.tweettext <- as.character(tData$Samsung.tweettext)
Samsung2 <- read_rds("Samsung2.RDS")
tData2 <- data.frame(1:length(Samsung2$screenName),Samsung2$screenName,Samsung2$tweettext)
tData2$Samsung2.screenName <- as.character(tData2$Samsung2.screenName)
tData2$Samsung2.tweettext <- as.character(tData2$Samsung2.tweettext)
createList <- function(tData) {
# Reads data
nData <- tData %>%
set_colnames(c("id", "screenname", "tweet")) %>%
tbl_df()
# Extracts poster information
retweeterPoster <- nData %>%
mutate(is_retweeted = stri_detect_regex(tweet, "(RT|via)((?:\\b\\W*@\\w+)+)")) %>%
filter(is_retweeted) %>%
rowwise() %>%
do({
# Gets retwitter
who_retweet <-
stri_extract_first_regex(.$tweet, "(RT|via)((?:\\b\\W*@\\w+)+)")[[1]] %>%
stri_extract_first_regex("@[a-zA-Z0-9_]{1,}") %>%
stri_replace_all_fixed("@", "")
# Returns pair
data_frame(who_post = .$screenname, who_retweet = who_retweet,
combi = stri_c(sort(c(.$screenname, who_retweet)), collapse = " "))
}) %>%
ungroup() %>%
group_by(combi) %>%
summarize(from = min(who_post, who_retweet),
to = max(who_post, who_retweet),
weight = n()) %>%
ungroup() %>%
select(-combi)
# Returns results
retweeterPoster
}
createList2 <- function(tData2) {
# Reads Data
nData2 <- tData2 %>%
set_colnames(c("id", "screenname", "tweet")) %>%
tbl_df()
# Extract poster information
retweeterPoster2 <- nData2 %>%
mutate(is_retweeted = stri_detect_regex(tweet, "(RT|via)((?:\\b\\W*@\\w+)+)")) %>%
filter(is_retweeted) %>%
rowwise() %>%
do({
# Gets retwitter
who_retweet2 <-
stri_extract_first_regex(.$tweet, "(RT|via)((?:\\b\\W*@\\w+)+)")[[1]] %>%
stri_extract_first_regex("@[a-zA-Z0-9_]{1,}") %>%
stri_replace_all_fixed("@", "")
# Returns pair
data_frame(who_post = .$screenname, who_retweet2 = who_retweet2,
combi = stri_c(sort(c(.$screenname, who_retweet2)), collapse = " "))
}) %>%
ungroup() %>%
group_by(combi) %>%
summarize(from = min(who_post, who_retweet2),
to = max(who_post, who_retweet2),
weight = n()) %>%
ungroup() %>%
select(-combi)
# Return results
retweeterPoster2
}
retweeterPoster <- createList(tData)
## Warning: package 'bindrcpp' was built under R version 3.4.4
retweeterPoster2 <- createList(tData2)
m <- ftM2adjM(ft = as.matrix(retweeterPoster[, 1:2]), W = retweeterPoster$weight, edgemode = "directed")
g1 <- as(m, "graphNEL")
m2 <- ftM2adjM(ft = as.matrix(retweeterPoster2[, 1:2]), W = retweeterPoster2$weight, edgemode = "directed")
g2 <- as(m2, "graphNEL")
# Calculate centrality
node <- data.frame(nodes(g1))
node$betweenness <- sna::betweenness(m)
node$degree <- sna::degree(m)
sortlist <- node[order(-node$degree),]
head(sortlist, 10)
node2 <- data.frame(nodes(g2))
node2$betweenness <- sna::betweenness(m2)
node2$degree <- sna::degree(m2)
sortlist <- node2[order(-node2$degree),]
head(sortlist, 10)
# Defines clusters for nodes in 3 groups by degree centrality
node %<>%
mutate(size = log(node$degree)) %>%
mutate(size = ifelse(size == -Inf, 1, size))
N = 3
node %<>%
mutate(group = Mclust(size, G = N)$classification)
node2 %<>%
mutate(size = log(node2$degree)) %>%
mutate(size = ifelse(size == -Inf, 1, size))
N = 3
node2 %<>%
mutate(group = Mclust(size, G = N)$classification)
library(visNetwork)
## Warning: package 'visNetwork' was built under R version 3.4.4
gnode <- data.frame(node$nodes.g1.)
gnode<- setNames(gnode, "id")
gnode$shape <- "dot"
gnode$shadow <- TRUE # Nodes will drop shadow
gnode$title <- node$nodes.g1. #Click to show title
gnode$label <- node$degree # Node label by degree centrality
gnode$group <- node$group
gnode$size <- gnode$group*3 # Node size by group
gnode$color.background <- c("slategrey", "tomato", "gold")[gnode$group]
gnode2 <- data.frame(node2$nodes.g2.)
gnode2<- setNames(gnode2, "id")
gnode2$shape <- "dot"
gnode2$shadow <- TRUE # Nodes will drop shadow
gnode2$title <- node2$nodes.g2. #Click to show title
gnode2$label <- node2$degree # Node label by degree centrality
gnode2$group <- node2$group
gnode2$size <- gnode2$group*3 # Node size by group
gnode2$color.background <- c("slategrey", "tomato", "gold")[gnode2$group]
visNetwork(
gnode,
setNames(retweeterPoster, c("from", "to", "weight"))
) %>%
visOptions(highlightNearest = TRUE,
selectedBy = "group")
visNetwork(
gnode2,
setNames(retweeterPoster, c("from", "to", "weight"))
) %>%
visOptions(highlightNearest = TRUE,
selectedBy = "group")