This is an R Markdown Notebook. When you execute code within the notebook, the results appear beneath the code.
Try executing this chunk by clicking the Run button within the chunk or by placing your cursor inside it and pressing Cmd+Shift+Enter.
#install.packages("mclust")
#install.packages("plyr")
#install.packages("stringr")
#install.packages("igraph")
#install.packages("stringi")
#install.packages("magrittr")
#install.packages("dplyr")
#install.packages("sna")
#install.packages("RColorBrewer")
#install.packages("visNetwork")
#source("http://bioconductor.org/biocLite.R")
#biocLite("RBGL")
#biocLite("graph")
library(mclust)
## Warning: package 'mclust' was built under R version 3.4.4
## Package 'mclust' version 5.4
## Type 'citation("mclust")' for citing this R package in publications.
library(RColorBrewer)
library(sna)
## Warning: package 'sna' was built under R version 3.4.4
## Loading required package: statnet.common
##
## Attaching package: 'statnet.common'
## The following object is masked from 'package:base':
##
## order
## Loading required package: network
## Warning: package 'network' was built under R version 3.4.4
## network: Classes for Relational Data
## Version 1.13.0 created on 2015-08-31.
## copyright (c) 2005, Carter T. Butts, University of California-Irvine
## Mark S. Handcock, University of California -- Los Angeles
## David R. Hunter, Penn State University
## Martina Morris, University of Washington
## Skye Bender-deMoll, University of Washington
## For citation information, type citation("network").
## Type help("network-package") to get started.
## sna: Tools for Social Network Analysis
## Version 2.4 created on 2016-07-23.
## copyright (c) 2005, Carter T. Butts, University of California-Irvine
## For citation information, type citation("sna").
## Type help(package="sna") to get started.
library(graph)
## Loading required package: BiocGenerics
## Loading required package: parallel
##
## Attaching package: 'BiocGenerics'
## The following objects are masked from 'package:parallel':
##
## clusterApply, clusterApplyLB, clusterCall, clusterEvalQ,
## clusterExport, clusterMap, parApply, parCapply, parLapply,
## parLapplyLB, parRapply, parSapply, parSapplyLB
## The following object is masked from 'package:statnet.common':
##
## order
## The following objects are masked from 'package:stats':
##
## IQR, mad, sd, var, xtabs
## The following objects are masked from 'package:base':
##
## anyDuplicated, append, as.data.frame, cbind, colMeans,
## colnames, colSums, do.call, duplicated, eval, evalq, Filter,
## Find, get, grep, grepl, intersect, is.unsorted, lapply,
## lengths, Map, mapply, match, mget, order, paste, pmax,
## pmax.int, pmin, pmin.int, Position, rank, rbind, Reduce,
## rowMeans, rownames, rowSums, sapply, setdiff, sort, table,
## tapply, union, unique, unsplit, which, which.max, which.min
##
## Attaching package: 'graph'
## The following object is masked from 'package:sna':
##
## degree
library(igraph)
## Warning: package 'igraph' was built under R version 3.4.4
##
## Attaching package: 'igraph'
## The following objects are masked from 'package:graph':
##
## degree, edges, intersection, union
## The following objects are masked from 'package:BiocGenerics':
##
## normalize, union
## The following objects are masked from 'package:sna':
##
## betweenness, bonpow, closeness, components, degree,
## dyad.census, evcent, hierarchy, is.connected, neighborhood,
## triad.census
## The following objects are masked from 'package:network':
##
## %c%, %s%, add.edges, add.vertices, delete.edges,
## delete.vertices, get.edge.attribute, get.edges,
## get.vertex.attribute, is.bipartite, is.directed,
## list.edge.attributes, list.vertex.attributes,
## set.edge.attribute, set.vertex.attribute
## The following objects are masked from 'package:stats':
##
## decompose, spectrum
## The following object is masked from 'package:base':
##
## union
library(readr)
## Warning: package 'readr' was built under R version 3.4.4
library(plyr)
##
## Attaching package: 'plyr'
## The following object is masked from 'package:graph':
##
## join
## The following object is masked from 'package:network':
##
## is.discrete
library(stringr)
##
## Attaching package: 'stringr'
## The following object is masked from 'package:graph':
##
## boundary
library(stringi)
library(magrittr)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:plyr':
##
## arrange, count, desc, failwith, id, mutate, rename, summarise,
## summarize
## The following objects are masked from 'package:igraph':
##
## as_data_frame, groups, union
## The following object is masked from 'package:graph':
##
## union
## The following objects are masked from 'package:BiocGenerics':
##
## combine, intersect, setdiff, union
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
Samsung <- read_rds("Samsung.RDS")
tData <- data.frame(1:length(Samsung$screenName),Samsung$screenName,Samsung$tweettext)
tData$Samsung.screenName <- as.character(tData$Samsung.screenName)
tData$Samsung.tweettext <- as.character(tData$Samsung.tweettext)
createList <- function(tData) {
# Reads data
nData <-
tData %>%
set_colnames(c("id", "screenname", "tweet")) %>%
tbl_df()
# Extracts poster information
retweeterPoster <-
nData %>%
mutate(is_retweeted = stri_detect_regex(tweet, "(RT|via)((?:\\b\\W*@\\w+)+)")) %>%
filter(is_retweeted) %>%
rowwise() %>%
do({
# Gets retwitter
who_retweet <-
stri_extract_first_regex(.$tweet, "(RT|via)((?:\\b\\W*@\\w+)+)")[[1]] %>%
stri_extract_first_regex("@[a-zA-Z0-9_]{1,}") %>%
stri_replace_all_fixed("@", "")
# Returns pair
data_frame(who_post = .$screenname, who_retweet = who_retweet,
combi = stri_c(sort(c(.$screenname, who_retweet)), collapse = " "))
}) %>%
ungroup() %>%
group_by(combi) %>%
summarize(from = min(who_post, who_retweet),
to = max(who_post, who_retweet),
weight = n()) %>%
ungroup() %>%
select(-combi)
# Returns results
retweeterPoster
}
retweeterPoster <- createList(tData)
m <- ftM2adjM(ft = as.matrix(retweeterPoster[, 1:2]), W = retweeterPoster$weight, edgemode = "directed")
g1 <- as(m, "graphNEL")
# Calculate centrality
node <- data.frame(nodes(g1))
node$betweenness <- sna::betweenness(m)
node$degree <- sna::degree(m)
sortlist <- node[order(-node$degree),]
head(sortlist, 10)
# Defines clusters for nodes in 3 groups by degree centrality
node %<>%
mutate(size = log(node$degree)) %>%
mutate(size = ifelse(size == -Inf, 1, size))
N = 3
node %<>%
mutate(group = Mclust(size, G = N)$classification)
library(visNetwork)
## Warning: package 'visNetwork' was built under R version 3.4.4
gnode <- data.frame(node$nodes.g1.)
gnode<- setNames(gnode, "id")
gnode$shape <- "dot"
gnode$shadow <- TRUE # Nodes will drop shadow
gnode$title <- node$nodes.g1. #Click to show title
gnode$label <- node$degree # Node label by degree centrality
gnode$group <- node$group
gnode$size <- gnode$group*3 # Node size by group
gnode$color.background <- c("slategrey", "tomato", "gold")[gnode$group]
visNetwork(
gnode,
setNames(retweeterPoster, c("from", "to", "weight"))
) %>%
visOptions(highlightNearest = TRUE,
selectedBy = "group")
Add a new chunk by clicking the Insert Chunk button on the toolbar or by pressing Cmd+Option+I.
When you save the notebook, an HTML file containing the code and output will be saved alongside it (click the Preview button or press Cmd+Shift+K to preview the HTML file).
The preview shows you a rendered HTML copy of the contents of the editor. Consequently, unlike Knit, Preview does not run any R code chunks. Instead, the output of the chunk when it was last run in the editor is displayed.