This is an R Markdown Notebook. When you execute code within the notebook, the results appear beneath the code.
Try executing this chunk by clicking the Run button within the chunk or by placing your cursor inside it and pressing Cmd+Shift+Enter.
Install the necessary packages then comment them out
mydata <- read_rds("YCombinator.RDS")
n <- length(mydata$screenName)
tData <- mydata[,c("screenName","text")]
tData <- cbind(1:n, tData)
tData$screenName <- as.character(tData$screenName)
tData$text <- as.character(tData$text)
createList <- function(tData) {
# Reads data
nData <-
tData %>%
set_colnames(c("id", "screenname", "tweet")) %>%
tbl_df()
# Extracts poster information
retweeterPoster <-
nData %>%
mutate(is_retweeted = stri_detect_regex(tweet, "(RT|via)((?:\\b\\W*@\\w+)+)")) %>%
filter(is_retweeted) %>%
rowwise() %>%
do({
# Gets retwitter
who_retweet <-
stri_extract_first_regex(.$tweet, "(RT|via)((?:\\b\\W*@\\w+)+)")[[1]] %>%
stri_extract_first_regex("@[a-zA-Z0-9_]{1,}") %>%
stri_replace_all_fixed("@", "")
# Returns pair
data_frame(who_post = .$screenname, who_retweet = who_retweet,
combi = stri_c(sort(c(.$screenname, who_retweet)), collapse = " "))
}) %>%
ungroup() %>%
group_by(combi) %>%
dplyr::summarize(from = min(who_post, who_retweet),
to = max(who_post, who_retweet),
weight = n()) %>%
ungroup() %>%
select(-combi)
# Returns results
retweeterPoster
}
retweeterPoster <- createList(tData)
retweeterPoster <- na.omit(retweeterPoster)
#library(influenceR)
#g = graph_from_edgelist(as.matrix(retweeterPoster[, 1:2]), directed = FALSE)
#betweenness(g, snap = T)
#bridging(g)
#constraint(g)
#ens(g)
# identify the top ten kep players
# This function implements KPP-Pos, a metric intended to identify k nodes which optimize resource diffusion through the network.
#keyplayer(g, 10, prob = 0, tol = 1e-04, maxsec = 120, roundsec = 30)
m <- ftM2adjM(ft = as.matrix(retweeterPoster[, 1:2]), W = retweeterPoster$weight, edgemode = "directed")
g1 <- as(m, "graphNEL")
# Calculate centrality
node <- data.frame(nodes(g1))
node$betweenness <- sna::betweenness(m)
node$degree <- sna::degree(m)
sortlist <- node[order(-node$degree),]
head(sortlist, 20)
## nodes.g1. betweenness degree
## 155 garrytan 1797 83
## 309 TechCrunch 75 30
## 322 ycombinator 0 25
## 271 Recode 60 18
## 359 ugodre 0 17
## 220 KateClarkTweets 146 15
## 242 MaxCRoser 24 14
## 205 JijoSunny 290 12
## 362 techcrunch 0 10
## 367 VentureBeat 0 8
## 268 prosunjoyi 12 7
## 297 StartupLou 10 7
## 291 sheetal_sonar 3 6
## 350 thedatainc 0 6
## 464 WSJmarkets 0 6
## 1 _alexguillot 0 5
## 64 Bitfi6 0 5
## 151 ForbesME 9 5
## 230 LibbyMClark 6 5
## 375 tianhuil 0 5
# Defines clusters for nodes in 3 groups by degree centrality
node %<>%
mutate(size = log(node$degree)) %>%
mutate(size = ifelse(size == -Inf, 1, size))
N = 2
node %<>%
mutate(group = Mclust(size, G = N)$classification)
library(visNetwork)
## Warning: package 'visNetwork' was built under R version 3.5.3
gnode <- data.frame(node$nodes.g1.)
gnode<- setNames(gnode, "id")
gnode$shape <- "dot"
gnode$shadow <- TRUE # Nodes will drop shadow
gnode$title <- node$nodes.g1. #Click to show title
gnode$label <- node$degree # Node label by degree centrality
gnode$group <- node$group
gnode$size <- gnode$group*3 # Node size by group
gnode$color.background <- c("slategrey", "tomato", "gold")[gnode$group]
visNetwork(
gnode,
setNames(retweeterPoster, c("from", "to", "weight"))
) %>%
visOptions(highlightNearest = TRUE,
selectedBy = "group")