## [1] "Using direct authentication"
tw = twitteR::searchTwitter('workout', n = 1000, since = '2016-11-08', retryOnRateLimit = 1)
workout = twitteR::twListToDF(tw)
library('tm')
## Warning: package 'tm' was built under R version 3.4.4
## Loading required package: NLP
library('RColorBrewer')
library('wordcloud')
saveRDS(workout, "workout.RDS")
workout <- readRDS("workout.RDS")
workouttweet <- workout$text
clean.text = function(x)
{
x = gsub("[^[:graph:]]", " ",x)
x = tolower(x)
x = gsub("rt", "", x)
x = gsub("@\\w+", "", x)
x = gsub("[[:punct:]]", "", x)
x = gsub("[[:digit:]]", "", x)
x = gsub("http\\w+", "", x)
x = gsub("[ |\t]{2,}", "", x)
x = gsub("^ ", "", x)
x = gsub(" $", "", x)
return(x)
}
workouttweet = clean.text(workouttweet)
corpus = Corpus(VectorSource(workouttweet))
tdm = TermDocumentMatrix(
corpus,
control = list(
wordLengths=c(3,20),
removePunctuation = TRUE,
stopwords = c("the", "a", stopwords("english")),
removeNumbers = TRUE, tolower = TRUE) )
tdm = as.matrix(tdm)
word_freqs = sort(rowSums(tdm), decreasing=TRUE)
word_freqs = word_freqs[-(1:9)]
dm = data.frame(word=names(word_freqs), freq=word_freqs)
wordcloud(head(dm$word, 50), head(dm$freq, 50), scale=c(2, .9), random.order=FALSE, colors=brewer.pal(8, "Dark2"))
head(word_freqs, 20)
## need office car comfo outdoorsget
## 53 46 45 45 45
## poolready summerworkout like will can
## 45 45 40 39 38
## korang still stuff amalkanbuat badan
## 32 32 31 29 29
## berat dalam inilah jer kalau
## 29 29 29 29 29
pos.words = scan('positive-words.txt', what='character', comment.char=';')
neg.words = scan('negative-words.txt', what='character', comment.char=';')
require(plyr)
## Loading required package: plyr
## Warning: package 'plyr' was built under R version 3.4.4
##
## Attaching package: 'plyr'
## The following object is masked from 'package:twitteR':
##
## id
require(stringr)
## Loading required package: stringr
## Warning: package 'stringr' was built under R version 3.4.4
require(stringi)
## Loading required package: stringi
## Warning: package 'stringi' was built under R version 3.4.4
neg.words = c(neg.words, 'wtf', 'fail')
score.sentiment = function(sentences, pos.words, neg.words, .progress='none')
{
scores = laply(sentences, function(sentence, pos.words, neg.words) {
sentence = gsub('[[:punct:]]', '', sentence)
sentence = gsub('[[:cntrl:]]', '', sentence)
sentence = gsub('\\d+', '', sentence)
sentence = tolower(sentence)
word.list = str_split(sentence, '\\s+')
words = unlist(word.list)
pos.matches = match(words, pos.words)
neg.matches = match(words, neg.words)
pos.matches = !is.na(pos.matches)
neg.matches = !is.na(neg.matches)
score = sum(pos.matches) - sum(neg.matches)
return(score)
}, pos.words, neg.words, .progress=.progress )
scores.df = data.frame(score=scores, text=sentences)
return(scores.df)
}
sentiment.scores= score.sentiment(workouttweet, pos.words, neg.words, .progress='none')
score <- sentiment.scores$score
library(plotly)
## Loading required package: ggplot2
##
## Attaching package: 'ggplot2'
## The following object is masked from 'package:NLP':
##
## annotate
##
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following objects are masked from 'package:plyr':
##
## arrange, mutate, rename, summarise
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
p <- plot_ly(x = ~score, type = "histogram")
p
## Warning: package 'bindrcpp' was built under R version 3.4.4
topic.pos = subset(sentiment.scores, score > 0)
topic.neg = subset(sentiment.scores, score < 0)
topic.neu = subset(sentiment.scores, score == 0)
negN = nrow(topic.neg)
posN = nrow(topic.pos)
neuN = nrow(topic.neu)
dftemp=data.frame(topic=c("Postive tweets", "Negative tweets" , "Neutral tweets"),
number=c(posN, negN, neuN))
library(plotly)
p <- plot_ly(data=dftemp, labels = ~topic, values = ~number, type = 'pie') %>%
layout(title = 'Pie Chart of the percentage of Positive, Negative and Neutral tweets',
xaxis = list(showgrid = FALSE, zeroline = FALSE, showticklabels = FALSE),
yaxis = list(showgrid = FALSE, zeroline = FALSE, showticklabels = FALSE))
p
#Network anaysis for workout tweets
library(mclust)
## Warning: package 'mclust' was built under R version 3.4.4
## Package 'mclust' version 5.4
## Type 'citation("mclust")' for citing this R package in publications.
library(RColorBrewer)
library(sna)
## Warning: package 'sna' was built under R version 3.4.4
## Loading required package: statnet.common
##
## Attaching package: 'statnet.common'
## The following object is masked from 'package:base':
##
## order
## Loading required package: network
## Warning: package 'network' was built under R version 3.4.4
## network: Classes for Relational Data
## Version 1.13.0.1 created on 2015-08-31.
## copyright (c) 2005, Carter T. Butts, University of California-Irvine
## Mark S. Handcock, University of California -- Los Angeles
## David R. Hunter, Penn State University
## Martina Morris, University of Washington
## Skye Bender-deMoll, University of Washington
## For citation information, type citation("network").
## Type help("network-package") to get started.
##
## Attaching package: 'network'
## The following object is masked from 'package:plyr':
##
## is.discrete
## sna: Tools for Social Network Analysis
## Version 2.4 created on 2016-07-23.
## copyright (c) 2005, Carter T. Butts, University of California-Irvine
## For citation information, type citation("sna").
## Type help(package="sna") to get started.
library(graph)
## Loading required package: BiocGenerics
## Loading required package: parallel
##
## Attaching package: 'BiocGenerics'
## The following objects are masked from 'package:parallel':
##
## clusterApply, clusterApplyLB, clusterCall, clusterEvalQ,
## clusterExport, clusterMap, parApply, parCapply, parLapply,
## parLapplyLB, parRapply, parSapply, parSapplyLB
## The following object is masked from 'package:statnet.common':
##
## order
## The following object is masked from 'package:twitteR':
##
## as.data.frame
## The following objects are masked from 'package:stats':
##
## IQR, mad, sd, var, xtabs
## The following objects are masked from 'package:base':
##
## anyDuplicated, append, as.data.frame, cbind, colMeans,
## colnames, colSums, do.call, duplicated, eval, evalq, Filter,
## Find, get, grep, grepl, intersect, is.unsorted, lapply,
## lengths, Map, mapply, match, mget, order, paste, pmax,
## pmax.int, pmin, pmin.int, Position, rank, rbind, Reduce,
## rowMeans, rownames, rowSums, sapply, setdiff, sort, table,
## tapply, union, unique, unsplit, which, which.max, which.min
##
## Attaching package: 'graph'
## The following object is masked from 'package:sna':
##
## degree
## The following object is masked from 'package:stringr':
##
## boundary
## The following object is masked from 'package:plyr':
##
## join
library(igraph)
## Warning: package 'igraph' was built under R version 3.4.4
##
## Attaching package: 'igraph'
## The following objects are masked from 'package:graph':
##
## degree, edges, intersection, union
## The following objects are masked from 'package:BiocGenerics':
##
## normalize, union
## The following objects are masked from 'package:sna':
##
## betweenness, bonpow, closeness, components, degree,
## dyad.census, evcent, hierarchy, is.connected, neighborhood,
## triad.census
## The following objects are masked from 'package:network':
##
## %c%, %s%, add.edges, add.vertices, delete.edges,
## delete.vertices, get.edge.attribute, get.edges,
## get.vertex.attribute, is.bipartite, is.directed,
## list.edge.attributes, list.vertex.attributes,
## set.edge.attribute, set.vertex.attribute
## The following object is masked from 'package:plotly':
##
## groups
## The following objects are masked from 'package:stats':
##
## decompose, spectrum
## The following object is masked from 'package:base':
##
## union
library(readr)
## Warning: package 'readr' was built under R version 3.4.4
library(plyr)
library(stringr)
library(stringi)
library(magrittr)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:igraph':
##
## as_data_frame, groups, union
## The following object is masked from 'package:graph':
##
## union
## The following objects are masked from 'package:BiocGenerics':
##
## combine, intersect, setdiff, union
## The following objects are masked from 'package:plyr':
##
## arrange, count, desc, failwith, id, mutate, rename, summarise,
## summarize
## The following objects are masked from 'package:twitteR':
##
## id, location
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
tData <- data.frame(1:length(workout$screenName),workout$screenName,workout$text)
tData$workout.screenName <- as.character(tData$workout.screenName)
tData$workout.text <- as.character(tData$workout.text)
createList <- function(tData) {
# Reads data
nData <-
tData %>%
set_colnames(c("id", "screenname", "tweet")) %>%
tbl_df()
# Extracts poster information
retweeterPoster <-
nData %>%
mutate(is_retweeted = stri_detect_regex(tweet, "(RT|via)((?:\\b\\W*@\\w+)+)")) %>%
filter(is_retweeted) %>%
rowwise() %>%
do({
# Gets retwitter
who_retweet <-
stri_extract_first_regex(.$tweet, "(RT|via)((?:\\b\\W*@\\w+)+)")[[1]] %>%
stri_extract_first_regex("@[a-zA-Z0-9_]{1,}") %>%
stri_replace_all_fixed("@", "")
# Returns pair
data_frame(who_post = .$screenname, who_retweet = who_retweet,
combi = stri_c(sort(c(.$screenname, who_retweet)), collapse = " "))
}) %>%
ungroup() %>%
group_by(combi) %>%
summarize(from = min(who_post, who_retweet),
to = max(who_post, who_retweet),
weight = n()) %>%
ungroup() %>%
select(-combi)
# Returns results
retweeterPoster
}
retweeterPoster <- createList(tData)
m <- ftM2adjM(ft = as.matrix(retweeterPoster[, 1:2]), W = retweeterPoster$weight, edgemode = "directed")
g1 <- as(m, "graphNEL")
node <- data.frame(nodes(g1))
node$betweenness <- sna::betweenness(m)
node$degree <- sna::degree(m)
sortlist <- node[order(-node$degree),]
head(sortlist, 10)
## nodes.g1. betweenness degree
## 217 InfoBoosting 345 45
## 346 twtjogging 58 31
## 317 RockiiRoadd 48 19
## 342 ThinkSarcasm 12 13
## 341 therealcoya 10 11
## 39 alyssabflowers 8 9
## 180 FitnessMagazine 18 9
## 279 muscle_fitness 3 9
## 154 drdishbball 0 8
## 145 DeluxeFitgirls 0 6
node %<>%
mutate(size = log(node$degree)) %>%
mutate(size = ifelse(size == -Inf, 1, size))
N = 3
node %<>%
mutate(group = Mclust(size, G = N)$classification)
library(visNetwork)
## Warning: package 'visNetwork' was built under R version 3.4.4
gnode <- data.frame(node$nodes.g1.)
gnode<- setNames(gnode, "id")
gnode$shape <- "dot"
gnode$shadow <- TRUE # Nodes will drop shadow
gnode$title <- node$nodes.g1. #Click to show title
gnode$label <- node$degree # Node label by degree centrality
gnode$group <- node$group
gnode$size <- gnode$group*3 # Node size by group
gnode$color.background <- c("slategrey", "tomato", "gold")[gnode$group]
visNetwork(
gnode,
setNames(retweeterPoster, c("from", "to", "weight"))
) %>%
visOptions(highlightNearest = TRUE,
selectedBy = "group")