# Load packages
library(rtweet)
library(tidyverse)
# Twitter authentication
create_token(
app = "my_twitter_research_app",
consumer_key = consumer_key,
consumer_secret = consumer_secret,
access_token = access_token,
access_secret = access_secret)
## <Token>
## <oauth_endpoint>
## request: https://api.twitter.com/oauth/request_token
## authorize: https://api.twitter.com/oauth/authenticate
## access: https://api.twitter.com/oauth/access_token
## <oauth_app> my_twitter_research_app
## key: sxxdmMv0ceEXTFN0ZlqsdTcdu
## secret: <hidden>
## <credentials> oauth_token, oauth_token_secret
## ---
# Retrieve tweets
tweets <- search_tweets("Telkomsel", n = 8000, tweet_mode="extended")
## Searching for tweets...
## Finished collecting tweets!
tweets <- distinct(tweets, text, .keep_all=TRUE)
ts_plot(tweets, "3 hours") +
theme_minimal() +
theme(plot.title = ggplot2::element_text(face = "bold")) +
labs(
x = NULL, y = NULL,
title = "Frequency of indihome Twitter statuses from past 9 days",
subtitle = "Twitter status (tweet) counts aggregated using three-hour intervals",
caption = "\nSource: Data collected from Twitter's REST API via rtweet"
)
tail(tweets, 20)
library(tm)
## Loading required package: NLP
##
## Attaching package: 'NLP'
## The following object is masked from 'package:ggplot2':
##
## annotate
# build a corpus, and specify the source to be character vectors
myCorpus <- Corpus(VectorSource(tweets$text))
# convert to lower case
myCorpus <- tm_map(myCorpus, content_transformer(tolower))
## Warning in tm_map.SimpleCorpus(myCorpus, content_transformer(tolower)):
## transformation drops documents
# remove URLs
removeURL <- function(x) gsub("http[^[:space:]]*", "", x)
myCorpus <- tm_map(myCorpus, content_transformer(removeURL))
## Warning in tm_map.SimpleCorpus(myCorpus, content_transformer(removeURL)):
## transformation drops documents
# remove anything other than English letters or space
removeNumPunct <- function(x) gsub("[^[:alpha:][:space:]]*", "", x)
myCorpus <- tm_map(myCorpus, content_transformer(removeNumPunct))
## Warning in tm_map.SimpleCorpus(myCorpus,
## content_transformer(removeNumPunct)): transformation drops documents
# remove stopwords
myStopwords <- c(setdiff(stopwords('english'), c("r", "big")), "use", "see", "used", "via", "amp", "indihome")
stopwords_id <- read.table("E://stopwords-id.txt", header = FALSE)
myStopwords <- c(myStopwords, as.matrix(stopwords_id$V1), "hi", "yg")
myCorpus <- tm_map(myCorpus, removeWords, myStopwords)
## Warning in tm_map.SimpleCorpus(myCorpus, removeWords, myStopwords):
## transformation drops documents
# remove extra whitespace
myCorpus <- tm_map(myCorpus, stripWhitespace)
## Warning in tm_map.SimpleCorpus(myCorpus, stripWhitespace): transformation
## drops documents
# keep a copy for stem completion later
myCorpusCopy <- myCorpus
tdm <- TermDocumentMatrix(myCorpus, control = list(wordLengths = c(1, Inf)))
tdm
## <<TermDocumentMatrix (terms: 4696, documents: 3100)>>
## Non-/sparse entries: 41766/14515834
## Sparsity : 100%
## Maximal term length: 41
## Weighting : term frequency (tf)
freq.terms <- findFreqTerms(tdm, lowfreq = 20)
freq.terms[1:50]
## [1] "hai" "internet" "kaka"
## [4] "kasih" "mytelkomsel" "paket"
## [7] "telkomsel" "terima" "daniirmatria"
## [10] "dm" "kenny" "klik"
## [13] "langsung" "link" "silakan"
## [16] "terhubung" "twitter" "aplikasi"
## [19] "dialami" "ketidaknyamanan" "maaf"
## [22] "masuk" "mbak" "mohon"
## [25] "penipuan" "proses" "terkait"
## [28] "dibantu" "keluhan" "layanan"
## [31] "produk" "reva" "seputar"
## [34] "akses" "capture" "data"
## [37] "dicek" "hp" "infokan"
## [40] "jaringan" "kecepatan" "kejadian"
## [43] "kendala" "keterangan" "lokasi"
## [46] "neklampir" "nomor" "sinyal"
## [49] "stabil" "terjaga"
term.freq <- rowSums(as.matrix(tdm))
term.freq <- subset(term.freq, term.freq >= 150)
df <- data.frame(term = names(term.freq), freq = term.freq)
ggplot(df, aes(x=term, y=freq)) + geom_bar(stat="identity") +
xlab("Terms") + ylab("Count") + coord_flip() +
theme(axis.text=element_text(size=7))
library(wordcloud)
## Loading required package: RColorBrewer
m <- as.matrix(tdm)
# calculate the frequency of words and sort it by frequency
word.freq <- sort(rowSums(m), decreasing = T)
# colors
pal <- brewer.pal(9, "BuGn")[-(1:4)]
wordcloud(words = names(word.freq), freq = word.freq, min.freq = 100,
random.order = F, colors = pal)