# Load packages
library(rtweet)
Dengan menggunakan packages rtweet, akan digunakan untuk mengumpulkan data dari twitter untuk dilakukan analisis lebih lanjut. Selanjutnya melakukan pendaftaran API twitter untuk mendapatkan akses.
Setelah mendapatkan token akses API twitter selanjutnya digunakan untuk mengakses data dalam Rmarkdown ini.
# Twitter authentication
create_token(
app = "my_twitter_research_app",
consumer_key = consumer_key,
consumer_secret = consumer_secret,
access_token = access_token,
access_secret = access_secret)
## <Token>
## <oauth_endpoint>
## request: https://api.twitter.com/oauth/request_token
## authorize: https://api.twitter.com/oauth/authenticate
## access: https://api.twitter.com/oauth/access_token
## <oauth_app> my_twitter_research_app
## key: JgtAKEJUEsGq2B6h36G7bpRYm
## secret: <hidden>
## <credentials> oauth_token, oauth_token_secret
## ---
# Retrieve tweets
tweets <- search_tweets("#HariAyah", n = 10000, langs="en", tweet_mode="extended")
## Searching for tweets...
## Finished collecting tweets!
## plot time series of tweets
ts_plot(tweets, "3 hours") +
ggplot2::theme_minimal() +
ggplot2::theme(plot.title = ggplot2::element_text(face = "bold")) +
ggplot2::labs(
x = NULL, y = NULL,
title = "Frequency of #HariAyah Twitter statuses from past 9 days",
subtitle = "Twitter status (tweet) counts aggregated using three-hour intervals",
caption = "\nSource: Data collected from Twitter's REST API via rtweet"
)
library(tm)
## Loading required package: NLP
##
## Attaching package: 'NLP'
## The following object is masked from 'package:ggplot2':
##
## annotate
# build a corpus, and specify the source to be character vectors
myCorpus <- Corpus(VectorSource(tweets$text))
# convert to lower case
myCorpus <- tm_map(myCorpus, content_transformer(tolower))
## Warning in tm_map.SimpleCorpus(myCorpus, content_transformer(tolower)):
## transformation drops documents
# remove URLs
removeURL <- function(x) gsub("http[^[:space:]]*", "", x)
myCorpus <- tm_map(myCorpus, content_transformer(removeURL))
## Warning in tm_map.SimpleCorpus(myCorpus, content_transformer(removeURL)):
## transformation drops documents
# remove anything other than English letters or space
removeNumPunct <- function(x) gsub("[^[:alpha:][:space:]]*", "", x)
myCorpus <- tm_map(myCorpus, content_transformer(removeNumPunct))
## Warning in tm_map.SimpleCorpus(myCorpus,
## content_transformer(removeNumPunct)): transformation drops documents
# remove stopwords
myStopwords <- c(setdiff(stopwords('english'), c("r", "big")), "use", "see", "used", "via", "amp")
myCorpus <- tm_map(myCorpus, removeWords, myStopwords)
## Warning in tm_map.SimpleCorpus(myCorpus, removeWords, myStopwords):
## transformation drops documents
# remove extra whitespace
myCorpus <- tm_map(myCorpus, stripWhitespace)
## Warning in tm_map.SimpleCorpus(myCorpus, stripWhitespace): transformation
## drops documents
# keep a copy for stem completion later
myCorpusCopy <- myCorpus
tdm <- TermDocumentMatrix(myCorpus, control = list(wordLengths = c(1, Inf)))
tdm
## <<TermDocumentMatrix (terms: 2655, documents: 2274)>>
## Non-/sparse entries: 40843/5996627
## Sparsity : 99%
## Maximal term length: 53
## Weighting : term frequency (tf)
freq.terms <- findFreqTerms(tdm, lowfreq = 20)
freq.terms[1:50]
## [1] "ayah" "beliau" "dgn" "di" "hari"
## [6] "hariayah" "kepada" "menulis" "selamat" "semua"
## [11] "sudah" "yg" "berniat" "cengeng" "jika"
## [16] "mendiang" "mengingat" "saya" "selalu" "soal"
## [21] "tapi" "urungkan" "apa" "asih" "beli"
## [26] "berlikuliku" "hidupku" "ikan" "ini" "inspirasi"
## [31] "jati" "kalian" "ke" "kusampaikan" "lembah"
## [36] "mau" "melewati" "menggunakan" "panggilan" "pantun"
## [41] "sampaikan" "tahun" "terimakasih" "ucapan" "untuk"
## [46] "wabah" "yang" "buatku" "dan" "disisimu"
term.freq <- rowSums(as.matrix(tdm))
term.freq <- subset(term.freq, term.freq >= 1000)
df <- data.frame(term = names(term.freq), freq = term.freq)
ggplot2::ggplot(df, aes(x=term, y=freq)) + geom_bar(stat="identity") +
xlab("Terms") + ylab("Count") + coord_flip() +
theme(axis.text=element_text(size=7))
library(wordcloud)
## Loading required package: RColorBrewer
m <- as.matrix(tdm)
# calculate the frequency of words and sort it by frequency
word.freq <- sort(rowSums(m), decreasing = T)
# colors
pal <- brewer.pal(9, "BuGn")[-(1:4)]
wordcloud(words = names(word.freq), freq = word.freq, min.freq = 300,
random.order = F, colors = pal)