Oleh: Erna Dwi Dwi Nurindah Sari (06211745000034)
Pada tugas kali ini ingin diketahui hal yang banyak dicuitkan oleh para customer triindonesia di twitter.
library(rtweet)
library(tidyverse)
## -- Attaching packages ----------------------------------------------------------- tidyverse 1.2.1 --
## v ggplot2 3.1.0 v purrr 0.2.5
## v tibble 1.4.2 v dplyr 0.7.8
## v tidyr 0.8.1 v stringr 1.3.0
## v readr 1.1.1 v forcats 0.3.0
## -- Conflicts -------------------------------------------------------------- tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x purrr::flatten() masks rtweet::flatten()
## x dplyr::lag() masks stats::lag()
create_token(
app = "ernadns",
consumer_key = "bXap9GY7zLRVjSlS3hJcGvlbS",
consumer_secret = "3iYXYRnGm3GVicHoyOvvZW6u9rT7RawIhlSru0apYwX0wzGK9X",
access_token = "306680209-F8dXWbfLweE64m6imobyiA4SzN44FQMy7fTNGgb4",
access_secret = "Nl1LOKeNrbfmCDv39ogNFFuDKtfL2DDFZCqLOHTrDra2N")
## <Token>
## <oauth_endpoint>
## request: https://api.twitter.com/oauth/request_token
## authorize: https://api.twitter.com/oauth/authenticate
## access: https://api.twitter.com/oauth/access_token
## <oauth_app> ernadns
## key: bXap9GY7zLRVjSlS3hJcGvlbS
## secret: <hidden>
## <credentials> oauth_token, oauth_token_secret
## ---
tweets <- search_tweets("triindonesia", n = 10000, tweet_mode="extended")
## Searching for tweets...
## Finished collecting tweets!
tweets <- distinct(tweets, text, .keep_all=TRUE)
ts_plot(tweets, "3 hours") +
theme_minimal() +
theme(plot.title = ggplot2::element_text(face = "bold")) +
labs(
x = NULL, y = NULL,
title = "Frequency of triindonesia Twitter statuses from past 9 days",
subtitle = "Twitter status (tweet) counts aggregated using three-hour intervals",
caption = "\nSource: Data collected from Twitter's REST API via rtweet"
)
Berdasarkan hasil grafik diatas dapat diketahui bahwa mulai tanggal 3 November hingga 11 November orang banyak mencuitkan tentang triindonesia yakni pada tanggal 5 November.
tail(tweets, 20)
## # A tibble: 20 x 88
## user_id status_id created_at screen_name text source
## <chr> <chr> <dttm> <chr> <chr> <chr>
## 1 533001~ 10583445~ 2018-11-02 13:06:14 DellayosyR @tri~ Twitt~
## 2 533001~ 10583446~ 2018-11-02 13:06:36 DellayosyR @tri~ Twitt~
## 3 980857~ 10583431~ 2018-11-02 13:00:33 ardimnida Wakt~ Twitt~
## 4 103040~ 10583377~ 2018-11-02 12:39:26 MB_alfiant~ @tri~ Twitt~
## 5 103040~ 10583288~ 2018-11-02 12:03:44 MB_alfiant~ @tri~ Twitt~
## 6 153289~ 10583376~ 2018-11-02 12:39:02 cocholava @tri~ Twitt~
## 7 958781~ 10583339~ 2018-11-02 12:24:16 rzpw15 @tri~ Twitt~
## 8 243116~ 10583082~ 2018-11-02 10:41:54 IamAffri "@tr~ Twitt~
## 9 243116~ 10583304~ 2018-11-02 12:10:14 IamAffri @tej~ Twitt~
## 10 442519~ 10583246~ 2018-11-02 11:47:17 boocinjaeh~ siny~ Twitt~
## 11 413314~ 10583239~ 2018-11-02 11:44:26 teje_sarwa~ "@Ia~ Twitt~
## 12 364753~ 10583217~ 2018-11-02 11:35:33 sincereloey "@tr~ Twitt~
## 13 814537~ 10583167~ 2018-11-02 11:15:48 agness_ky @tri~ Twitt~
## 14 961187~ 10583155~ 2018-11-02 11:11:00 Dzeus_prin~ "Kuo~ Faceb~
## 15 125960~ 10583151~ 2018-11-02 11:09:22 defalpha @tri~ Twitt~
## 16 103840~ 10583148~ 2018-11-02 11:08:12 tiara_f08 "Mak~ Twitt~
## 17 863078~ 10583117~ 2018-11-02 10:55:46 bluenisti_~ @tri~ Twitt~
## 18 967777~ 10583087~ 2018-11-02 10:44:03 NanangR1927 @tri~ Twitt~
## 19 108881~ 10583081~ 2018-11-02 10:41:37 destin_mah~ @tri~ Twitt~
## 20 536131~ 10582950~ 2018-11-02 09:49:32 Oktav_viani @tri~ Twitt~
## # ... with 82 more variables: display_text_width <dbl>,
## # reply_to_status_id <chr>, reply_to_user_id <chr>,
## # reply_to_screen_name <chr>, is_quote <lgl>, is_retweet <lgl>,
## # favorite_count <int>, retweet_count <int>, hashtags <list>,
## # symbols <list>, urls_url <list>, urls_t.co <list>,
## # urls_expanded_url <list>, media_url <list>, media_t.co <list>,
## # media_expanded_url <list>, media_type <list>, ext_media_url <list>,
## # ext_media_t.co <list>, ext_media_expanded_url <list>,
## # ext_media_type <chr>, mentions_user_id <list>,
## # mentions_screen_name <list>, lang <chr>, quoted_status_id <chr>,
## # quoted_text <chr>, quoted_created_at <dttm>, quoted_source <chr>,
## # quoted_favorite_count <int>, quoted_retweet_count <int>,
## # quoted_user_id <chr>, quoted_screen_name <chr>, quoted_name <chr>,
## # quoted_followers_count <int>, quoted_friends_count <int>,
## # quoted_statuses_count <int>, quoted_location <chr>,
## # quoted_description <chr>, quoted_verified <lgl>,
## # retweet_status_id <chr>, retweet_text <chr>,
## # retweet_created_at <dttm>, retweet_source <chr>,
## # retweet_favorite_count <int>, retweet_retweet_count <int>,
## # retweet_user_id <chr>, retweet_screen_name <chr>, retweet_name <chr>,
## # retweet_followers_count <int>, retweet_friends_count <int>,
## # retweet_statuses_count <int>, retweet_location <chr>,
## # retweet_description <chr>, retweet_verified <lgl>, place_url <chr>,
## # place_name <chr>, place_full_name <chr>, place_type <chr>,
## # country <chr>, country_code <chr>, geo_coords <list>,
## # coords_coords <list>, bbox_coords <list>, status_url <chr>,
## # name <chr>, location <chr>, description <chr>, url <chr>,
## # protected <lgl>, followers_count <int>, friends_count <int>,
## # listed_count <int>, statuses_count <int>, favourites_count <int>,
## # account_created_at <dttm>, verified <lgl>, profile_url <chr>,
## # profile_expanded_url <chr>, account_lang <chr>,
## # profile_banner_url <chr>, profile_background_url <chr>,
## # profile_image_url <chr>
library(tm)
## Loading required package: NLP
##
## Attaching package: 'NLP'
## The following object is masked from 'package:ggplot2':
##
## annotate
# build a corpus, and specify the source to be character vectors
myCorpus <- Corpus(VectorSource(tweets$text))
# remove URLs
removeURL <- function(x) gsub("http[^[:space:]]*", "", x)
myCorpus <- tm_map(myCorpus, content_transformer(removeURL))
## Warning in tm_map.SimpleCorpus(myCorpus, content_transformer(removeURL)):
## transformation drops documents
removeNumPunct <- function(x) gsub("[^[:alpha:][:space:]]*", "", x)
myCorpus <- tm_map(myCorpus, content_transformer(removeNumPunct))
## Warning in tm_map.SimpleCorpus(myCorpus,
## content_transformer(removeNumPunct)): transformation drops documents
myCorpus <- tm_map(myCorpus, stripWhitespace)
## Warning in tm_map.SimpleCorpus(myCorpus, stripWhitespace): transformation
## drops documents
myCorpusCopy <- myCorpus
tdm <- TermDocumentMatrix(myCorpus, control = list(wordLengths = c(1, Inf)))
tdm
## <<TermDocumentMatrix (terms: 6218, documents: 3405)>>
## Non-/sparse entries: 56288/21116002
## Sparsity : 100%
## Maximal term length: 76
## Weighting : term frequency (tf)
freq.terms <- findFreqTerms(tdm, lowfreq = 20)
freq.terms[1:50]
## [1] "ada" "admin" "beli" "bisa"
## [5] "di" "kah" "kak" "triindonesia"
## [9] "ya" "yang" "aktif" "hari"
## [13] "isi" "masa" "nih" "ulang"
## [17] "hilang" "kalau" "kenapa" "mas"
## [21] "sinyal" "tri" "apa" "bgt"
## [25] "dapet" "dicek" "ini" "kuota"
## [29] "mb" "pas" "saya" "seperti"
## [33] "sering" "sms" "tp" "aktifkan"
## [37] "lalu" "pulsa" "amp" "ga"
## [41] "kok" "masuk" "min" "pagi"
## [45] "sampe" "sekarang" "dari" "dm"
## [49] "ke" "sama"
term.freq <- rowSums(as.matrix(tdm))
term.freq <- subset(term.freq, term.freq >= 150)
df <- data.frame(term = names(term.freq), freq = term.freq)
ggplot(df, aes(x=term, y=freq)) + geom_bar(stat="identity") +
xlab("Terms") + ylab("Count") + coord_flip() +
theme(axis.text=element_text(size=7))
Berdasarkan pada gambar diatas dapat diketahui bahwa lima kata teratas yang sering dicuitkan oleh customer triindonesia adalah mengenai triindonesia, kak, ya, dm, dan kami. Hal tersebut terjadi karena pihak twitter triindonesia selalu membalas semua keluh kesah customer tri di twitter dengan sapaan kak, dan akan menyarankan customer untuk menyampaikan keluh kesah atau masalah melalui dm (direct message) agar privasinya lebih terjaga.
library(wordcloud)
## Loading required package: RColorBrewer
m <- as.matrix(tdm)
# calculate the frequency of words and sort it by frequency
word.freq <- sort(rowSums(m), decreasing = T)
# colors
pal <- brewer.pal(9, "BuGn")[-(1:4)]
wordcloud(words = names(word.freq), freq = word.freq, min.freq = 100,
random.order = F, colors = pal)