Di zaman milenial sekarang terdapat teknologi canggih yang sangat berkembang. Dimanapun, kapanpun dan siapapun pasti tidak pernah terlepas dari teknologi. Dengan adanya teknologi yang canggih serta didukung dengan pesatnya perkembangan internet menyebabkan segala aktivitas pun sekarang jauh lebih mudah. Seperti jika kita ingin membeli, memesan maupun mencari lowongan pekerjaan pun sekarang mudah. Info pun dapat diakses dari manapun. Hal yang paling berkembang sekarang yaitu adalah bisnis, khususnya di online shop. banyak sekali online shop yang dapat menarik pelanggan. Salah satunya yaitu Shopee.
Shopee merupakan platform e-commerce yang berkantor pusat di Singapura dibawah Sea Group, yang didirikan pada tahun 2009 oleh Forrest Li (source: wikipedia.com). Shopee pertama kali diluncurkan pada tahun 2015, dan sejak itu memperluas jangkauannya ke Malaysia, Thailand, Taiwan, Vietnam, Filipina, dan Indonesia. Di Indonesia pun shopee sangat berhasil memikat pelanggannya. Shopee menjadi lebih menarik lagi khususnya pada bulan September 2018 dengan adanya Goyang Shopee dan Big Sale 9.9. Pada bulan November, Shopee mengadakan lagi Goyang Shopee dan Big Sale 11.11 dengan hadiah yang lebih menarik lagi.
Dengan Shopee mengadakan big sale 11.11, e-commerce lainnya pun berusaha bersaing juga dengan mengadakan event pada bulan November. Salah satunya yaitu lazada. Lazada merupakan sebuah perusahaan e-commerce Asia Tenggara yang didirikan oleh Rocket Internet pada 2012. dan dimiliki oleh Alibaba Group. Pada tahun 2014, Lazada telkah beroperasi di Singapura, Malaysia, Vietnam, Thailand, Filipina, dan Indonesia. Pada bulan November, lazada mengadakan event lazada 11.11 untuk menarik pelanggan.
Dalam tugas ini akan dianalisis mengenai e-commerce Shopee dan Lazada melalui akun twitter
# Load packages
library(rtweet)
library(tidyverse)
# Twitter authentication
create_token(
app = "my_twitter_research_app",
consumer_key = consumer_key,
consumer_secret = consumer_secret,
access_token = access_token,
access_secret = access_secret)
## <Token>
## <oauth_endpoint>
## request: https://api.twitter.com/oauth/request_token
## authorize: https://api.twitter.com/oauth/authenticate
## access: https://api.twitter.com/oauth/access_token
## <oauth_app> my_twitter_research_app
## key: 31YsTBHEZwGaQN72wNAnI0Ysx
## secret: <hidden>
## <credentials> oauth_token, oauth_token_secret
## ---
tweets <- search_tweets("shopee", n = 15000, tweet_mode="extended")
## Searching for tweets...
## This may take a few seconds...
## Finished collecting tweets!
tweets <- distinct(tweets, text, .keep_all=TRUE)
tweets1 <- search_tweets("lazada", n = 15000, tweet_mode="extended")
## Searching for tweets...
## This may take a few seconds...
## Warning: Rate limit exceeded - 88
## Warning: Rate limit exceeded
## Finished collecting tweets!
tweets1 <- distinct(tweets1, text, .keep_all=TRUE)
## plot time series of tweets
ts_plot(tweets, "3 hours") +
theme_minimal() +
theme(plot.title = ggplot2::element_text(face = "bold")) +
labs(
x = NULL, y = NULL,
title = "Frequency of Shopee Twitter statuses from past 9 days",
subtitle = "Twitter status (tweet) counts aggregated using three-hour intervals",
caption = "\nSource: Data collected from Twitter's REST API via rtweet"
)
## plot time series of tweets
ts_plot(tweets1, "3 hours") +
theme_minimal() +
theme(plot.title = ggplot2::element_text(face = "bold")) +
labs(
x = NULL, y = NULL,
title = "Frequency of Lazada Twitter statuses from past 9 days",
subtitle = "Twitter status (tweet) counts aggregated using three-hour intervals",
caption = "\nSource: Data collected from Twitter's REST API via rtweet"
)
tail(tweets, 20)
tail(tweets1, 20)
library(tm)
## Loading required package: NLP
##
## Attaching package: 'NLP'
## The following object is masked from 'package:ggplot2':
##
## annotate
# build a corpus, and specify the source to be character vectors
myCorpus <- Corpus(VectorSource(tweets$text))
# convert to lower case
myCorpus <- tm_map(myCorpus, content_transformer(tolower))
## Warning in tm_map.SimpleCorpus(myCorpus, content_transformer(tolower)):
## transformation drops documents
# remove URLs
removeURL <- function(x) gsub("http[^[:space:]]*", "", x)
myCorpus <- tm_map(myCorpus, content_transformer(removeURL))
## Warning in tm_map.SimpleCorpus(myCorpus, content_transformer(removeURL)):
## transformation drops documents
# remove anything other than English letters or space
removeNumPunct <- function(x) gsub("[^[:alpha:][:space:]]*", "", x)
myCorpus <- tm_map(myCorpus, content_transformer(removeNumPunct))
## Warning in tm_map.SimpleCorpus(myCorpus,
## content_transformer(removeNumPunct)): transformation drops documents
# remove stopwords
myStopwords <- c(setdiff(stopwords('english'), c("r", "big")), "use", "see", "used", "via", "amp", "shopee")
stopwords_id <- read.table('stopwords-id.txt', header = FALSE)
myStopwords <- c(myStopwords, as.matrix(stopwords_id$V1), "hi", "yg")
myCorpus <- tm_map(myCorpus, removeWords, myStopwords)
## Warning in tm_map.SimpleCorpus(myCorpus, removeWords, myStopwords):
## transformation drops documents
# remove extra whitespace
myCorpus <- tm_map(myCorpus, stripWhitespace)
## Warning in tm_map.SimpleCorpus(myCorpus, stripWhitespace): transformation
## drops documents
# keep a copy for stem completion later
myCorpusCopy <- myCorpus
# build a corpus, and specify the source to be character vectors
myCorpus1 <- Corpus(VectorSource(tweets1$text))
# convert to lower case
myCorpus1 <- tm_map(myCorpus1, content_transformer(tolower))
## Warning in tm_map.SimpleCorpus(myCorpus1, content_transformer(tolower)):
## transformation drops documents
# remove URLs
removeURL <- function(x) gsub("http[^[:space:]]*", "", x)
myCorpus1 <- tm_map(myCorpus1, content_transformer(removeURL))
## Warning in tm_map.SimpleCorpus(myCorpus1, content_transformer(removeURL)):
## transformation drops documents
# remove anything other than English letters or space
removeNumPunct <- function(x) gsub("[^[:alpha:][:space:]]*", "", x)
myCorpus1 <- tm_map(myCorpus1, content_transformer(removeNumPunct))
## Warning in tm_map.SimpleCorpus(myCorpus1,
## content_transformer(removeNumPunct)): transformation drops documents
# remove stopwords
myStopwords1 <- c(setdiff(stopwords('english'), c("r", "big")), "use", "see", "used", "via", "amp", "lazada")
stopwords_id <- read.table('stopwords-id.txt', header = FALSE)
myStopwords1 <- c(myStopwords1, as.matrix(stopwords_id$V1), "hi", "yg")
myCorpus1 <- tm_map(myCorpus1, removeWords, myStopwords1)
## Warning in tm_map.SimpleCorpus(myCorpus1, removeWords, myStopwords1):
## transformation drops documents
# remove extra whitespace
myCorpus1 <- tm_map(myCorpus1, stripWhitespace)
## Warning in tm_map.SimpleCorpus(myCorpus1, stripWhitespace): transformation
## drops documents
# keep a copy for stem completion later
myCorpusCopy1 <- myCorpus1
tdm <- TermDocumentMatrix(myCorpus, control = list(wordLengths = c(1, Inf)))
tdm1 <- TermDocumentMatrix(myCorpus1, control = list(wordLengths = c(1, Inf)))
tdm
## <<TermDocumentMatrix (terms: 8597, documents: 3614)>>
## Non-/sparse entries: 32735/31036823
## Sparsity : 100%
## Maximal term length: 52
## Weighting : term frequency (tf)
tdm1
## <<TermDocumentMatrix (terms: 1603, documents: 588)>>
## Non-/sparse entries: 3757/938807
## Sparsity : 100%
## Maximal term length: 32
## Weighting : term frequency (tf)
freq.terms <- findFreqTerms(tdm, lowfreq = 20)
freq.terms1 <- findFreqTerms(tdm1, lowfreq = 20)
freq.terms[1:50]
## [1] "aja" "bikin" "checkout" "giveaway"
## [5] "like" "oh" "ongkir" "rb"
## [9] "rt" "si" "udh" "lazada"
## [13] "sale" "free" "k" "newpreloved"
## [17] "song" "la" "nak" "belanja"
## [21] "follow" "guys" "orang" "voucher"
## [25] "bambam" "got" "shopeexbambam" "buy"
## [29] "dah" "je" "jual" "main"
## [33] "menjual" "murah" "ni" "suka"
## [37] "tu" "angkut" "banget" "bu"
## [41] "harga" "new" "np" "preloved"
## [45] "yaa" "yuk" "freeongkir" "poster"
## [49] "price" "take"
freq.terms1[1:50]
## [1] "bantu" "sale" "shopee" "rlthingy" "newpreloved"
## [6] "yuk" "halo" "mengirimkan" "rp" "temanmu"
## [11] "mixawaki" "ya" "back" "find" "link"
## [16] "wallet" "bantuin" "yaa" "ko" "sa"
## [21] "kard" "na" "super" NA NA
## [26] NA NA NA NA NA
## [31] NA NA NA NA NA
## [36] NA NA NA NA NA
## [41] NA NA NA NA NA
## [46] NA NA NA NA NA
term.freq <- rowSums(as.matrix(tdm))
term.freq <- subset(term.freq, term.freq >= 100)
df <- data.frame(term = names(term.freq), freq = term.freq)
term.freq1 <- rowSums(as.matrix(tdm1))
term.freq1 <- subset(term.freq1, term.freq1 >= 100)
df1 <- data.frame(term = names(term.freq1), freq = term.freq1)
ggplot(df, aes(x=term, y=freq)) + geom_bar(stat="identity") +
xlab("Terms") + ylab("Count") + coord_flip() +
theme(axis.text=element_text(size=7))
ggplot(df1, aes(x=term, y=freq)) + geom_bar(stat="identity") +
xlab("Terms") + ylab("Count") + coord_flip() +
theme(axis.text=element_text(size=7))
library(wordcloud)
## Loading required package: RColorBrewer
m <- as.matrix(tdm)
# calculate the frequency of words and sort it by frequency
word.freq <- sort(rowSums(m), decreasing = T)
# colors
pal <- brewer.pal(9, "BuGn")[-(1:4)]
m1 <- as.matrix(tdm1)
# calculate the frequency of words and sort it by frequency
word.freq1 <- sort(rowSums(m1), decreasing = T)
# colors
pal <- brewer.pal(9, "BuGn")[-(1:4)]
wordcloud(words = names(word.freq), freq = word.freq, min.freq = 100,
random.order = F, colors = pal)
wordcloud(words = names(word.freq1), freq = word.freq1, min.freq = 100,
random.order = F, colors = pal)