Tugas Bisnis Analitik Twitter

Shopee Vs Lazada
Extracting Tweets
- Retrieve tweets from Twitter
- Tweets Description
Plot Shopee
Plot Lazada
Data terakhir Shopee
Data terakhir Lazada
Text Cleaning
- Build corpus Shopee
- Build corpus Lazada
Frequent Words
- Build Term Document Matrix
- Top Frequent Terms
Wordcloud
- Build Wordcloud

Shopee Vs Lazada

Di zaman milenial sekarang terdapat teknologi canggih yang sangat berkembang. Dimanapun, kapanpun dan siapapun pasti tidak pernah terlepas dari teknologi. Dengan adanya teknologi yang canggih serta didukung dengan pesatnya perkembangan internet menyebabkan segala aktivitas pun sekarang jauh lebih mudah. Seperti jika kita ingin membeli, memesan maupun mencari lowongan pekerjaan pun sekarang mudah. Info pun dapat diakses dari manapun. Hal yang paling berkembang sekarang yaitu adalah bisnis, khususnya di online shop. banyak sekali online shop yang dapat menarik pelanggan. Salah satunya yaitu Shopee.

Shopee merupakan platform e-commerce yang berkantor pusat di Singapura dibawah Sea Group, yang didirikan pada tahun 2009 oleh Forrest Li (source: wikipedia.com). Shopee pertama kali diluncurkan pada tahun 2015, dan sejak itu memperluas jangkauannya ke Malaysia, Thailand, Taiwan, Vietnam, Filipina, dan Indonesia. Di Indonesia pun shopee sangat berhasil memikat pelanggannya. Shopee menjadi lebih menarik lagi khususnya pada bulan September 2018 dengan adanya Goyang Shopee dan Big Sale 9.9. Pada bulan November, Shopee mengadakan lagi Goyang Shopee dan Big Sale 11.11 dengan hadiah yang lebih menarik lagi.

Dengan Shopee mengadakan big sale 11.11, e-commerce lainnya pun berusaha bersaing juga dengan mengadakan event pada bulan November. Salah satunya yaitu lazada. Lazada merupakan sebuah perusahaan e-commerce Asia Tenggara yang didirikan oleh Rocket Internet pada 2012. dan dimiliki oleh Alibaba Group. Pada tahun 2014, Lazada telkah beroperasi di Singapura, Malaysia, Vietnam, Thailand, Filipina, dan Indonesia. Pada bulan November, lazada mengadakan event lazada 11.11 untuk menarik pelanggan.

Dalam tugas ini akan dianalisis mengenai e-commerce Shopee dan Lazada melalui akun twitter

Extracting Tweets

Retrieve tweets from Twitter

# Load packages
library(rtweet)
library(tidyverse)

# Twitter authentication
create_token(
  app             = "my_twitter_research_app",
  consumer_key    = consumer_key,
  consumer_secret = consumer_secret,
  access_token    = access_token,
  access_secret   = access_secret)

## <Token>
## <oauth_endpoint>
##  request:   https://api.twitter.com/oauth/request_token
##  authorize: https://api.twitter.com/oauth/authenticate
##  access:    https://api.twitter.com/oauth/access_token
## <oauth_app> my_twitter_research_app
##   key:    31YsTBHEZwGaQN72wNAnI0Ysx
##   secret: <hidden>
## <credentials> oauth_token, oauth_token_secret
## ---

tweets <- search_tweets("shopee", n = 15000, tweet_mode="extended")

## Searching for tweets...

## This may take a few seconds...

## Finished collecting tweets!

tweets <- distinct(tweets, text, .keep_all=TRUE)

tweets1 <- search_tweets("lazada", n = 15000, tweet_mode="extended")

## Searching for tweets...

## This may take a few seconds...

## Warning: Rate limit exceeded - 88

## Warning: Rate limit exceeded

## Finished collecting tweets!

tweets1 <- distinct(tweets1, text, .keep_all=TRUE)

Tweets Description

Plot Shopee

## plot time series of tweets
ts_plot(tweets, "3 hours") +
  theme_minimal() +
  theme(plot.title = ggplot2::element_text(face = "bold")) +
  labs(
    x = NULL, y = NULL,
    title = "Frequency of Shopee Twitter statuses from past 9 days",
    subtitle = "Twitter status (tweet) counts aggregated using three-hour intervals",
    caption = "\nSource: Data collected from Twitter's REST API via rtweet"
  )

Plot Lazada

## plot time series of tweets
ts_plot(tweets1, "3 hours") +
  theme_minimal() +
  theme(plot.title = ggplot2::element_text(face = "bold")) +
  labs(
    x = NULL, y = NULL,
    title = "Frequency of Lazada Twitter statuses from past 9 days",
    subtitle = "Twitter status (tweet) counts aggregated using three-hour intervals",
    caption = "\nSource: Data collected from Twitter's REST API via rtweet"
  )

Data terakhir Shopee

tail(tweets, 20)

Data terakhir Lazada

tail(tweets1, 20)

Text Cleaning

library(tm)

## Loading required package: NLP

## 
## Attaching package: 'NLP'

## The following object is masked from 'package:ggplot2':
## 
##     annotate

Build corpus Shopee

# build a corpus, and specify the source to be character vectors 
myCorpus <- Corpus(VectorSource(tweets$text))
# convert to lower case
myCorpus <- tm_map(myCorpus, content_transformer(tolower))

## Warning in tm_map.SimpleCorpus(myCorpus, content_transformer(tolower)):
## transformation drops documents

# remove URLs
removeURL <- function(x) gsub("http[^[:space:]]*", "", x)
myCorpus <- tm_map(myCorpus, content_transformer(removeURL))

## Warning in tm_map.SimpleCorpus(myCorpus, content_transformer(removeURL)):
## transformation drops documents

# remove anything other than English letters or space 
removeNumPunct <- function(x) gsub("[^[:alpha:][:space:]]*", "", x) 
myCorpus <- tm_map(myCorpus, content_transformer(removeNumPunct))

## Warning in tm_map.SimpleCorpus(myCorpus,
## content_transformer(removeNumPunct)): transformation drops documents

# remove stopwords
myStopwords <- c(setdiff(stopwords('english'), c("r", "big")), "use", "see", "used", "via", "amp", "shopee")
stopwords_id <- read.table('stopwords-id.txt', header = FALSE)
myStopwords <- c(myStopwords, as.matrix(stopwords_id$V1), "hi", "yg")
myCorpus <- tm_map(myCorpus, removeWords, myStopwords)

## Warning in tm_map.SimpleCorpus(myCorpus, removeWords, myStopwords):
## transformation drops documents

# remove extra whitespace
myCorpus <- tm_map(myCorpus, stripWhitespace)

## Warning in tm_map.SimpleCorpus(myCorpus, stripWhitespace): transformation
## drops documents

# keep a copy for stem completion later
myCorpusCopy <- myCorpus

Build corpus Lazada

# build a corpus, and specify the source to be character vectors 
myCorpus1 <- Corpus(VectorSource(tweets1$text))
# convert to lower case
myCorpus1 <- tm_map(myCorpus1, content_transformer(tolower))

## Warning in tm_map.SimpleCorpus(myCorpus1, content_transformer(tolower)):
## transformation drops documents

# remove URLs
removeURL <- function(x) gsub("http[^[:space:]]*", "", x)
myCorpus1 <- tm_map(myCorpus1, content_transformer(removeURL))

## Warning in tm_map.SimpleCorpus(myCorpus1, content_transformer(removeURL)):
## transformation drops documents

# remove anything other than English letters or space 
removeNumPunct <- function(x) gsub("[^[:alpha:][:space:]]*", "", x) 
myCorpus1 <- tm_map(myCorpus1, content_transformer(removeNumPunct))

## Warning in tm_map.SimpleCorpus(myCorpus1,
## content_transformer(removeNumPunct)): transformation drops documents

# remove stopwords
myStopwords1 <- c(setdiff(stopwords('english'), c("r", "big")), "use", "see", "used", "via", "amp", "lazada")
stopwords_id <- read.table('stopwords-id.txt', header = FALSE)
myStopwords1 <- c(myStopwords1, as.matrix(stopwords_id$V1), "hi", "yg")
myCorpus1 <- tm_map(myCorpus1, removeWords, myStopwords1)

## Warning in tm_map.SimpleCorpus(myCorpus1, removeWords, myStopwords1):
## transformation drops documents

# remove extra whitespace
myCorpus1 <- tm_map(myCorpus1, stripWhitespace)

## Warning in tm_map.SimpleCorpus(myCorpus1, stripWhitespace): transformation
## drops documents

# keep a copy for stem completion later
myCorpusCopy1 <- myCorpus1

Frequent Words

Build Term Document Matrix

tdm <- TermDocumentMatrix(myCorpus, control = list(wordLengths = c(1, Inf)))
tdm1 <- TermDocumentMatrix(myCorpus1, control = list(wordLengths = c(1, Inf)))

tdm

## <<TermDocumentMatrix (terms: 8597, documents: 3614)>>
## Non-/sparse entries: 32735/31036823
## Sparsity           : 100%
## Maximal term length: 52
## Weighting          : term frequency (tf)

tdm1

## <<TermDocumentMatrix (terms: 1603, documents: 588)>>
## Non-/sparse entries: 3757/938807
## Sparsity           : 100%
## Maximal term length: 32
## Weighting          : term frequency (tf)

Top Frequent Terms

freq.terms <- findFreqTerms(tdm, lowfreq = 20)
freq.terms1 <- findFreqTerms(tdm1, lowfreq = 20)

freq.terms[1:50]

##  [1] "aja"           "bikin"         "checkout"      "giveaway"     
##  [5] "like"          "oh"            "ongkir"        "rb"           
##  [9] "rt"            "si"            "udh"           "lazada"       
## [13] "sale"          "free"          "k"             "newpreloved"  
## [17] "song"          "la"            "nak"           "belanja"      
## [21] "follow"        "guys"          "orang"         "voucher"      
## [25] "bambam"        "got"           "shopeexbambam" "buy"          
## [29] "dah"           "je"            "jual"          "main"         
## [33] "menjual"       "murah"         "ni"            "suka"         
## [37] "tu"            "angkut"        "banget"        "bu"           
## [41] "harga"         "new"           "np"            "preloved"     
## [45] "yaa"           "yuk"           "freeongkir"    "poster"       
## [49] "price"         "take"

freq.terms1[1:50]

##  [1] "bantu"       "sale"        "shopee"      "rlthingy"    "newpreloved"
##  [6] "yuk"         "halo"        "mengirimkan" "rp"          "temanmu"    
## [11] "mixawaki"    "ya"          "back"        "find"        "link"       
## [16] "wallet"      "bantuin"     "yaa"         "ko"          "sa"         
## [21] "kard"        "na"          "super"       NA            NA           
## [26] NA            NA            NA            NA            NA           
## [31] NA            NA            NA            NA            NA           
## [36] NA            NA            NA            NA            NA           
## [41] NA            NA            NA            NA            NA           
## [46] NA            NA            NA            NA            NA

term.freq <- rowSums(as.matrix(tdm))
term.freq <- subset(term.freq, term.freq >= 100)
df <- data.frame(term = names(term.freq), freq = term.freq)
term.freq1 <- rowSums(as.matrix(tdm1))
term.freq1 <- subset(term.freq1, term.freq1 >= 100)
df1 <- data.frame(term = names(term.freq1), freq = term.freq1)

ggplot(df, aes(x=term, y=freq)) + geom_bar(stat="identity") +
  xlab("Terms") + ylab("Count") + coord_flip() +
  theme(axis.text=element_text(size=7))

ggplot(df1, aes(x=term, y=freq)) + geom_bar(stat="identity") +
  xlab("Terms") + ylab("Count") + coord_flip() +
  theme(axis.text=element_text(size=7))

Wordcloud

Build Wordcloud

library(wordcloud)

## Loading required package: RColorBrewer

m <- as.matrix(tdm)
# calculate the frequency of words and sort it by frequency 
word.freq <- sort(rowSums(m), decreasing = T)
# colors
pal <- brewer.pal(9, "BuGn")[-(1:4)]

m1 <- as.matrix(tdm1)
# calculate the frequency of words and sort it by frequency 
word.freq1 <- sort(rowSums(m1), decreasing = T)
# colors
pal <- brewer.pal(9, "BuGn")[-(1:4)]

wordcloud(words = names(word.freq), freq = word.freq, min.freq = 100,
    random.order = F, colors = pal)

wordcloud(words = names(word.freq1), freq = word.freq1, min.freq = 100,
    random.order = F, colors = pal)