#========================================================
# ANALISIS SENTIMEN ULASAN ROBLOX
#========================================================
#========================================================
# 1. INSTALL PACKAGE
#========================================================
# install.packages("readxl")
# install.packages("tm")
# install.packages("SnowballC")
# install.packages("dplyr")
# install.packages("caret")
# install.packages("e1071")
# install.packages("ggplot2")
# install.packages("tokenizers")
# install.packages("knitr")
# install.packages("ggwordcloud")
#========================================================
# 2. LOAD LIBRARY
#========================================================
library(readxl)
## Warning: package 'readxl' was built under R version 4.4.3
library(tm)
## Warning: package 'tm' was built under R version 4.4.3
## Loading required package: NLP
library(SnowballC)
library(dplyr)
## Warning: package 'dplyr' was built under R version 4.4.3
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(caret)
## Warning: package 'caret' was built under R version 4.4.3
## Loading required package: ggplot2
##
## Attaching package: 'ggplot2'
## The following object is masked from 'package:NLP':
##
## annotate
## Loading required package: lattice
## Warning: package 'lattice' was built under R version 4.4.3
library(e1071)
## Warning: package 'e1071' was built under R version 4.4.3
##
## Attaching package: 'e1071'
## The following object is masked from 'package:ggplot2':
##
## element
library(ggplot2)
library(tokenizers)
## Warning: package 'tokenizers' was built under R version 4.4.3
library(knitr)
## Warning: package 'knitr' was built under R version 4.4.3
library(ggwordcloud)
## Warning: package 'ggwordcloud' was built under R version 4.4.3
#========================================================
# 3. IMPORT DATA
#========================================================
roblox <- read_excel(
"D:/Universitas Negeri Padang/Semester 6/Data Mining/Tugas Data Text/Data Ulasan Roblox.xlsx"
)
#========================================================
# 4. MEMILIH KOLOM
#========================================================
roblox <- roblox %>%
select(ulasan, rating)
#========================================================
# 5. MENGHAPUS DATA KOSONG
#========================================================
roblox <- roblox %>%
filter(!is.na(ulasan))
#========================================================
# 6. PELABELAN SENTIMEN
#========================================================
roblox$sentimen <- ifelse(
roblox$rating >= 4,
"positif",
ifelse(
roblox$rating <= 2,
"negatif",
NA
)
)
# Menghapus data netral
roblox <- na.omit(roblox)
#========================================================
# 7. MEMBUAT CORPUS
#========================================================
corpus <- VCorpus(
VectorSource(roblox$ulasan)
)
#========================================================
# 8. CASE FOLDING
#========================================================
corpus <- tm_map(
corpus,
content_transformer(tolower)
)
#========================================================
# 9. MENGHAPUS TANDA BACA
#========================================================
corpus <- tm_map(
corpus,
removePunctuation
)
#========================================================
# 10. MENGHAPUS ANGKA
#========================================================
corpus <- tm_map(
corpus,
removeNumbers
)
#========================================================
# 11. MENGHAPUS KARAKTER KHUSUS
#========================================================
corpus <- tm_map(
corpus,
content_transformer(function(x){
gsub(
"[^[:alnum:][:space:]]",
" ",
x
)
})
)
#========================================================
# 12. MENGHAPUS SPASI BERLEBIH
#========================================================
corpus <- tm_map(
corpus,
stripWhitespace
)
#========================================================
# 13. MENGHAPUS STOPWORD
#========================================================
stop_id <- c(
"yang","dan","di","ke","dari",
"untuk","pada","dengan","adalah",
"itu","ini","karena","jadi",
"saya","aku","nya","atau",
"dalam","tidak","ada","sudah",
"agar","lebih","bisa","sangat"
)
corpus <- tm_map(
corpus,
removeWords,
stop_id
)
#========================================================
# 14. STEMMING
#========================================================
corpus <- tm_map(
corpus,
stemDocument
)
#========================================================
# 15. HASIL PREPROCESSING
#========================================================
cleaned_text <- sapply(
corpus,
as.character
)
#========================================================
# 16. TOKENISASI
#========================================================
tokens <- tokenize_words(
cleaned_text
)
tokens[1:5]
## $`1`
## [1] "saiki" "ra" "iso" "man" "cor"
##
## $`2`
## [1] "jelek" "sekali"
##
## $`3`
## [1] "bagus" "cinta" "roblox" "selalu"
##
## $`4`
## [1] "game" "burik" "jelek" "kebanyakan" "bug"
## [6] "masalah" "usia" "juga" "nggak" "pant"
## [11] "nihh" "game" "buang" "aja"
##
## $`5`
## [1] "makin" "lama" "makin" "ngaco" "main"
## [6] "game" "aja" "harus" "verifikasi" "email"
## [11] "sama" "aja" "kek" "mau" "nge"
## [16] "hack" "masa" "main" "game" "aja"
## [21] "make" "email" "kan" "lucuðÿ" "ðÿ"
#========================================================
# 17. PERBANDINGAN TEKS SEBELUM DAN SESUDAH
#========================================================
comparison <- data.frame(
sebelum = roblox$ulasan[1:5],
sesudah = cleaned_text[1:5]
)
kable(
comparison,
caption = "Perbandingan Sebelum dan Sesudah Preprocessing"
)
Perbandingan Sebelum dan Sesudah Preprocessing
| Saiki Ra ISO man cor |
saiki ra iso man cor |
| JELEK SEKALI |
jelek sekali |
| bagus cinta Roblox selalu |
bagus cinta roblox selalu |
| game burik jelek kebanyakan bug masalah usia juga nggak
pantes ada nihh game buang aja |
game burik jelek kebanyakan bug masalah usia juga nggak
pant nihh game buang aja |
| makin lama makin ngaco main game aja harus verifikasi
email itu sama aja kek mau nge hack masa main game aja make email kan
lucu😹😹 |
makin lama makin ngaco main game aja harus verifikasi
email sama aja kek mau nge hack masa main game aja make email kan lucuðÿ
ðÿ |
#========================================================
# 18. MEMBUAT DATA BERSIH
#========================================================
data_clean <- data.frame(
cleaned_text = cleaned_text,
sentimen = roblox$sentimen
)
head(data_clean)
## cleaned_text
## 1 saiki ra iso man cor
## 2 jelek sekali
## 3 bagus cinta roblox selalu
## 4 game burik jelek kebanyakan bug masalah usia juga nggak pant nihh game buang aja
## 5 makin lama makin ngaco main game aja harus verifikasi email sama aja kek mau nge hack masa main game aja make email kan lucuðÿ ðÿ
## 6 ive top up my app but the robux hasnt come in yet my money is gone and i cant get anythingðÿ ðÿ ðÿ ðÿ
## sentimen
## 1 negatif
## 2 negatif
## 3 positif
## 4 negatif
## 5 negatif
## 6 negatif
#========================================================
# 19. MEMBUAT DOCUMENT TERM MATRIX
#========================================================
corpus_clean <- VCorpus(
VectorSource(data_clean$cleaned_text)
)
dtm <- DocumentTermMatrix(
corpus_clean
)
#========================================================
# 20. MEMBATASI TERM
#========================================================
dtm <- removeSparseTerms(
dtm,
0.99
)
freq_terms <- findFreqTerms(
dtm,
lowfreq = 5
)
dtm <- dtm[
,
colnames(dtm) %in% freq_terms
]
#========================================================
# 21. TF-IDF
#========================================================
dtm_tfidf <- weightTfIdf(
dtm
)
## Warning in weightTfIdf(dtm): empty document(s): 1 11 47 50 56 58 63 71 75 96
## 102 127 155 156 158 159 191 216 225 236 239 245 256 258 264 271 301 307 332 354
## 360 365 388 397 407 419 440 451
tfidf_matrix <- as.matrix(
dtm_tfidf
)
#========================================================
# 22. BIGRAM
#========================================================
bigram <- tokenize_ngrams(
cleaned_text,
n = 2
)
bigram[1:5]
## $`1`
## [1] "saiki ra" "ra iso" "iso man" "man cor"
##
## $`2`
## [1] "jelek sekali"
##
## $`3`
## [1] "bagus cinta" "cinta roblox" "roblox selalu"
##
## $`4`
## [1] "game burik" "burik jelek" "jelek kebanyakan" "kebanyakan bug"
## [5] "bug masalah" "masalah usia" "usia juga" "juga nggak"
## [9] "nggak pant" "pant nihh" "nihh game" "game buang"
## [13] "buang aja"
##
## $`5`
## [1] "makin lama" "lama makin" "makin ngaco" "ngaco main"
## [5] "main game" "game aja" "aja harus" "harus verifikasi"
## [9] "verifikasi email" "email sama" "sama aja" "aja kek"
## [13] "kek mau" "mau nge" "nge hack" "hack masa"
## [17] "masa main" "main game" "game aja" "aja make"
## [21] "make email" "email kan" "kan lucuðÿ" "lucuðÿ ðÿ"
#========================================================
# 23. MEMBAGI DATA TRAINING DAN TESTING
#========================================================
set.seed(123)
index <- createDataPartition(
data_clean$sentimen,
p = 0.8,
list = FALSE
)
x_train <- tfidf_matrix[index, ]
x_test <- tfidf_matrix[-index, ]
y_train <- as.factor(
data_clean$sentimen[index]
)
y_test <- as.factor(
data_clean$sentimen[-index]
)
#========================================================
# 24. MENGUBAH DATA MENJADI DATA FRAME
#========================================================
x_train <- as.data.frame(x_train)
x_test <- as.data.frame(x_test)
#========================================================
# 25. MELATIH MODEL NAIVE BAYES
#========================================================
model_nb <- naiveBayes(
x = x_train,
y = y_train
)
# Menampilkan model Naive Bayes
model_nb
##
## Naive Bayes Classifier for Discrete Predictors
##
## Call:
## naiveBayes.default(x = x_train, y = y_train)
##
## A-priori probabilities:
## y_train
## negatif positif
## 0.538874 0.461126
##
## Conditional probabilities:
## age
## y_train [,1] [,2]
## negatif 0.012589476 0.08025591
## positif 0.004475338 0.04221010
##
## aja
## y_train [,1] [,2]
## negatif 0.03563545 0.1417611
## positif 0.01318776 0.1498781
##
## akun
## y_train [,1] [,2]
## negatif 0.037767497 0.24247001
## positif 0.007074082 0.06285928
##
## anak
## y_train [,1] [,2]
## negatif 0.014761126 0.09248562
## positif 0.003099464 0.04064909
##
## and
## y_train [,1] [,2]
## negatif 0.02259014 0.1790483
## positif 0.02036292 0.1454516
##
## aneh
## y_train [,1] [,2]
## negatif 0.05072338 0.2637936
## positif 0.00000000 0.0000000
##
## anj
## y_train [,1] [,2]
## negatif 0.03289195 0.2713134
## positif 0.00000000 0.0000000
##
## apa
## y_train [,1] [,2]
## negatif 0.02739584 0.21278974
## positif 0.00614284 0.06262195
##
## are
## y_train [,1] [,2]
## negatif 0.011096096 0.09128297
## positif 0.002113132 0.02771347
##
## asik
## y_train [,1] [,2]
## negatif 0.002712379 0.03845461
## positif 0.012678795 0.16628083
##
## bagus
## y_train [,1] [,2]
## negatif 0.02678573 0.1904248
## positif 0.26803927 0.5984472
##
## baik
## y_train [,1] [,2]
## negatif 0.004744929 0.04833289
## positif 0.006339397 0.08314042
##
## balikin
## y_train [,1] [,2]
## negatif 0.013624076 0.11494963
## positif 0.007301423 0.09575726
##
## ban
## y_train [,1] [,2]
## negatif 0.023412113 0.2439016
## positif 0.005851751 0.0541080
##
## banget
## y_train [,1] [,2]
## negatif 0.04560838 0.1961054
## positif 0.09031087 0.3575135
##
## banyak
## y_train [,1] [,2]
## negatif 0.03521411 0.2499447
## positif 0.07116777 0.2487137
##
## baru
## y_train [,1] [,2]
## negatif 0.018308558 0.23238009
## positif 0.006383729 0.05923391
##
## begitu
## y_train [,1] [,2]
## negatif 0.013696068 0.10080303
## positif 0.003099464 0.04064909
##
## beli
## y_train [,1] [,2]
## negatif 0.021456938 0.14155359
## positif 0.008299717 0.08003098
##
## benerin
## y_train [,1] [,2]
## negatif 0.01258275 0.09368959
## positif 0.01216904 0.15959544
##
## bersama
## y_train [,1] [,2]
## negatif 0.001017142 0.01442048
## positif 0.042338118 0.50020761
##
## bgt
## y_train [,1] [,2]
## negatif 0.004649793 0.06592219
## positif 0.045643661 0.36478208
##
## bikin
## y_train [,1] [,2]
## negatif 0.014808772 0.13668444
## positif 0.008416873 0.07800109
##
## bintang
## y_train [,1] [,2]
## negatif 0.02054182 0.09701289
## positif 0.02920308 0.19991904
##
## buat
## y_train [,1] [,2]
## negatif 0.019774018 0.08113835
## positif 0.008633541 0.09044302
##
## bug
## y_train [,1] [,2]
## negatif 0.05110028 0.3668242
## positif 0.03103572 0.1546316
##
## bukan
## y_train [,1] [,2]
## negatif 0.012888517 0.1216196
## positif 0.001521455 0.0199537
##
## burik
## y_train [,1] [,2]
## negatif 0.05212036 0.46541
## positif 0.03409411 0.44714
##
## but
## y_train [,1] [,2]
## negatif 0.01689021 0.12288100
## positif 0.01155719 0.07997681
##
## cant
## y_train [,1] [,2]
## negatif 0.03674409 0.285907
## positif 0.01622538 0.167933
##
## chat
## y_train [,1] [,2]
## negatif 0.08478253 0.3072986
## positif 0.05133665 0.2058792
##
## coba
## y_train [,1] [,2]
## negatif 0.004042813 0.04610364
## positif 0.011141782 0.08753738
##
## cuma
## y_train [,1] [,2]
## negatif 0.025806398 0.21548676
## positif 0.004729453 0.06202619
##
## dah
## y_train [,1] [,2]
## negatif 0.005352306 0.03843373
## positif 0.000000000 0.00000000
##
## develop
## y_train [,1] [,2]
## negatif 0.003192561 0.03201142
## positif 0.004226265 0.05542694
##
## diperbaiki
## y_train [,1] [,2]
## negatif 0.012054462 0.09619345
## positif 0.003803638 0.04988425
##
## dong
## y_train [,1] [,2]
## negatif 0.013478231 0.10914539
## positif 0.004722948 0.04716193
##
## dulu
## y_train [,1] [,2]
## negatif 0.023125866 0.11606028
## positif 0.002064516 0.02707588
##
## eror
## y_train [,1] [,2]
## negatif 0.0463014 0.3180569
## positif 0.0000000 0.0000000
##
## error
## y_train [,1] [,2]
## negatif 0.02696314 0.1964125
## positif 0.00960147 0.1054096
##
## even
## y_train [,1] [,2]
## negatif 0.008137137 0.08137035
## positif 0.002113132 0.02771347
##
## fitur
## y_train [,1] [,2]
## negatif 0.01703986 0.1126794
## positif 0.02305244 0.2099215
##
## fix
## y_train [,1] [,2]
## negatif 0.01220571 0.1287225
## positif 0.00000000 0.0000000
##
## gabisa
## y_train [,1] [,2]
## negatif 0.08085078 0.42909643
## positif 0.01250893 0.09494928
##
## gajela
## y_train [,1] [,2]
## negatif 0.01567755 0.1332881
## positif 0.00000000 0.0000000
##
## gak
## y_train [,1] [,2]
## negatif 0.05783204 0.1764340
## positif 0.04638872 0.1998073
##
## game
## y_train [,1] [,2]
## negatif 0.09425212 0.2432882
## positif 0.15961421 0.3093285
##
## gamenya
## y_train [,1] [,2]
## negatif 0.02136779 0.18133625
## positif 0.01512198 0.09909258
##
## ganti
## y_train [,1] [,2]
## negatif 0.005640541 0.06047194
## positif 0.000000000 0.00000000
##
## gem
## y_train [,1] [,2]
## negatif 0.01227618 0.1395831
## positif 0.04151409 0.3150968
##
## geme
## y_train [,1] [,2]
## negatif 0.01162242 0.09914831
## positif 0.05084162 0.34574398
##
## get
## y_train [,1] [,2]
## negatif 0.01522807 0.1266431
## positif 0.00000000 0.0000000
##
## gimana
## y_train [,1] [,2]
## negatif 0.01960601 0.1247174
## positif 0.02164180 0.2027238
##
## gini
## y_train [,1] [,2]
## negatif 0.006570755 0.04426261
## positif 0.000000000 0.00000000
##
## gitu
## y_train [,1] [,2]
## negatif 0.013854936 0.1106049
## positif 0.003409411 0.0447140
##
## gua
## y_train [,1] [,2]
## negatif 0.033012881 0.15074214
## positif 0.002020011 0.02649219
##
## hacker
## y_train [,1] [,2]
## negatif 0.001049953 0.01488565
## positif 0.009309604 0.08891372
##
## hapus
## y_train [,1] [,2]
## negatif 0.035399409 0.24186264
## positif 0.004603204 0.06037046
##
## hari
## y_train [,1] [,2]
## negatif 0.019597689 0.15967520
## positif 0.005215302 0.06839804
##
## harus
## y_train [,1] [,2]
## negatif 0.02325711 0.1262613
## positif 0.01926203 0.1023164
##
## hilang
## y_train [,1] [,2]
## negatif 0.034357361 0.2357212
## positif 0.005370405 0.0704322
##
## ingin
## y_train [,1] [,2]
## negatif 0.004308505 0.03695251
## positif 0.001521130 0.01994943
##
## internet
## y_train [,1] [,2]
## negatif 0.024396889 0.23086052
## positif 0.003318829 0.04352603
##
## izin
## y_train [,1] [,2]
## negatif 0.006894295 0.04154044
## positif 0.011350687 0.10641832
##
## jangan
## y_train [,1] [,2]
## negatif 0.01889778 0.1014590
## positif 0.03112013 0.2126508
##
## jaringan
## y_train [,1] [,2]
## negatif 0.041850270 0.17926861
## positif 0.004040022 0.05298439
##
## jela
## y_train [,1] [,2]
## negatif 0.04458548 0.4204954
## positif 0.00000000 0.0000000
##
## jelek
## y_train [,1] [,2]
## negatif 0.042912713 0.20934125
## positif 0.001885343 0.02472605
##
## juga
## y_train [,1] [,2]
## negatif 0.02487951 0.1142057
## positif 0.02787729 0.1540298
##
## kalau
## y_train [,1] [,2]
## negatif 0.010421355 0.06886206
## positif 0.002731899 0.02528084
##
## kali
## y_train [,1] [,2]
## negatif 0.007743959 0.06526385
## positif 0.014897776 0.13860939
##
## kalian
## y_train [,1] [,2]
## negatif 0.002169903 0.03076369
## positif 0.001584849 0.02078510
##
## kalo
## y_train [,1] [,2]
## negatif 0.01443717 0.08125020
## positif 0.01187953 0.07798246
##
## kami
## y_train [,1] [,2]
## negatif 0.009985781 0.08341376
## positif 0.008069906 0.08204741
##
## kan
## y_train [,1] [,2]
## negatif 0.04658733 0.2384963
## positif 0.01954227 0.1865450
##
## karna
## y_train [,1] [,2]
## negatif 0.02784238 0.20173333
## positif 0.01289175 0.08967921
##
## kasih
## y_train [,1] [,2]
## negatif 0.02006638 0.1058915
## positif 0.02305254 0.1897839
##
## katanya
## y_train [,1] [,2]
## negatif 0.01438162 0.1021083
## positif 0.00000000 0.0000000
##
## kaya
## y_train [,1] [,2]
## negatif 0.003567634 0.03567927
## positif 0.003803638 0.04988425
##
## kayak
## y_train [,1] [,2]
## negatif 0.014295483 0.09432597
## positif 0.002347609 0.03078860
##
## kek
## y_train [,1] [,2]
## negatif 0.013197447 0.09463063
## positif 0.006818821 0.08942800
##
## kembali
## y_train [,1] [,2]
## negatif 0.02827009 0.2087321
## positif 0.00000000 0.0000000
##
## kembalikan
## y_train [,1] [,2]
## negatif 0.01419054 0.1009516
## positif 0.04802912 0.3991689
##
## kenapa
## y_train [,1] [,2]
## negatif 0.08829927 0.44115618
## positif 0.01849191 0.08986547
##
## kid
## y_train [,1] [,2]
## negatif 0.012258977 0.0952168
## positif 0.002347609 0.0307886
##
## kita
## y_train [,1] [,2]
## negatif 0.009388336 0.06301250
## positif 0.006441713 0.05034306
##
## knp
## y_train [,1] [,2]
## negatif 0.009094447 0.11609203
## positif 0.003169699 0.04157021
##
## kok
## y_train [,1] [,2]
## negatif 0.042253741 0.25542859
## positif 0.006823054 0.06654087
##
## koneksi
## y_train [,1] [,2]
## negatif 0.01985849 0.1722712
## positif 0.00000000 0.0000000
##
## kurang
## y_train [,1] [,2]
## negatif 0.013407717 0.10974681
## positif 0.002377274 0.03117766
##
## lag
## y_train [,1] [,2]
## negatif 0.04796093 0.2797024
## positif 0.02484684 0.2190398
##
## lagi
## y_train [,1] [,2]
## negatif 0.01726328 0.07606474
## positif 0.03844298 0.18674205
##
## lah
## y_train [,1] [,2]
## negatif 0.047539460 0.25607051
## positif 0.004548703 0.05965568
##
## lain
## y_train [,1] [,2]
## negatif 0.008090821 0.05089242
## positif 0.011559762 0.12048796
##
## lama
## y_train [,1] [,2]
## negatif 0.009824616 0.07509032
## positif 0.004251571 0.04772242
##
## lancar
## y_train [,1] [,2]
## negatif 0.026432196 0.13165370
## positif 0.003202387 0.04199892
##
## login
## y_train [,1] [,2]
## negatif 0.03495847 0.2073995
## positif 0.00000000 0.0000000
##
## maaf
## y_train [,1] [,2]
## negatif 0.01801277 0.1477161
## positif 0.00000000 0.0000000
##
## mabar
## y_train [,1] [,2]
## negatif 0.00000000 0.0000000
## positif 0.01498286 0.1055469
##
## main
## y_train [,1] [,2]
## negatif 0.04948029 0.1944355
## positif 0.02361013 0.1234539
##
## makin
## y_train [,1] [,2]
## negatif 0.050698403 0.25225823
## positif 0.001488429 0.01952056
##
## malah
## y_train [,1] [,2]
## negatif 0.04011572 0.13821607
## positif 0.01020670 0.06660103
##
## mana
## y_train [,1] [,2]
## negatif 0.034713468 0.41604865
## positif 0.004972057 0.05660971
##
## map
## y_train [,1] [,2]
## negatif 0.02039849 0.1825995
## positif 0.03485377 0.2720601
##
## masa
## y_train [,1] [,2]
## negatif 0.01939215 0.1111708
## positif 0.00000000 0.0000000
##
## masih
## y_train [,1] [,2]
## negatif 0.005890757 0.06493550
## positif 0.003668139 0.03961558
##
## masuk
## y_train [,1] [,2]
## negatif 0.040027279 0.14469314
## positif 0.002726311 0.03575524
##
## mau
## y_train [,1] [,2]
## negatif 0.05819452 0.28748612
## positif 0.02025484 0.09116203
##
## min
## y_train [,1] [,2]
## negatif 0.01033283 0.0895945
## positif 0.01981954 0.1902185
##
## minta
## y_train [,1] [,2]
## negatif 0.018939469 0.21589679
## positif 0.003521414 0.04618291
##
## mohon
## y_train [,1] [,2]
## negatif 0.008096958 0.07387773
## positif 0.015842695 0.12136363
##
## mulu
## y_train [,1] [,2]
## negatif 0.051904349 0.26492962
## positif 0.008069841 0.06121331
##
## nge
## y_train [,1] [,2]
## negatif 0.004767212 0.04905419
## positif 0.003169699 0.04157021
##
## ngeleg
## y_train [,1] [,2]
## negatif 0.026193831 0.20322693
## positif 0.003803638 0.04988425
##
## nggak
## y_train [,1] [,2]
## negatif 0.01982016 0.14979951
## positif 0.00140412 0.01841486
##
## ngobrol
## y_train [,1] [,2]
## negatif 0.005601918 0.04571896
## positif 0.002984393 0.02760045
##
## not
## y_train [,1] [,2]
## negatif 0.01612633 0.1487158
## positif 0.01267879 0.1662808
##
## orang
## y_train [,1] [,2]
## negatif 0.02350637 0.1163156
## positif 0.01080806 0.1268859
##
## padah
## y_train [,1] [,2]
## negatif 0.068667648 0.17921967
## positif 0.008138399 0.05741044
##
## pakai
## y_train [,1] [,2]
## negatif 0.008289714 0.06244451
## positif 0.000000000 0.00000000
##
## pake
## y_train [,1] [,2]
## negatif 0.02203426 0.12816437
## positif 0.01448532 0.09517604
##
## pas
## y_train [,1] [,2]
## negatif 0.02193118 0.11022972
## positif 0.01492340 0.08332469
##
## pembatasan
## y_train [,1] [,2]
## negatif 0.006062094 0.04810505
## positif 0.000000000 0.00000000
##
## peraturan
## y_train [,1] [,2]
## negatif 0.004940049 0.04029971
## positif 0.000000000 0.00000000
##
## perbaiki
## y_train [,1] [,2]
## negatif 0.019590580 0.1435963
## positif 0.009401889 0.0776209
##
## permainan
## y_train [,1] [,2]
## negatif 0.00000000 0.0000000
## positif 0.03596297 0.2876092
##
## play
## y_train [,1] [,2]
## negatif 0.027296359 0.18704646
## positif 0.003788234 0.04968222
##
## player
## y_train [,1] [,2]
## negatif 0.02624153 0.24241770
## positif 0.00280824 0.03682972
##
## punya
## y_train [,1] [,2]
## negatif 0.00442825 0.03606733
## positif 0.00000000 0.00000000
##
## ribet
## y_train [,1] [,2]
## negatif 0.02612449 0.2073772
## positif 0.00000000 0.0000000
##
## roblox
## y_train [,1] [,2]
## negatif 0.10751259 0.2310351
## positif 0.06804251 0.2647571
##
## robux
## y_train [,1] [,2]
## negatif 0.02521976 0.1424150
## positif 0.01679790 0.1302904
##
## rusak
## y_train [,1] [,2]
## negatif 0.02600384 0.2501118
## positif 0.00000000 0.0000000
##
## saat
## y_train [,1] [,2]
## negatif 0.007575435 0.06059058
## positif 0.016553085 0.21709167
##
## sama
## y_train [,1] [,2]
## negatif 0.02392654 0.1194404
## positif 0.03409271 0.1543004
##
## satu
## y_train [,1] [,2]
## negatif 0.007683113 0.05651572
## positif 0.008314449 0.07701370
##
## sekali
## y_train [,1] [,2]
## negatif 0.002585008 0.03664882
## positif 0.053704051 0.34546604
##
## sekarang
## y_train [,1] [,2]
## negatif 0.04079089 0.1680948
## positif 0.01221228 0.0737263
##
## selalu
## y_train [,1] [,2]
## negatif 0.035057638 0.39909253
## positif 0.008343308 0.07690337
##
## semoga
## y_train [,1] [,2]
## negatif 0.006374091 0.0519365
## positif 0.000000000 0.0000000
##
## semua
## y_train [,1] [,2]
## negatif 0.01882606 0.1759561
## positif 0.02765170 0.2119156
##
## sendiri
## y_train [,1] [,2]
## negatif 0.009276854 0.08220573
## positif 0.011318829 0.11989173
##
## seperti
## y_train [,1] [,2]
## negatif 0.008403385 0.06301916
## positif 0.000000000 0.00000000
##
## sere
## y_train [,1] [,2]
## negatif 0.02930514 0.20488010
## positif 0.00490003 0.06426329
##
## seru
## y_train [,1] [,2]
## negatif 0.01884957 0.1061446
## positif 0.26912264 0.6652773
##
## server
## y_train [,1] [,2]
## negatif 0.021347278 0.17198540
## positif 0.007301423 0.09575726
##
## setiap
## y_train [,1] [,2]
## negatif 0.025428553 0.25775168
## positif 0.005433769 0.07126321
##
## sih
## y_train [,1] [,2]
## negatif 0.009098958 0.06910625
## positif 0.024862403 0.14506048
##
## suka
## y_train [,1] [,2]
## negatif 0.03353192 0.1703275
## positif 0.09640327 0.3033615
##
## susah
## y_train [,1] [,2]
## negatif 0.007558483 0.06441868
## positif 0.001460285 0.01915145
##
## tahun
## y_train [,1] [,2]
## negatif 0.028973249 0.20145134
## positif 0.003927874 0.05151358
##
## tak
## y_train [,1] [,2]
## negatif 0.012503715 0.12150328
## positif 0.003169699 0.04157021
##
## tambah
## y_train [,1] [,2]
## negatif 0.005923532 0.05192854
## positif 0.007607277 0.09976850
##
## tapi
## y_train [,1] [,2]
## negatif 0.02412274 0.07371383
## positif 0.09353655 0.21450614
##
## tau
## y_train [,1] [,2]
## negatif 0.008478888 0.05620412
## positif 0.000000000 0.00000000
##
## teman
## y_train [,1] [,2]
## negatif 0.01075456 0.07969407
## positif 0.04400920 0.41577449
##
## temen
## y_train [,1] [,2]
## negatif 0.013254553 0.10077242
## positif 0.008191464 0.09284987
##
## terus
## y_train [,1] [,2]
## negatif 0.027163857 0.15741042
## positif 0.008474555 0.05523294
##
## tetap
## y_train [,1] [,2]
## negatif 0.010412197 0.08611348
## positif 0.007231474 0.07185856
##
## that
## y_train [,1] [,2]
## negatif 0.01583296 0.11820627
## positif 0.00280824 0.03682972
##
## the
## y_train [,1] [,2]
## negatif 0.03874305 0.2316878
## positif 0.01594155 0.1072924
##
## there
## y_train [,1] [,2]
## negatif 0.01164248 0.08654903
## positif 0.00737390 0.07540000
##
## this
## y_train [,1] [,2]
## negatif 0.02212828 0.2307447
## positif 0.01497728 0.1635799
##
## though
## y_train [,1] [,2]
## negatif 0.008137137 0.08137035
## positif 0.002113132 0.02771347
##
## tiba
## y_train [,1] [,2]
## negatif 0.002712379 0.03845461
## positif 0.021038440 0.15338148
##
## tolong
## y_train [,1] [,2]
## negatif 0.03118359 0.1414002
## positif 0.03683253 0.1549699
##
## top
## y_train [,1] [,2]
## negatif 0.007717004 0.0712449
## positif 0.027380336 0.2670229
##
## udah
## y_train [,1] [,2]
## negatif 0.02918195 0.09419364
## positif 0.04254743 0.29139825
##
## udh
## y_train [,1] [,2]
## negatif 0.001688644 0.02394066
## positif 0.003318829 0.04352603
##
## umur
## y_train [,1] [,2]
## negatif 0.027660572 0.14280101
## positif 0.002667608 0.02880994
##
## updat
## y_train [,1] [,2]
## negatif 0.13481325 0.3870046
## positif 0.04424352 0.1306901
##
## upgrad
## y_train [,1] [,2]
## negatif 0.03763921 0.2440619
## positif 0.01023105 0.1341790
##
## usia
## y_train [,1] [,2]
## negatif 0.023663571 0.23147010
## positif 0.006084519 0.07979772
##
## verif
## y_train [,1] [,2]
## negatif 0.007651754 0.06507571
## positif 0.017607068 0.16280310
##
## verifikasi
## y_train [,1] [,2]
## negatif 0.02003819 0.13148381
## positif 0.00240179 0.03149919
##
## verivikasi
## y_train [,1] [,2]
## negatif 0.01107466 0.09928476
## positif 0.00000000 0.00000000
##
## wajah
## y_train [,1] [,2]
## negatif 0.002958959 0.04195048
## positif 0.026879045 0.23515056
##
## walau
## y_train [,1] [,2]
## negatif 0.000000000 0.00000000
## positif 0.009684047 0.08479999
##
## walaupun
## y_train [,1] [,2]
## negatif 0.009509282 0.08686406
## positif 0.010143036 0.09670708
##
## whi
## y_train [,1] [,2]
## negatif 0.02056628 0.1801272
## positif 0.01216904 0.1595954
##
## wifi
## y_train [,1] [,2]
## negatif 0.038413570 0.1847079
## positif 0.002058678 0.0269993
##
## woi
## y_train [,1] [,2]
## negatif 0.01871541 0.1578842
## positif 0.00000000 0.0000000
##
## yah
## y_train [,1] [,2]
## negatif 0.014104371 0.16037001
## positif 0.004250278 0.04646562
##
## you
## y_train [,1] [,2]
## negatif 0.006508316 0.06638406
## positif 0.009906845 0.07558768
#========================================================
# 26. MELATIH MODEL SUPPORT VECTOR MACHINE (SVM)
#========================================================
model_svm <- svm(
x = x_train,
y = y_train,
kernel = "linear"
)
# Menampilkan model SVM
model_svm
##
## Call:
## svm.default(x = x_train, y = y_train, kernel = "linear")
##
##
## Parameters:
## SVM-Type: C-classification
## SVM-Kernel: linear
## cost: 1
##
## Number of Support Vectors: 175
#========================================================
# 27. PREDIKSI DATA UJI
#========================================================
pred_nb <- predict(
model_nb,
x_test
)
pred_svm <- predict(
model_svm,
x_test
)
# Menampilkan hasil prediksi
head(pred_nb)
## [1] positif negatif negatif positif positif negatif
## Levels: negatif positif
head(pred_svm)
## 2 3 4 15 19 22
## negatif negatif negatif positif positif negatif
## Levels: negatif positif
#========================================================
# 28. CONFUSION MATRIX
#========================================================
cm_nb <- confusionMatrix(
pred_nb,
y_test
)
cm_svm <- confusionMatrix(
pred_svm,
y_test
)
print(cm_nb)
## Confusion Matrix and Statistics
##
## Reference
## Prediction negatif positif
## negatif 26 8
## positif 24 35
##
## Accuracy : 0.6559
## 95% CI : (0.5502, 0.7514)
## No Information Rate : 0.5376
## P-Value [Acc > NIR] : 0.01385
##
## Kappa : 0.3255
##
## Mcnemar's Test P-Value : 0.00801
##
## Sensitivity : 0.5200
## Specificity : 0.8140
## Pos Pred Value : 0.7647
## Neg Pred Value : 0.5932
## Prevalence : 0.5376
## Detection Rate : 0.2796
## Detection Prevalence : 0.3656
## Balanced Accuracy : 0.6670
##
## 'Positive' Class : negatif
##
print(cm_svm)
## Confusion Matrix and Statistics
##
## Reference
## Prediction negatif positif
## negatif 34 14
## positif 16 29
##
## Accuracy : 0.6774
## 95% CI : (0.5725, 0.7707)
## No Information Rate : 0.5376
## P-Value [Acc > NIR] : 0.004276
##
## Kappa : 0.3533
##
## Mcnemar's Test P-Value : 0.855132
##
## Sensitivity : 0.6800
## Specificity : 0.6744
## Pos Pred Value : 0.7083
## Neg Pred Value : 0.6444
## Prevalence : 0.5376
## Detection Rate : 0.3656
## Detection Prevalence : 0.5161
## Balanced Accuracy : 0.6772
##
## 'Positive' Class : negatif
##
#========================================================
# 29. METRIK EVALUASI
#========================================================
akurasi_nb <- cm_nb$overall["Accuracy"]
akurasi_svm <- cm_svm$overall["Accuracy"]
precision_nb <- cm_nb$byClass["Pos Pred Value"]
precision_svm <- cm_svm$byClass["Pos Pred Value"]
recall_nb <- cm_nb$byClass["Sensitivity"]
recall_svm <- cm_svm$byClass["Sensitivity"]
f1_nb <- 2 * (
(precision_nb * recall_nb) /
(precision_nb + recall_nb)
)
f1_svm <- 2 * (
(precision_svm * recall_svm) /
(precision_svm + recall_svm)
)
cat("Akurasi Naive Bayes :", akurasi_nb, "\n")
## Akurasi Naive Bayes : 0.655914
cat("Akurasi SVM :", akurasi_svm, "\n")
## Akurasi SVM : 0.6774194
cat("Precision Naive Bayes :", precision_nb, "\n")
## Precision Naive Bayes : 0.7647059
cat("Precision SVM :", precision_svm, "\n")
## Precision SVM : 0.7083333
cat("Recall Naive Bayes :", recall_nb, "\n")
## Recall Naive Bayes : 0.52
cat("Recall SVM :", recall_svm, "\n")
## Recall SVM : 0.68
cat("F1 Score Naive Bayes :", f1_nb, "\n")
## F1 Score Naive Bayes : 0.6190476
cat("F1 Score SVM :", f1_svm, "\n")
## F1 Score SVM : 0.6938776
#========================================================
# 30. TABEL CONFUSION MATRIX
#========================================================
table(
Actual = y_test,
Predicted = pred_svm
)
## Predicted
## Actual negatif positif
## negatif 34 16
## positif 14 29
#========================================================
# 31. HEATMAP CONFUSION MATRIX NAIVE BAYES
#========================================================
cm_nb_table <- table(
Actual = y_test,
Predicted = pred_nb
)
cm_nb_df <- as.data.frame(
cm_nb_table
)
ggplot(
cm_nb_df,
aes(
x = Predicted,
y = Actual,
fill = Freq
)
) +
geom_tile() +
geom_text(
aes(label = Freq),
color = "white",
size = 5
) +
labs(
title = "Heatmap Confusion Matrix Naive Bayes",
x = "Prediksi",
y = "Aktual"
) +
theme_minimal()

#========================================================
# 32. HEATMAP CONFUSION MATRIX SVM
#========================================================
cm_svm_table <- table(
Actual = y_test,
Predicted = pred_svm
)
cm_svm_df <- as.data.frame(
cm_svm_table
)
ggplot(
cm_svm_df,
aes(
x = Predicted,
y = Actual,
fill = Freq
)
) +
geom_tile() +
geom_text(
aes(label = Freq),
color = "white",
size = 5
) +
labs(
title = "Heatmap Confusion Matrix SVM",
x = "Prediksi",
y = "Aktual"
) +
theme_minimal()

#========================================================
# 33. CEK IMBALANCE CLASS
#========================================================
table(
data_clean$sentimen
)
##
## negatif positif
## 251 215
#========================================================
# 34. CEK OVERFITTING
#========================================================
pred_train <- predict(
model_svm,
x_train
)
train_acc <- mean(
pred_train == y_train
)
test_acc <- mean(
pred_svm == y_test
)
cat("Akurasi Training :", train_acc, "\n")
## Akurasi Training : 0.9544236
cat("Akurasi Testing :", test_acc, "\n")
## Akurasi Testing : 0.6774194
#========================================================
# 35. MENENTUKAN KATA PALING BERPENGARUH
#========================================================
svm_coef <- t(model_svm$coefs) %*% model_svm$SV
svm_coef <- as.vector(
svm_coef
)
coef_df <- data.frame(
kata = colnames(x_train),
koefisien = svm_coef
)
#========================================================
# 36. TOP 10 KATA POSITIF
#========================================================
top_positif <- coef_df %>%
arrange(desc(koefisien)) %>%
slice(1:10)
kable(
top_positif,
caption = "Top 10 Kata yang Berpengaruh pada Sentimen Positif"
)
Top 10 Kata yang Berpengaruh pada Sentimen Positif
| jelek |
1.1035393 |
| updat |
0.9125950 |
| jaringan |
0.8712268 |
| get |
0.6383577 |
| chat |
0.6027311 |
| bukan |
0.5404523 |
| masa |
0.4636324 |
| kayak |
0.4601234 |
| tahun |
0.4464002 |
| semoga |
0.4311776 |
#========================================================
# 37. TOP 10 KATA NEGATIF
#========================================================
top_negatif <- coef_df %>%
arrange(koefisien) %>%
slice(1:10)
kable(
top_negatif,
caption = "Top 10 Kata yang Berpengaruh pada Sentimen Negatif"
)
Top 10 Kata yang Berpengaruh pada Sentimen Negatif
| tapi |
-0.8629167 |
| seru |
-0.7517827 |
| sekali |
-0.7428487 |
| lagi |
-0.7000926 |
| fitur |
-0.4544341 |
| kek |
-0.4310310 |
| gem |
-0.3939529 |
| tiba |
-0.3930159 |
| sama |
-0.3577497 |
| permainan |
-0.3541397 |
#========================================================
# 38. WORDCLOUD SENTIMEN POSITIF
#========================================================
# install.packages("wordcloud2")
library(wordcloud2)
## Warning: package 'wordcloud2' was built under R version 4.4.3
positif <- data_clean %>%
filter(sentimen == "positif")
text_pos <- paste(
positif$cleaned_text,
collapse = " "
)
kata_pos <- table(
unlist(
strsplit(text_pos, " ")
)
)
df_pos <- data.frame(
kata = names(kata_pos),
frekuensi = as.numeric(kata_pos)
)
df_pos <- df_pos %>%
filter(kata != "") %>%
arrange(desc(frekuensi)) %>%
slice(1:100)
wordcloud2(
data = df_pos,
size = 0.8
)
#========================================================
# 39. WORDCLOUD SENTIMEN NEGATIF
#========================================================
negatif <- data_clean %>%
filter(sentimen == "negatif")
text_neg <- paste(
negatif$cleaned_text,
collapse = " "
)
kata_neg <- table(
unlist(
strsplit(text_neg, " ")
)
)
df_neg <- data.frame(
kata = names(kata_neg),
frekuensi = as.numeric(kata_neg)
)
df_neg <- df_neg %>%
filter(kata != "") %>%
arrange(desc(frekuensi)) %>%
slice(1:100)
wordcloud2(
data = df_neg,
size = 0.8
)
#========================================================
# 40. MENYIMPAN DATA BERSIH
#========================================================
write.csv(
data_clean,
"hasil_preprocessing_roblox.csv",
row.names = FALSE
)