# ============================================================
# ANALISIS SENTIMEN KOMENTAR TIKTOK - NADIEM MAKARIM
# ============================================================
# TAHAP 1: PERSIAPAN
library(readxl)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(stringr)
## Warning: package 'stringr' was built under R version 4.5.3
library(tidytext)
## Warning: package 'tidytext' was built under R version 4.5.3
library(tidyr)
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 4.5.3
library(wordcloud)
## Warning: package 'wordcloud' was built under R version 4.5.3
## Loading required package: RColorBrewer
library(RColorBrewer)
library(e1071)
## Warning: package 'e1071' was built under R version 4.5.3
##
## Attaching package: 'e1071'
## The following object is masked from 'package:ggplot2':
##
## element
library(caret)
## Warning: package 'caret' was built under R version 4.5.3
## Loading required package: lattice
library(tm)
## Warning: package 'tm' was built under R version 4.5.3
## Loading required package: NLP
##
## Attaching package: 'NLP'
## The following object is masked from 'package:ggplot2':
##
## annotate
library(showtext)
## Loading required package: sysfonts
## Loading required package: showtextdb
# TAHAP 2: BACA DATA
df <- read_excel("UAS TM.xlsx")
## Warning: Expecting numeric in Q1135 / R1135C17: got
## 'https://p19-common-sign.tiktokcdn-us.com/tos-alisg-avt-0068/6c84f815f107f50ada31b259f08137a9~tplv-tiktokx-cropcenter:100:100.jpg?dr=9640&refresh_token=bf628aea&x-expires=1779022800&x-signature=%2B2M%2BNzKkgZs0846VRCtdjezD2Uw%3D&t=4d5b0474&ps=13740610&shp=30310797&shcp=ff37627b&idc=useast8'
## Warning: Expecting numeric in Q1304 / R1304C17: got
## 'https://www.tiktok.com/@hukum.perubahan/video/7613662301048065298'
## Warning: Expecting logical in T1304 / R1304C20: got
## 'https://p16-common-sign.tiktokcdn-us.com/tos-alisg-avt-0068/e6a08aeec43ef5a586fd93c77f93ce88~tplv-tiktokx-cropcenter:100:100.jpg?dr=9640&refresh_token=00ce081c&x-expires=1779022800&x-signature=Qy3e0Qnh0cRCYHF6FslHr3fIBWg%3D&t=4d5b0474&ps=13740610&shp=30310797&shcp=ff37627b&idc=useast5'
## Warning: Expecting numeric in Q1987 / R1987C17: got '2026-05-16T11:57:36.000Z'
## Warning: Expecting logical in S1987 / R1987C19: got
## 'https://www.tiktok.com/@awbimax/video/7639644459986210069'
## Warning: Expecting logical in V1987 / R1987C22: got
## 'https://p16-common-sign.tiktokcdn-us.com/tos-alisg-avt-0068/49b4a87de55aa0c13864c608a083661c~tplv-tiktokx-cropcenter:100:100.jpg?dr=9640&refresh_token=1dbcefc6&x-expires=1779022800&x-signature=sq1iQcVyos3ATlGPC8%2Fa8%2FedX24%3D&t=4d5b0474&ps=13740610&shp=30310797&shcp=ff37627b&idc=useast8'
## Warning: Expecting numeric in Q3353 / R3353C17: got
## 'https://p16-common-sign.tiktokcdn-us.com/tos-alisg-avt-0068/d8ed80cc03e08eba5f9044bbbf698761~tplv-tiktokx-cropcenter:100:100.jpg?dr=9640&refresh_token=7c4ddc53&x-expires=1779022800&x-signature=p9wHCuRmgEaE1dOCjdX88O7WCCw%3D&t=4d5b0474&ps=13740610&shp=30310797&shcp=ff37627b&idc=useast8'
## Warning: Expecting numeric in Q3799 / R3799C17: got
## 'https://p16-common-sign.tiktokcdn.com/tos-alisg-avt-0068/c79d586927b8199bcc5428c68cf256e9~tplv-tiktokx-cropcenter:100:100.jpg?dr=14579&refresh_token=26d0f079&x-expires=1779022800&x-signature=hS31oE26lZCKw9PWCZnlaOxv2pY%3D&t=4d5b0474&ps=13740610&shp=30310797&shcp=ff37627b&idc=my2'
## Warning: Expecting numeric in Q3973 / R3973C17: got
## 'https://p16-common-sign.tiktokcdn-us.com/tos-alisg-avt-0068/18ff0cf74deba0c965fe4ba1e917081d~tplv-tiktokx-cropcenter:100:100.jpg?dr=9640&refresh_token=8d99dc8a&x-expires=1779022800&x-signature=2%2BXrW%2BMllzRq22p1ZZDBMchx%2BCA%3D&t=4d5b0474&ps=13740610&shp=30310797&shcp=ff37627b&idc=useast5'
## New names:
## • `` -> `...10`
## • `` -> `...11`
## • `` -> `...12`
## • `` -> `...13`
## • `` -> `...14`
## • `` -> `...15`
## • `` -> `...16`
## • `` -> `...17`
## • `` -> `...18`
## • `` -> `...19`
## • `` -> `...20`
## • `` -> `...21`
## • `` -> `...22`
View(df)
glimpse(df)
## Rows: 4,527
## Columns: 22
## $ text <chr> "seandainya saya ketua MBG.", "ini lah mengapa orang…
## $ diggCount <chr> NA, "mending kerja diluar negeri", NA, "perxayalah..…
## $ replyCommentTotal <chr> NA, "gaji gede ga urusan sama penjara.", NA, "jauh d…
## $ createTimeISO <chr> NA, "5845", NA, "3081", "2026-05-14T02:20:05.000Z", …
## $ uniqueId <chr> NA, "26", NA, "62", "aw_rich1996", "2026-03-05T21:20…
## $ videoWebUrl <chr> NA, "2026-03-05T19:27:17.000Z", NA, "2026-03-05T13:4…
## $ uid <chr> NA, "maya_jkt", NA, "qq_ay", "6.7093563374809897E+18…
## $ cid <chr> NA, "https://www.tiktok.com/@hukum.perubahan/video/7…
## $ avatarThumbnail <chr> NA, "6.8414617610617395E+18", NA, "7.029302470825230…
## $ ...10 <chr> NA, "7.6138552551391703E+18", NA, "7.613767892466870…
## $ ...11 <chr> NA, "https://p19-common-sign.tiktokcdn-us.com/tos-al…
## $ ...12 <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, …
## $ ...13 <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, …
## $ ...14 <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, …
## $ ...15 <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, …
## $ ...16 <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, …
## $ ...17 <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, …
## $ ...18 <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, …
## $ ...19 <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, …
## $ ...20 <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, …
## $ ...21 <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, …
## $ ...22 <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, …
head(df$text, 10)
## [1] "seandainya saya ketua MBG."
## [2] "ini lah mengapa orang2 pinter males masuk pemerintahan"
## [3] "MOMMY UNIVERSE will help You pak Nadim."
## [4] "sy kenal nadiem dr sejak di US"
## [5] "Ada yg bisa bantuin bapak ini gak? 😭"
## [6] "4"
## [7] "PARA OJEK ON LINE KALIAN KEMANA SIIH..MASA NGK ADA YG BELA BAPAK KALIAN BAHKAN NADIEM ADALAH BAPAK KENDARAAN ON LINE INDONESIA"
## [8] "salah Nadim cuma 1 ."
## [9] "Nadiem pegang rahasia apa ya"
## [10] "Yang menurut saya lebih keliru lagi adalah kenapa pilih laptop chromebook yg berbasis android dan harus online pemakaiannnya karena tdk bisa instal office misalnya seperti di windows."
# TAHAP 3: PREPROCESSING
bersihkan_teks <- function(x) {
x %>%
str_to_lower() %>%
str_remove_all("http\\S+|www\\S+") %>%
str_remove_all("@\\w+") %>%
str_remove_all("#\\w+") %>%
str_remove_all("[0-9]+") %>%
str_remove_all("[^a-z\\s]") %>%
str_squish()
}
df <- df %>%
filter(!is.na(text), str_length(text) > 3) %>%
mutate(text_bersih = bersihkan_teks(text))
head(df %>% select(text, text_bersih), 5)
## # A tibble: 5 × 2
## text text_bersih
## <chr> <chr>
## 1 "seandainya saya ketua MBG." seandainya saya ketu…
## 2 "ini lah mengapa orang2 pinter males masuk pemerintahan" ini lah mengapa oran…
## 3 "MOMMY UNIVERSE will help You pak Nadim." mommy universe will …
## 4 "sy kenal nadiem dr sejak di US" sy kenal nadiem dr s…
## 5 "Ada yg bisa bantuin bapak ini gak? \U0001f62d" ada yg bisa bantuin …
# TAHAP 4: NORMALISASI TEKS
kamus_normalisasi <- c(
"gua" = "saya", "gue" = "saya", "gw" = "saya", "aku" = "saya", "sy" = "saya",
"w" = "saya",
"ngk" = "tidak",
"lo" = "kamu", "lu" = "kamu", "elo" = "kamu",
"dia" = "dia", "dy" = "dia", "dya" = "dia",
"kita" = "kita", "kt" = "kita",
"mereka" = "mereka", "mrk" = "mereka",
"nggak" = "tidak", "ngga" = "tidak", "gak" = "tidak", "ga" = "tidak",
"enggak" = "tidak", "engga" = "tidak", "kagak" = "tidak",
"ndak" = "tidak", "ndk" = "tidak", "g" = "tidak", "gk" = "tidak",
"tak" = "tidak", "tdk" = "tidak",
"blm" = "belum", "blom" = "belum",
"udh" = "sudah", "udah" = "sudah", "dah" = "sudah", "sdh" = "sudah",
"emg" = "memang", "emang" = "memang",
"krn" = "karena", "karna" = "karena", "krna" = "karena",
"klo" = "kalau", "kalu" = "kalau", "kl" = "kalau", "klw" = "kalau",
"tp" = "tapi", "tpi" = "tapi",
"spy" = "supaya", "biar" = "supaya",
"yg" = "yang", "yng" = "yang",
"dgn" = "dengan", "dg" = "dengan", "sm" = "sama",
"utk" = "untuk", "buat" = "untuk", "tuk" = "untuk",
"dr" = "dari", "dri" = "dari",
"pd" = "pada", "ke" = "ke",
"jg" = "juga", "jga" = "juga",
"sdg" = "sedang", "lagi" = "sedang", "lg" = "sedang",
"msh" = "masih",
"hrs" = "harus",
"bs" = "bisa", "bsa" = "bisa",
"mau" = "mau", "mo" = "mau",
"aja" = "saja", "aj" = "saja",
"doang" = "saja",
"banget" = "sangat", "bgt" = "sangat", "bngt" = "sangat",
"byk" = "banyak", "bnyk" = "banyak",
"sgt" = "sangat",
"skrg" = "sekarang", "skrng" = "sekarang",
"dulu" = "dulu", "dl" = "dulu",
"lbh" = "lebih",
"krg" = "kurang",
"bilang" = "berkata", "ngomong" = "berkata", "ngbln" = "berkata",
"kerja" = "bekerja", "krja" = "bekerja",
"makan" = "makan", "mkn" = "makan",
"nonton" = "menonton", "ntn" = "menonton",
"beli" = "membeli",
"kasih" = "memberi", "ksh" = "memberi",
"liat" = "melihat", "lht" = "melihat",
"tau" = "tahu", "tw" = "tahu",
"bikin" = "membuat", "bkin" = "membuat",
"nyari" = "mencari",
"nemu" = "menemukan",
"nyebut" = "menyebut",
"ngerti" = "mengerti",
"mikir" = "berpikir", "mikirin" = "memikirkan",
"ngerasa" = "merasa",
"dapet" = "mendapat", "dpet" = "mendapat",
"bayar" = "membayar",
"pake" = "menggunakan", "make" = "menggunakan", "pk" = "menggunakan",
"bagus" = "bagus", "keren" = "bagus", "mantap" = "bagus",
"mantul" = "bagus", "josss" = "bagus", "joss" = "bagus",
"jelek" = "jelek", "parah" = "parah", "ancur" = "hancur",
"gila" = "gila", "gilak" = "gila",
"susah" = "sulit",
"gampang" = "mudah", "enteng" = "mudah",
"males" = "malas", "mls" = "malas",
"capek" = "lelah", "cape" = "lelah",
"sedih" = "sedih",
"seneng" = "senang", "senang" = "senang",
"kesel" = "kesal", "bete" = "kesal",
"marah" = "marah", "emosi" = "marah",
"kaget" = "terkejut",
"lucu" = "lucu", "ngakak" = "lucu", "wkwk" = "lucu", "wkwkwk" = "lucu",
"mewek" = "menangis",
"murah" = "murah", "mahal" = "mahal",
"bener" = "benar", "bnr" = "benar", "bner" = "benar",
"salah" = "salah", "slah" = "salah",
"penting" = "penting", "pnting" = "penting",
"hebat" = "hebat",
"mantep" = "bagus",
"pinter" = "pintar",
"pemerintah" = "pemerintah", "pmrntah" = "pemerintah",
"menteri" = "menteri", "mntri" = "menteri",
"pendidikan" = "pendidikan", "pdidikan" = "pendidikan",
"sekolah" = "sekolah", "sklh" = "sekolah",
"kampus" = "kampus", "kmpus" = "kampus",
"rakyat" = "rakyat", "rkyat" = "rakyat",
"negara" = "negara", "nkri" = "negara",
"uang" = "uang", "duit" = "uang",
"gaji" = "gaji",
"korupsi" = "korupsi", "korup" = "korupsi",
"kebijakan" = "kebijakan",
"wkwk" = "lucu", "haha" = "lucu", "hihi" = "lucu",
"lol" = "lucu",
"hmm" = "", "hmmm" = "", "eh" = "", "ah" = "", "oh" = "",
"yah" = "", "ya" = "", "iya" = "ya",
"dong" = "", "deh" = "", "sih" = "", "nih" = "",
"kan" = "", "lah" = "",
"woi" = "", "hey" = "", "hei" = "", "baekkk" = "baik"
)
normalisasi <- function(x) {
kata <- str_split(x, "\\s+")[[1]]
kata_baru <- ifelse(kata %in% names(kamus_normalisasi), kamus_normalisasi[kata], kata)
kata_baru <- kata_baru[kata_baru != ""]
paste(kata_baru, collapse = " ")
}
df <- df %>%
mutate(text_bersih = sapply(text_bersih, normalisasi))
# TAHAP 5: PENANGANAN NEGASI
kata_negasi <- c("tidak", "tak", "bukan", "belum", "jangan", "ga", "gak",
"nggak", "ngga", "enggak", "ndk", "ndak", "kagak", "gk")
pola_negasi <- paste0("\\b(", paste(kata_negasi, collapse = "|"), ")\\s+(\\w+)")
tangani_negasi <- function(x) {
str_replace_all(x, pola_negasi, "\\1_\\2")
}
df <- df %>%
mutate(text_bersih = tangani_negasi(text_bersih))
# TAHAP 6: LABELING SENTIMEN (LEXICON-BASED)
positif <- read.table("C:/Users/fadhi/Downloads/positive.tsv", header = TRUE, sep = "\t", col.names = c("word", "skor"))
negatif <- read.table("C:/Users/fadhi/Downloads/negative.tsv", header = TRUE, sep = "\t", col.names = c("word", "skor"))
kamus <- bind_rows(
positif %>% mutate(sentimen = "positif"),
negatif %>% mutate(sentimen = "negatif")
) %>% select(word, sentimen)
token <- df %>%
select(text_bersih) %>%
mutate(id = row_number()) %>%
unnest_tokens(word, text_bersih)
hasil_kata <- token %>%
inner_join(kamus, by = "word")
## Warning in inner_join(., kamus, by = "word"): Detected an unexpected many-to-many relationship between `x` and `y`.
## ℹ Row 20 of `x` matches multiple rows in `y`.
## ℹ Row 7285 of `y` matches multiple rows in `x`.
## ℹ If a many-to-many relationship is expected, set `relationship =
## "many-to-many"` to silence this warning.
skor_per_komentar <- hasil_kata %>%
group_by(id) %>%
count(sentimen) %>%
pivot_wider(names_from = sentimen, values_from = n, values_fill = 0) %>%
mutate(
label = case_when(
positif > negatif ~ "Positif",
negatif > positif ~ "Negatif",
TRUE ~ "Netral"
)
)
df <- df %>%
mutate(id = row_number()) %>%
left_join(skor_per_komentar %>% select(id, label), by = "id") %>%
mutate(label = replace_na(label, "Netral"))
# Tampilkan distribusi label lexicon
cat("=== Distribusi Label Sentimen (Lexicon) ===\n")
## === Distribusi Label Sentimen (Lexicon) ===
print(table(df$label))
##
## Negatif Netral Positif
## 1885 1770 666
# Visualisasi distribusi lexicon
font_add_google("Poppins", "poppins")
showtext_auto()
df_lexicon <- df %>%
count(label) %>%
mutate(
persen = round(n / sum(n) * 100, 1),
label = factor(label, levels = c("Positif", "Negatif", "Netral"))
)
ggplot(df_lexicon, aes(x = label, y = n, fill = label)) +
geom_col(width = 0.5, show.legend = FALSE) +
geom_text(aes(label = paste0(n, "\n(", persen, "%)")),
vjust = -0.5, size = 4, family = "poppins", color = "#444441") +
scale_fill_manual(values = c(
"Positif" = "#2C2C2A",
"Negatif" = "#888780",
"Netral" = "#D3D1C7"
)) +
scale_y_continuous(expand = expansion(mult = c(0, 0.15))) +
labs(
title = "Distribusi Sentimen — Lexicon Based",
subtitle = "Pelabelan awal menggunakan kamus kata positif & negatif",
x = NULL, y = "Jumlah Komentar"
) +
theme_minimal(base_family = "poppins") +
theme(
plot.title = element_text(size = 14, face = "bold", color = "#2C2C2A"),
plot.subtitle = element_text(size = 11, color = "#888780", margin = margin(b = 12)),
axis.text.x = element_text(size = 12, color = "#2C2C2A"),
axis.text.y = element_text(size = 10, color = "#888780"),
axis.title.y = element_text(size = 11, color = "#888780"),
panel.grid.major.x = element_blank(),
panel.grid.minor = element_blank(),
panel.grid.major.y = element_line(color = "#F1EFE8", linewidth = 0.5),
plot.margin = margin(20, 20, 20, 20)
)

# TAHAP 7: KLASIFIKASI SVM
corpus <- Corpus(VectorSource(df$text_bersih))
dtm <- DocumentTermMatrix(corpus, control = list(
weighting = weightTfIdf,
minDocFreq = 5
))
## Warning in TermDocumentMatrix.SimpleCorpus(x, control): custom functions are
## ignored
## Warning in weighting(x): empty document(s): up up up up up up up t ke up up up
## up up up up iya iya iya up up up up up up up up up up t up up up nih ri t ri t
dtm <- removeSparseTerms(dtm, 0.99)
dtm_df <- as.data.frame(as.matrix(dtm))
cat("=== Top 10 Kata dengan TF-IDF Tertinggi ===\n")
## === Top 10 Kata dengan TF-IDF Tertinggi ===
top_tfidf <- data.frame(
Kata = colnames(dtm_df),
Total_TFIDF = colSums(dtm_df)
) %>%
arrange(desc(Total_TFIDF)) %>%
head(10)
print(top_tfidf)
## Kata Total_TFIDF
## sticker sticker 361.2948
## pak pak 321.1924
## sangat sangat 279.5594
## yang yang 246.9411
## ini ini 242.6964
## saya saya 239.0225
## nadiem nadiem 228.8835
## negara negara 184.8011
## setuju setuju 178.4456
## benar benar 176.1601
df <- df[1:nrow(dtm_df), ]
dtm_df$label <- as.factor(df$label) # Kolom non-numerik baru dimasukkan di sini
set.seed(42)
idx <- createDataPartition(dtm_df$label, p = 0.8, list = FALSE)
data_train <- dtm_df[idx, ]
data_test <- dtm_df[-idx, ]
model_svm <- svm(label ~ ., data = data_train, kernel = "linear", cost = 1)
prediksi <- predict(model_svm, data_test)
# Tampilkan confusion matrix & evaluasi
cat("=== Evaluasi Model SVM ===\n")
## === Evaluasi Model SVM ===
print(confusionMatrix(prediksi, data_test$label))
## Confusion Matrix and Statistics
##
## Reference
## Prediction Negatif Netral Positif
## Negatif 260 55 14
## Netral 112 284 85
## Positif 5 15 34
##
## Overall Statistics
##
## Accuracy : 0.669
## 95% CI : (0.6365, 0.7003)
## No Information Rate : 0.4363
## P-Value [Acc > NIR] : < 2.2e-16
##
## Kappa : 0.4447
##
## Mcnemar's Test P-Value : 1.117e-15
##
## Statistics by Class:
##
## Class: Negatif Class: Netral Class: Positif
## Sensitivity 0.6897 0.8023 0.25564
## Specificity 0.8583 0.6137 0.97264
## Pos Pred Value 0.7903 0.5904 0.62963
## Neg Pred Value 0.7813 0.8172 0.87778
## Prevalence 0.4363 0.4097 0.15394
## Detection Rate 0.3009 0.3287 0.03935
## Detection Prevalence 0.3808 0.5567 0.06250
## Balanced Accuracy 0.7740 0.7080 0.61414
df$label_svm <- predict(model_svm, dtm_df[, -ncol(dtm_df)])
# Tampilkan distribusi label SVM
cat("=== Distribusi Label Sentimen (SVM) ===\n")
## === Distribusi Label Sentimen (SVM) ===
print(table(df$label_svm))
##
## Negatif Netral Positif
## 1625 2423 273
# Visualisasi distribusi SVM
df_svm <- df %>%
count(label_svm) %>%
mutate(
persen = round(n / sum(n) * 100, 1),
label_svm = factor(label_svm, levels = c("Positif", "Negatif", "Netral"))
)
ggplot(df_svm, aes(x = label_svm, y = n, fill = label_svm)) +
geom_col(width = 0.5, show.legend = FALSE) +
geom_text(aes(label = paste0(n, "\n(", persen, "%)")),
vjust = -0.5, size = 4, family = "poppins", color = "#444441") +
scale_fill_manual(values = c(
"Positif" = "#2C2C2A",
"Negatif" = "#888780",
"Netral" = "#D3D1C7"
)) +
scale_y_continuous(expand = expansion(mult = c(0, 0.15))) +
labs(
title = "Distribusi Sentimen — Klasifikasi SVM",
subtitle = "Hasil akhir prediksi model Support Vector Machine",
x = NULL, y = "Jumlah Komentar"
) +
theme_minimal(base_family = "poppins") +
theme(
plot.title = element_text(size = 14, face = "bold", color = "#2C2C2A"),
plot.subtitle = element_text(size = 11, color = "#888780", margin = margin(b = 12)),
axis.text.x = element_text(size = 12, color = "#2C2C2A"),
axis.text.y = element_text(size = 10, color = "#888780"),
axis.title.y = element_text(size = 11, color = "#888780"),
panel.grid.major.x = element_blank(),
panel.grid.minor = element_blank(),
panel.grid.major.y = element_line(color = "#F1EFE8", linewidth = 0.5),
plot.margin = margin(20, 20, 20, 20)
)

# Visualisasi confusion matrix
cm <- confusionMatrix(prediksi, data_test$label)
cm_df <- as.data.frame(cm$table)
colnames(cm_df) <- c("Prediksi", "Aktual", "Jumlah")
cm_df <- cm_df %>%
group_by(Aktual) %>%
mutate(Persen = round(Jumlah / sum(Jumlah) * 100, 1)) %>%
ungroup()
ggplot(cm_df, aes(x = Aktual, y = Prediksi, fill = Jumlah)) +
geom_tile(color = "white", linewidth = 2) +
geom_text(aes(label = paste0(Jumlah, "\n(", Persen, "%)")),
color = "white", size = 4.5, fontface = "bold", family = "poppins") +
scale_fill_gradient(low = "#B4B2A9", high = "#2C2C2A") +
labs(
title = "Confusion Matrix — Klasifikasi Sentimen SVM",
subtitle = "Perbandingan nilai prediksi dan nilai aktual",
x = "Nilai Aktual",
y = "Nilai Prediksi",
fill = "Jumlah"
) +
theme_minimal(base_family = "poppins") +
theme(
plot.title = element_text(size = 14, face = "bold", color = "#2C2C2A"),
plot.subtitle = element_text(size = 11, color = "#888780", margin = margin(b = 12)),
axis.text = element_text(size = 12, color = "#2C2C2A"),
axis.title = element_text(size = 11, color = "#888780"),
panel.grid = element_blank(),
legend.title = element_text(size = 10, color = "#888780"),
plot.margin = margin(20, 20, 20, 20)
)

# Cari baris ini di kode Anda (berada di bagian bawah Tahap 7):
cm <- confusionMatrix(prediksi, data_test$label, mode = "everything")
# Lalu cetak detail metriknya untuk melihat F1-Score:
print(cm$byClass)
## Sensitivity Specificity Pos Pred Value Neg Pred Value Precision
## Class: Negatif 0.6896552 0.8583162 0.7902736 0.7813084 0.7902736
## Class: Netral 0.8022599 0.6137255 0.5904366 0.8172324 0.5904366
## Class: Positif 0.2556391 0.9726402 0.6296296 0.8777778 0.6296296
## Recall F1 Prevalence Detection Rate
## Class: Negatif 0.6896552 0.7365439 0.4363426 0.30092593
## Class: Netral 0.8022599 0.6802395 0.4097222 0.32870370
## Class: Positif 0.2556391 0.3636364 0.1539352 0.03935185
## Detection Prevalence Balanced Accuracy
## Class: Negatif 0.380787 0.7739857
## Class: Netral 0.556713 0.7079927
## Class: Positif 0.062500 0.6141397
print(confusionMatrix(prediksi, data_test$label))
## Confusion Matrix and Statistics
##
## Reference
## Prediction Negatif Netral Positif
## Negatif 260 55 14
## Netral 112 284 85
## Positif 5 15 34
##
## Overall Statistics
##
## Accuracy : 0.669
## 95% CI : (0.6365, 0.7003)
## No Information Rate : 0.4363
## P-Value [Acc > NIR] : < 2.2e-16
##
## Kappa : 0.4447
##
## Mcnemar's Test P-Value : 1.117e-15
##
## Statistics by Class:
##
## Class: Negatif Class: Netral Class: Positif
## Sensitivity 0.6897 0.8023 0.25564
## Specificity 0.8583 0.6137 0.97264
## Pos Pred Value 0.7903 0.5904 0.62963
## Neg Pred Value 0.7813 0.8172 0.87778
## Prevalence 0.4363 0.4097 0.15394
## Detection Rate 0.3009 0.3287 0.03935
## Detection Prevalence 0.3808 0.5567 0.06250
## Balanced Accuracy 0.7740 0.7080 0.61414
# TAHAP 8: VISUALISASI WORD CLOUD
stopwords_id <- c("yang", "dan", "di", "ke", "dari", "dengan", "untuk", "pada",
"adalah", "ini", "itu", "juga", "sudah", "atau", "tapi", "karena",
"kalau", "bisa", "saya", "kamu", "dia", "kita", "mereka", "kami",
"ada", "tidak", "akan", "lebih", "banyak", "sama", "saja", "masih",
"dalam", "oleh", "atas", "bawah", "antara", "lagi", "saat", "sini",
"sana", "mana", "apa", "siapa", "kenapa", "gimana", "bagaimana",
"memang", "sangat", "sekali", "seperti", "semua", "setiap", "harus",
"mau", "maka", "jadi", "ketika", "waktu", "lalu", "setelah", "sebelum",
"pak", "bapak", "mas", "bang", "kak",
"nadiem", "nadim", "makarim", "beliau", "nya", "presiden",
"baik", "ing", "bim", "kok", "kalo", "tuh",
"makin", "selalu", "dulu", "jujur", "semangat", "pasti",
"tolong", "bantu", "membuat", "terjadi", "hidup", "dunia",
"negeri", "negri", "org", "uppo", "ceo", "mntri",
"bahwa", "namun", "serta", "pun", "agar", "hingga",
"sejak", "selama", "sambil", "malah", "justru", "bahkan",
"apalagi", "padahal", "sedangkan", "meski", "walaupun",
"meskipun", "apakah", "apabila", "biasanya", "hanya", "cuma",
"telah", "sedang", "baru")
warna_sentimen <- list(
"Positif" = c("#2C2C2A", "#444441", "#5F5E5A", "#888780", "#B4B2A9", "#D3D1C7"),
"Negatif" = c("#2C2C2A", "#444441", "#5F5E5A", "#888780", "#B4B2A9", "#D3D1C7"),
"Netral" = c("#2C2C2A", "#444441", "#5F5E5A", "#888780", "#B4B2A9", "#D3D1C7")
)
library(wordcloud2)
## Warning: package 'wordcloud2' was built under R version 4.5.3
library(htmlwidgets)
tambah_judul <- function(wc, judul) {
prependContent(wc, htmltools::tags$h3(
judul,
style = "text-align:center; font-family:'Poppins',sans-serif;
font-size:18px; font-weight:bold; color:#2C2C2A; margin-bottom:8px;"
))
}
# Word Cloud — Positif
kata_positif <- df %>%
filter(label_svm == "Positif") %>%
unnest_tokens(word, text_bersih) %>%
filter(!word %in% stopwords_id) %>%
filter(!str_detect(word, "^(tidak|tak|bukan|belum|jangan|ga|gak|nggak|ngga|enggak|ndak|kagak|gk)_")) %>%
count(word, sort = TRUE) %>%
filter(nchar(word) > 2) %>%
rename(freq = n) %>%
head(100)
wc_positif <- wordcloud2(
data = kata_positif,
shape = "square",
size = 1.2,
color = rep(c("#2C2C2A", "#444441", "#5F5E5A", "#888780", "#B4B2A9"), length.out = nrow(kata_positif)),
backgroundColor = "white",
fontFamily = "Poppins",
rotateRatio = 0.2
)
tambah_judul(wc_positif, "Word Cloud — Positif")
Word Cloud — Positif
# Word Cloud — Negatif
kata_negatif <- df %>%
filter(label_svm == "Negatif") %>%
unnest_tokens(word, text_bersih) %>%
filter(!word %in% stopwords_id) %>%
filter(!str_detect(word, "^(tidak|tak|bukan|belum|jangan|ga|gak|nggak|ngga|enggak|ndak|kagak|gk)_")) %>%
count(word, sort = TRUE) %>%
filter(nchar(word) > 2) %>%
rename(freq = n) %>%
head(100)
wc_negatif <- wordcloud2(
data = kata_negatif,
shape = "square",
size = 0.6,
color = rep(c("#2C2C2A", "#444441", "#5F5E5A", "#888780", "#B4B2A9"), length.out = nrow(kata_negatif)),
backgroundColor = "white",
fontFamily = "Poppins",
rotateRatio = 0.2
)
tambah_judul(wc_negatif, "Word Cloud — Negatif")
Word Cloud — Negatif
# Word Cloud — Netral
kata_netral <- df %>%
filter(label_svm == "Netral") %>%
unnest_tokens(word, text_bersih) %>%
filter(!word %in% stopwords_id) %>%
filter(!str_detect(word, "^(tidak|tak|bukan|belum|jangan|ga|gak|nggak|ngga|enggak|ndak|kagak|gk)_")) %>%
count(word, sort = TRUE) %>%
filter(nchar(word) > 2) %>%
rename(freq = n) %>%
head(100)
wc_netral <- wordcloud2(
data = kata_netral,
shape = "square",
size = 0.6,
color = rep(c("#2C2C2A", "#444441", "#5F5E5A", "#888780", "#B4B2A9"), length.out = nrow(kata_netral)),
backgroundColor = "white",
fontFamily = "Poppins",
rotateRatio = 0.2
)
tambah_judul(wc_netral, "Word Cloud — Netral")
Word Cloud — Netral