# =========================
# 1. LIBRARY & SETUP PYTHON
# =========================
Sys.setenv(RETICULATE_PYTHON = "C:/Users/ASUS/miniforge3/envs/env_skripsi/python.exe")
Sys.setenv(RETICULATE_USE_MANAGED_VENV = "false")
library(reticulate)
## Warning: package 'reticulate' was built under R version 4.5.2
py_config()
## python: C:/Users/ASUS/miniforge3/envs/env_skripsi/python.exe
## libpython: C:/Users/ASUS/miniforge3/envs/env_skripsi/python310.dll
## pythonhome: C:/Users/ASUS/miniforge3/envs/env_skripsi
## version: 3.10.20 | packaged by conda-forge | (main, Mar 5 2026, 16:36:49) [MSC v.1944 64 bit (AMD64)]
## Architecture: 64bit
## numpy: C:/Users/ASUS/miniforge3/envs/env_skripsi/Lib/site-packages/numpy
## numpy_version: 2.2.6
##
## NOTE: Python version was forced by RETICULATE_PYTHON
library(tensorflow)
## Warning: package 'tensorflow' was built under R version 4.5.3
library(keras)
## Warning: package 'keras' was built under R version 4.5.3
## The keras package is deprecated. Please use the keras3 package instead.
## Alternatively, to continue using legacy keras, call `py_require_legacy_keras()`.
library(keras3)
## Warning: package 'keras3' was built under R version 4.5.3
## Registered S3 methods overwritten by 'keras3':
## method from
## as.data.frame.keras_training_history keras
## plot.keras_training_history keras
## print.keras_training_history keras
## r_to_py.R6ClassGenerator keras
##
## Attaching package: 'keras3'
## The following objects are masked from 'package:keras':
##
## %<-active%, %py_class%, activation_elu, activation_exponential,
## activation_gelu, activation_hard_sigmoid, activation_linear,
## activation_relu, activation_selu, activation_sigmoid,
## activation_softmax, activation_softplus, activation_softsign,
## activation_tanh, adapt, application_densenet121,
## application_densenet169, application_densenet201,
## application_efficientnet_b0, application_efficientnet_b1,
## application_efficientnet_b2, application_efficientnet_b3,
## application_efficientnet_b4, application_efficientnet_b5,
## application_efficientnet_b6, application_efficientnet_b7,
## application_inception_resnet_v2, application_inception_v3,
## application_mobilenet, application_mobilenet_v2,
## application_mobilenet_v3_large, application_mobilenet_v3_small,
## application_nasnetlarge, application_nasnetmobile,
## application_resnet101, application_resnet101_v2,
## application_resnet152, application_resnet152_v2,
## application_resnet50, application_resnet50_v2, application_vgg16,
## application_vgg19, application_xception, bidirectional,
## callback_backup_and_restore, callback_csv_logger,
## callback_early_stopping, callback_lambda,
## callback_learning_rate_scheduler, callback_model_checkpoint,
## callback_reduce_lr_on_plateau, callback_remote_monitor,
## callback_tensorboard, clone_model, constraint_maxnorm,
## constraint_minmaxnorm, constraint_nonneg, constraint_unitnorm,
## count_params, custom_metric, dataset_boston_housing,
## dataset_cifar10, dataset_cifar100, dataset_fashion_mnist,
## dataset_imdb, dataset_imdb_word_index, dataset_mnist,
## dataset_reuters, dataset_reuters_word_index, freeze_weights,
## from_config, get_config, get_file, get_layer, get_vocabulary,
## get_weights, image_array_save, image_dataset_from_directory,
## image_load, image_to_array, imagenet_decode_predictions,
## imagenet_preprocess_input, initializer_constant,
## initializer_glorot_normal, initializer_glorot_uniform,
## initializer_he_normal, initializer_he_uniform,
## initializer_identity, initializer_lecun_normal,
## initializer_lecun_uniform, initializer_ones,
## initializer_orthogonal, initializer_random_normal,
## initializer_random_uniform, initializer_truncated_normal,
## initializer_variance_scaling, initializer_zeros, install_keras,
## keras, keras_model, keras_model_sequential, Layer,
## layer_activation, layer_activation_elu,
## layer_activation_leaky_relu, layer_activation_parametric_relu,
## layer_activation_relu, layer_activation_softmax,
## layer_activity_regularization, layer_add, layer_additive_attention,
## layer_alpha_dropout, layer_attention, layer_average,
## layer_average_pooling_1d, layer_average_pooling_2d,
## layer_average_pooling_3d, layer_batch_normalization,
## layer_category_encoding, layer_center_crop, layer_concatenate,
## layer_conv_1d, layer_conv_1d_transpose, layer_conv_2d,
## layer_conv_2d_transpose, layer_conv_3d, layer_conv_3d_transpose,
## layer_conv_lstm_1d, layer_conv_lstm_2d, layer_conv_lstm_3d,
## layer_cropping_1d, layer_cropping_2d, layer_cropping_3d,
## layer_dense, layer_depthwise_conv_1d, layer_depthwise_conv_2d,
## layer_discretization, layer_dot, layer_dropout, layer_embedding,
## layer_flatten, layer_gaussian_dropout, layer_gaussian_noise,
## layer_global_average_pooling_1d, layer_global_average_pooling_2d,
## layer_global_average_pooling_3d, layer_global_max_pooling_1d,
## layer_global_max_pooling_2d, layer_global_max_pooling_3d,
## layer_gru, layer_hashing, layer_input, layer_integer_lookup,
## layer_lambda, layer_layer_normalization, layer_lstm, layer_masking,
## layer_max_pooling_1d, layer_max_pooling_2d, layer_max_pooling_3d,
## layer_maximum, layer_minimum, layer_multi_head_attention,
## layer_multiply, layer_normalization, layer_permute,
## layer_random_brightness, layer_random_contrast, layer_random_crop,
## layer_random_flip, layer_random_rotation, layer_random_translation,
## layer_random_zoom, layer_repeat_vector, layer_rescaling,
## layer_reshape, layer_resizing, layer_rnn, layer_separable_conv_1d,
## layer_separable_conv_2d, layer_simple_rnn,
## layer_spatial_dropout_1d, layer_spatial_dropout_2d,
## layer_spatial_dropout_3d, layer_string_lookup, layer_subtract,
## layer_text_vectorization, layer_unit_normalization,
## layer_upsampling_1d, layer_upsampling_2d, layer_upsampling_3d,
## layer_zero_padding_1d, layer_zero_padding_2d,
## layer_zero_padding_3d, learning_rate_schedule_cosine_decay,
## learning_rate_schedule_cosine_decay_restarts,
## learning_rate_schedule_exponential_decay,
## learning_rate_schedule_inverse_time_decay,
## learning_rate_schedule_piecewise_constant_decay,
## learning_rate_schedule_polynomial_decay, loss_binary_crossentropy,
## loss_categorical_crossentropy, loss_categorical_hinge,
## loss_cosine_similarity, loss_hinge, loss_huber, loss_kl_divergence,
## loss_mean_absolute_error, loss_mean_absolute_percentage_error,
## loss_mean_squared_error, loss_mean_squared_logarithmic_error,
## loss_poisson, loss_sparse_categorical_crossentropy,
## loss_squared_hinge, mark_active, metric_auc,
## metric_binary_accuracy, metric_binary_crossentropy,
## metric_categorical_accuracy, metric_categorical_crossentropy,
## metric_categorical_hinge, metric_cosine_similarity,
## metric_false_negatives, metric_false_positives, metric_hinge,
## metric_mean, metric_mean_absolute_error,
## metric_mean_absolute_percentage_error, metric_mean_iou,
## metric_mean_squared_error, metric_mean_squared_logarithmic_error,
## metric_mean_wrapper, metric_poisson, metric_precision,
## metric_precision_at_recall, metric_recall,
## metric_recall_at_precision, metric_root_mean_squared_error,
## metric_sensitivity_at_specificity,
## metric_sparse_categorical_accuracy,
## metric_sparse_categorical_crossentropy,
## metric_sparse_top_k_categorical_accuracy,
## metric_specificity_at_sensitivity, metric_squared_hinge,
## metric_sum, metric_top_k_categorical_accuracy,
## metric_true_negatives, metric_true_positives, new_callback_class,
## new_layer_class, new_learning_rate_schedule_class, new_loss_class,
## new_metric_class, new_model_class, normalize, optimizer_adadelta,
## optimizer_adagrad, optimizer_adam, optimizer_adamax,
## optimizer_ftrl, optimizer_nadam, optimizer_rmsprop, optimizer_sgd,
## pad_sequences, pop_layer, predict_on_batch, regularizer_l1,
## regularizer_l1_l2, regularizer_l2, regularizer_orthogonal,
## set_vocabulary, set_weights, shape, test_on_batch,
## text_dataset_from_directory, time_distributed,
## timeseries_dataset_from_array, to_categorical, train_on_batch,
## unfreeze_weights, use_backend, with_custom_object_scope, zip_lists
## The following objects are masked from 'package:tensorflow':
##
## set_random_seed, shape
library(tm)
## Warning: package 'tm' was built under R version 4.5.2
## Loading required package: NLP
library(wordcloud)
## Loading required package: RColorBrewer
library(wordcloud2)
## Warning: package 'wordcloud2' was built under R version 4.5.3
library(RColorBrewer)
library(readr)
library(readxl)
library(tidyverse)
## Warning: package 'tidyverse' was built under R version 4.5.3
## Warning: package 'lubridate' was built under R version 4.5.2
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ purrr 1.0.4
## ✔ forcats 1.0.1 ✔ stringr 1.5.1
## ✔ ggplot2 3.5.2 ✔ tibble 3.3.0
## ✔ lubridate 1.9.5 ✔ tidyr 1.3.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ ggplot2::annotate() masks NLP::annotate()
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(tidytext)
## Warning: package 'tidytext' was built under R version 4.5.3
library(textclean)
## Warning: package 'textclean' was built under R version 4.5.3
library(dplyr)
library(stringr)
library(stopwords)
## Warning: package 'stopwords' was built under R version 4.5.2
##
## Attaching package: 'stopwords'
##
## The following object is masked from 'package:tm':
##
## stopwords
library(caret)
## Warning: package 'caret' was built under R version 4.5.3
## Loading required package: lattice
##
## Attaching package: 'caret'
##
## The following object is masked from 'package:purrr':
##
## lift
##
## The following object is masked from 'package:tensorflow':
##
## train
library(ggplot2)
library(SnowballC)
## Warning: package 'SnowballC' was built under R version 4.5.2
# =========================
# 2. INPUT DATA TRAINING
# =========================
setwd("~/Desktop/RISET DAN KAJIAN STRATEGIS")
df <- read.csv("datariset.csv", sep = ";",
fileEncoding = "latin1",
stringsAsFactors = FALSE)
df$doc_id <- 1:nrow(df)
# =========================
# 3. PREPROCESSING
# =========================
df <- df %>%
mutate(
text_clean = text %>%
tolower() %>%
replace_url() %>%
replace_html() %>%
gsub("@\\w+", "", .) %>%
gsub("#\\w+", "", .) %>%
gsub("[^a-z ]", " ", .) %>%
gsub("\\s+", " ", .) %>%
trimws()
)
# =========================
# 4. TOKENIZING & STEMMING
# =========================
stopword <- c(
"aku","saya","gue","gw","gua","ane","kita","kami","kamu","lu","lo","loe","kalian",
"kau","dia","mereka","beliau","kelen","kak","bang","bro","sis","dek","pak","ibu","bu",
"mas","om","tante","bapak","bapaknya","bg","ka","ini","itu","sini","situ","sana","di",
"ke","dari","pada","dalam","luar","atas","bawah","tengah","depan","belakang","antara",
"t4","dan","atau","tapi","tetapi","namun","karena","karna","sebab","sehingga","supaya",
"agar","maka","jika","kalau","kalo","klo","bila","apabila","bahwa","walaupun","meskipun",
"padahal","sedangkan","ketika","saat","waktu","setelah","sebelum","selama","sejak",
"hingga","sampai","sampe","serta","maupun","untuk","buat","dengan","tanpa","oleh",
"tentang","mengenai","terhadap","kepada","bagi","per","secara","seperti","sebagai",
"berdasarkan","sudah","udah","udh","belum","blm","blum","masih","lagi","terus","trus",
"selalu","sering","kadang","pernah","tidak","gak","ga","gk","ngga","nggak","enggak","tak",
"bukan","bkn","jangan","harus","wajib","pasti","mungkin","hampir","sangat","banget","bgt",
"sekali","amat","terlalu","cukup","hanya","saja","aja","cuma","cuman","doang","juga","jg",
"pun","lah","loh","deh","nih","tuh","sih","dong","kan","kah","pula","bahkan","malah",
"justru","emang","memang","emng","emg","lagian","sebenarnya","sebenernya","seharusnya",
"harusnya","sekarang","skrg","dulu","nanti","kini","tadi","besok","kemarin","bisa","dapat",
"boleh","mau","akan","ingin","minta","perlu","sempat","langsung","apa","siapa","mana",
"dimana","kemana","kenapa","mengapa","kapan","bagaimana","gimana","berapa","ada","banyak",
"semua","beberapa","setiap","tiap","lain","lainnya","satu","dua","tiga","sama","beda",
"sendiri","begini","begitu","gini","gitu","gtu","kayak","kaya","kek","ibaratnya","contoh",
"misalnya","jelas","jadi","jd","jdi","hal","cara","wkwk","haha","hehe","hihi","lol","nah",
"yah","yaah","wah","wow","aduh","duh","eh","ih","ah","oh","hayoloh","hadeh","haduuh","haduh",
"lha","lho","woi","oi","hai","hei","halo","hey","astagfirullah","subhanallah","alhamdulillah",
"aamiin","amiin","ya","iya","iyaa","yap","yep","oke","ok","oks","nope","sip","siap","baik",
"betul","bener","beneran","benar","setuju","iyalah","dah","yg","yng","dgn","utk","tdk","sdh",
"blm","krn","kpd","tsb","dst","dll","tp","tpi","bs","lg","sm","skrg","org","bnyk","bgt",
"udh","emg","gk","gak","klo","klu","jd","jdi","tu","ni","dg","d","k","ny","msh","min",
"mimin","bos","orang","tempat","hari","tahun","bulan","jam","menit","kali","masalah","situasi",
"kondisi","urusan","info","berita","video","konten","postingan","status","jadi","berarti",
"makanya","soalnya","soal","intinya","pokoknya","maksudnya","artinya","katanya","bilangnya",
"konon","kabarnya","ceritanya","setuju","sependapat","sepakat","lanjut","skip","bye","selamat",
"thanks","makasih","terima","kasih","yang","yg","kata","bikin","lihat","liat","makan","biasa",
"pake","kena","salah","jual","masuk","bodoh","tolol","dongo","smpe","maaf","asli","manusia",
"endara","para","goblok","allah","Allah","ALLAH","lebih","cari","kasih"
)
df_token_clean <- df %>%
unnest_tokens(kata, text_clean) %>%
filter(!kata %in% stopword) %>%
filter(nchar(kata) > 3) %>%
mutate(kata = wordStem(kata, language = "indonesian"))
df_text_bersih <- df_token_clean %>%
group_by(doc_id) %>%
summarise(text_clean = paste(kata, collapse = " "))
df <- df %>%
select(-text_clean) %>%
left_join(df_text_bersih, by = "doc_id") %>%
filter(text_clean != "")
# =========================
# 5. LABELING DATA TRAINING
# =========================
kata_positif <- read.csv("C:/Users/ASUS/Downloads/positive.tsv", sep = "\t",
col.names = c("kata","bobot")) %>%
mutate(bobot = as.numeric(bobot)) %>%
filter(!is.na(bobot))
kata_negatif <- read.csv("C:/Users/ASUS/Downloads/negative.tsv", sep = "\t",
col.names = c("kata","bobot")) %>%
mutate(bobot = as.numeric(bobot)) %>%
filter(!is.na(bobot))
kamus <- bind_rows(kata_positif, kata_negatif)
df <- df %>% mutate(id_baris = row_number())
df_token <- df %>% unnest_tokens(kata, text_clean)
df_skor <- df_token %>%
inner_join(kamus, by = "kata") %>%
group_by(id_baris) %>%
summarise(total_skor = sum(bobot, na.rm = TRUE))
## Warning in inner_join(., kamus, by = "kata"): Detected an unexpected many-to-many relationship between `x` and `y`.
## ℹ Row 2 of `x` matches multiple rows in `y`.
## ℹ Row 5458 of `y` matches multiple rows in `x`.
## ℹ If a many-to-many relationship is expected, set `relationship =
## "many-to-many"` to silence this warning.
df_final <- df %>%
left_join(df_skor, by = "id_baris") %>%
mutate(
total_skor = replace_na(total_skor, 0),
label = case_when(
total_skor > 0 ~ "Positif",
total_skor < 0 ~ "Negatif",
TRUE ~ "Netral"
)
)
# =========================
# 6. TOKENIZER & ONE-HOT ENCODING
# =========================
max_words <- as.integer(5000)
max_len <- as.integer(50)
tokenizer <- text_tokenizer(num_words = max_words)
tokenizer %>% fit_text_tokenizer(df_final$text_clean)
x_data <- texts_to_sequences(tokenizer, df_final$text_clean) %>%
pad_sequences(maxlen = max_len)
label_index <- df_final$label %>%
factor(levels = c("Negatif","Netral","Positif")) %>%
as.integer() - 1
num_classes <- as.integer(3)
y_data <- matrix(0, nrow = length(label_index), ncol = num_classes)
for(i in 1:length(label_index)) {
y_data[i, label_index[i] + 1] <- 1
}
np <- import("numpy")
x_data <- np$array(x_data, dtype = "int32")
y_data <- np$array(y_data, dtype = "float32")
# =========================
# 7. SPLIT DATA
# =========================
set.seed(42)
n <- nrow(x_data)
train_index <- sample(1:n, 0.8 * n)
x_train <- x_data[train_index, ]
x_test <- x_data[-train_index, ]
y_train <- y_data[train_index, ]
y_test <- y_data[-train_index, ]
# =========================
# 8. MODEL BI-LSTM
# =========================
model <- keras_model_sequential()
model$add(layer_embedding(
input_dim = as.integer(max_words),
output_dim = as.integer(128),
input_length = as.integer(max_len)
))
model$add(keras$layers$Bidirectional(
layer_lstm(
units = as.integer(64),
return_sequences = TRUE,
dropout = 0.2
)
))
model$add(layer_lstm(units = as.integer(32)))
model$add(layer_dense(units = as.integer(32), activation = "relu"))
model$add(layer_dropout(rate = 0.5))
model$add(layer_dense(units = as.integer(num_classes), activation = "softmax"))
model$build(input_shape = tuple(NULL, max_len))
model$compile(
loss = "categorical_crossentropy",
optimizer = "adam",
metrics = list("accuracy")
)
model$summary()
## Model: "sequential"
## ┌─────────────────────────────────┬────────────────────────┬───────────────┐
## │ Layer (type) │ Output Shape │ Param # │
## ├─────────────────────────────────┼────────────────────────┼───────────────┤
## │ embedding (Embedding) │ (None, 50, 128) │ 640,000 │
## ├─────────────────────────────────┼────────────────────────┼───────────────┤
## │ bidirectional (Bidirectional) │ (None, 50, 128) │ 98,816 │
## ├─────────────────────────────────┼────────────────────────┼───────────────┤
## │ lstm_1 (LSTM) │ (None, 32) │ 20,608 │
## ├─────────────────────────────────┼────────────────────────┼───────────────┤
## │ dense (Dense) │ (None, 32) │ 1,056 │
## ├─────────────────────────────────┼────────────────────────┼───────────────┤
## │ dropout (Dropout) │ (None, 32) │ 0 │
## ├─────────────────────────────────┼────────────────────────┼───────────────┤
## │ dense_1 (Dense) │ (None, 3) │ 99 │
## └─────────────────────────────────┴────────────────────────┴───────────────┘
## Total params: 760,579 (2.90 MB)
## Trainable params: 760,579 (2.90 MB)
## Non-trainable params: 0 (0.00 B)
# =========================
# 9. TRAINING
# =========================
early_stop <- callback_early_stopping(monitor="val_loss", patience=3, restore_best_weights=TRUE)
history <- model %>% fit(
x_train, y_train,
epochs = 10,
batch_size = 32,
validation_split = 0.2,
callbacks = list(early_stop)
)
## Epoch 1/10
## 65/65 - 21s - 328ms/step - accuracy: 0.5510 - loss: 0.9942 - val_accuracy: 0.6337 - val_loss: 0.8825
## Epoch 2/10
## 65/65 - 19s - 290ms/step - accuracy: 0.6951 - loss: 0.7511 - val_accuracy: 0.7151 - val_loss: 0.7664
## Epoch 3/10
## 65/65 - 11s - 165ms/step - accuracy: 0.8049 - loss: 0.4828 - val_accuracy: 0.7151 - val_loss: 0.7273
## Epoch 4/10
## 65/65 - 11s - 172ms/step - accuracy: 0.8908 - loss: 0.3010 - val_accuracy: 0.7326 - val_loss: 0.9960
## Epoch 5/10
## 65/65 - 10s - 156ms/step - accuracy: 0.9568 - loss: 0.1690 - val_accuracy: 0.7287 - val_loss: 0.9535
## Epoch 6/10
## 65/65 - 10s - 157ms/step - accuracy: 0.9617 - loss: 0.1415 - val_accuracy: 0.7403 - val_loss: 1.0047
# =========================
# 10. PREDIKSI & EVALUASI DATA TEST
# =========================
y_pred <- model %>% predict(x_test)
## 21/21 - 2s - 114ms/step
y_pred_class <- apply(y_pred, 1, which.max) - 1
y_true_class <- apply(y_test, 1, which.max) - 1
pred_factor <- factor(y_pred_class, levels=c(0,1,2), labels=c("Negatif","Netral","Positif"))
actual_factor <- factor(y_true_class, levels=c(0,1,2), labels=c("Negatif","Netral","Positif"))
cm <- confusionMatrix(pred_factor, actual_factor)
cm
## Confusion Matrix and Statistics
##
## Reference
## Prediction Negatif Netral Positif
## Negatif 318 33 37
## Netral 16 135 45
## Positif 25 15 21
##
## Overall Statistics
##
## Accuracy : 0.7349
## 95% CI : (0.699, 0.7686)
## No Information Rate : 0.5566
## P-Value [Acc > NIR] : < 2.2e-16
##
## Kappa : 0.5298
##
## Mcnemar's Test P-Value : 3.633e-05
##
## Statistics by Class:
##
## Class: Negatif Class: Netral Class: Positif
## Sensitivity 0.8858 0.7377 0.20388
## Specificity 0.7552 0.8680 0.92620
## Pos Pred Value 0.8196 0.6888 0.34426
## Neg Pred Value 0.8405 0.8931 0.85959
## Prevalence 0.5566 0.2837 0.15969
## Detection Rate 0.4930 0.2093 0.03256
## Detection Prevalence 0.6016 0.3039 0.09457
## Balanced Accuracy 0.8205 0.8028 0.56504
# Tabel
cm_table <- table(Prediksi = pred_factor, Aktual = actual_factor)
cm_df <- as.data.frame(cm_table)
# Plot
ggplot(cm_df, aes(x = Aktual, y = Prediksi, fill = Freq)) +
geom_tile(color = "white") +
geom_text(aes(label = Freq), size = 5, color = "white", fontface = "bold") +
scale_fill_gradient(low = "#ff9999", high = "#660000") +
theme_minimal() +
labs(
x = "Aktual",
y = "Prediksi",
fill = "Frekuensi"
)

# Accuracy, F1-Score, Precision, Recall
accuracy <- cm$overall["Accuracy"]
precision <- cm$byClass[,"Precision"]
recall <- cm$byClass[,"Recall"]
f1_score <- cm$byClass[,"F1"]
precision_macro <- mean(precision, na.rm=TRUE)
recall_macro <- mean(recall, na.rm=TRUE)
f1_macro <- mean(f1_score, na.rm=TRUE)
cat("=== DATA TEST ===\n")
## === DATA TEST ===
cat("Accuracy :", accuracy, "\n\n")
## Accuracy : 0.7348837
cat("Precision:\n"); print(precision)
## Precision:
## Class: Negatif Class: Netral Class: Positif
## 0.8195876 0.6887755 0.3442623
cat("\nRecall:\n"); print(recall)
##
## Recall:
## Class: Negatif Class: Netral Class: Positif
## 0.8857939 0.7377049 0.2038835
cat("\nF1-score:\n"); print(f1_score)
##
## F1-score:
## Class: Negatif Class: Netral Class: Positif
## 0.8514056 0.7124011 0.2560976
cat("\nMacro Average:\n")
##
## Macro Average:
cat("Precision :", precision_macro, "\n")
## Precision : 0.6175418
cat("Recall :", recall_macro, "\n")
## Recall : 0.6091274
cat("F1-score :", f1_macro, "\n")
## F1-score : 0.6066347
# =========================
# 11. VISUALISASI WORDCLOUD
# =========================
text_all <- paste(df_final$text_clean, collapse = " ")
corpus <- Corpus(VectorSource(text_all))
tdm <- TermDocumentMatrix(corpus)
m <- as.matrix(tdm)
freq <- sort(rowSums(m), decreasing=TRUE)
df_word <- data.frame(word=names(freq), freq=freq)
png("wordcloud.png", width=1600, height=1000, res=150)
wordcloud2(df_word, size=0.7, color="random-light", backgroundColor="black")
dev.off()
## png
## 2
# =========================
# 12. PREDIKSI DATA BARU
# =========================
data_baru <- read_excel("C:/Users/ASUS/Downloads/data_baru.xlsx")
## New names:
## • `` -> `...1`
## • `` -> `...2`
data_baru <- data_baru %>%
mutate(
text_clean = Comment %>%
tolower() %>%
replace_url() %>%
replace_html() %>%
gsub("@\\w+", "", .) %>%
gsub("#\\w+", "", .) %>%
gsub("[^a-z ]", " ", .) %>%
gsub("\\s+", " ", .) %>%
trimws()
)
x_baru <- texts_to_sequences(tokenizer, data_baru$text_clean) %>%
pad_sequences(maxlen = max_len) %>%
np$array(dtype="int32")
pred_baru <- model %>% predict(x_baru)
## 4/4 - 0s - 48ms/step
pred_class_baru <- apply(pred_baru, 1, which.max)
label_pred_baru <- factor(pred_class_baru, levels=c(1,2,3), labels=c("Negatif","Netral","Positif"))
data_baru$prediksi <- label_pred_baru
data_baru$prob_negatif <- pred_baru[,1]
data_baru$prob_netral <- pred_baru[,2]
data_baru$prob_positif <- pred_baru[,3]
# =========================
# 13. LABELING DATA BARU
# =========================
kamus_baru <- bind_rows(kata_positif, kata_negatif)
data_baru <- data_baru %>% mutate(id_baru = row_number())
data_baru_token <- data_baru %>% unnest_tokens(kata, text_clean)
data_baru_skor <- data_baru_token %>%
left_join(kamus_baru, by="kata") %>%
group_by(id_baru) %>%
summarise(total_skor=sum(bobot, na.rm=TRUE), .groups="drop")
## Warning in left_join(., kamus_baru, by = "kata"): Detected an unexpected many-to-many relationship between `x` and `y`.
## ℹ Row 12 of `x` matches multiple rows in `y`.
## ℹ Row 7285 of `y` matches multiple rows in `x`.
## ℹ If a many-to-many relationship is expected, set `relationship =
## "many-to-many"` to silence this warning.
data_baru <- data_baru %>%
left_join(data_baru_skor, by="id_baru") %>%
mutate(total_skor = replace_na(total_skor, 0)) %>%
mutate(label_asli = case_when(
total_skor > 0 ~ "Positif",
total_skor < 0 ~ "Negatif",
TRUE ~ "Netral"
))
# =========================
# 14. EVALUASI DATA BARU
# =========================
actual_baru <- factor(data_baru$label_asli, levels=c("Negatif","Netral","Positif"))
pred_baru_factor <- factor(data_baru$prediksi, levels=c("Negatif","Netral","Positif"))
cm_baru <- confusionMatrix(pred_baru_factor, actual_baru)
accuracy_baru <- cm_baru$overall["Accuracy"]
precision_baru <- cm_baru$byClass[,"Precision"]
recall_baru <- cm_baru$byClass[,"Recall"]
f1_baru <- cm_baru$byClass[,"F1"]
precision_macro_baru <- mean(precision_baru, na.rm=TRUE)
recall_macro_baru <- mean(recall_baru, na.rm=TRUE)
f1_macro_baru <- mean(f1_baru, na.rm=TRUE)
cat("\n=== DATA BARU ===\n")
##
## === DATA BARU ===
cat("Accuracy :", accuracy_baru, "\n\n")
## Accuracy : 0.7
cat("Precision per kelas:\n"); print(precision_baru)
## Precision per kelas:
## Class: Negatif Class: Netral Class: Positif
## 0.8026316 0.3684211 0.4000000
cat("\nRecall per kelas:\n"); print(recall_baru)
##
## Recall per kelas:
## Class: Negatif Class: Netral Class: Positif
## 0.8714286 0.5833333 0.1111111
cat("\nF1-score per kelas:\n"); print(f1_baru)
##
## F1-score per kelas:
## Class: Negatif Class: Netral Class: Positif
## 0.8356164 0.4516129 0.1739130
cat("\nMacro Average:\n")
##
## Macro Average:
cat("Precision :", precision_macro_baru, "\n")
## Precision : 0.5236842
cat("Recall :", recall_macro_baru, "\n")
## Recall : 0.5219577
cat("F1-score :", f1_macro_baru, "\n")
## F1-score : 0.4870475