========================================
# --- 1. PERSIAPAN PAKET ---
library(readxl)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(caret)
## Loading required package: ggplot2
## Loading required package: lattice
library(e1071)
library(randomForest)
## randomForest 4.7-1.2
## Type rfNews() to see new features/changes/bug fixes.
##
## Attaching package: 'randomForest'
## The following object is masked from 'package:ggplot2':
##
## margin
## The following object is masked from 'package:dplyr':
##
## combine
library(rpart)
library(writexl)
set.seed(42)
# --- 2. LOAD DATA ---
setwd("E:/uas data mining")
df_train <- read_excel("datatraining.xlsx")
df_test <- read_excel("datatesting.xlsx")
# --- 3. PREPROCESSING ---
# Ubah kolom kategorik ke faktor
kategorik_vars <- c("jenis_kelamin", "dukungan_orang_tua", "fasilitas_belajar",
"minat_pada_pelajaran", "kesulitan_ekonomi", "motivasi_belajar")
df_train[kategorik_vars] <- lapply(df_train[kategorik_vars], as.factor)
# Data testing belum ada kolom motivasi_belajar
df_test[c("jenis_kelamin", "dukungan_orang_tua", "fasilitas_belajar",
"minat_pada_pelajaran", "kesulitan_ekonomi")] <-
lapply(df_test[c("jenis_kelamin", "dukungan_orang_tua", "fasilitas_belajar",
"minat_pada_pelajaran", "kesulitan_ekonomi")], as.factor)
# --- 4. SPLIT DATA TRAINING (80:20) ---
splitIndex <- createDataPartition(df_train$motivasi_belajar, p = 0.8, list = FALSE)
trainSet <- df_train[splitIndex, ]
valSet <- df_train[-splitIndex, ]
# --- 5. TRAINING MODEL ---
## 1. Decision Tree
model_dt <- rpart(motivasi_belajar ~ ., data = trainSet, method = "class")
pred_dt <- predict(model_dt, valSet, type = "class")
cat("\n--- Decision Tree ---\n")
##
## --- Decision Tree ---
print(confusionMatrix(pred_dt, valSet$motivasi_belajar))
## Confusion Matrix and Statistics
##
## Reference
## Prediction 1 2 3
## 1 9 2 0
## 2 7 19 2
## 3 0 0 0
##
## Overall Statistics
##
## Accuracy : 0.7179
## 95% CI : (0.5513, 0.85)
## No Information Rate : 0.5385
## P-Value [Acc > NIR] : 0.01707
##
## Kappa : 0.4333
##
## Mcnemar's Test P-Value : NA
##
## Statistics by Class:
##
## Class: 1 Class: 2 Class: 3
## Sensitivity 0.5625 0.9048 0.00000
## Specificity 0.9130 0.5000 1.00000
## Pos Pred Value 0.8182 0.6786 NaN
## Neg Pred Value 0.7500 0.8182 0.94872
## Prevalence 0.4103 0.5385 0.05128
## Detection Rate 0.2308 0.4872 0.00000
## Detection Prevalence 0.2821 0.7179 0.00000
## Balanced Accuracy 0.7378 0.7024 0.50000
## 2. Random Forest
model_rf <- randomForest(motivasi_belajar ~ ., data = trainSet)
pred_rf <- predict(model_rf, valSet)
cat("\n--- Random Forest ---\n")
##
## --- Random Forest ---
print(confusionMatrix(pred_rf, valSet$motivasi_belajar))
## Confusion Matrix and Statistics
##
## Reference
## Prediction 1 2 3
## 1 9 4 0
## 2 7 17 2
## 3 0 0 0
##
## Overall Statistics
##
## Accuracy : 0.6667
## 95% CI : (0.4978, 0.8091)
## No Information Rate : 0.5385
## P-Value [Acc > NIR] : 0.07304
##
## Kappa : 0.339
##
## Mcnemar's Test P-Value : NA
##
## Statistics by Class:
##
## Class: 1 Class: 2 Class: 3
## Sensitivity 0.5625 0.8095 0.00000
## Specificity 0.8261 0.5000 1.00000
## Pos Pred Value 0.6923 0.6538 NaN
## Neg Pred Value 0.7308 0.6923 0.94872
## Prevalence 0.4103 0.5385 0.05128
## Detection Rate 0.2308 0.4359 0.00000
## Detection Prevalence 0.3333 0.6667 0.00000
## Balanced Accuracy 0.6943 0.6548 0.50000
## 3. SVM
model_svm <- svm(motivasi_belajar ~ ., data = trainSet)
pred_svm <- predict(model_svm, valSet)
cat("\n--- SVM ---\n")
##
## --- SVM ---
print(confusionMatrix(pred_svm, valSet$motivasi_belajar))
## Confusion Matrix and Statistics
##
## Reference
## Prediction 1 2 3
## 1 10 6 0
## 2 6 15 2
## 3 0 0 0
##
## Overall Statistics
##
## Accuracy : 0.641
## 95% CI : (0.4718, 0.788)
## No Information Rate : 0.5385
## P-Value [Acc > NIR] : 0.1301
##
## Kappa : 0.3018
##
## Mcnemar's Test P-Value : NA
##
## Statistics by Class:
##
## Class: 1 Class: 2 Class: 3
## Sensitivity 0.6250 0.7143 0.00000
## Specificity 0.7391 0.5556 1.00000
## Pos Pred Value 0.6250 0.6522 NaN
## Neg Pred Value 0.7391 0.6250 0.94872
## Prevalence 0.4103 0.5385 0.05128
## Detection Rate 0.2564 0.3846 0.00000
## Detection Prevalence 0.4103 0.5897 0.00000
## Balanced Accuracy 0.6821 0.6349 0.50000
# --- 6. PREDIKSI DATA TESTING ---
# Prediksi dengan Decision Tree
pred_dt_test <- predict(model_dt, df_test, type = "class")
# Prediksi dengan Random Forest
pred_rf_test <- predict(model_rf, df_test)
# Prediksi dengan SVM
pred_svm_test <- predict(model_svm, df_test)
df_hasil <- df_test %>%
mutate(
prediksi_DT = pred_dt_test,
prediksi_RF = pred_rf_test,
prediksi_SVM = pred_svm_test
)
# --- 7. TAMPILKAN HASIL 15 BARIS PREDIKSI ---
cat("\n--- Hasil Prediksi ke-15 Siswa ---\n")
##
## --- Hasil Prediksi ke-15 Siswa ---
print(df_hasil %>% select(prediksi_DT, prediksi_RF, prediksi_SVM))
## # A tibble: 15 × 3
## prediksi_DT prediksi_RF prediksi_SVM
## <fct> <fct> <fct>
## 1 2 2 2
## 2 2 2 2
## 3 2 1 1
## 4 2 2 2
## 5 1 1 1
## 6 2 2 2
## 7 2 2 2
## 8 2 2 2
## 9 2 2 2
## 10 2 2 2
## 11 2 1 1
## 12 1 1 1
## 13 2 2 2
## 14 2 2 1
## 15 1 1 1