========================================

UAS Data Mining - Genap 2024/2025

Klasifikasi Motivasi Belajar Siswa

Algoritma: SVM, Random Forest, Decision Tree

========================================

# --- 1. PERSIAPAN PAKET ---
library(readxl)
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(caret)
## Loading required package: ggplot2
## Loading required package: lattice
library(e1071)
library(randomForest)
## randomForest 4.7-1.2
## Type rfNews() to see new features/changes/bug fixes.
## 
## Attaching package: 'randomForest'
## The following object is masked from 'package:ggplot2':
## 
##     margin
## The following object is masked from 'package:dplyr':
## 
##     combine
library(rpart)
library(writexl)

set.seed(42)
# --- 2. LOAD DATA ---
setwd("E:/uas data mining")

df_train <- read_excel("datatraining.xlsx")
df_test <- read_excel("datatesting.xlsx")
# --- 3. PREPROCESSING ---

# Ubah kolom kategorik ke faktor
kategorik_vars <- c("jenis_kelamin", "dukungan_orang_tua", "fasilitas_belajar",
                    "minat_pada_pelajaran", "kesulitan_ekonomi", "motivasi_belajar")
df_train[kategorik_vars] <- lapply(df_train[kategorik_vars], as.factor)

# Data testing belum ada kolom motivasi_belajar
df_test[c("jenis_kelamin", "dukungan_orang_tua", "fasilitas_belajar",
          "minat_pada_pelajaran", "kesulitan_ekonomi")] <- 
  lapply(df_test[c("jenis_kelamin", "dukungan_orang_tua", "fasilitas_belajar",
                   "minat_pada_pelajaran", "kesulitan_ekonomi")], as.factor)
# --- 4. SPLIT DATA TRAINING (80:20) ---
splitIndex <- createDataPartition(df_train$motivasi_belajar, p = 0.8, list = FALSE)
trainSet <- df_train[splitIndex, ]
valSet <- df_train[-splitIndex, ]
# --- 5. TRAINING MODEL ---

## 1. Decision Tree
model_dt <- rpart(motivasi_belajar ~ ., data = trainSet, method = "class")
pred_dt <- predict(model_dt, valSet, type = "class")
cat("\n--- Decision Tree ---\n")
## 
## --- Decision Tree ---
print(confusionMatrix(pred_dt, valSet$motivasi_belajar))
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  1  2  3
##          1  9  2  0
##          2  7 19  2
##          3  0  0  0
## 
## Overall Statistics
##                                         
##                Accuracy : 0.7179        
##                  95% CI : (0.5513, 0.85)
##     No Information Rate : 0.5385        
##     P-Value [Acc > NIR] : 0.01707       
##                                         
##                   Kappa : 0.4333        
##                                         
##  Mcnemar's Test P-Value : NA            
## 
## Statistics by Class:
## 
##                      Class: 1 Class: 2 Class: 3
## Sensitivity            0.5625   0.9048  0.00000
## Specificity            0.9130   0.5000  1.00000
## Pos Pred Value         0.8182   0.6786      NaN
## Neg Pred Value         0.7500   0.8182  0.94872
## Prevalence             0.4103   0.5385  0.05128
## Detection Rate         0.2308   0.4872  0.00000
## Detection Prevalence   0.2821   0.7179  0.00000
## Balanced Accuracy      0.7378   0.7024  0.50000
## 2. Random Forest
model_rf <- randomForest(motivasi_belajar ~ ., data = trainSet)
pred_rf <- predict(model_rf, valSet)
cat("\n--- Random Forest ---\n")
## 
## --- Random Forest ---
print(confusionMatrix(pred_rf, valSet$motivasi_belajar))
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  1  2  3
##          1  9  4  0
##          2  7 17  2
##          3  0  0  0
## 
## Overall Statistics
##                                           
##                Accuracy : 0.6667          
##                  95% CI : (0.4978, 0.8091)
##     No Information Rate : 0.5385          
##     P-Value [Acc > NIR] : 0.07304         
##                                           
##                   Kappa : 0.339           
##                                           
##  Mcnemar's Test P-Value : NA              
## 
## Statistics by Class:
## 
##                      Class: 1 Class: 2 Class: 3
## Sensitivity            0.5625   0.8095  0.00000
## Specificity            0.8261   0.5000  1.00000
## Pos Pred Value         0.6923   0.6538      NaN
## Neg Pred Value         0.7308   0.6923  0.94872
## Prevalence             0.4103   0.5385  0.05128
## Detection Rate         0.2308   0.4359  0.00000
## Detection Prevalence   0.3333   0.6667  0.00000
## Balanced Accuracy      0.6943   0.6548  0.50000
## 3. SVM
model_svm <- svm(motivasi_belajar ~ ., data = trainSet)
pred_svm <- predict(model_svm, valSet)
cat("\n--- SVM ---\n")
## 
## --- SVM ---
print(confusionMatrix(pred_svm, valSet$motivasi_belajar))
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  1  2  3
##          1 10  6  0
##          2  6 15  2
##          3  0  0  0
## 
## Overall Statistics
##                                          
##                Accuracy : 0.641          
##                  95% CI : (0.4718, 0.788)
##     No Information Rate : 0.5385         
##     P-Value [Acc > NIR] : 0.1301         
##                                          
##                   Kappa : 0.3018         
##                                          
##  Mcnemar's Test P-Value : NA             
## 
## Statistics by Class:
## 
##                      Class: 1 Class: 2 Class: 3
## Sensitivity            0.6250   0.7143  0.00000
## Specificity            0.7391   0.5556  1.00000
## Pos Pred Value         0.6250   0.6522      NaN
## Neg Pred Value         0.7391   0.6250  0.94872
## Prevalence             0.4103   0.5385  0.05128
## Detection Rate         0.2564   0.3846  0.00000
## Detection Prevalence   0.4103   0.5897  0.00000
## Balanced Accuracy      0.6821   0.6349  0.50000
# --- 6. PREDIKSI DATA TESTING ---

# Prediksi dengan Decision Tree
pred_dt_test <- predict(model_dt, df_test, type = "class")

# Prediksi dengan Random Forest
pred_rf_test <- predict(model_rf, df_test)

# Prediksi dengan SVM
pred_svm_test <- predict(model_svm, df_test)

df_hasil <- df_test %>%
  mutate(
    prediksi_DT  = pred_dt_test,
    prediksi_RF  = pred_rf_test,
    prediksi_SVM = pred_svm_test
  )
# --- 7. TAMPILKAN HASIL 15 BARIS PREDIKSI ---
cat("\n--- Hasil Prediksi ke-15 Siswa ---\n")
## 
## --- Hasil Prediksi ke-15 Siswa ---
print(df_hasil %>% select(prediksi_DT, prediksi_RF, prediksi_SVM))
## # A tibble: 15 × 3
##    prediksi_DT prediksi_RF prediksi_SVM
##    <fct>       <fct>       <fct>       
##  1 2           2           2           
##  2 2           2           2           
##  3 2           1           1           
##  4 2           2           2           
##  5 1           1           1           
##  6 2           2           2           
##  7 2           2           2           
##  8 2           2           2           
##  9 2           2           2           
## 10 2           2           2           
## 11 2           1           1           
## 12 1           1           1           
## 13 2           2           2           
## 14 2           2           1           
## 15 1           1           1