library(caret)
## Warning: package 'caret' was built under R version 4.4.3
## Loading required package: ggplot2
## Loading required package: lattice
library(readxl)
## Warning: package 'readxl' was built under R version 4.4.3
library(e1071)         # SVM
## Warning: package 'e1071' was built under R version 4.4.3
library(randomForest)  # Random Forest
## Warning: package 'randomForest' was built under R version 4.4.3
## randomForest 4.7-1.2
## Type rfNews() to see new features/changes/bug fixes.
## 
## Attaching package: 'randomForest'
## The following object is masked from 'package:ggplot2':
## 
##     margin
library(rpart)         # Decision Tree
## Warning: package 'rpart' was built under R version 4.4.3
library(dplyr)
## Warning: package 'dplyr' was built under R version 4.4.3
## 
## Attaching package: 'dplyr'
## The following object is masked from 'package:randomForest':
## 
##     combine
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
setwd("D:/Data PC/Documents/UAS Data Mining")
train <- read_excel("datatraining.xlsx")
test <- read_excel("datatesting.xlsx")
# Ubah kolom kategorik jadi faktor
categorical_cols <- c("jenis_kelamin", "dukungan_orang_tua", "fasilitas_belajar",
                      "minat_pada_pelajaran", "kesulitan_ekonomi", "motivasi_belajar")

train[categorical_cols] <- lapply(train[categorical_cols], as.factor)
test[categorical_cols[1:5]] <- lapply(test[categorical_cols[1:5]], as.factor)

# Skala kolom numerik (untuk SVM)
num_cols <- c("usia", "nilai_rata_rata", "jam_belajar_per_hari",
              "kehadiran_persen", "jarak_rumah_sekolah")

scaler <- preProcess(train[, num_cols], method = c("center", "scale"))
train_scaled <- train
test_scaled <- test

train_scaled[, num_cols] <- predict(scaler, train[, num_cols])
test_scaled[, num_cols] <- predict(scaler, test[, num_cols])
set.seed(42)
split <- createDataPartition(train_scaled$motivasi_belajar, p = 0.8, list = FALSE)
train_set <- train_scaled[split, ]
val_set <- train_scaled[-split, ]
model_tree <- rpart(motivasi_belajar ~ ., data = train_set, method = "class")
model_rf <- randomForest(motivasi_belajar ~ ., data = train_set)
model_svm <- svm(motivasi_belajar ~ ., data = train_set, kernel = "linear")
# Confusion matrix
val_pred_tree <- predict(model_tree, val_set, type = "class")
val_pred_rf <- predict(model_rf, val_set)
val_pred_svm <- predict(model_svm, val_set)

confusionMatrix(val_pred_tree, val_set$motivasi_belajar)
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  1  2  3
##          1  9  2  0
##          2  7 19  2
##          3  0  0  0
## 
## Overall Statistics
##                                         
##                Accuracy : 0.7179        
##                  95% CI : (0.5513, 0.85)
##     No Information Rate : 0.5385        
##     P-Value [Acc > NIR] : 0.01707       
##                                         
##                   Kappa : 0.4333        
##                                         
##  Mcnemar's Test P-Value : NA            
## 
## Statistics by Class:
## 
##                      Class: 1 Class: 2 Class: 3
## Sensitivity            0.5625   0.9048  0.00000
## Specificity            0.9130   0.5000  1.00000
## Pos Pred Value         0.8182   0.6786      NaN
## Neg Pred Value         0.7500   0.8182  0.94872
## Prevalence             0.4103   0.5385  0.05128
## Detection Rate         0.2308   0.4872  0.00000
## Detection Prevalence   0.2821   0.7179  0.00000
## Balanced Accuracy      0.7378   0.7024  0.50000
confusionMatrix(val_pred_rf, val_set$motivasi_belajar)
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  1  2  3
##          1  9  3  0
##          2  7 18  2
##          3  0  0  0
## 
## Overall Statistics
##                                           
##                Accuracy : 0.6923          
##                  95% CI : (0.5243, 0.8298)
##     No Information Rate : 0.5385          
##     P-Value [Acc > NIR] : 0.0372          
##                                           
##                   Kappa : 0.3858          
##                                           
##  Mcnemar's Test P-Value : NA              
## 
## Statistics by Class:
## 
##                      Class: 1 Class: 2 Class: 3
## Sensitivity            0.5625   0.8571  0.00000
## Specificity            0.8696   0.5000  1.00000
## Pos Pred Value         0.7500   0.6667      NaN
## Neg Pred Value         0.7407   0.7500  0.94872
## Prevalence             0.4103   0.5385  0.05128
## Detection Rate         0.2308   0.4615  0.00000
## Detection Prevalence   0.3077   0.6923  0.00000
## Balanced Accuracy      0.7160   0.6786  0.50000
confusionMatrix(val_pred_svm, val_set$motivasi_belajar)
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  1  2  3
##          1  9  2  0
##          2  7 18  2
##          3  0  1  0
## 
## Overall Statistics
##                                           
##                Accuracy : 0.6923          
##                  95% CI : (0.5243, 0.8298)
##     No Information Rate : 0.5385          
##     P-Value [Acc > NIR] : 0.0372          
##                                           
##                   Kappa : 0.3969          
##                                           
##  Mcnemar's Test P-Value : NA              
## 
## Statistics by Class:
## 
##                      Class: 1 Class: 2 Class: 3
## Sensitivity            0.5625   0.8571  0.00000
## Specificity            0.9130   0.5000  0.97297
## Pos Pred Value         0.8182   0.6667  0.00000
## Neg Pred Value         0.7500   0.7500  0.94737
## Prevalence             0.4103   0.5385  0.05128
## Detection Rate         0.2308   0.4615  0.00000
## Detection Prevalence   0.2821   0.6923  0.02564
## Balanced Accuracy      0.7378   0.6786  0.48649
pred_test_tree <- predict(model_tree, test_scaled, type = "class")
pred_test_rf   <- predict(model_rf, test_scaled)
pred_test_svm  <- predict(model_svm, test_scaled)
hasil <- data.frame(
  No = 1:nrow(test),
  SVM = pred_test_svm,
  RandomForest = pred_test_rf,
  DecisionTree = pred_test_tree
)

# Simpan ke file CSV (opsional)
write.csv(hasil, "hasil_prediksi_uas.csv", row.names = FALSE)

# Tampilkan hasil
print(hasil)
##    No SVM RandomForest DecisionTree
## 1   1   2            2            2
## 2   2   2            2            2
## 3   3   1            1            2
## 4   4   2            2            2
## 5   5   1            1            1
## 6   6   2            2            2
## 7   7   2            2            2
## 8   8   2            2            2
## 9   9   2            2            2
## 10 10   2            2            2
## 11 11   1            1            2
## 12 12   1            1            1
## 13 13   2            2            2
## 14 14   1            2            2
## 15 15   1            1            1