library(caret)
## Warning: package 'caret' was built under R version 4.4.3
## Loading required package: ggplot2
## Loading required package: lattice
library(readxl)
## Warning: package 'readxl' was built under R version 4.4.3
library(e1071) # SVM
## Warning: package 'e1071' was built under R version 4.4.3
library(randomForest) # Random Forest
## Warning: package 'randomForest' was built under R version 4.4.3
## randomForest 4.7-1.2
## Type rfNews() to see new features/changes/bug fixes.
##
## Attaching package: 'randomForest'
## The following object is masked from 'package:ggplot2':
##
## margin
library(rpart) # Decision Tree
## Warning: package 'rpart' was built under R version 4.4.3
library(dplyr)
## Warning: package 'dplyr' was built under R version 4.4.3
##
## Attaching package: 'dplyr'
## The following object is masked from 'package:randomForest':
##
## combine
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
setwd("D:/Data PC/Documents/UAS Data Mining")
train <- read_excel("datatraining.xlsx")
test <- read_excel("datatesting.xlsx")
# Ubah kolom kategorik jadi faktor
categorical_cols <- c("jenis_kelamin", "dukungan_orang_tua", "fasilitas_belajar",
"minat_pada_pelajaran", "kesulitan_ekonomi", "motivasi_belajar")
train[categorical_cols] <- lapply(train[categorical_cols], as.factor)
test[categorical_cols[1:5]] <- lapply(test[categorical_cols[1:5]], as.factor)
# Skala kolom numerik (untuk SVM)
num_cols <- c("usia", "nilai_rata_rata", "jam_belajar_per_hari",
"kehadiran_persen", "jarak_rumah_sekolah")
scaler <- preProcess(train[, num_cols], method = c("center", "scale"))
train_scaled <- train
test_scaled <- test
train_scaled[, num_cols] <- predict(scaler, train[, num_cols])
test_scaled[, num_cols] <- predict(scaler, test[, num_cols])
set.seed(42)
split <- createDataPartition(train_scaled$motivasi_belajar, p = 0.8, list = FALSE)
train_set <- train_scaled[split, ]
val_set <- train_scaled[-split, ]
model_tree <- rpart(motivasi_belajar ~ ., data = train_set, method = "class")
model_rf <- randomForest(motivasi_belajar ~ ., data = train_set)
model_svm <- svm(motivasi_belajar ~ ., data = train_set, kernel = "linear")
# Confusion matrix
val_pred_tree <- predict(model_tree, val_set, type = "class")
val_pred_rf <- predict(model_rf, val_set)
val_pred_svm <- predict(model_svm, val_set)
confusionMatrix(val_pred_tree, val_set$motivasi_belajar)
## Confusion Matrix and Statistics
##
## Reference
## Prediction 1 2 3
## 1 9 2 0
## 2 7 19 2
## 3 0 0 0
##
## Overall Statistics
##
## Accuracy : 0.7179
## 95% CI : (0.5513, 0.85)
## No Information Rate : 0.5385
## P-Value [Acc > NIR] : 0.01707
##
## Kappa : 0.4333
##
## Mcnemar's Test P-Value : NA
##
## Statistics by Class:
##
## Class: 1 Class: 2 Class: 3
## Sensitivity 0.5625 0.9048 0.00000
## Specificity 0.9130 0.5000 1.00000
## Pos Pred Value 0.8182 0.6786 NaN
## Neg Pred Value 0.7500 0.8182 0.94872
## Prevalence 0.4103 0.5385 0.05128
## Detection Rate 0.2308 0.4872 0.00000
## Detection Prevalence 0.2821 0.7179 0.00000
## Balanced Accuracy 0.7378 0.7024 0.50000
confusionMatrix(val_pred_rf, val_set$motivasi_belajar)
## Confusion Matrix and Statistics
##
## Reference
## Prediction 1 2 3
## 1 9 3 0
## 2 7 18 2
## 3 0 0 0
##
## Overall Statistics
##
## Accuracy : 0.6923
## 95% CI : (0.5243, 0.8298)
## No Information Rate : 0.5385
## P-Value [Acc > NIR] : 0.0372
##
## Kappa : 0.3858
##
## Mcnemar's Test P-Value : NA
##
## Statistics by Class:
##
## Class: 1 Class: 2 Class: 3
## Sensitivity 0.5625 0.8571 0.00000
## Specificity 0.8696 0.5000 1.00000
## Pos Pred Value 0.7500 0.6667 NaN
## Neg Pred Value 0.7407 0.7500 0.94872
## Prevalence 0.4103 0.5385 0.05128
## Detection Rate 0.2308 0.4615 0.00000
## Detection Prevalence 0.3077 0.6923 0.00000
## Balanced Accuracy 0.7160 0.6786 0.50000
confusionMatrix(val_pred_svm, val_set$motivasi_belajar)
## Confusion Matrix and Statistics
##
## Reference
## Prediction 1 2 3
## 1 9 2 0
## 2 7 18 2
## 3 0 1 0
##
## Overall Statistics
##
## Accuracy : 0.6923
## 95% CI : (0.5243, 0.8298)
## No Information Rate : 0.5385
## P-Value [Acc > NIR] : 0.0372
##
## Kappa : 0.3969
##
## Mcnemar's Test P-Value : NA
##
## Statistics by Class:
##
## Class: 1 Class: 2 Class: 3
## Sensitivity 0.5625 0.8571 0.00000
## Specificity 0.9130 0.5000 0.97297
## Pos Pred Value 0.8182 0.6667 0.00000
## Neg Pred Value 0.7500 0.7500 0.94737
## Prevalence 0.4103 0.5385 0.05128
## Detection Rate 0.2308 0.4615 0.00000
## Detection Prevalence 0.2821 0.6923 0.02564
## Balanced Accuracy 0.7378 0.6786 0.48649
pred_test_tree <- predict(model_tree, test_scaled, type = "class")
pred_test_rf <- predict(model_rf, test_scaled)
pred_test_svm <- predict(model_svm, test_scaled)
hasil <- data.frame(
No = 1:nrow(test),
SVM = pred_test_svm,
RandomForest = pred_test_rf,
DecisionTree = pred_test_tree
)
# Simpan ke file CSV (opsional)
write.csv(hasil, "hasil_prediksi_uas.csv", row.names = FALSE)
# Tampilkan hasil
print(hasil)
## No SVM RandomForest DecisionTree
## 1 1 2 2 2
## 2 2 2 2 2
## 3 3 1 1 2
## 4 4 2 2 2
## 5 5 1 1 1
## 6 6 2 2 2
## 7 7 2 2 2
## 8 8 2 2 2
## 9 9 2 2 2
## 10 10 2 2 2
## 11 11 1 1 2
## 12 12 1 1 1
## 13 13 2 2 2
## 14 14 1 2 2
## 15 15 1 1 1