# Instal paket yang diperlukan (jika belum terinstal)
# install.packages(c("tidyverse", "caret", "e1071", "randomForest", "rpart", "openxlsx"))
# Muat paket yang diperlukan
library(tidyverse)
## Warning: package 'tidyverse' was built under R version 4.4.3
## Warning: package 'ggplot2' was built under R version 4.4.3
## Warning: package 'readr' was built under R version 4.4.3
## Warning: package 'dplyr' was built under R version 4.4.3
## Warning: package 'forcats' was built under R version 4.4.3
## Warning: package 'lubridate' was built under R version 4.4.3
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.5
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ ggplot2 3.5.2 ✔ tibble 3.2.1
## ✔ lubridate 1.9.4 ✔ tidyr 1.3.1
## ✔ purrr 1.0.4
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(caret)
## Warning: package 'caret' was built under R version 4.4.3
## Loading required package: lattice
##
## Attaching package: 'caret'
##
## The following object is masked from 'package:purrr':
##
## lift
library(e1071)
## Warning: package 'e1071' was built under R version 4.4.3
library(randomForest)
## Warning: package 'randomForest' was built under R version 4.4.3
## randomForest 4.7-1.2
## Type rfNews() to see new features/changes/bug fixes.
##
## Attaching package: 'randomForest'
##
## The following object is masked from 'package:dplyr':
##
## combine
##
## The following object is masked from 'package:ggplot2':
##
## margin
library(rpart)
## Warning: package 'rpart' was built under R version 4.4.3
library(openxlsx)
## Warning: package 'openxlsx' was built under R version 4.4.3
library(kernlab)
##
## Attaching package: 'kernlab'
##
## The following object is masked from 'package:purrr':
##
## cross
##
## The following object is masked from 'package:ggplot2':
##
## alpha
## 1. Preprocessing Data
# Baca data training dan testing
data_training <- read.xlsx("C:/Users/ASUS/Downloads/datatraining.xlsx", sheet = 1)
data_testing <- read.xlsx("C:/Users/ASUS/Downloads/datatesting.xlsx", sheet = 1)
# Konversi variabel kategorik ke factor
convert_to_factor <- function(data) {
data %>%
mutate(
jenis_kelamin = factor(jenis_kelamin, levels = c(0, 1), labels = c("Perempuan", "Laki-laki")),
dukungan_orang_tua = factor(dukungan_orang_tua, levels = 1:3, labels = c("Rendah", "Sedang", "Tinggi")),
fasilitas_belajar = factor(fasilitas_belajar, levels = 1:3, labels = c("Kurang", "Cukup", "Baik")),
minat_pada_pelajaran = factor(minat_pada_pelajaran, levels = 1:3, labels = c("Rendah", "Sedang", "Tinggi")),
kesulitan_ekonomi = factor(kesulitan_ekonomi, levels = c(0, 1), labels = c("Tidak", "Ya"))
)
}
# Untuk data training, tambahkan konversi target variable
data_training <- data_training %>%
convert_to_factor() %>%
mutate(
motivasi_belajar = factor(motivasi_belajar, levels = 1:3, labels = c("Rendah", "Sedang", "Tinggi"))
)
# Untuk data testing
data_testing <- data_testing %>%
convert_to_factor()
# Pastikan struktur data sudah benar
str(data_training)
## 'data.frame': 200 obs. of 11 variables:
## $ usia : num 15 19 15 15 16 18 16 16 15 21 ...
## $ jenis_kelamin : Factor w/ 2 levels "Perempuan","Laki-laki": 1 1 1 2 2 2 2 1 2 2 ...
## $ nilai_rata_rata : num 85.2 60.9 60.5 78.6 66.2 85.9 45.2 91.6 74.1 88.2 ...
## $ dukungan_orang_tua : Factor w/ 3 levels "Rendah","Sedang",..: 1 2 2 3 2 2 1 2 2 3 ...
## $ fasilitas_belajar : Factor w/ 3 levels "Kurang","Cukup",..: 2 3 1 2 1 1 3 2 1 3 ...
## $ jam_belajar_per_hari: num 4.1 3.8 6.1 5.1 5.5 3.7 2.8 6.2 4.6 3 ...
## $ kehadiran_persen : num 79.7 89.8 86.7 78.5 81 85.4 78.9 78.1 96.8 82.1 ...
## $ minat_pada_pelajaran: Factor w/ 3 levels "Rendah","Sedang",..: 2 2 1 3 1 2 1 2 2 3 ...
## $ kesulitan_ekonomi : Factor w/ 2 levels "Tidak","Ya": 1 2 2 2 2 2 1 1 1 2 ...
## $ jarak_rumah_sekolah : num 11.5 6.3 15.9 9.8 3.5 1 6.4 4.9 6.5 9.5 ...
## $ motivasi_belajar : Factor w/ 3 levels "Rendah","Sedang",..: 1 2 1 2 1 1 1 2 1 3 ...
str(data_testing)
## 'data.frame': 15 obs. of 10 variables:
## $ usia : num 15 19 17 20 16 21 21 19 17 18 ...
## $ jenis_kelamin : Factor w/ 2 levels "Perempuan","Laki-laki": 2 1 1 2 2 1 1 2 1 1 ...
## $ nilai_rata_rata : num 90.2 79.6 66.9 85.6 65.9 70.8 86.3 84.3 79.5 74.4 ...
## $ dukungan_orang_tua : Factor w/ 3 levels "Rendah","Sedang",..: 2 1 2 3 3 2 3 3 2 1 ...
## $ fasilitas_belajar : Factor w/ 3 levels "Kurang","Cukup",..: 2 1 3 1 1 3 2 3 2 2 ...
## $ jam_belajar_per_hari: num 5.4 4.6 2.9 3.9 5.3 1.9 4.4 1.5 2 5.5 ...
## $ kehadiran_persen : num 78.3 88.2 76.3 89.4 74 100 100 92.1 84.5 90.3 ...
## $ minat_pada_pelajaran: Factor w/ 3 levels "Rendah","Sedang",..: 2 3 1 1 1 1 1 1 3 3 ...
## $ kesulitan_ekonomi : Factor w/ 2 levels "Tidak","Ya": 2 1 1 1 2 2 1 1 1 1 ...
## $ jarak_rumah_sekolah : num 4.7 10.8 11.8 5.7 3.1 7.1 8.2 5.4 4.2 10.7 ...
## 2. Training Model
# Persiapan untuk cross-validation
set.seed(123) # Untuk reproduktibilitas
train_control <- trainControl(method = "cv", number = 5)
### a. Support Vector Machine (SVM)
model_svm <- train(
motivasi_belajar ~ .,
data = data_training,
method = "svmRadial",
trControl = train_control,
preProcess = c("center", "scale"),
tuneLength = 5
)
### b. Random Forest
model_rf <- train(
motivasi_belajar ~ .,
data = data_training,
method = "rf",
trControl = train_control,
ntree = 100,
tuneLength = 5
)
### c. Decision Tree
model_dt <- train(
motivasi_belajar ~ .,
data = data_training,
method = "rpart",
trControl = train_control,
tuneLength = 5
)
## 3. Prediksi pada Data Testing
# Prediksi dengan masing-masing model
predictions_svm <- predict(model_svm, newdata = data_testing)
predictions_rf <- predict(model_rf, newdata = data_testing)
predictions_dt <- predict(model_dt, newdata = data_testing)
# Konversi prediksi ke numerik (1=Rendah, 2=Sedang, 3=Tinggi)
convert_pred_to_numeric <- function(pred) {
as.numeric(factor(pred, levels = c("Rendah", "Sedang", "Tinggi")))
}
# Konversi prediksi ke numerik (1=Rendah, 2=Sedang, 3=Tinggi)
convert_pred_to_numeric <- function(pred) {
as.numeric(factor(pred, levels = c("Rendah", "Sedang", "Tinggi")))
}
predictions_svm_num <- convert_pred_to_numeric(predictions_svm)
predictions_rf_num <- convert_pred_to_numeric(predictions_rf)
predictions_dt_num <- convert_pred_to_numeric(predictions_dt)
## 4. Menyiapkan Hasil Akhir
# Baca template hasil prediksi
template_prediksi <- read.xlsx("C:/Users/ASUS/Downloads/Paste Hasil Prediksi.xlsx", sheet = "SVM")
# Ganti nilai prediksi pada template
# Untuk SVM (sheet 1)
template_svm <- template_prediksi
template_svm[1:15, 3:ncol(template_svm)] <- predictions_svm_num
# Untuk Random Forest (sheet 2)
template_rf <- read.xlsx("C:/Users/ASUS/Downloads/Random Forest.xlsx", sheet = 1)
template_rf[1:15, 2:ncol(template_rf)] <- predictions_rf_num
# Untuk Decision Tree (sheet 3)
template_dt <- read.xlsx("C:/Users/ASUS/Downloads/DCT.xlsx", sheet = 1)
template_dt[1:15, 2:ncol(template_dt)] <- predictions_dt_num
# Buat workbook untuk menyimpan hasil
wb <- createWorkbook()
# Tambahkan sheet untuk masing-masing model
addWorksheet(wb, "SVM")
writeData(wb, "SVM", template_svm)
addWorksheet(wb, "Random Forest")
writeData(wb, "Random Forest", template_rf)
addWorksheet(wb, "Decision Tree")
writeData(wb, "Decision Tree", template_dt)
# Simpan workbook ke file Excel
saveWorkbook(wb, "Hasil_Prediksi_Final.xlsx", overwrite = TRUE)
## 5. Evaluasi Model (Opsional)
# Evaluasi model menggunakan data training dengan cross-validation
results <- resamples(list(
SVM = model_svm,
RandomForest = model_rf,
DecisionTree = model_dt
))
# Ringkasan hasil evaluasi
summary(results)
##
## Call:
## summary.resamples(object = results)
##
## Models: SVM, RandomForest, DecisionTree
## Number of resamples: 5
##
## Accuracy
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## SVM 0.700 0.7179487 0.75 0.7398093 0.7560976 0.775 0
## RandomForest 0.650 0.6500000 0.70 0.7100000 0.7750000 0.775 0
## DecisionTree 0.575 0.5853659 0.60 0.6152783 0.6410256 0.675 0
##
## Kappa
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## SVM 0.4385965 0.4576485 0.5226730 0.5106382 0.5514223 0.5828505 0
## RandomForest 0.3212121 0.3450292 0.4319527 0.4435986 0.5588235 0.5609756 0
## DecisionTree 0.2006881 0.2046784 0.2514620 0.2718495 0.3106061 0.3918129 0
# Visualisasi perbandingan model
dotplot(results)

# Tampilkan prediksi untuk verifikasi
data.frame(
No = 1:15,
SVM = predictions_svm,
RandomForest = predictions_rf,
DecisionTree = predictions_dt
)
## No SVM RandomForest DecisionTree
## 1 1 Sedang Sedang Sedang
## 2 2 Sedang Sedang Sedang
## 3 3 Rendah Rendah Rendah
## 4 4 Sedang Sedang Sedang
## 5 5 Rendah Rendah Sedang
## 6 6 Sedang Sedang Sedang
## 7 7 Sedang Sedang Sedang
## 8 8 Sedang Sedang Sedang
## 9 9 Sedang Sedang Rendah
## 10 10 Sedang Sedang Sedang
## 11 11 Rendah Rendah Rendah
## 12 12 Rendah Rendah Rendah
## 13 13 Sedang Sedang Sedang
## 14 14 Sedang Sedang Rendah
## 15 15 Rendah Rendah Rendah