#MODEL RANDOM FOREST
library(tidyverse)
## Warning: package 'tidyverse' was built under R version 4.4.3
## Warning: package 'ggplot2' was built under R version 4.4.3
## Warning: package 'readr' was built under R version 4.4.3
## Warning: package 'forcats' was built under R version 4.4.3
## Warning: package 'lubridate' was built under R version 4.4.3
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.5
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ ggplot2 3.5.2 ✔ tibble 3.2.1
## ✔ lubridate 1.9.4 ✔ tidyr 1.3.1
## ✔ purrr 1.0.4
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(readr)
library(randomForest)
## Warning: package 'randomForest' was built under R version 4.4.3
## randomForest 4.7-1.2
## Type rfNews() to see new features/changes/bug fixes.
##
## Attaching package: 'randomForest'
##
## The following object is masked from 'package:dplyr':
##
## combine
##
## The following object is masked from 'package:ggplot2':
##
## margin
library(readxl)
## Warning: package 'readxl' was built under R version 4.4.3
data_train=read_xlsx("C:/Users/Asus/Downloads/datatraining.xlsx")
data_train
## # A tibble: 200 × 11
## usia jenis_kelamin nilai_rata_rata dukungan_orang_tua fasilitas_belajar
## <dbl> <chr> <dbl> <chr> <chr>
## 1 15 0 85.2 1 2
## 2 19 0 60.9 2 3
## 3 15 0 60.5 2 1
## 4 15 1 78.6 3 2
## 5 16 1 66.2 2 1
## 6 18 1 85.9 2 1
## 7 16 1 45.2 1 3
## 8 16 0 91.6 2 2
## 9 15 1 74.1 2 1
## 10 21 1 88.2 3 3
## # ℹ 190 more rows
## # ℹ 6 more variables: jam_belajar_per_hari <dbl>, kehadiran_persen <dbl>,
## # minat_pada_pelajaran <chr>, kesulitan_ekonomi <chr>,
## # jarak_rumah_sekolah <dbl>, motivasi_belajar <chr>
data_test <- read_xlsx("C:/Users/Asus/Downloads/datatesting.xlsx")
data_test
## # A tibble: 15 × 10
## usia jenis_kelamin nilai_rata_rata dukungan_orang_tua fasilitas_belajar
## <dbl> <chr> <dbl> <chr> <chr>
## 1 15 1 90.2 2 2
## 2 19 0 79.6 1 1
## 3 17 0 66.9 2 3
## 4 20 1 85.6 3 1
## 5 16 1 65.9 3 1
## 6 21 0 70.8 2 3
## 7 21 0 86.3 3 2
## 8 19 1 84.3 3 3
## 9 17 0 79.5 2 2
## 10 18 0 74.4 1 2
## 11 16 1 76.9 3 2
## 12 15 1 81.9 2 2
## 13 17 1 83.2 3 2
## 14 17 0 65.2 3 3
## 15 20 1 62 2 1
## # ℹ 5 more variables: jam_belajar_per_hari <dbl>, kehadiran_persen <dbl>,
## # minat_pada_pelajaran <chr>, kesulitan_ekonomi <chr>,
## # jarak_rumah_sekolah <dbl>
# Variabel kategorik
kategorik <- c("jenis_kelamin", "dukungan_orang_tua", "fasilitas_belajar",
"minat_pada_pelajaran", "kesulitan_ekonomi", "motivasi_belajar")
# Preprocessing data training
data_train <- data_train %>%
mutate(across(all_of(kategorik), as.factor))
# Preprocessing data testing (tanpa kolom target)
data_test <- data_test %>%
mutate(across(all_of(kategorik[-length(kategorik)]), as.factor))
set.seed(123)
model_rf <- randomForest(motivasi_belajar ~ ., data = data_train, importance = TRUE)
# Kolom prediktor (tanpa target)
predictors <- setdiff(names(data_train), "motivasi_belajar")
data_test_ready <- data_test[, predictors]
# Prediksi motivasi belajar
prediksi_rf <- predict(model_rf, newdata = data_test_ready)
hasil_rf <- data_test %>%
select(usia, jenis_kelamin) %>% # contoh kolom identitas
mutate(prediksi_motivasi = prediksi_rf)
print(hasil_rf)
## # A tibble: 15 × 3
## usia jenis_kelamin prediksi_motivasi
## <dbl> <fct> <fct>
## 1 15 1 2
## 2 19 0 2
## 3 17 0 1
## 4 20 1 2
## 5 16 1 1
## 6 21 0 2
## 7 21 0 2
## 8 19 1 2
## 9 17 0 2
## 10 18 0 2
## 11 16 1 1
## 12 15 1 1
## 13 17 1 2
## 14 17 0 2
## 15 20 1 1
# MODEL SVM
library(tidyverse)
library(readr)
library(e1071)
## Warning: package 'e1071' was built under R version 4.4.3
library(dplyr)
library(readxl)
data_train=read_xlsx("C:/Users/Asus/Downloads/datatraining.xlsx")
data_train
## # A tibble: 200 × 11
## usia jenis_kelamin nilai_rata_rata dukungan_orang_tua fasilitas_belajar
## <dbl> <chr> <dbl> <chr> <chr>
## 1 15 0 85.2 1 2
## 2 19 0 60.9 2 3
## 3 15 0 60.5 2 1
## 4 15 1 78.6 3 2
## 5 16 1 66.2 2 1
## 6 18 1 85.9 2 1
## 7 16 1 45.2 1 3
## 8 16 0 91.6 2 2
## 9 15 1 74.1 2 1
## 10 21 1 88.2 3 3
## # ℹ 190 more rows
## # ℹ 6 more variables: jam_belajar_per_hari <dbl>, kehadiran_persen <dbl>,
## # minat_pada_pelajaran <chr>, kesulitan_ekonomi <chr>,
## # jarak_rumah_sekolah <dbl>, motivasi_belajar <chr>
data_test <- read_xlsx("C:/Users/Asus/Downloads/datatesting.xlsx")
data_test
## # A tibble: 15 × 10
## usia jenis_kelamin nilai_rata_rata dukungan_orang_tua fasilitas_belajar
## <dbl> <chr> <dbl> <chr> <chr>
## 1 15 1 90.2 2 2
## 2 19 0 79.6 1 1
## 3 17 0 66.9 2 3
## 4 20 1 85.6 3 1
## 5 16 1 65.9 3 1
## 6 21 0 70.8 2 3
## 7 21 0 86.3 3 2
## 8 19 1 84.3 3 3
## 9 17 0 79.5 2 2
## 10 18 0 74.4 1 2
## 11 16 1 76.9 3 2
## 12 15 1 81.9 2 2
## 13 17 1 83.2 3 2
## 14 17 0 65.2 3 3
## 15 20 1 62 2 1
## # ℹ 5 more variables: jam_belajar_per_hari <dbl>, kehadiran_persen <dbl>,
## # minat_pada_pelajaran <chr>, kesulitan_ekonomi <chr>,
## # jarak_rumah_sekolah <dbl>
kategori <- c("jenis_kelamin", "dukungan_orang_tua", "fasilitas_belajar",
"minat_pada_pelajaran", "kesulitan_ekonomi", "motivasi_belajar")
data_train <- data_train %>%
mutate(across(all_of(kategori), as.factor))
data_test <- data_test %>%
mutate(across(all_of(kategori[-length(kategori)]), as.factor)) # tanpa target
set.seed(123) # Untuk reproducibility
model_svm <- svm(motivasi_belajar ~ ., data = data_train, kernel = "radial")
# Buat daftar kolom prediktor (tanpa kolom target)
predictors <- setdiff(names(data_train), "motivasi_belajar")
data_test_aligned <- data_test[, predictors]
# Prediksi
prediksi_svm <- predict(model_svm, newdata = data_test_aligned)
hasil_svm <- data_test %>%
select(usia, jenis_kelamin) %>%
mutate(prediksi_motivasi = prediksi_svm)
print(hasil_svm)
## # A tibble: 15 × 3
## usia jenis_kelamin prediksi_motivasi
## <dbl> <fct> <fct>
## 1 15 1 2
## 2 19 0 2
## 3 17 0 1
## 4 20 1 2
## 5 16 1 1
## 6 21 0 2
## 7 21 0 2
## 8 19 1 2
## 9 17 0 2
## 10 18 0 2
## 11 16 1 1
## 12 15 1 1
## 13 17 1 2
## 14 17 0 1
## 15 20 1 1
# Model DECISION TREE
library(rpart) # Untuk model decision tree
## Warning: package 'rpart' was built under R version 4.4.3
library(readxl) # Untuk membaca file Excel
library(dplyr) # Untuk manipulasi data
library(rpart.plot) # Untuk visualisasi pohon (opsional)
## Warning: package 'rpart.plot' was built under R version 4.4.3
data_train=read_xlsx("C:/Users/Asus/Downloads/datatraining.xlsx")
data_train
## # A tibble: 200 × 11
## usia jenis_kelamin nilai_rata_rata dukungan_orang_tua fasilitas_belajar
## <dbl> <chr> <dbl> <chr> <chr>
## 1 15 0 85.2 1 2
## 2 19 0 60.9 2 3
## 3 15 0 60.5 2 1
## 4 15 1 78.6 3 2
## 5 16 1 66.2 2 1
## 6 18 1 85.9 2 1
## 7 16 1 45.2 1 3
## 8 16 0 91.6 2 2
## 9 15 1 74.1 2 1
## 10 21 1 88.2 3 3
## # ℹ 190 more rows
## # ℹ 6 more variables: jam_belajar_per_hari <dbl>, kehadiran_persen <dbl>,
## # minat_pada_pelajaran <chr>, kesulitan_ekonomi <chr>,
## # jarak_rumah_sekolah <dbl>, motivasi_belajar <chr>
data_test <- read_xlsx("C:/Users/Asus/Downloads/datatesting.xlsx")
data_test
## # A tibble: 15 × 10
## usia jenis_kelamin nilai_rata_rata dukungan_orang_tua fasilitas_belajar
## <dbl> <chr> <dbl> <chr> <chr>
## 1 15 1 90.2 2 2
## 2 19 0 79.6 1 1
## 3 17 0 66.9 2 3
## 4 20 1 85.6 3 1
## 5 16 1 65.9 3 1
## 6 21 0 70.8 2 3
## 7 21 0 86.3 3 2
## 8 19 1 84.3 3 3
## 9 17 0 79.5 2 2
## 10 18 0 74.4 1 2
## 11 16 1 76.9 3 2
## 12 15 1 81.9 2 2
## 13 17 1 83.2 3 2
## 14 17 0 65.2 3 3
## 15 20 1 62 2 1
## # ℹ 5 more variables: jam_belajar_per_hari <dbl>, kehadiran_persen <dbl>,
## # minat_pada_pelajaran <chr>, kesulitan_ekonomi <chr>,
## # jarak_rumah_sekolah <dbl>
kategori <- c("jenis_kelamin", "dukungan_orang_tua", "fasilitas_belajar",
"minat_pada_pelajaran", "kesulitan_ekonomi", "motivasi_belajar")
data_train <- data_train %>%
mutate(across(all_of(kategori), as.factor))
data_test <- data_test %>%
mutate(across(all_of(kategori[-length(kategori)]), as.factor)) # tanpa target
model_tree <- rpart(motivasi_belajar ~ ., data = data_train, method = "class")
rpart.plot(model_tree)

# Buat subset kolom prediktor (tanpa target)
predictors <- setdiff(names(data_train), "motivasi_belajar")
data_test_aligned <- data_test[, predictors]
# Prediksi klasifikasi
prediksi_tree <- predict(model_tree, newdata = data_test_aligned, type = "class")
hasil_tree <- data_test %>%
select(usia, jenis_kelamin) %>%
mutate(prediksi_motivasi = prediksi_tree)
print(hasil_tree)
## # A tibble: 15 × 3
## usia jenis_kelamin prediksi_motivasi
## <dbl> <fct> <fct>
## 1 15 1 2
## 2 19 0 2
## 3 17 0 1
## 4 20 1 2
## 5 16 1 1
## 6 21 0 2
## 7 21 0 2
## 8 19 1 3
## 9 17 0 1
## 10 18 0 2
## 11 16 1 2
## 12 15 1 1
## 13 17 1 1
## 14 17 0 2
## 15 20 1 1