#MODEL RANDOM FOREST
library(tidyverse)
## Warning: package 'tidyverse' was built under R version 4.4.3
## Warning: package 'ggplot2' was built under R version 4.4.3
## Warning: package 'readr' was built under R version 4.4.3
## Warning: package 'forcats' was built under R version 4.4.3
## Warning: package 'lubridate' was built under R version 4.4.3
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ ggplot2   3.5.2     ✔ tibble    3.2.1
## ✔ lubridate 1.9.4     ✔ tidyr     1.3.1
## ✔ purrr     1.0.4     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(readr)
library(randomForest)
## Warning: package 'randomForest' was built under R version 4.4.3
## randomForest 4.7-1.2
## Type rfNews() to see new features/changes/bug fixes.
## 
## Attaching package: 'randomForest'
## 
## The following object is masked from 'package:dplyr':
## 
##     combine
## 
## The following object is masked from 'package:ggplot2':
## 
##     margin
library(readxl)   
## Warning: package 'readxl' was built under R version 4.4.3
data_train=read_xlsx("C:/Users/Asus/Downloads/datatraining.xlsx")
data_train
## # A tibble: 200 × 11
##     usia jenis_kelamin nilai_rata_rata dukungan_orang_tua fasilitas_belajar
##    <dbl> <chr>                   <dbl> <chr>              <chr>            
##  1    15 0                        85.2 1                  2                
##  2    19 0                        60.9 2                  3                
##  3    15 0                        60.5 2                  1                
##  4    15 1                        78.6 3                  2                
##  5    16 1                        66.2 2                  1                
##  6    18 1                        85.9 2                  1                
##  7    16 1                        45.2 1                  3                
##  8    16 0                        91.6 2                  2                
##  9    15 1                        74.1 2                  1                
## 10    21 1                        88.2 3                  3                
## # ℹ 190 more rows
## # ℹ 6 more variables: jam_belajar_per_hari <dbl>, kehadiran_persen <dbl>,
## #   minat_pada_pelajaran <chr>, kesulitan_ekonomi <chr>,
## #   jarak_rumah_sekolah <dbl>, motivasi_belajar <chr>
data_test <- read_xlsx("C:/Users/Asus/Downloads/datatesting.xlsx")
data_test
## # A tibble: 15 × 10
##     usia jenis_kelamin nilai_rata_rata dukungan_orang_tua fasilitas_belajar
##    <dbl> <chr>                   <dbl> <chr>              <chr>            
##  1    15 1                        90.2 2                  2                
##  2    19 0                        79.6 1                  1                
##  3    17 0                        66.9 2                  3                
##  4    20 1                        85.6 3                  1                
##  5    16 1                        65.9 3                  1                
##  6    21 0                        70.8 2                  3                
##  7    21 0                        86.3 3                  2                
##  8    19 1                        84.3 3                  3                
##  9    17 0                        79.5 2                  2                
## 10    18 0                        74.4 1                  2                
## 11    16 1                        76.9 3                  2                
## 12    15 1                        81.9 2                  2                
## 13    17 1                        83.2 3                  2                
## 14    17 0                        65.2 3                  3                
## 15    20 1                        62   2                  1                
## # ℹ 5 more variables: jam_belajar_per_hari <dbl>, kehadiran_persen <dbl>,
## #   minat_pada_pelajaran <chr>, kesulitan_ekonomi <chr>,
## #   jarak_rumah_sekolah <dbl>
# Variabel kategorik
kategorik <- c("jenis_kelamin", "dukungan_orang_tua", "fasilitas_belajar",
               "minat_pada_pelajaran", "kesulitan_ekonomi", "motivasi_belajar")

# Preprocessing data training
data_train <- data_train %>%
  mutate(across(all_of(kategorik), as.factor))

# Preprocessing data testing (tanpa kolom target)
data_test <- data_test %>%
  mutate(across(all_of(kategorik[-length(kategorik)]), as.factor))
set.seed(123)
model_rf <- randomForest(motivasi_belajar ~ ., data = data_train, importance = TRUE)
# Kolom prediktor (tanpa target)
predictors <- setdiff(names(data_train), "motivasi_belajar")
data_test_ready <- data_test[, predictors]

# Prediksi motivasi belajar
prediksi_rf <- predict(model_rf, newdata = data_test_ready)
hasil_rf <- data_test %>%
  select(usia, jenis_kelamin) %>%  # contoh kolom identitas
  mutate(prediksi_motivasi = prediksi_rf)

print(hasil_rf)
## # A tibble: 15 × 3
##     usia jenis_kelamin prediksi_motivasi
##    <dbl> <fct>         <fct>            
##  1    15 1             2                
##  2    19 0             2                
##  3    17 0             1                
##  4    20 1             2                
##  5    16 1             1                
##  6    21 0             2                
##  7    21 0             2                
##  8    19 1             2                
##  9    17 0             2                
## 10    18 0             2                
## 11    16 1             1                
## 12    15 1             1                
## 13    17 1             2                
## 14    17 0             2                
## 15    20 1             1
# MODEL SVM
library(tidyverse)
library(readr)
library(e1071)
## Warning: package 'e1071' was built under R version 4.4.3
library(dplyr)
library(readxl)   
data_train=read_xlsx("C:/Users/Asus/Downloads/datatraining.xlsx")
data_train
## # A tibble: 200 × 11
##     usia jenis_kelamin nilai_rata_rata dukungan_orang_tua fasilitas_belajar
##    <dbl> <chr>                   <dbl> <chr>              <chr>            
##  1    15 0                        85.2 1                  2                
##  2    19 0                        60.9 2                  3                
##  3    15 0                        60.5 2                  1                
##  4    15 1                        78.6 3                  2                
##  5    16 1                        66.2 2                  1                
##  6    18 1                        85.9 2                  1                
##  7    16 1                        45.2 1                  3                
##  8    16 0                        91.6 2                  2                
##  9    15 1                        74.1 2                  1                
## 10    21 1                        88.2 3                  3                
## # ℹ 190 more rows
## # ℹ 6 more variables: jam_belajar_per_hari <dbl>, kehadiran_persen <dbl>,
## #   minat_pada_pelajaran <chr>, kesulitan_ekonomi <chr>,
## #   jarak_rumah_sekolah <dbl>, motivasi_belajar <chr>
data_test <- read_xlsx("C:/Users/Asus/Downloads/datatesting.xlsx")
data_test
## # A tibble: 15 × 10
##     usia jenis_kelamin nilai_rata_rata dukungan_orang_tua fasilitas_belajar
##    <dbl> <chr>                   <dbl> <chr>              <chr>            
##  1    15 1                        90.2 2                  2                
##  2    19 0                        79.6 1                  1                
##  3    17 0                        66.9 2                  3                
##  4    20 1                        85.6 3                  1                
##  5    16 1                        65.9 3                  1                
##  6    21 0                        70.8 2                  3                
##  7    21 0                        86.3 3                  2                
##  8    19 1                        84.3 3                  3                
##  9    17 0                        79.5 2                  2                
## 10    18 0                        74.4 1                  2                
## 11    16 1                        76.9 3                  2                
## 12    15 1                        81.9 2                  2                
## 13    17 1                        83.2 3                  2                
## 14    17 0                        65.2 3                  3                
## 15    20 1                        62   2                  1                
## # ℹ 5 more variables: jam_belajar_per_hari <dbl>, kehadiran_persen <dbl>,
## #   minat_pada_pelajaran <chr>, kesulitan_ekonomi <chr>,
## #   jarak_rumah_sekolah <dbl>
kategori <- c("jenis_kelamin", "dukungan_orang_tua", "fasilitas_belajar",
              "minat_pada_pelajaran", "kesulitan_ekonomi", "motivasi_belajar")

data_train <- data_train %>%
  mutate(across(all_of(kategori), as.factor))

data_test <- data_test %>%
  mutate(across(all_of(kategori[-length(kategori)]), as.factor))  # tanpa target
set.seed(123)  # Untuk reproducibility
model_svm <- svm(motivasi_belajar ~ ., data = data_train, kernel = "radial")
# Buat daftar kolom prediktor (tanpa kolom target)
predictors <- setdiff(names(data_train), "motivasi_belajar")
data_test_aligned <- data_test[, predictors]

# Prediksi
prediksi_svm <- predict(model_svm, newdata = data_test_aligned)
hasil_svm <- data_test %>%
  select(usia, jenis_kelamin) %>%
  mutate(prediksi_motivasi = prediksi_svm)

print(hasil_svm)
## # A tibble: 15 × 3
##     usia jenis_kelamin prediksi_motivasi
##    <dbl> <fct>         <fct>            
##  1    15 1             2                
##  2    19 0             2                
##  3    17 0             1                
##  4    20 1             2                
##  5    16 1             1                
##  6    21 0             2                
##  7    21 0             2                
##  8    19 1             2                
##  9    17 0             2                
## 10    18 0             2                
## 11    16 1             1                
## 12    15 1             1                
## 13    17 1             2                
## 14    17 0             1                
## 15    20 1             1
# Model DECISION TREE
library(rpart)        # Untuk model decision tree
## Warning: package 'rpart' was built under R version 4.4.3
library(readxl)       # Untuk membaca file Excel
library(dplyr)        # Untuk manipulasi data
library(rpart.plot)   # Untuk visualisasi pohon (opsional)
## Warning: package 'rpart.plot' was built under R version 4.4.3
data_train=read_xlsx("C:/Users/Asus/Downloads/datatraining.xlsx")
data_train
## # A tibble: 200 × 11
##     usia jenis_kelamin nilai_rata_rata dukungan_orang_tua fasilitas_belajar
##    <dbl> <chr>                   <dbl> <chr>              <chr>            
##  1    15 0                        85.2 1                  2                
##  2    19 0                        60.9 2                  3                
##  3    15 0                        60.5 2                  1                
##  4    15 1                        78.6 3                  2                
##  5    16 1                        66.2 2                  1                
##  6    18 1                        85.9 2                  1                
##  7    16 1                        45.2 1                  3                
##  8    16 0                        91.6 2                  2                
##  9    15 1                        74.1 2                  1                
## 10    21 1                        88.2 3                  3                
## # ℹ 190 more rows
## # ℹ 6 more variables: jam_belajar_per_hari <dbl>, kehadiran_persen <dbl>,
## #   minat_pada_pelajaran <chr>, kesulitan_ekonomi <chr>,
## #   jarak_rumah_sekolah <dbl>, motivasi_belajar <chr>
data_test <- read_xlsx("C:/Users/Asus/Downloads/datatesting.xlsx")
data_test
## # A tibble: 15 × 10
##     usia jenis_kelamin nilai_rata_rata dukungan_orang_tua fasilitas_belajar
##    <dbl> <chr>                   <dbl> <chr>              <chr>            
##  1    15 1                        90.2 2                  2                
##  2    19 0                        79.6 1                  1                
##  3    17 0                        66.9 2                  3                
##  4    20 1                        85.6 3                  1                
##  5    16 1                        65.9 3                  1                
##  6    21 0                        70.8 2                  3                
##  7    21 0                        86.3 3                  2                
##  8    19 1                        84.3 3                  3                
##  9    17 0                        79.5 2                  2                
## 10    18 0                        74.4 1                  2                
## 11    16 1                        76.9 3                  2                
## 12    15 1                        81.9 2                  2                
## 13    17 1                        83.2 3                  2                
## 14    17 0                        65.2 3                  3                
## 15    20 1                        62   2                  1                
## # ℹ 5 more variables: jam_belajar_per_hari <dbl>, kehadiran_persen <dbl>,
## #   minat_pada_pelajaran <chr>, kesulitan_ekonomi <chr>,
## #   jarak_rumah_sekolah <dbl>
kategori <- c("jenis_kelamin", "dukungan_orang_tua", "fasilitas_belajar",
              "minat_pada_pelajaran", "kesulitan_ekonomi", "motivasi_belajar")

data_train <- data_train %>%
  mutate(across(all_of(kategori), as.factor))

data_test <- data_test %>%
  mutate(across(all_of(kategori[-length(kategori)]), as.factor))  # tanpa target
model_tree <- rpart(motivasi_belajar ~ ., data = data_train, method = "class")
rpart.plot(model_tree)

# Buat subset kolom prediktor (tanpa target)
predictors <- setdiff(names(data_train), "motivasi_belajar")
data_test_aligned <- data_test[, predictors]

# Prediksi klasifikasi
prediksi_tree <- predict(model_tree, newdata = data_test_aligned, type = "class")
hasil_tree <- data_test %>%
  select(usia, jenis_kelamin) %>%
  mutate(prediksi_motivasi = prediksi_tree)

print(hasil_tree)
## # A tibble: 15 × 3
##     usia jenis_kelamin prediksi_motivasi
##    <dbl> <fct>         <fct>            
##  1    15 1             2                
##  2    19 0             2                
##  3    17 0             1                
##  4    20 1             2                
##  5    16 1             1                
##  6    21 0             2                
##  7    21 0             2                
##  8    19 1             3                
##  9    17 0             1                
## 10    18 0             2                
## 11    16 1             2                
## 12    15 1             1                
## 13    17 1             1                
## 14    17 0             2                
## 15    20 1             1