#Preprocessing

library(readxl)
## Warning: package 'readxl' was built under R version 4.4.3
library(tidyverse)
## Warning: package 'tidyverse' was built under R version 4.4.3
## Warning: package 'readr' was built under R version 4.4.3
## Warning: package 'forcats' was built under R version 4.4.3
## Warning: package 'lubridate' was built under R version 4.4.3
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ ggplot2   3.5.1     ✔ tibble    3.2.1
## ✔ lubridate 1.9.4     ✔ tidyr     1.3.1
## ✔ purrr     1.0.4     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(caret)
## Warning: package 'caret' was built under R version 4.4.3
## Loading required package: lattice
## 
## Attaching package: 'caret'
## 
## The following object is masked from 'package:purrr':
## 
##     lift
library(e1071)
## Warning: package 'e1071' was built under R version 4.4.3
library(randomForest)
## Warning: package 'randomForest' was built under R version 4.4.3
## randomForest 4.7-1.2
## Type rfNews() to see new features/changes/bug fixes.
## 
## Attaching package: 'randomForest'
## 
## The following object is masked from 'package:dplyr':
## 
##     combine
## 
## The following object is masked from 'package:ggplot2':
## 
##     margin
library(rpart)
library(rpart.plot)
## Warning: package 'rpart.plot' was built under R version 4.4.3
setwd("C:/Users/LENOVO/Documents/DATA MINING/UAS")
train <- read_excel("datatraining.xlsx")
test <- read_excel("datatesting.xlsx")

str(train)
## tibble [200 × 11] (S3: tbl_df/tbl/data.frame)
##  $ usia                : num [1:200] 15 19 15 15 16 18 16 16 15 21 ...
##  $ jenis_kelamin       : chr [1:200] "0" "0" "0" "1" ...
##  $ nilai_rata_rata     : num [1:200] 85.2 60.9 60.5 78.6 66.2 85.9 45.2 91.6 74.1 88.2 ...
##  $ dukungan_orang_tua  : chr [1:200] "1" "2" "2" "3" ...
##  $ fasilitas_belajar   : chr [1:200] "2" "3" "1" "2" ...
##  $ jam_belajar_per_hari: num [1:200] 4.1 3.8 6.1 5.1 5.5 3.7 2.8 6.2 4.6 3 ...
##  $ kehadiran_persen    : num [1:200] 79.7 89.8 86.7 78.5 81 85.4 78.9 78.1 96.8 82.1 ...
##  $ minat_pada_pelajaran: chr [1:200] "2" "2" "1" "3" ...
##  $ kesulitan_ekonomi   : chr [1:200] "0" "1" "1" "1" ...
##  $ jarak_rumah_sekolah : num [1:200] 11.5 6.3 15.9 9.8 3.5 1 6.4 4.9 6.5 9.5 ...
##  $ motivasi_belajar    : chr [1:200] "1" "2" "1" "2" ...
cols_factor_train <- c("jenis_kelamin", "dukungan_orang_tua","fasilitas_belajar","minat_pada_pelajaran","kesulitan_ekonomi","motivasi_belajar")
cols_factor_test <- c("jenis_kelamin", "dukungan_orang_tua","fasilitas_belajar","minat_pada_pelajaran","kesulitan_ekonomi")

train[cols_factor_train] <- lapply(train[cols_factor_train], as.factor)
test[cols_factor_test] <- lapply(test[cols_factor_test], as.factor)

cat("Jumlah data kosong pada training:", sum(is.na(train)), "\n")
## Jumlah data kosong pada training: 0
cat("Jumlah data kosong pada testing:", sum(is.na(test)), "\n")
## Jumlah data kosong pada testing: 0

#TrainingModel

set.seed(123)

#Decision Tree
model_dt <- rpart(motivasi_belajar ~., data = train, method = "class")

#Random Forest
model_rf <- randomForest(motivasi_belajar ~., data = train, ntree = 100)

#SVM
model_svm <- svm(motivasi_belajar ~., data = train, kernel = "radial")

#Prediksi

#Prediksi data test
pred_dt <- predict(model_dt, test, type = "class")
pred_rf <- predict(model_rf, test)
pred_svm <- predict(model_svm, test)

#Gabungkan hasil prediksi
hasil_prediksi <- data.frame(
  DecisionTree = pred_dt,
  RandomForest = pred_rf,
  SVM = pred_svm
)

print(hasil_prediksi)
##    DecisionTree RandomForest SVM
## 1             2            2   2
## 2             2            2   2
## 3             1            1   1
## 4             2            2   2
## 5             1            1   1
## 6             2            2   2
## 7             2            2   2
## 8             3            2   2
## 9             1            2   2
## 10            2            2   2
## 11            2            1   1
## 12            1            1   1
## 13            1            2   2
## 14            2            2   1
## 15            1            1   1

#Simpan Hasil Akhir

write.csv(hasil_prediksi, "hasil_prediksi.csv",row.names = FALSE)