# 1. Install & Load Library
packages <- c("readxl", "e1071", "randomForest", "rpart", "caret", "dplyr", "openxlsx")
installed <- packages %in% installed.packages()
if (any(!installed)) install.packages(packages[!installed])
lapply(packages, library, character.only = TRUE)
## Warning: package 'readxl' was built under R version 4.4.3
## Warning: package 'e1071' was built under R version 4.4.3
## Warning: package 'randomForest' was built under R version 4.4.3
## randomForest 4.7-1.2
## Type rfNews() to see new features/changes/bug fixes.
## Warning: package 'rpart' was built under R version 4.4.3
## Warning: package 'caret' was built under R version 4.4.3
## Loading required package: ggplot2
## 
## Attaching package: 'ggplot2'
## The following object is masked from 'package:randomForest':
## 
##     margin
## Loading required package: lattice
## Warning: package 'dplyr' was built under R version 4.4.3
## 
## Attaching package: 'dplyr'
## The following object is masked from 'package:randomForest':
## 
##     combine
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
## Warning: package 'openxlsx' was built under R version 4.4.3
## [[1]]
## [1] "readxl"    "stats"     "graphics"  "grDevices" "utils"     "datasets" 
## [7] "methods"   "base"     
## 
## [[2]]
## [1] "e1071"     "readxl"    "stats"     "graphics"  "grDevices" "utils"    
## [7] "datasets"  "methods"   "base"     
## 
## [[3]]
##  [1] "randomForest" "e1071"        "readxl"       "stats"        "graphics"    
##  [6] "grDevices"    "utils"        "datasets"     "methods"      "base"        
## 
## [[4]]
##  [1] "rpart"        "randomForest" "e1071"        "readxl"       "stats"       
##  [6] "graphics"     "grDevices"    "utils"        "datasets"     "methods"     
## [11] "base"        
## 
## [[5]]
##  [1] "caret"        "lattice"      "ggplot2"      "rpart"        "randomForest"
##  [6] "e1071"        "readxl"       "stats"        "graphics"     "grDevices"   
## [11] "utils"        "datasets"     "methods"      "base"        
## 
## [[6]]
##  [1] "dplyr"        "caret"        "lattice"      "ggplot2"      "rpart"       
##  [6] "randomForest" "e1071"        "readxl"       "stats"        "graphics"    
## [11] "grDevices"    "utils"        "datasets"     "methods"      "base"        
## 
## [[7]]
##  [1] "openxlsx"     "dplyr"        "caret"        "lattice"      "ggplot2"     
##  [6] "rpart"        "randomForest" "e1071"        "readxl"       "stats"       
## [11] "graphics"     "grDevices"    "utils"        "datasets"     "methods"     
## [16] "base"
# 2. Import Data
data_train <- read_excel("C:/Users/Aulia Puspita/Downloads/datatraining.xlsx")
data_test <- read_excel("C:/Users/Aulia Puspita/Downloads/datatesting.xlsx")
# 3. Preprocessing 
# Kolom yang harus berupa faktor
faktor_kolom <- c("jenis_kelamin", "dukungan_orang_tua", "fasilitas_belajar", 
                  "minat_pada_pelajaran", "kesulitan_ekonomi", "motivasi_belajar")

# Ubah kolom di training set
data_train[faktor_kolom] <- lapply(data_train[faktor_kolom], as.factor)
# Cek struktur data training
str(data_train)
## tibble [200 × 11] (S3: tbl_df/tbl/data.frame)
##  $ usia                : num [1:200] 15 19 15 15 16 18 16 16 15 21 ...
##  $ jenis_kelamin       : Factor w/ 2 levels "0","1": 1 1 1 2 2 2 2 1 2 2 ...
##  $ nilai_rata_rata     : num [1:200] 85.2 60.9 60.5 78.6 66.2 85.9 45.2 91.6 74.1 88.2 ...
##  $ dukungan_orang_tua  : Factor w/ 3 levels "1","2","3": 1 2 2 3 2 2 1 2 2 3 ...
##  $ fasilitas_belajar   : Factor w/ 3 levels "1","2","3": 2 3 1 2 1 1 3 2 1 3 ...
##  $ jam_belajar_per_hari: num [1:200] 4.1 3.8 6.1 5.1 5.5 3.7 2.8 6.2 4.6 3 ...
##  $ kehadiran_persen    : num [1:200] 79.7 89.8 86.7 78.5 81 85.4 78.9 78.1 96.8 82.1 ...
##  $ minat_pada_pelajaran: Factor w/ 3 levels "1","2","3": 2 2 1 3 1 2 1 2 2 3 ...
##  $ kesulitan_ekonomi   : Factor w/ 2 levels "0","1": 1 2 2 2 2 2 1 1 1 2 ...
##  $ jarak_rumah_sekolah : num [1:200] 11.5 6.3 15.9 9.8 3.5 1 6.4 4.9 6.5 9.5 ...
##  $ motivasi_belajar    : Factor w/ 3 levels "1","2","3": 1 2 1 2 1 1 1 2 1 3 ...
# Ubah kolom di test set (tanpa motivasi_belajar)
faktor_kolom_test <- setdiff(faktor_kolom, "motivasi_belajar")
data_test[faktor_kolom_test] <- lapply(data_test[faktor_kolom_test], as.factor)

# Samakan levels antara training dan test (penting untuk prediksi)
for (col in faktor_kolom_test) {
  data_test[[col]] <- factor(data_test[[col]], levels = levels(data_train[[col]]))
}
# 4. Training Model

## Decision Tree
model_dt <- rpart(motivasi_belajar ~ ., data = data_train, method = "class")

## Random Forest
model_rf <- randomForest(motivasi_belajar ~ ., data = data_train)

## SVM
model_svm <- svm(motivasi_belajar ~ ., data = data_train)

# 5. Prediksi

## Decision Tree
pred_dt <- predict(model_dt, data_test, type = "class")

## Random Forest
pred_rf <- predict(model_rf, data_test)

## SVM
pred_svm <- predict(model_svm, data_test)
# 6. Gabungkan dan Simpan Hasil
hasil_prediksi <- data.frame(
  No = 1:nrow(data_test),
  Prediksi_DecisionTree = pred_dt,
  Prediksi_RandomForest = pred_rf,
  Prediksi_SVM = pred_svm
)
# Tampilkan hasil
print(hasil_prediksi)
##    No Prediksi_DecisionTree Prediksi_RandomForest Prediksi_SVM
## 1   1                     2                     2            2
## 2   2                     2                     2            2
## 3   3                     1                     1            1
## 4   4                     2                     2            2
## 5   5                     1                     1            1
## 6   6                     2                     2            2
## 7   7                     2                     2            2
## 8   8                     3                     2            2
## 9   9                     1                     2            2
## 10 10                     2                     2            2
## 11 11                     2                     1            1
## 12 12                     1                     1            1
## 13 13                     1                     2            2
## 14 14                     2                     2            1
## 15 15                     1                     1            1