#Load Library

library(caret)
## Warning: package 'caret' was built under R version 4.4.3
## Loading required package: ggplot2
## Warning: package 'ggplot2' was built under R version 4.4.3
## Loading required package: lattice
library(readxl)
## Warning: package 'readxl' was built under R version 4.4.3
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(e1071)
## Warning: package 'e1071' was built under R version 4.4.3
library(rpart)
## Warning: package 'rpart' was built under R version 4.4.3
library(randomForest)
## Warning: package 'randomForest' was built under R version 4.4.3
## randomForest 4.7-1.2
## Type rfNews() to see new features/changes/bug fixes.
## 
## Attaching package: 'randomForest'
## The following object is masked from 'package:dplyr':
## 
##     combine
## The following object is masked from 'package:ggplot2':
## 
##     margin

#Import Data

datatraining <- read_excel("C:/Users/canti/Downloads/datatraining.xlsx")
datatesting <- read_excel("C:/Users/canti/Downloads/datatesting.xlsx")

#Preprocessing Data

# Kolom kategorik diubah ke factor
factor_cols <- c("jenis_kelamin", "dukungan_orang_tua", "fasilitas_belajar","minat_pada_pelajaran", "kesulitan_ekonomi", "motivasi_belajar")
datatraining[factor_cols] <- lapply(datatraining[factor_cols], as.factor)

# Kolom numerik ke numeric/int
datatraining$usia <- as.integer(datatraining$usia)
datatraining$nilai_rata_rata <- as.numeric(datatraining$nilai_rata_rata)
datatraining$jam_belajar_per_hari <- as.numeric(datatraining$jam_belajar_per_hari)
datatraining$kehadiran_persen <- as.numeric(datatraining$kehadiran_persen)
datatraining$jarak_rumah_sekolah <- as.numeric(datatraining$jarak_rumah_sekolah)

# Cek struktur
str(datatraining)
## tibble [200 × 11] (S3: tbl_df/tbl/data.frame)
##  $ usia                : int [1:200] 15 19 15 15 16 18 16 16 15 21 ...
##  $ jenis_kelamin       : Factor w/ 2 levels "0","1": 1 1 1 2 2 2 2 1 2 2 ...
##  $ nilai_rata_rata     : num [1:200] 85.2 60.9 60.5 78.6 66.2 85.9 45.2 91.6 74.1 88.2 ...
##  $ dukungan_orang_tua  : Factor w/ 3 levels "1","2","3": 1 2 2 3 2 2 1 2 2 3 ...
##  $ fasilitas_belajar   : Factor w/ 3 levels "1","2","3": 2 3 1 2 1 1 3 2 1 3 ...
##  $ jam_belajar_per_hari: num [1:200] 4.1 3.8 6.1 5.1 5.5 3.7 2.8 6.2 4.6 3 ...
##  $ kehadiran_persen    : num [1:200] 79.7 89.8 86.7 78.5 81 85.4 78.9 78.1 96.8 82.1 ...
##  $ minat_pada_pelajaran: Factor w/ 3 levels "1","2","3": 2 2 1 3 1 2 1 2 2 3 ...
##  $ kesulitan_ekonomi   : Factor w/ 2 levels "0","1": 1 2 2 2 2 2 1 1 1 2 ...
##  $ jarak_rumah_sekolah : num [1:200] 11.5 6.3 15.9 9.8 3.5 1 6.4 4.9 6.5 9.5 ...
##  $ motivasi_belajar    : Factor w/ 3 levels "1","2","3": 1 2 1 2 1 1 1 2 1 3 ...
# Kolom kategorik diubah ke factor
factor_cols_test <- c("jenis_kelamin", "dukungan_orang_tua", "fasilitas_belajar",
                      "minat_pada_pelajaran", "kesulitan_ekonomi")
datatesting[factor_cols_test] <- lapply(datatesting[factor_cols_test], as.factor)

# Kolom numerik ke numeric/int
datatesting$usia <- as.integer(datatesting$usia)
datatesting$nilai_rata_rata <- as.numeric(datatesting$nilai_rata_rata)
datatesting$jam_belajar_per_hari <- as.numeric(datatesting$jam_belajar_per_hari)
datatesting$kehadiran_persen <- as.numeric(datatesting$kehadiran_persen)
datatesting$jarak_rumah_sekolah <- as.numeric(datatesting$jarak_rumah_sekolah)

# Cek struktur
str(datatesting)
## tibble [15 × 10] (S3: tbl_df/tbl/data.frame)
##  $ usia                : int [1:15] 15 19 17 20 16 21 21 19 17 18 ...
##  $ jenis_kelamin       : Factor w/ 2 levels "0","1": 2 1 1 2 2 1 1 2 1 1 ...
##  $ nilai_rata_rata     : num [1:15] 90.2 79.6 66.9 85.6 65.9 70.8 86.3 84.3 79.5 74.4 ...
##  $ dukungan_orang_tua  : Factor w/ 3 levels "1","2","3": 2 1 2 3 3 2 3 3 2 1 ...
##  $ fasilitas_belajar   : Factor w/ 3 levels "1","2","3": 2 1 3 1 1 3 2 3 2 2 ...
##  $ jam_belajar_per_hari: num [1:15] 5.4 4.6 2.9 3.9 5.3 1.9 4.4 1.5 2 5.5 ...
##  $ kehadiran_persen    : num [1:15] 78.3 88.2 76.3 89.4 74 100 100 92.1 84.5 90.3 ...
##  $ minat_pada_pelajaran: Factor w/ 3 levels "1","2","3": 2 3 1 1 1 1 1 1 3 3 ...
##  $ kesulitan_ekonomi   : Factor w/ 2 levels "0","1": 2 1 1 1 2 2 1 1 1 1 ...
##  $ jarak_rumah_sekolah : num [1:15] 4.7 10.8 11.8 5.7 3.1 7.1 8.2 5.4 4.2 10.7 ...
summary(datatraining)
##       usia       jenis_kelamin nilai_rata_rata  dukungan_orang_tua
##  Min.   :15.00   0: 95         Min.   : 41.50   1: 39             
##  1st Qu.:16.00   1:105         1st Qu.: 66.95   2:111             
##  Median :18.00                 Median : 74.40   3: 50             
##  Mean   :17.78                 Mean   : 74.14                     
##  3rd Qu.:20.00                 3rd Qu.: 80.78                     
##  Max.   :21.00                 Max.   :100.00                     
##  fasilitas_belajar jam_belajar_per_hari kehadiran_persen minat_pada_pelajaran
##  1:52              Min.   :1.000        Min.   : 66.30   1:65                
##  2:88              1st Qu.:2.900        1st Qu.: 79.70   2:76                
##  3:60              Median :4.050        Median : 86.10   3:59                
##                    Mean   :4.095        Mean   : 85.53                       
##                    3rd Qu.:5.100        3rd Qu.: 90.80                       
##                    Max.   :8.800        Max.   :100.00                       
##  kesulitan_ekonomi jarak_rumah_sekolah motivasi_belajar
##  0:112             Min.   : 1.000      1: 81           
##  1: 88             1st Qu.: 5.475      2:105           
##                    Median : 8.300      3: 14           
##                    Mean   : 8.315                      
##                    3rd Qu.:11.300                      
##                    Max.   :20.500
summary(datatesting)
##       usia       jenis_kelamin nilai_rata_rata dukungan_orang_tua
##  Min.   :15.00   0:7           Min.   :62.00   1:2               
##  1st Qu.:16.50   1:8           1st Qu.:68.85   2:6               
##  Median :17.00                 Median :79.50   3:7               
##  Mean   :17.87                 Mean   :76.85                     
##  3rd Qu.:19.50                 3rd Qu.:83.75                     
##  Max.   :21.00                 Max.   :90.20                     
##  fasilitas_belajar jam_belajar_per_hari kehadiran_persen minat_pada_pelajaran
##  1:4               Min.   :1.400        Min.   : 64.90   1:9                 
##  2:7               1st Qu.:1.900        1st Qu.: 77.15   2:3                 
##  3:4               Median :2.900        Median : 84.50   3:3                 
##                    Mean   :3.287        Mean   : 83.64                       
##                    3rd Qu.:4.500        3rd Qu.: 89.85                       
##                    Max.   :5.500        Max.   :100.00                       
##  kesulitan_ekonomi jarak_rumah_sekolah
##  0:10              Min.   : 3.100     
##  1: 5              1st Qu.: 5.050     
##                    Median : 8.000     
##                    Mean   : 7.667     
##                    3rd Qu.:10.050     
##                    Max.   :12.500
colSums(is.na(datatraining))
##                 usia        jenis_kelamin      nilai_rata_rata 
##                    0                    0                    0 
##   dukungan_orang_tua    fasilitas_belajar jam_belajar_per_hari 
##                    0                    0                    0 
##     kehadiran_persen minat_pada_pelajaran    kesulitan_ekonomi 
##                    0                    0                    0 
##  jarak_rumah_sekolah     motivasi_belajar 
##                    0                    0
colSums(is.na(datatesting))
##                 usia        jenis_kelamin      nilai_rata_rata 
##                    0                    0                    0 
##   dukungan_orang_tua    fasilitas_belajar jam_belajar_per_hari 
##                    0                    0                    0 
##     kehadiran_persen minat_pada_pelajaran    kesulitan_ekonomi 
##                    0                    0                    0 
##  jarak_rumah_sekolah 
##                    0
# Ubah kolom di testing
factor_cols_test <- setdiff (factor_cols,"motivasi_belajar")
 datatesting [factor_cols_test]<-lapply(datatesting [factor_cols_test],as.factor)
 
# Samakan levels antara training dan testing(
 for(col in factor_cols_test){
 datatesting [[col]]<-factor(datatesting[[col]],levels=levels(datatraining[[col]]))
 }

#Training Model

# Decision Tree
model_dt <- rpart(motivasi_belajar~.,data = datatraining,method="class")

# Random Forest
model_rf <- randomForest(motivasi_belajar~.,data = datatraining)
 
# SVM
model_svm <- svm(motivasi_belajar~.,data = datatraining)

#Prediksi

# Decision Tree
pred_dt <- predict(model_dt,datatesting,type="class")

# Random Forest
pred_rf <- predict(model_rf,datatesting)
 
# SVM
pred_svm <- predict(model_svm,datatesting)
 
hasil_prediksi <- data.frame(
 No = 1: nrow(datatesting),
 Prediksi_DecisionTree = pred_dt,
 Prediksi_RandomForest = pred_rf,
 Prediksi_SVM = pred_svm
)

print(hasil_prediksi)
##    No Prediksi_DecisionTree Prediksi_RandomForest Prediksi_SVM
## 1   1                     2                     2            2
## 2   2                     2                     2            2
## 3   3                     1                     1            1
## 4   4                     2                     2            2
## 5   5                     1                     1            1
## 6   6                     2                     2            2
## 7   7                     2                     2            2
## 8   8                     3                     2            2
## 9   9                     1                     2            2
## 10 10                     2                     2            2
## 11 11                     2                     1            1
## 12 12                     1                     1            1
## 13 13                     1                     2            2
## 14 14                     2                     2            1
## 15 15                     1                     1            1