# Memuat library yang diperlukan
library(readxl)
## Warning: package 'readxl' was built under R version 4.4.3
library(caret)
## Warning: package 'caret' was built under R version 4.4.3
## Loading required package: ggplot2
## Warning: package 'ggplot2' was built under R version 4.4.3
## Loading required package: lattice
library(e1071)
## Warning: package 'e1071' was built under R version 4.4.3
library(randomForest)
## Warning: package 'randomForest' was built under R version 4.4.3
## randomForest 4.7-1.2
## Type rfNews() to see new features/changes/bug fixes.
## 
## Attaching package: 'randomForest'
## The following object is masked from 'package:ggplot2':
## 
##     margin
library(rpart)
## Warning: package 'rpart' was built under R version 4.4.3
# Membaca data
datatesting <- read_excel("C:/Users/ASUS/Downloads/datatesting.xlsx")
View(datatesting)
datatraining <- read_excel("C:/Users/ASUS/Downloads/datatraining.xlsx")
View(datatraining)

# Membersihkan dan memformat data
preprocess_data <- function(data) {

# Mengubah tipe data kategorik menjadi factor
categorical_vars <- c("jenis_kelamin", "dukungan_orang_tua", "fasilitas_belajar", "minat_pada_pelajaran", "kesulitan_ekonomi")
  
for (var in categorical_vars) {
 data[[var]] <- as.factor(data[[var]])
}
  
# Jika ada variabel target (hanya untuk data training)
if ("motivasi_belajar" %in% colnames(data)) {
 data$motivasi_belajar <- as.factor(data$motivasi_belajar)
}
  
# Memastikan variabel numerik bertipe numeric
numeric_vars <- c("usia", "nilai_rata_rata", "jam_belajar_per_hari", "kehadiran_persen", "jarak_rumah_sekolah")
  
for (var in numeric_vars) {
 data[[var]] <- as.numeric(data[[var]])
}
  
return(data)
}

# Memproses data training dan testing
datatraining_clean <- preprocess_data(datatraining)
datatesting_clean <- preprocess_data(datatesting)

# Memeriksa struktur data
str(datatraining_clean)
## tibble [200 × 11] (S3: tbl_df/tbl/data.frame)
##  $ usia                : num [1:200] 15 19 15 15 16 18 16 16 15 21 ...
##  $ jenis_kelamin       : Factor w/ 2 levels "0","1": 1 1 1 2 2 2 2 1 2 2 ...
##  $ nilai_rata_rata     : num [1:200] 85.2 60.9 60.5 78.6 66.2 85.9 45.2 91.6 74.1 88.2 ...
##  $ dukungan_orang_tua  : Factor w/ 3 levels "1","2","3": 1 2 2 3 2 2 1 2 2 3 ...
##  $ fasilitas_belajar   : Factor w/ 3 levels "1","2","3": 2 3 1 2 1 1 3 2 1 3 ...
##  $ jam_belajar_per_hari: num [1:200] 4.1 3.8 6.1 5.1 5.5 3.7 2.8 6.2 4.6 3 ...
##  $ kehadiran_persen    : num [1:200] 79.7 89.8 86.7 78.5 81 85.4 78.9 78.1 96.8 82.1 ...
##  $ minat_pada_pelajaran: Factor w/ 3 levels "1","2","3": 2 2 1 3 1 2 1 2 2 3 ...
##  $ kesulitan_ekonomi   : Factor w/ 2 levels "0","1": 1 2 2 2 2 2 1 1 1 2 ...
##  $ jarak_rumah_sekolah : num [1:200] 11.5 6.3 15.9 9.8 3.5 1 6.4 4.9 6.5 9.5 ...
##  $ motivasi_belajar    : Factor w/ 3 levels "1","2","3": 1 2 1 2 1 1 1 2 1 3 ...
str(datatesting_clean)
## tibble [15 × 10] (S3: tbl_df/tbl/data.frame)
##  $ usia                : num [1:15] 15 19 17 20 16 21 21 19 17 18 ...
##  $ jenis_kelamin       : Factor w/ 2 levels "0","1": 2 1 1 2 2 1 1 2 1 1 ...
##  $ nilai_rata_rata     : num [1:15] 90.2 79.6 66.9 85.6 65.9 70.8 86.3 84.3 79.5 74.4 ...
##  $ dukungan_orang_tua  : Factor w/ 3 levels "1","2","3": 2 1 2 3 3 2 3 3 2 1 ...
##  $ fasilitas_belajar   : Factor w/ 3 levels "1","2","3": 2 1 3 1 1 3 2 3 2 2 ...
##  $ jam_belajar_per_hari: num [1:15] 5.4 4.6 2.9 3.9 5.3 1.9 4.4 1.5 2 5.5 ...
##  $ kehadiran_persen    : num [1:15] 78.3 88.2 76.3 89.4 74 100 100 92.1 84.5 90.3 ...
##  $ minat_pada_pelajaran: Factor w/ 3 levels "1","2","3": 2 3 1 1 1 1 1 1 3 3 ...
##  $ kesulitan_ekonomi   : Factor w/ 2 levels "0","1": 2 1 1 1 2 2 1 1 1 1 ...
##  $ jarak_rumah_sekolah : num [1:15] 4.7 10.8 11.8 5.7 3.1 7.1 8.2 5.4 4.2 10.7 ...
# Memeriksa missing values
sum(is.na(datatraining_clean))
## [1] 0
sum(is.na(datatesting_clean))
## [1] 0
# Model SVM
svm_model <- svm(motivasi_belajar ~ ., 
                 data = datatraining_clean, 
                 kernel = "radial",
                 probability = TRUE)

# Ringkasan model
summary(svm_model)
## 
## Call:
## svm(formula = motivasi_belajar ~ ., data = datatraining_clean, kernel = "radial", 
##     probability = TRUE)
## 
## 
## Parameters:
##    SVM-Type:  C-classification 
##  SVM-Kernel:  radial 
##        cost:  1 
## 
## Number of Support Vectors:  177
## 
##  ( 69 94 14 )
## 
## 
## Number of Classes:  3 
## 
## Levels: 
##  1 2 3
# Model Random Forest
set.seed(123)  # Untuk reproduktibilitas
rf_model <- randomForest(motivasi_belajar ~ ., 
                        data = datatraining_clean,
                        ntree = 500,
                        importance = TRUE)

# Ringkasan model
print(rf_model)
## 
## Call:
##  randomForest(formula = motivasi_belajar ~ ., data = datatraining_clean,      ntree = 500, importance = TRUE) 
##                Type of random forest: classification
##                      Number of trees: 500
## No. of variables tried at each split: 3
## 
##         OOB estimate of  error rate: 31.5%
## Confusion matrix:
##    1  2 3 class.error
## 1 55 26 0   0.3209877
## 2 23 82 0   0.2190476
## 3  0 14 0   1.0000000
# Menampilkan pentingnya variabel
varImpPlot(rf_model)

# Model Decision Tree
dt_model <- rpart(motivasi_belajar ~ .,
                 data = datatraining_clean,
                 method = "class")

# Ringkasan model
print(dt_model)
## n= 200 
## 
## node), split, n, loss, yval, (yprob)
##       * denotes terminal node
## 
##  1) root 200 95 2 (0.40500000 0.52500000 0.07000000)  
##    2) kehadiran_persen< 87.2 110 48 1 (0.56363636 0.40909091 0.02727273)  
##      4) nilai_rata_rata< 88.9 98 36 1 (0.63265306 0.34693878 0.02040816)  
##        8) jam_belajar_per_hari< 4.95 73 21 1 (0.71232877 0.26027397 0.02739726)  
##         16) dukungan_orang_tua=1,2 58 12 1 (0.79310345 0.20689655 0.00000000) *
##         17) dukungan_orang_tua=3 15  8 2 (0.40000000 0.46666667 0.13333333) *
##        9) jam_belajar_per_hari>=4.95 25 10 2 (0.40000000 0.60000000 0.00000000)  
##         18) nilai_rata_rata< 77.9 16  6 1 (0.62500000 0.37500000 0.00000000) *
##         19) nilai_rata_rata>=77.9 9  0 2 (0.00000000 1.00000000 0.00000000) *
##      5) nilai_rata_rata>=88.9 12  1 2 (0.00000000 0.91666667 0.08333333) *
##    3) kehadiran_persen>=87.2 90 30 2 (0.21111111 0.66666667 0.12222222)  
##      6) fasilitas_belajar=1,2 59 21 2 (0.30508475 0.64406780 0.05084746)  
##       12) kesulitan_ekonomi=1 18  8 1 (0.55555556 0.38888889 0.05555556) *
##       13) kesulitan_ekonomi=0 41 10 2 (0.19512195 0.75609756 0.04878049) *
##      7) fasilitas_belajar=3 31  9 2 (0.03225806 0.70967742 0.25806452)  
##       14) nilai_rata_rata< 77.85 22  4 2 (0.04545455 0.81818182 0.13636364) *
##       15) nilai_rata_rata>=77.85 9  4 3 (0.00000000 0.44444444 0.55555556) *
# Visualisasi pohon keputusan
library(rpart.plot)
## Warning: package 'rpart.plot' was built under R version 4.4.3
rpart.plot(dt_model)

# Membuat prediksi dengan ketiga model
svm_pred <- predict(svm_model, newdata = datatesting_clean)
rf_pred <- predict(rf_model, newdata = datatesting_clean)
dt_pred <- predict(dt_model, newdata = datatesting_clean, type = "class")

# Menggabungkan hasil prediksi
hasil_prediksi <- data.frame(
  Siswa = 1:nrow(datatesting_clean),
  Usia = datatesting_clean$usia,
  Jenis_Kelamin = datatesting_clean$jenis_kelamin,
  Nilai_Rata_Rata = datatesting_clean$nilai_rata_rata,
  SVM = svm_pred,
  Random_Forest = rf_pred,
  Decision_Tree = dt_pred
)

# Menampilkan hasil prediksi
print(hasil_prediksi)
##    Siswa Usia Jenis_Kelamin Nilai_Rata_Rata SVM Random_Forest Decision_Tree
## 1      1   15             1            90.2   2             2             2
## 2      2   19             0            79.6   2             2             2
## 3      3   17             0            66.9   1             1             1
## 4      4   20             1            85.6   2             2             2
## 5      5   16             1            65.9   1             1             1
## 6      6   21             0            70.8   2             2             2
## 7      7   21             0            86.3   2             2             2
## 8      8   19             1            84.3   2             2             3
## 9      9   17             0            79.5   2             2             1
## 10    10   18             0            74.4   2             2             2
## 11    11   16             1            76.9   1             1             2
## 12    12   15             1            81.9   1             1             1
## 13    13   17             1            83.2   2             2             1
## 14    14   17             0            65.2   1             2             2
## 15    15   20             1            62.0   1             1             1
# Menyimpan hasil prediksi ke file CSV
write.csv(hasil_prediksi, "hasil_prediksi_motivasi_belajar.csv", row.names = FALSE)
# Validasi silang untuk evaluasi model
ctrl <- trainControl(method = "cv", number = 10)

# Evaluasi SVM
svm_cv <- train(motivasi_belajar ~ ., 
               data = datatraining_clean,
               method = "svmRadial",
               trControl = ctrl)
print(svm_cv)
## Support Vector Machines with Radial Basis Function Kernel 
## 
## 200 samples
##  10 predictor
##   3 classes: '1', '2', '3' 
## 
## No pre-processing
## Resampling: Cross-Validated (10 fold) 
## Summary of sample sizes: 180, 181, 180, 180, 178, 180, ... 
## Resampling results across tuning parameters:
## 
##   C     Accuracy   Kappa    
##   0.25  0.6924402  0.3893807
##   0.50  0.7217943  0.4574149
##   1.00  0.7225120  0.4615129
## 
## Tuning parameter 'sigma' was held constant at a value of 0.04153763
## Accuracy was used to select the optimal model using the largest value.
## The final values used for the model were sigma = 0.04153763 and C = 1.
# Evaluasi Random Forest
rf_cv <- train(motivasi_belajar ~ .,
              data = datatraining_clean,
              method = "rf",
              trControl = ctrl)
print(rf_cv)
## Random Forest 
## 
## 200 samples
##  10 predictor
##   3 classes: '1', '2', '3' 
## 
## No pre-processing
## Resampling: Cross-Validated (10 fold) 
## Summary of sample sizes: 179, 180, 181, 179, 179, 181, ... 
## Resampling results across tuning parameters:
## 
##   mtry  Accuracy   Kappa    
##    2    0.7259774  0.4725787
##    7    0.6961654  0.4137575
##   13    0.6666165  0.3527847
## 
## Accuracy was used to select the optimal model using the largest value.
## The final value used for the model was mtry = 2.
# Evaluasi Decision Tree
dt_cv <- train(motivasi_belajar ~ .,
              data = datatraining_clean,
              method = "rpart",
              trControl = ctrl)
print(dt_cv)
## CART 
## 
## 200 samples
##  10 predictor
##   3 classes: '1', '2', '3' 
## 
## No pre-processing
## Resampling: Cross-Validated (10 fold) 
## Summary of sample sizes: 180, 179, 179, 180, 180, 181, ... 
## Resampling results across tuning parameters:
## 
##   cp          Accuracy   Kappa     
##   0.05263158  0.5837845  0.19869718
##   0.11578947  0.6040727  0.25556514
##   0.17894737  0.5397870  0.06657325
## 
## Accuracy was used to select the optimal model using the largest value.
## The final value used for the model was cp = 0.1157895.