library(readxl)
## Warning: package 'readxl' was built under R version 4.4.3
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(caret)
## Warning: package 'caret' was built under R version 4.4.3
## Loading required package: ggplot2
## Warning: package 'ggplot2' was built under R version 4.4.3
## Loading required package: lattice
library(e1071)
library(randomForest)
## Warning: package 'randomForest' was built under R version 4.4.3
## randomForest 4.7-1.2
## Type rfNews() to see new features/changes/bug fixes.
## 
## Attaching package: 'randomForest'
## The following object is masked from 'package:ggplot2':
## 
##     margin
## The following object is masked from 'package:dplyr':
## 
##     combine
library(rpart)
## Warning: package 'rpart' was built under R version 4.4.3
library(rpart.plot)
## Warning: package 'rpart.plot' was built under R version 4.4.3
library(openxlsx)
## Warning: package 'openxlsx' was built under R version 4.4.3
data_train <- read_excel("C:/Users/HP/Documents/Semester 6/Data Mining/UAS/datatraining.xlsx")
data_test <- read_excel("C:/Users/HP/Documents/Semester 6/Data Mining/UAS/datatesting.xlsx")
data_train$motivasi_belajar <- as.factor(data_train$motivasi_belajar)

# SPLIT DATA TRAINING (80/20)
set.seed(123)
trainIndex <- createDataPartition(data_train$motivasi_belajar, p = 0.8, list = FALSE)
train_set <- data_train[trainIndex, ]
test_set <- data_train[-trainIndex, ]

control <- trainControl(method = "cv", number = 5)
model <- train(motivasi_belajar ~ ., data = data_train, method = "rpart", trControl = control)


# LATIH MODEL DECISION TREE
model_dt <- rpart(motivasi_belajar ~ ., data = train_set, method = "class")



# 5. EVALUASI MODEL
pred_test <- predict(model_dt, newdata = test_set, type = "class")
confusionMatrix(pred_test, test_set$motivasi_belajar)
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  1  2  3
##          1 10  4  0
##          2  6 17  2
##          3  0  0  0
## 
## Overall Statistics
##                                           
##                Accuracy : 0.6923          
##                  95% CI : (0.5243, 0.8298)
##     No Information Rate : 0.5385          
##     P-Value [Acc > NIR] : 0.0372          
##                                           
##                   Kappa : 0.3938          
##                                           
##  Mcnemar's Test P-Value : NA              
## 
## Statistics by Class:
## 
##                      Class: 1 Class: 2 Class: 3
## Sensitivity            0.6250   0.8095  0.00000
## Specificity            0.8261   0.5556  1.00000
## Pos Pred Value         0.7143   0.6800      NaN
## Neg Pred Value         0.7600   0.7143  0.94872
## Prevalence             0.4103   0.5385  0.05128
## Detection Rate         0.2564   0.4359  0.00000
## Detection Prevalence   0.3590   0.6410  0.00000
## Balanced Accuracy      0.7255   0.6825  0.50000
# 6. PREDIKSI DATA TESTING (15 BARIS)
pred_final <- predict(model_dt, newdata = data_test, type = "class")

# Tampilkan hasil
hasil_prediksi <- data.frame(ID = 1:15, motivasi_belajar_prediksi = pred_final)
print(hasil_prediksi)
##    ID motivasi_belajar_prediksi
## 1   1                         2
## 2   2                         2
## 3   3                         2
## 4   4                         1
## 5   5                         1
## 6   6                         2
## 7   7                         1
## 8   8                         2
## 9   9                         1
## 10 10                         2
## 11 11                         1
## 12 12                         2
## 13 13                         2
## 14 14                         2
## 15 15                         1
# RANDOM FOREST
library(randomForest)

# Latih model Random Forest
model_rf <- randomForest(motivasi_belajar ~ ., data = train_set)

# Evaluasi model
pred_rf <- predict(model_rf, newdata = test_set)
confusionMatrix(pred_rf, test_set$motivasi_belajar)
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  1  2  3
##          1 10  1  0
##          2  6 20  2
##          3  0  0  0
## 
## Overall Statistics
##                                           
##                Accuracy : 0.7692          
##                  95% CI : (0.6067, 0.8887)
##     No Information Rate : 0.5385          
##     P-Value [Acc > NIR] : 0.002543        
##                                           
##                   Kappa : 0.5363          
##                                           
##  Mcnemar's Test P-Value : NA              
## 
## Statistics by Class:
## 
##                      Class: 1 Class: 2 Class: 3
## Sensitivity            0.6250   0.9524  0.00000
## Specificity            0.9565   0.5556  1.00000
## Pos Pred Value         0.9091   0.7143      NaN
## Neg Pred Value         0.7857   0.9091  0.94872
## Prevalence             0.4103   0.5385  0.05128
## Detection Rate         0.2564   0.5128  0.00000
## Detection Prevalence   0.2821   0.7179  0.00000
## Balanced Accuracy      0.7908   0.7540  0.50000
# Prediksi pada 15 data testing
pred_final_rf <- predict(model_rf, newdata = data_test)

# Tampilkan hasil
hasil_rf <- data.frame(ID = 1:15, motivasi_belajar_prediksi = pred_final_rf)
print(hasil_rf)
##    ID motivasi_belajar_prediksi
## 1   1                         2
## 2   2                         2
## 3   3                         1
## 4   4                         2
## 5   5                         2
## 6   6                         2
## 7   7                         2
## 8   8                         2
## 9   9                         2
## 10 10                         2
## 11 11                         1
## 12 12                         1
## 13 13                         2
## 14 14                         2
## 15 15                         1
# SVM (Support Vector Machine)
model_svm <- svm(motivasi_belajar ~ ., data = train_set, kernel = "radial")

# Evaluasi model
pred_svm <- predict(model_svm, newdata = test_set)
confusionMatrix(pred_svm, test_set$motivasi_belajar)
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  1  2  3
##          1  8  1  0
##          2  8 20  2
##          3  0  0  0
## 
## Overall Statistics
##                                         
##                Accuracy : 0.7179        
##                  95% CI : (0.5513, 0.85)
##     No Information Rate : 0.5385        
##     P-Value [Acc > NIR] : 0.01707       
##                                         
##                   Kappa : 0.4257        
##                                         
##  Mcnemar's Test P-Value : NA            
## 
## Statistics by Class:
## 
##                      Class: 1 Class: 2 Class: 3
## Sensitivity            0.5000   0.9524  0.00000
## Specificity            0.9565   0.4444  1.00000
## Pos Pred Value         0.8889   0.6667      NaN
## Neg Pred Value         0.7333   0.8889  0.94872
## Prevalence             0.4103   0.5385  0.05128
## Detection Rate         0.2051   0.5128  0.00000
## Detection Prevalence   0.2308   0.7692  0.00000
## Balanced Accuracy      0.7283   0.6984  0.50000
# Prediksi pada 15 data testing
pred_final_svm <- predict(model_svm, newdata = data_test)

# Tampilkan hasil
hasil_svm <- data.frame(ID = 1:15, motivasi_belajar_prediksi = pred_final_svm)
print(hasil_svm)
##    ID motivasi_belajar_prediksi
## 1   1                         2
## 2   2                         2
## 3   3                         1
## 4   4                         2
## 5   5                         1
## 6   6                         2
## 7   7                         2
## 8   8                         2
## 9   9                         2
## 10 10                         2
## 11 11                         1
## 12 12                         1
## 13 13                         2
## 14 14                         1
## 15 15                         1