library(readxl)
## Warning: package 'readxl' was built under R version 4.4.3
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(caret)
## Warning: package 'caret' was built under R version 4.4.3
## Loading required package: ggplot2
## Warning: package 'ggplot2' was built under R version 4.4.3
## Loading required package: lattice
library(e1071)
library(randomForest)
## Warning: package 'randomForest' was built under R version 4.4.3
## randomForest 4.7-1.2
## Type rfNews() to see new features/changes/bug fixes.
##
## Attaching package: 'randomForest'
## The following object is masked from 'package:ggplot2':
##
## margin
## The following object is masked from 'package:dplyr':
##
## combine
library(rpart)
## Warning: package 'rpart' was built under R version 4.4.3
library(rpart.plot)
## Warning: package 'rpart.plot' was built under R version 4.4.3
library(openxlsx)
## Warning: package 'openxlsx' was built under R version 4.4.3
data_train <- read_excel("C:/Users/HP/Documents/Semester 6/Data Mining/UAS/datatraining.xlsx")
data_test <- read_excel("C:/Users/HP/Documents/Semester 6/Data Mining/UAS/datatesting.xlsx")
data_train$motivasi_belajar <- as.factor(data_train$motivasi_belajar)
# SPLIT DATA TRAINING (80/20)
set.seed(123)
trainIndex <- createDataPartition(data_train$motivasi_belajar, p = 0.8, list = FALSE)
train_set <- data_train[trainIndex, ]
test_set <- data_train[-trainIndex, ]
control <- trainControl(method = "cv", number = 5)
model <- train(motivasi_belajar ~ ., data = data_train, method = "rpart", trControl = control)
# LATIH MODEL DECISION TREE
model_dt <- rpart(motivasi_belajar ~ ., data = train_set, method = "class")
# 5. EVALUASI MODEL
pred_test <- predict(model_dt, newdata = test_set, type = "class")
confusionMatrix(pred_test, test_set$motivasi_belajar)
## Confusion Matrix and Statistics
##
## Reference
## Prediction 1 2 3
## 1 10 4 0
## 2 6 17 2
## 3 0 0 0
##
## Overall Statistics
##
## Accuracy : 0.6923
## 95% CI : (0.5243, 0.8298)
## No Information Rate : 0.5385
## P-Value [Acc > NIR] : 0.0372
##
## Kappa : 0.3938
##
## Mcnemar's Test P-Value : NA
##
## Statistics by Class:
##
## Class: 1 Class: 2 Class: 3
## Sensitivity 0.6250 0.8095 0.00000
## Specificity 0.8261 0.5556 1.00000
## Pos Pred Value 0.7143 0.6800 NaN
## Neg Pred Value 0.7600 0.7143 0.94872
## Prevalence 0.4103 0.5385 0.05128
## Detection Rate 0.2564 0.4359 0.00000
## Detection Prevalence 0.3590 0.6410 0.00000
## Balanced Accuracy 0.7255 0.6825 0.50000
# 6. PREDIKSI DATA TESTING (15 BARIS)
pred_final <- predict(model_dt, newdata = data_test, type = "class")
# Tampilkan hasil
hasil_prediksi <- data.frame(ID = 1:15, motivasi_belajar_prediksi = pred_final)
print(hasil_prediksi)
## ID motivasi_belajar_prediksi
## 1 1 2
## 2 2 2
## 3 3 2
## 4 4 1
## 5 5 1
## 6 6 2
## 7 7 1
## 8 8 2
## 9 9 1
## 10 10 2
## 11 11 1
## 12 12 2
## 13 13 2
## 14 14 2
## 15 15 1
# RANDOM FOREST
library(randomForest)
# Latih model Random Forest
model_rf <- randomForest(motivasi_belajar ~ ., data = train_set)
# Evaluasi model
pred_rf <- predict(model_rf, newdata = test_set)
confusionMatrix(pred_rf, test_set$motivasi_belajar)
## Confusion Matrix and Statistics
##
## Reference
## Prediction 1 2 3
## 1 10 1 0
## 2 6 20 2
## 3 0 0 0
##
## Overall Statistics
##
## Accuracy : 0.7692
## 95% CI : (0.6067, 0.8887)
## No Information Rate : 0.5385
## P-Value [Acc > NIR] : 0.002543
##
## Kappa : 0.5363
##
## Mcnemar's Test P-Value : NA
##
## Statistics by Class:
##
## Class: 1 Class: 2 Class: 3
## Sensitivity 0.6250 0.9524 0.00000
## Specificity 0.9565 0.5556 1.00000
## Pos Pred Value 0.9091 0.7143 NaN
## Neg Pred Value 0.7857 0.9091 0.94872
## Prevalence 0.4103 0.5385 0.05128
## Detection Rate 0.2564 0.5128 0.00000
## Detection Prevalence 0.2821 0.7179 0.00000
## Balanced Accuracy 0.7908 0.7540 0.50000
# Prediksi pada 15 data testing
pred_final_rf <- predict(model_rf, newdata = data_test)
# Tampilkan hasil
hasil_rf <- data.frame(ID = 1:15, motivasi_belajar_prediksi = pred_final_rf)
print(hasil_rf)
## ID motivasi_belajar_prediksi
## 1 1 2
## 2 2 2
## 3 3 1
## 4 4 2
## 5 5 2
## 6 6 2
## 7 7 2
## 8 8 2
## 9 9 2
## 10 10 2
## 11 11 1
## 12 12 1
## 13 13 2
## 14 14 2
## 15 15 1
# SVM (Support Vector Machine)
model_svm <- svm(motivasi_belajar ~ ., data = train_set, kernel = "radial")
# Evaluasi model
pred_svm <- predict(model_svm, newdata = test_set)
confusionMatrix(pred_svm, test_set$motivasi_belajar)
## Confusion Matrix and Statistics
##
## Reference
## Prediction 1 2 3
## 1 8 1 0
## 2 8 20 2
## 3 0 0 0
##
## Overall Statistics
##
## Accuracy : 0.7179
## 95% CI : (0.5513, 0.85)
## No Information Rate : 0.5385
## P-Value [Acc > NIR] : 0.01707
##
## Kappa : 0.4257
##
## Mcnemar's Test P-Value : NA
##
## Statistics by Class:
##
## Class: 1 Class: 2 Class: 3
## Sensitivity 0.5000 0.9524 0.00000
## Specificity 0.9565 0.4444 1.00000
## Pos Pred Value 0.8889 0.6667 NaN
## Neg Pred Value 0.7333 0.8889 0.94872
## Prevalence 0.4103 0.5385 0.05128
## Detection Rate 0.2051 0.5128 0.00000
## Detection Prevalence 0.2308 0.7692 0.00000
## Balanced Accuracy 0.7283 0.6984 0.50000
# Prediksi pada 15 data testing
pred_final_svm <- predict(model_svm, newdata = data_test)
# Tampilkan hasil
hasil_svm <- data.frame(ID = 1:15, motivasi_belajar_prediksi = pred_final_svm)
print(hasil_svm)
## ID motivasi_belajar_prediksi
## 1 1 2
## 2 2 2
## 3 3 1
## 4 4 2
## 5 5 1
## 6 6 2
## 7 7 2
## 8 8 2
## 9 9 2
## 10 10 2
## 11 11 1
## 12 12 1
## 13 13 2
## 14 14 1
## 15 15 1