UAS Data Mining

library(readxl)

## Warning: package 'readxl' was built under R version 4.4.3

library(dplyr)

## 
## Attaching package: 'dplyr'

## The following objects are masked from 'package:stats':
## 
##     filter, lag

## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

library(caret)

## Warning: package 'caret' was built under R version 4.4.3

## Loading required package: ggplot2

## Warning: package 'ggplot2' was built under R version 4.4.3

## Loading required package: lattice

library(e1071)
library(randomForest)

## Warning: package 'randomForest' was built under R version 4.4.3

## randomForest 4.7-1.2

## Type rfNews() to see new features/changes/bug fixes.

## 
## Attaching package: 'randomForest'

## The following object is masked from 'package:ggplot2':
## 
##     margin

## The following object is masked from 'package:dplyr':
## 
##     combine

library(rpart)

## Warning: package 'rpart' was built under R version 4.4.3

library(rpart.plot)

## Warning: package 'rpart.plot' was built under R version 4.4.3

library(openxlsx)

## Warning: package 'openxlsx' was built under R version 4.4.3

data_train <- read_excel("C:/Users/HP/Documents/Semester 6/Data Mining/UAS/datatraining.xlsx")
data_test <- read_excel("C:/Users/HP/Documents/Semester 6/Data Mining/UAS/datatesting.xlsx")

data_train$motivasi_belajar <- as.factor(data_train$motivasi_belajar)

# SPLIT DATA TRAINING (80/20)
set.seed(123)
trainIndex <- createDataPartition(data_train$motivasi_belajar, p = 0.8, list = FALSE)
train_set <- data_train[trainIndex, ]
test_set <- data_train[-trainIndex, ]

control <- trainControl(method = "cv", number = 5)
model <- train(motivasi_belajar ~ ., data = data_train, method = "rpart", trControl = control)


# LATIH MODEL DECISION TREE
model_dt <- rpart(motivasi_belajar ~ ., data = train_set, method = "class")



# 5. EVALUASI MODEL
pred_test <- predict(model_dt, newdata = test_set, type = "class")
confusionMatrix(pred_test, test_set$motivasi_belajar)

## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  1  2  3
##          1 10  4  0
##          2  6 17  2
##          3  0  0  0
## 
## Overall Statistics
##                                           
##                Accuracy : 0.6923          
##                  95% CI : (0.5243, 0.8298)
##     No Information Rate : 0.5385          
##     P-Value [Acc > NIR] : 0.0372          
##                                           
##                   Kappa : 0.3938          
##                                           
##  Mcnemar's Test P-Value : NA              
## 
## Statistics by Class:
## 
##                      Class: 1 Class: 2 Class: 3
## Sensitivity            0.6250   0.8095  0.00000
## Specificity            0.8261   0.5556  1.00000
## Pos Pred Value         0.7143   0.6800      NaN
## Neg Pred Value         0.7600   0.7143  0.94872
## Prevalence             0.4103   0.5385  0.05128
## Detection Rate         0.2564   0.4359  0.00000
## Detection Prevalence   0.3590   0.6410  0.00000
## Balanced Accuracy      0.7255   0.6825  0.50000

# 6. PREDIKSI DATA TESTING (15 BARIS)
pred_final <- predict(model_dt, newdata = data_test, type = "class")

# Tampilkan hasil
hasil_prediksi <- data.frame(ID = 1:15, motivasi_belajar_prediksi = pred_final)
print(hasil_prediksi)

##    ID motivasi_belajar_prediksi
## 1   1                         2
## 2   2                         2
## 3   3                         2
## 4   4                         1
## 5   5                         1
## 6   6                         2
## 7   7                         1
## 8   8                         2
## 9   9                         1
## 10 10                         2
## 11 11                         1
## 12 12                         2
## 13 13                         2
## 14 14                         2
## 15 15                         1

# RANDOM FOREST
library(randomForest)

# Latih model Random Forest
model_rf <- randomForest(motivasi_belajar ~ ., data = train_set)

# Evaluasi model
pred_rf <- predict(model_rf, newdata = test_set)
confusionMatrix(pred_rf, test_set$motivasi_belajar)

## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  1  2  3
##          1 10  1  0
##          2  6 20  2
##          3  0  0  0
## 
## Overall Statistics
##                                           
##                Accuracy : 0.7692          
##                  95% CI : (0.6067, 0.8887)
##     No Information Rate : 0.5385          
##     P-Value [Acc > NIR] : 0.002543        
##                                           
##                   Kappa : 0.5363          
##                                           
##  Mcnemar's Test P-Value : NA              
## 
## Statistics by Class:
## 
##                      Class: 1 Class: 2 Class: 3
## Sensitivity            0.6250   0.9524  0.00000
## Specificity            0.9565   0.5556  1.00000
## Pos Pred Value         0.9091   0.7143      NaN
## Neg Pred Value         0.7857   0.9091  0.94872
## Prevalence             0.4103   0.5385  0.05128
## Detection Rate         0.2564   0.5128  0.00000
## Detection Prevalence   0.2821   0.7179  0.00000
## Balanced Accuracy      0.7908   0.7540  0.50000

# Prediksi pada 15 data testing
pred_final_rf <- predict(model_rf, newdata = data_test)

# Tampilkan hasil
hasil_rf <- data.frame(ID = 1:15, motivasi_belajar_prediksi = pred_final_rf)
print(hasil_rf)

##    ID motivasi_belajar_prediksi
## 1   1                         2
## 2   2                         2
## 3   3                         1
## 4   4                         2
## 5   5                         2
## 6   6                         2
## 7   7                         2
## 8   8                         2
## 9   9                         2
## 10 10                         2
## 11 11                         1
## 12 12                         1
## 13 13                         2
## 14 14                         2
## 15 15                         1

# SVM (Support Vector Machine)
model_svm <- svm(motivasi_belajar ~ ., data = train_set, kernel = "radial")

# Evaluasi model
pred_svm <- predict(model_svm, newdata = test_set)
confusionMatrix(pred_svm, test_set$motivasi_belajar)

## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  1  2  3
##          1  8  1  0
##          2  8 20  2
##          3  0  0  0
## 
## Overall Statistics
##                                         
##                Accuracy : 0.7179        
##                  95% CI : (0.5513, 0.85)
##     No Information Rate : 0.5385        
##     P-Value [Acc > NIR] : 0.01707       
##                                         
##                   Kappa : 0.4257        
##                                         
##  Mcnemar's Test P-Value : NA            
## 
## Statistics by Class:
## 
##                      Class: 1 Class: 2 Class: 3
## Sensitivity            0.5000   0.9524  0.00000
## Specificity            0.9565   0.4444  1.00000
## Pos Pred Value         0.8889   0.6667      NaN
## Neg Pred Value         0.7333   0.8889  0.94872
## Prevalence             0.4103   0.5385  0.05128
## Detection Rate         0.2051   0.5128  0.00000
## Detection Prevalence   0.2308   0.7692  0.00000
## Balanced Accuracy      0.7283   0.6984  0.50000

# Prediksi pada 15 data testing
pred_final_svm <- predict(model_svm, newdata = data_test)

# Tampilkan hasil
hasil_svm <- data.frame(ID = 1:15, motivasi_belajar_prediksi = pred_final_svm)
print(hasil_svm)

##    ID motivasi_belajar_prediksi
## 1   1                         2
## 2   2                         2
## 3   3                         1
## 4   4                         2
## 5   5                         1
## 6   6                         2
## 7   7                         2
## 8   8                         2
## 9   9                         2
## 10 10                         2
## 11 11                         1
## 12 12                         1
## 13 13                         2
## 14 14                         1
## 15 15                         1

UAS Data Mining

Wanda Almira M.K

2025-05-28