#instal library
library(caret)
## Warning: package 'caret' was built under R version 4.4.3
## Loading required package: ggplot2
## Loading required package: lattice
## Warning: package 'lattice' was built under R version 4.4.3
library(e1071)
## Warning: package 'e1071' was built under R version 4.4.3
library(randomForest)
## Warning: package 'randomForest' was built under R version 4.4.3
## randomForest 4.7-1.2
## Type rfNews() to see new features/changes/bug fixes.
##
## Attaching package: 'randomForest'
## The following object is masked from 'package:ggplot2':
##
## margin
library(rpart)
library(readxl)
## Warning: package 'readxl' was built under R version 4.4.3
#inputdata
data_testing<-read_xlsx("C:/Users/HP Pavilion 14/Documents/Data Mining/datatesting.xlsx")
data_training<-read_xlsx("C:/Users/HP Pavilion 14/Documents/Data Mining/datatraining.xlsx")
summary(data_training)
## usia jenis_kelamin nilai_rata_rata dukungan_orang_tua
## Min. :15.00 Length:200 Min. : 41.50 Length:200
## 1st Qu.:16.00 Class :character 1st Qu.: 66.95 Class :character
## Median :18.00 Mode :character Median : 74.40 Mode :character
## Mean :17.78 Mean : 74.14
## 3rd Qu.:20.00 3rd Qu.: 80.78
## Max. :21.00 Max. :100.00
## fasilitas_belajar jam_belajar_per_hari kehadiran_persen minat_pada_pelajaran
## Length:200 Min. :1.000 Min. : 66.30 Length:200
## Class :character 1st Qu.:2.900 1st Qu.: 79.70 Class :character
## Mode :character Median :4.050 Median : 86.10 Mode :character
## Mean :4.095 Mean : 85.53
## 3rd Qu.:5.100 3rd Qu.: 90.80
## Max. :8.800 Max. :100.00
## kesulitan_ekonomi jarak_rumah_sekolah motivasi_belajar
## Length:200 Min. : 1.000 Length:200
## Class :character 1st Qu.: 5.475 Class :character
## Mode :character Median : 8.300 Mode :character
## Mean : 8.315
## 3rd Qu.:11.300
## Max. :20.500
#preprocessing
data_training[c("jenis_kelamin","dukungan_orang_tua","fasilitas_belajar","minat_pada_pelajaran","kesulitan_ekonomi","motivasi_belajar")] <- lapply(data_training[c("jenis_kelamin","dukungan_orang_tua","fasilitas_belajar","minat_pada_pelajaran","kesulitan_ekonomi","motivasi_belajar")],as.factor)
str(data_training)
## tibble [200 × 11] (S3: tbl_df/tbl/data.frame)
## $ usia : num [1:200] 15 19 15 15 16 18 16 16 15 21 ...
## $ jenis_kelamin : Factor w/ 2 levels "0","1": 1 1 1 2 2 2 2 1 2 2 ...
## $ nilai_rata_rata : num [1:200] 85.2 60.9 60.5 78.6 66.2 85.9 45.2 91.6 74.1 88.2 ...
## $ dukungan_orang_tua : Factor w/ 3 levels "1","2","3": 1 2 2 3 2 2 1 2 2 3 ...
## $ fasilitas_belajar : Factor w/ 3 levels "1","2","3": 2 3 1 2 1 1 3 2 1 3 ...
## $ jam_belajar_per_hari: num [1:200] 4.1 3.8 6.1 5.1 5.5 3.7 2.8 6.2 4.6 3 ...
## $ kehadiran_persen : num [1:200] 79.7 89.8 86.7 78.5 81 85.4 78.9 78.1 96.8 82.1 ...
## $ minat_pada_pelajaran: Factor w/ 3 levels "1","2","3": 2 2 1 3 1 2 1 2 2 3 ...
## $ kesulitan_ekonomi : Factor w/ 2 levels "0","1": 1 2 2 2 2 2 1 1 1 2 ...
## $ jarak_rumah_sekolah : num [1:200] 11.5 6.3 15.9 9.8 3.5 1 6.4 4.9 6.5 9.5 ...
## $ motivasi_belajar : Factor w/ 3 levels "1","2","3": 1 2 1 2 1 1 1 2 1 3 ...
data_testing[c("jenis_kelamin","dukungan_orang_tua","fasilitas_belajar","minat_pada_pelajaran","kesulitan_ekonomi")] <- lapply(data_testing[c("jenis_kelamin","dukungan_orang_tua","fasilitas_belajar","minat_pada_pelajaran","kesulitan_ekonomi")],as.factor)
str(data_testing)
## tibble [15 × 10] (S3: tbl_df/tbl/data.frame)
## $ usia : num [1:15] 15 19 17 20 16 21 21 19 17 18 ...
## $ jenis_kelamin : Factor w/ 2 levels "0","1": 2 1 1 2 2 1 1 2 1 1 ...
## $ nilai_rata_rata : num [1:15] 90.2 79.6 66.9 85.6 65.9 70.8 86.3 84.3 79.5 74.4 ...
## $ dukungan_orang_tua : Factor w/ 3 levels "1","2","3": 2 1 2 3 3 2 3 3 2 1 ...
## $ fasilitas_belajar : Factor w/ 3 levels "1","2","3": 2 1 3 1 1 3 2 3 2 2 ...
## $ jam_belajar_per_hari: num [1:15] 5.4 4.6 2.9 3.9 5.3 1.9 4.4 1.5 2 5.5 ...
## $ kehadiran_persen : num [1:15] 78.3 88.2 76.3 89.4 74 100 100 92.1 84.5 90.3 ...
## $ minat_pada_pelajaran: Factor w/ 3 levels "1","2","3": 2 3 1 1 1 1 1 1 3 3 ...
## $ kesulitan_ekonomi : Factor w/ 2 levels "0","1": 2 1 1 1 2 2 1 1 1 1 ...
## $ jarak_rumah_sekolah : num [1:15] 4.7 10.8 11.8 5.7 3.1 7.1 8.2 5.4 4.2 10.7 ...
#split data
set.seed(123)
index <- createDataPartition(data_training$motivasi_belajar, p=0.8, list=FALSE)
train_set<-data_training[index,]
valid_set<-data_training[-index,]
#latih model SVM
model_svm <- svm(motivasi_belajar~.,data=data_training, kernel="linear")
prediksi_svm <- predict(model_svm, newdata=valid_set)
confusion_matrix <- confusionMatrix(prediksi_svm, valid_set$motivasi_belajar)
print(confusion_matrix)
## Confusion Matrix and Statistics
##
## Reference
## Prediction 1 2 3
## 1 9 1 0
## 2 7 20 2
## 3 0 0 0
##
## Overall Statistics
##
## Accuracy : 0.7436
## 95% CI : (0.5787, 0.8696)
## No Information Rate : 0.5385
## P-Value [Acc > NIR] : 0.006998
##
## Kappa : 0.4814
##
## Mcnemar's Test P-Value : NA
##
## Statistics by Class:
##
## Class: 1 Class: 2 Class: 3
## Sensitivity 0.5625 0.9524 0.00000
## Specificity 0.9565 0.5000 1.00000
## Pos Pred Value 0.9000 0.6897 NaN
## Neg Pred Value 0.7586 0.9000 0.94872
## Prevalence 0.4103 0.5385 0.05128
## Detection Rate 0.2308 0.5128 0.00000
## Detection Prevalence 0.2564 0.7436 0.00000
## Balanced Accuracy 0.7595 0.7262 0.50000
#prediksi
svm_pred_test <- predict(model_svm, newdata=data_testing)
data_testing$prediksi_motivasi_belajar <- svm_pred_test
print(data_testing[,c("prediksi_motivasi_belajar")])
## # A tibble: 15 × 1
## prediksi_motivasi_belajar
## <fct>
## 1 2
## 2 2
## 3 1
## 4 2
## 5 1
## 6 2
## 7 2
## 8 2
## 9 2
## 10 2
## 11 1
## 12 1
## 13 2
## 14 1
## 15 1
#latih random forest
model_rf <- randomForest(motivasi_belajar~., data = data_training, ntree=100, mtry=2, importance=TRUE)
prediksi_rf <- predict(model_rf, newdata=valid_set)
confusion_matrix <- confusionMatrix(prediksi_rf,valid_set$motivasi_belajar)
print(confusion_matrix)
## Confusion Matrix and Statistics
##
## Reference
## Prediction 1 2 3
## 1 16 0 0
## 2 0 21 0
## 3 0 0 2
##
## Overall Statistics
##
## Accuracy : 1
## 95% CI : (0.9097, 1)
## No Information Rate : 0.5385
## P-Value [Acc > NIR] : 3.274e-11
##
## Kappa : 1
##
## Mcnemar's Test P-Value : NA
##
## Statistics by Class:
##
## Class: 1 Class: 2 Class: 3
## Sensitivity 1.0000 1.0000 1.00000
## Specificity 1.0000 1.0000 1.00000
## Pos Pred Value 1.0000 1.0000 1.00000
## Neg Pred Value 1.0000 1.0000 1.00000
## Prevalence 0.4103 0.5385 0.05128
## Detection Rate 0.4103 0.5385 0.05128
## Detection Prevalence 0.4103 0.5385 0.05128
## Balanced Accuracy 1.0000 1.0000 1.00000
#prediksi
rf_pred_test <- predict(model_rf, newdata = data_testing)
data_testing$prediksi_motivasi_belajar <- rf_pred_test
print(data_testing[,c("prediksi_motivasi_belajar")])
## # A tibble: 15 × 1
## prediksi_motivasi_belajar
## <fct>
## 1 2
## 2 2
## 3 1
## 4 2
## 5 1
## 6 2
## 7 2
## 8 2
## 9 2
## 10 2
## 11 1
## 12 1
## 13 2
## 14 2
## 15 1
#latih decision tree
model_dt <- rpart(motivasi_belajar~., data = train_set, method= "class")
prediksi_dt <- predict(model_dt, newdata = valid_set, type= "class")
confusion_matrix <- confusionMatrix(prediksi_dt, valid_set$motivasi_belajar)
print(confusion_matrix)
## Confusion Matrix and Statistics
##
## Reference
## Prediction 1 2 3
## 1 10 4 0
## 2 6 17 2
## 3 0 0 0
##
## Overall Statistics
##
## Accuracy : 0.6923
## 95% CI : (0.5243, 0.8298)
## No Information Rate : 0.5385
## P-Value [Acc > NIR] : 0.0372
##
## Kappa : 0.3938
##
## Mcnemar's Test P-Value : NA
##
## Statistics by Class:
##
## Class: 1 Class: 2 Class: 3
## Sensitivity 0.6250 0.8095 0.00000
## Specificity 0.8261 0.5556 1.00000
## Pos Pred Value 0.7143 0.6800 NaN
## Neg Pred Value 0.7600 0.7143 0.94872
## Prevalence 0.4103 0.5385 0.05128
## Detection Rate 0.2564 0.4359 0.00000
## Detection Prevalence 0.3590 0.6410 0.00000
## Balanced Accuracy 0.7255 0.6825 0.50000
#prediksi
dt_pred_test <- predict(model_dt, newdata = data_testing, type = "class")
data_testing$prediksi_motivasi_belajar <- dt_pred_test
print(data_testing[,c("prediksi_motivasi_belajar")])
## # A tibble: 15 × 1
## prediksi_motivasi_belajar
## <fct>
## 1 2
## 2 2
## 3 2
## 4 1
## 5 1
## 6 2
## 7 1
## 8 2
## 9 1
## 10 2
## 11 1
## 12 2
## 13 2
## 14 2
## 15 1