#Preprocessing
library(readxl)
## Warning: package 'readxl' was built under R version 4.4.3
library(tidyverse)
## Warning: package 'tidyverse' was built under R version 4.4.3
## Warning: package 'readr' was built under R version 4.4.3
## Warning: package 'forcats' was built under R version 4.4.3
## Warning: package 'lubridate' was built under R version 4.4.3
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.5
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ ggplot2 3.5.1 ✔ tibble 3.2.1
## ✔ lubridate 1.9.4 ✔ tidyr 1.3.1
## ✔ purrr 1.0.4
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(caret)
## Warning: package 'caret' was built under R version 4.4.3
## Loading required package: lattice
##
## Attaching package: 'caret'
##
## The following object is masked from 'package:purrr':
##
## lift
library(e1071)
## Warning: package 'e1071' was built under R version 4.4.3
library(randomForest)
## Warning: package 'randomForest' was built under R version 4.4.3
## randomForest 4.7-1.2
## Type rfNews() to see new features/changes/bug fixes.
##
## Attaching package: 'randomForest'
##
## The following object is masked from 'package:dplyr':
##
## combine
##
## The following object is masked from 'package:ggplot2':
##
## margin
library(rpart)
library(rpart.plot)
## Warning: package 'rpart.plot' was built under R version 4.4.3
setwd("C:/Users/LENOVO/Documents/DATA MINING/UAS")
train <- read_excel("datatraining.xlsx")
test <- read_excel("datatesting.xlsx")
str(train)
## tibble [200 × 11] (S3: tbl_df/tbl/data.frame)
## $ usia : num [1:200] 15 19 15 15 16 18 16 16 15 21 ...
## $ jenis_kelamin : chr [1:200] "0" "0" "0" "1" ...
## $ nilai_rata_rata : num [1:200] 85.2 60.9 60.5 78.6 66.2 85.9 45.2 91.6 74.1 88.2 ...
## $ dukungan_orang_tua : chr [1:200] "1" "2" "2" "3" ...
## $ fasilitas_belajar : chr [1:200] "2" "3" "1" "2" ...
## $ jam_belajar_per_hari: num [1:200] 4.1 3.8 6.1 5.1 5.5 3.7 2.8 6.2 4.6 3 ...
## $ kehadiran_persen : num [1:200] 79.7 89.8 86.7 78.5 81 85.4 78.9 78.1 96.8 82.1 ...
## $ minat_pada_pelajaran: chr [1:200] "2" "2" "1" "3" ...
## $ kesulitan_ekonomi : chr [1:200] "0" "1" "1" "1" ...
## $ jarak_rumah_sekolah : num [1:200] 11.5 6.3 15.9 9.8 3.5 1 6.4 4.9 6.5 9.5 ...
## $ motivasi_belajar : chr [1:200] "1" "2" "1" "2" ...
cols_factor_train <- c("jenis_kelamin", "dukungan_orang_tua","fasilitas_belajar","minat_pada_pelajaran","kesulitan_ekonomi","motivasi_belajar")
cols_factor_test <- c("jenis_kelamin", "dukungan_orang_tua","fasilitas_belajar","minat_pada_pelajaran","kesulitan_ekonomi")
train[cols_factor_train] <- lapply(train[cols_factor_train], as.factor)
test[cols_factor_test] <- lapply(test[cols_factor_test], as.factor)
cat("Jumlah data kosong pada training:", sum(is.na(train)), "\n")
## Jumlah data kosong pada training: 0
cat("Jumlah data kosong pada testing:", sum(is.na(test)), "\n")
## Jumlah data kosong pada testing: 0
#TrainingModel
set.seed(123)
#Decision Tree
model_dt <- rpart(motivasi_belajar ~., data = train, method = "class")
#Random Forest
model_rf <- randomForest(motivasi_belajar ~., data = train, ntree = 100)
#SVM
model_svm <- svm(motivasi_belajar ~., data = train, kernel = "radial")
#Prediksi
#Prediksi data test
pred_dt <- predict(model_dt, test, type = "class")
pred_rf <- predict(model_rf, test)
pred_svm <- predict(model_svm, test)
#Gabungkan hasil prediksi
hasil_prediksi <- data.frame(
DecisionTree = pred_dt,
RandomForest = pred_rf,
SVM = pred_svm
)
print(hasil_prediksi)
## DecisionTree RandomForest SVM
## 1 2 2 2
## 2 2 2 2
## 3 1 1 1
## 4 2 2 2
## 5 1 1 1
## 6 2 2 2
## 7 2 2 2
## 8 3 2 2
## 9 1 2 2
## 10 2 2 2
## 11 2 1 1
## 12 1 1 1
## 13 1 2 2
## 14 2 2 1
## 15 1 1 1
#Simpan Hasil Akhir
write.csv(hasil_prediksi, "hasil_prediksi.csv",row.names = FALSE)