Pertama panggil library yang diperlukan

library(readxl)
## Warning: package 'readxl' was built under R version 4.4.3
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(caret)
## Warning: package 'caret' was built under R version 4.4.3
## Loading required package: ggplot2
## Loading required package: lattice
library(e1071)
library(randomForest)
## Warning: package 'randomForest' was built under R version 4.4.3
## randomForest 4.7-1.2
## Type rfNews() to see new features/changes/bug fixes.
## 
## Attaching package: 'randomForest'
## The following object is masked from 'package:ggplot2':
## 
##     margin
## The following object is masked from 'package:dplyr':
## 
##     combine
library(rpart)

Kedua panggil data yang sudah disiapkan, disini ada dua data yaitu data training dan data testing. Tampilkan data untuk melihat apakah ada data missing(NA)

datatraining <- read_excel("D:/kuliah/Data Mining/UAS/datatraining.xlsx") 
datatesting <- read_excel("D:/kuliah/Data Mining/UAS/datatesting.xlsx")
summary(datatraining)
##       usia       jenis_kelamin      nilai_rata_rata  dukungan_orang_tua
##  Min.   :15.00   Length:200         Min.   : 41.50   Length:200        
##  1st Qu.:16.00   Class :character   1st Qu.: 66.95   Class :character  
##  Median :18.00   Mode  :character   Median : 74.40   Mode  :character  
##  Mean   :17.78                      Mean   : 74.14                     
##  3rd Qu.:20.00                      3rd Qu.: 80.78                     
##  Max.   :21.00                      Max.   :100.00                     
##  fasilitas_belajar  jam_belajar_per_hari kehadiran_persen minat_pada_pelajaran
##  Length:200         Min.   :1.000        Min.   : 66.30   Length:200          
##  Class :character   1st Qu.:2.900        1st Qu.: 79.70   Class :character    
##  Mode  :character   Median :4.050        Median : 86.10   Mode  :character    
##                     Mean   :4.095        Mean   : 85.53                       
##                     3rd Qu.:5.100        3rd Qu.: 90.80                       
##                     Max.   :8.800        Max.   :100.00                       
##  kesulitan_ekonomi  jarak_rumah_sekolah motivasi_belajar  
##  Length:200         Min.   : 1.000      Length:200        
##  Class :character   1st Qu.: 5.475      Class :character  
##  Mode  :character   Median : 8.300      Mode  :character  
##                     Mean   : 8.315                        
##                     3rd Qu.:11.300                        
##                     Max.   :20.500
summary(datatesting)
##       usia       jenis_kelamin      nilai_rata_rata dukungan_orang_tua
##  Min.   :15.00   Length:15          Min.   :62.00   Length:15         
##  1st Qu.:16.50   Class :character   1st Qu.:68.85   Class :character  
##  Median :17.00   Mode  :character   Median :79.50   Mode  :character  
##  Mean   :17.87                      Mean   :76.85                     
##  3rd Qu.:19.50                      3rd Qu.:83.75                     
##  Max.   :21.00                      Max.   :90.20                     
##  fasilitas_belajar  jam_belajar_per_hari kehadiran_persen minat_pada_pelajaran
##  Length:15          Min.   :1.400        Min.   : 64.90   Length:15           
##  Class :character   1st Qu.:1.900        1st Qu.: 77.15   Class :character    
##  Mode  :character   Median :2.900        Median : 84.50   Mode  :character    
##                     Mean   :3.287        Mean   : 83.64                       
##                     3rd Qu.:4.500        3rd Qu.: 89.85                       
##                     Max.   :5.500        Max.   :100.00                       
##  kesulitan_ekonomi  jarak_rumah_sekolah
##  Length:15          Min.   : 3.100     
##  Class :character   1st Qu.: 5.050     
##  Mode  :character   Median : 8.000     
##                     Mean   : 7.667     
##                     3rd Qu.:10.050     
##                     Max.   :12.500

Jika tidak ada missing kita tampilan variabel apa saja yang ada pada data

glimpse(datatraining)
## Rows: 200
## Columns: 11
## $ usia                 <dbl> 15, 19, 15, 15, 16, 18, 16, 16, 15, 21, 21, 18, 1…
## $ jenis_kelamin        <chr> "0", "0", "0", "1", "1", "1", "1", "0", "1", "1",…
## $ nilai_rata_rata      <dbl> 85.2, 60.9, 60.5, 78.6, 66.2, 85.9, 45.2, 91.6, 7…
## $ dukungan_orang_tua   <chr> "1", "2", "2", "3", "2", "2", "1", "2", "2", "3",…
## $ fasilitas_belajar    <chr> "2", "3", "1", "2", "1", "1", "3", "2", "1", "3",…
## $ jam_belajar_per_hari <dbl> 4.1, 3.8, 6.1, 5.1, 5.5, 3.7, 2.8, 6.2, 4.6, 3.0,…
## $ kehadiran_persen     <dbl> 79.7, 89.8, 86.7, 78.5, 81.0, 85.4, 78.9, 78.1, 9…
## $ minat_pada_pelajaran <chr> "2", "2", "1", "3", "1", "2", "1", "2", "2", "3",…
## $ kesulitan_ekonomi    <chr> "0", "1", "1", "1", "1", "1", "0", "0", "0", "1",…
## $ jarak_rumah_sekolah  <dbl> 11.5, 6.3, 15.9, 9.8, 3.5, 1.0, 6.4, 4.9, 6.5, 9.…
## $ motivasi_belajar     <chr> "1", "2", "1", "2", "1", "1", "1", "2", "1", "3",…
glimpse(datatesting)
## Rows: 15
## Columns: 10
## $ usia                 <dbl> 15, 19, 17, 20, 16, 21, 21, 19, 17, 18, 16, 15, 1…
## $ jenis_kelamin        <chr> "1", "0", "0", "1", "1", "0", "0", "1", "0", "0",…
## $ nilai_rata_rata      <dbl> 90.2, 79.6, 66.9, 85.6, 65.9, 70.8, 86.3, 84.3, 7…
## $ dukungan_orang_tua   <chr> "2", "1", "2", "3", "3", "2", "3", "3", "2", "1",…
## $ fasilitas_belajar    <chr> "2", "1", "3", "1", "1", "3", "2", "3", "2", "2",…
## $ jam_belajar_per_hari <dbl> 5.4, 4.6, 2.9, 3.9, 5.3, 1.9, 4.4, 1.5, 2.0, 5.5,…
## $ kehadiran_persen     <dbl> 78.3, 88.2, 76.3, 89.4, 74.0, 100.0, 100.0, 92.1,…
## $ minat_pada_pelajaran <chr> "2", "3", "1", "1", "1", "1", "1", "1", "3", "3",…
## $ kesulitan_ekonomi    <chr> "1", "0", "0", "0", "1", "1", "0", "0", "0", "0",…
## $ jarak_rumah_sekolah  <dbl> 4.7, 10.8, 11.8, 5.7, 3.1, 7.1, 8.2, 5.4, 4.2, 10…

Pastikan dataset sudah berupa factor, num dan int.

datatraining[c("jenis_kelamin", "dukungan_orang_tua", "fasilitas_belajar", "minat_pada_pelajaran", "kesulitan_ekonomi", "motivasi_belajar")] <- 
  lapply(datatraining[c("jenis_kelamin", "dukungan_orang_tua", "fasilitas_belajar", "minat_pada_pelajaran", "kesulitan_ekonomi", "motivasi_belajar")] , as.factor)
str(datatraining)
## tibble [200 × 11] (S3: tbl_df/tbl/data.frame)
##  $ usia                : num [1:200] 15 19 15 15 16 18 16 16 15 21 ...
##  $ jenis_kelamin       : Factor w/ 2 levels "0","1": 1 1 1 2 2 2 2 1 2 2 ...
##  $ nilai_rata_rata     : num [1:200] 85.2 60.9 60.5 78.6 66.2 85.9 45.2 91.6 74.1 88.2 ...
##  $ dukungan_orang_tua  : Factor w/ 3 levels "1","2","3": 1 2 2 3 2 2 1 2 2 3 ...
##  $ fasilitas_belajar   : Factor w/ 3 levels "1","2","3": 2 3 1 2 1 1 3 2 1 3 ...
##  $ jam_belajar_per_hari: num [1:200] 4.1 3.8 6.1 5.1 5.5 3.7 2.8 6.2 4.6 3 ...
##  $ kehadiran_persen    : num [1:200] 79.7 89.8 86.7 78.5 81 85.4 78.9 78.1 96.8 82.1 ...
##  $ minat_pada_pelajaran: Factor w/ 3 levels "1","2","3": 2 2 1 3 1 2 1 2 2 3 ...
##  $ kesulitan_ekonomi   : Factor w/ 2 levels "0","1": 1 2 2 2 2 2 1 1 1 2 ...
##  $ jarak_rumah_sekolah : num [1:200] 11.5 6.3 15.9 9.8 3.5 1 6.4 4.9 6.5 9.5 ...
##  $ motivasi_belajar    : Factor w/ 3 levels "1","2","3": 1 2 1 2 1 1 1 2 1 3 ...
datatesting[c("jenis_kelamin", "dukungan_orang_tua", "fasilitas_belajar", "minat_pada_pelajaran", "kesulitan_ekonomi")] <- 
  lapply(datatesting[c("jenis_kelamin", "dukungan_orang_tua", "fasilitas_belajar", "minat_pada_pelajaran", "kesulitan_ekonomi")], as.factor)
str(datatesting)
## tibble [15 × 10] (S3: tbl_df/tbl/data.frame)
##  $ usia                : num [1:15] 15 19 17 20 16 21 21 19 17 18 ...
##  $ jenis_kelamin       : Factor w/ 2 levels "0","1": 2 1 1 2 2 1 1 2 1 1 ...
##  $ nilai_rata_rata     : num [1:15] 90.2 79.6 66.9 85.6 65.9 70.8 86.3 84.3 79.5 74.4 ...
##  $ dukungan_orang_tua  : Factor w/ 3 levels "1","2","3": 2 1 2 3 3 2 3 3 2 1 ...
##  $ fasilitas_belajar   : Factor w/ 3 levels "1","2","3": 2 1 3 1 1 3 2 3 2 2 ...
##  $ jam_belajar_per_hari: num [1:15] 5.4 4.6 2.9 3.9 5.3 1.9 4.4 1.5 2 5.5 ...
##  $ kehadiran_persen    : num [1:15] 78.3 88.2 76.3 89.4 74 100 100 92.1 84.5 90.3 ...
##  $ minat_pada_pelajaran: Factor w/ 3 levels "1","2","3": 2 3 1 1 1 1 1 1 3 3 ...
##  $ kesulitan_ekonomi   : Factor w/ 2 levels "0","1": 2 1 1 1 2 2 1 1 1 1 ...
##  $ jarak_rumah_sekolah : num [1:15] 4.7 10.8 11.8 5.7 3.1 7.1 8.2 5.4 4.2 10.7 ...

Lakukan standarisasi numerik

numeric_vars <- c("usia", "nilai_rata_rata", "jam_belajar_per_hari", 
                  "kehadiran_persen", "jarak_rumah_sekolah")

train <- datatraining
test <- datatesting

preProc <- preProcess(datatraining[, numeric_vars], method = c("center", "scale"))
train_svm <- datatraining
test_svm <- datatesting
train_svm[, numeric_vars] <- predict(preProc, datatraining[, numeric_vars])
test_svm[, numeric_vars] <- predict(preProc, datatesting[, numeric_vars])

Lakukan test Support Vector Machine (SVM)

svm_model <- svm(motivasi_belajar ~ ., data = train_svm)
svm_pred <- predict(svm_model, newdata = test_svm)

Lakukan test RandomForest

rf_model <- randomForest(motivasi_belajar ~ ., data = train, ntree = 100)
rf_pred <- predict(rf_model, newdata = test)

Lakukan test Decision Tree

dt_model <- rpart(motivasi_belajar ~ ., data = train, method = "class")
dt_pred <- predict(dt_model, newdata = test, type = "class")

Tampilkan hasil dari test SVM, test Random Forest dan test Decision Tree

results <- data.frame(
  SVM = svm_pred,
  RandomForest = rf_pred,
  DecisionTree = dt_pred
)

print(results)
##    SVM RandomForest DecisionTree
## 1    2            2            2
## 2    2            2            2
## 3    1            1            1
## 4    2            2            2
## 5    1            1            1
## 6    2            2            2
## 7    2            2            2
## 8    2            2            3
## 9    2            1            1
## 10   2            2            2
## 11   1            1            2
## 12   1            1            1
## 13   2            2            1
## 14   1            2            2
## 15   1            1            1