library(caret)
## Loading required package: ggplot2
## Loading required package: lattice
library(readxl)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(mice)
##
## Attaching package: 'mice'
## The following object is masked from 'package:stats':
##
## filter
## The following objects are masked from 'package:base':
##
## cbind, rbind
library(e1071)
library(VIM)
## Loading required package: colorspace
## Loading required package: grid
## VIM is ready to use.
## Suggestions and bug-reports can be submitted at: https://github.com/statistikat/VIM/issues
##
## Attaching package: 'VIM'
## The following object is masked from 'package:datasets':
##
## sleep
library(ggplot2)
library(kableExtra)
##
## Attaching package: 'kableExtra'
## The following object is masked from 'package:dplyr':
##
## group_rows
train.data <- read_excel("Level Risiko Investasi.xlsx", sheet = "Training")
test.data <- read_excel("Level Risiko Investasi.xlsx", sheet = "Testing")
test.data.ori <- test.data
glimpse(train.data)
## Rows: 100
## Columns: 16
## $ Country <chr> "AD", "AE", "AE-AZ", "AE-RK", "AM", "AO", "AR", "AT", "AU…
## $ X1 <dbl> 17.5000, 18.2000, 18.7000, NA, 14.0000, NA, 23.2527, 18.5…
## $ X2 <dbl> 38674.616, 40105.120, 76037.997, 27882.829, 4251.398, 203…
## $ X3 <dbl> 172.75400, 103.52280, 31.03626, 24.78532, 89.61882, 57.05…
## $ X4 <dbl> 0.68000, 1.76600, 2.63056, 1.29416, 1.44000, 22.35646, 36…
## $ X5 <dbl> 1.2206, 0.8698, 1.4893, 1.7530, 0.2562, 3.3422, 0.9657, 0…
## $ X6 <dbl> 1.78560, 2.65884, 1.85034, 2.23192, 4.74800, -0.87800, -0…
## $ X7 <dbl> -2.0843, -0.7254, -1.9008, -1.1355, 2.3318, -5.2032, -3.7…
## $ X8 <dbl> 55.00000, 102.52738, 102.52738, 102.52738, 166.80851, 34.…
## $ X9 <dbl> -26.52000, -13.59890, -56.24160, 24.78532, 47.27262, 15.4…
## $ X10 <dbl> 2.857862, 352.910575, 199.928422, 10.108892, 12.645460, 6…
## $ X11 <dbl> 8.0000, 8.1550, 8.1550, NA, 6.6000, 10.3000, 10.6000, 2.0…
## $ X12 <dbl> 23.08410, 24.85976, 20.39940, 21.69104, 19.40300, 31.1238…
## $ X13 <dbl> 26.94344, 32.47740, 31.03926, 17.30888, 15.11172, 20.5721…
## $ X14 <dbl> 3.0000, 2.4500, NA, NA, 18.5000, 10.5000, 11.0500, 6.0000…
## $ `Risk Level` <chr> "low", "low", "low", "low", "high", "high", "high", "low"…
glimpse(test.data)
## Rows: 17
## Columns: 15
## $ Country <chr> "SE", "SG", "SI", "SK", "SM", "SV", "TH", "TN", "TR", "TW", "U…
## $ X1 <dbl> 23.2000, 16.8056, 18.2857, 19.6715, 11.9000, NA, 19.8000, 12.9…
## $ X2 <dbl> 60338.0204, 62432.9952, 28684.1682, 21042.7221, 49356.2618, 39…
## $ X3 <dbl> 175.42230, 409.69700, 103.06040, 102.73060, 60.15464, 65.55750…
## $ X4 <dbl> 1.62000, 0.10510, 0.84352, 1.17400, 0.89594, 0.39400, 0.34500,…
## $ X5 <dbl> 0.6755, 0.9068, 0.0746, 0.0734, 0.5865, 0.5042, 0.3153, 1.1173…
## $ X6 <dbl> 2.47168, 2.77600, 3.55290, 3.21976, 1.75420, 2.44734, 3.44058,…
## $ X7 <dbl> 0.3526, 0.2912, 1.9299, 1.2325, -1.1342, -0.1248, 1.2787, -1.5…
## $ X8 <dbl> 185.64097, 94.00211, 72.30708, 111.78982, 88.60514, 88.88685, …
## $ X9 <dbl> 64.14972, -200.98100, 16.23838, 33.35258, -145.43800, 27.33332…
## $ X10 <dbl> 537.609866, 339.988210, 52.761781, 102.567122, 1.490827, 24.63…
## $ X11 <dbl> 0.5000, 1.3095, 3.0176, 2.5300, 63.5000, 1.5706, 3.2000, 13.60…
## $ X12 <dbl> 25.11320, 26.76784, 19.90742, 22.83084, 17.79208, 16.78238, 23…
## $ X13 <dbl> 27.95256, 47.25374, 25.76882, 20.95780, 23.21144, 14.52982, 32…
## $ X14 <dbl> 8.6000, 3.0000, 5.0000, 7.0000, 7.3000, 9.0000, 2.0000, 17.000…
str(train.data)
## tibble [100 × 16] (S3: tbl_df/tbl/data.frame)
## $ Country : chr [1:100] "AD" "AE" "AE-AZ" "AE-RK" ...
## $ X1 : num [1:100] 17.5 18.2 18.7 NA 14 ...
## $ X2 : num [1:100] 38675 40105 76038 27883 4251 ...
## $ X3 : num [1:100] 172.8 103.5 31 24.8 89.6 ...
## $ X4 : num [1:100] 0.68 1.77 2.63 1.29 1.44 ...
## $ X5 : num [1:100] 1.221 0.87 1.489 1.753 0.256 ...
## $ X6 : num [1:100] 1.79 2.66 1.85 2.23 4.75 ...
## $ X7 : num [1:100] -2.084 -0.725 -1.901 -1.135 2.332 ...
## $ X8 : num [1:100] 55 103 103 103 167 ...
## $ X9 : num [1:100] -26.5 -13.6 -56.2 24.8 47.3 ...
## $ X10 : num [1:100] 2.86 352.91 199.93 10.11 12.65 ...
## $ X11 : num [1:100] 8 8.15 8.15 NA 6.6 ...
## $ X12 : num [1:100] 23.1 24.9 20.4 21.7 19.4 ...
## $ X13 : num [1:100] 26.9 32.5 31 17.3 15.1 ...
## $ X14 : num [1:100] 3 2.45 NA NA 18.5 ...
## $ Risk Level: chr [1:100] "low" "low" "low" "low" ...
str(test.data)
## tibble [17 × 15] (S3: tbl_df/tbl/data.frame)
## $ Country: chr [1:17] "SE" "SG" "SI" "SK" ...
## $ X1 : num [1:17] 23.2 16.8 18.3 19.7 11.9 ...
## $ X2 : num [1:17] 60338 62433 28684 21043 49356 ...
## $ X3 : num [1:17] 175.4 409.7 103.1 102.7 60.2 ...
## $ X4 : num [1:17] 1.62 0.105 0.844 1.174 0.896 ...
## $ X5 : num [1:17] 0.6755 0.9068 0.0746 0.0734 0.5865 ...
## $ X6 : num [1:17] 2.47 2.78 3.55 3.22 1.75 ...
## $ X7 : num [1:17] 0.353 0.291 1.93 1.232 -1.134 ...
## $ X8 : num [1:17] 185.6 94 72.3 111.8 88.6 ...
## $ X9 : num [1:17] 64.1 -201 16.2 33.4 -145.4 ...
## $ X10 : num [1:17] 537.61 339.99 52.76 102.57 1.49 ...
## $ X11 : num [1:17] 0.5 1.31 3.02 2.53 63.5 ...
## $ X12 : num [1:17] 25.1 26.8 19.9 22.8 17.8 ...
## $ X13 : num [1:17] 28 47.3 25.8 21 23.2 ...
## $ X14 : num [1:17] 8.6 3 5 7 7.3 9 2 17 13.2 3.7 ...
summary(train.data)
## Country X1 X2 X3
## Length:100 Min. : 4.20 Min. : 434.5 Min. : 13.63
## Class :character 1st Qu.:15.93 1st Qu.: 4265.9 1st Qu.: 42.96
## Mode :character Median :18.58 Median : 11659.1 Median : 70.42
## Mean :18.97 Mean : 22641.6 Mean : 191.94
## 3rd Qu.:21.80 3rd Qu.: 34815.2 3rd Qu.: 130.63
## Max. :47.50 Max. :124340.4 Max. :6908.35
## NA's :12
## X4 X5 X6 X7
## Min. :-0.151 Min. :-0.8862 Min. :-5.135 Min. :-9.84530
## 1st Qu.: 0.869 1st Qu.: 0.4419 1st Qu.: 1.765 1st Qu.:-1.18720
## Median : 1.700 Median : 1.1402 Median : 2.984 Median : 0.07155
## Mean : 3.263 Mean : 1.2019 Mean : 3.076 Mean : 0.10804
## 3rd Qu.: 3.939 3rd Qu.: 1.9502 3rd Qu.: 4.305 3rd Qu.: 1.94108
## Max. :36.703 Max. : 4.4021 Max. :10.076 Max. : 6.07120
##
## X8 X9 X10 X11
## Min. : 34.82 Min. :-1955.72 Min. : 1.171 Min. : 0.3357
## 1st Qu.: 76.95 1st Qu.: -14.11 1st Qu.: 32.813 1st Qu.: 1.9250
## Median : 90.19 Median : 12.67 Median : 106.872 Median : 3.9000
## Mean : 99.94 Mean : -13.58 Mean : 582.318 Mean : 5.5346
## 3rd Qu.:113.39 3rd Qu.: 36.67 3rd Qu.: 366.370 3rd Qu.: 7.9500
## Max. :359.14 Max. : 456.49 Max. :14866.703 Max. :26.9780
## NA's :7 NA's :17
## X12 X13 X14 Risk Level
## Min. :12.67 Min. :10.95 Min. : 0.120 Length:100
## 1st Qu.:20.79 1st Qu.:19.06 1st Qu.: 4.818 Class :character
## Median :23.40 Median :24.28 Median : 6.800 Mode :character
## Mean :24.96 Mean :24.48 Mean : 8.441
## 3rd Qu.:28.38 3rd Qu.:29.36 3rd Qu.:10.500
## Max. :46.83 Max. :55.09 Max. :24.650
## NA's :11
summary(test.data)
## Country X1 X2 X3
## Length:17 Min. :11.90 Min. : 786.9 Min. : 30.05
## Class :character 1st Qu.:15.76 1st Qu.: 3955.1 1st Qu.: 48.51
## Mode :character Median :17.52 Median : 8653.0 Median : 65.56
## Mean :17.42 Mean :22330.4 Mean : 92.59
## 3rd Qu.:19.70 3rd Qu.:31854.3 3rd Qu.:103.06
## Max. :23.20 Max. :69324.7 Max. :409.70
## NA's :1
## X4 X5 X6 X7
## Min. : 0.1051 Min. :-0.3906 Min. :0.340 Min. :-2.3230
## 1st Qu.: 0.8435 1st Qu.: 0.3153 1st Qu.:1.754 1st Qu.:-0.1248
## Median : 1.6200 Median : 0.6255 Median :2.539 Median : 0.4867
## Mean : 4.4949 Mean : 0.8249 Mean :2.994 Mean : 0.8826
## 3rd Qu.: 5.5560 3rd Qu.: 1.1173 3rd Qu.:3.553 3rd Qu.: 1.8906
## Max. :19.1730 Max. : 3.6551 Max. :6.946 Max. : 5.2762
##
## X8 X9 X10 X11
## Min. : 49.06 Min. :-200.98 Min. : 1.491 Min. : 0.500
## 1st Qu.: 72.28 1st Qu.: -42.56 1st Qu.: 52.762 1st Qu.: 1.571
## Median : 88.89 Median : 15.04 Median : 155.582 Median : 2.530
## Mean : 96.54 Mean : -18.76 Mean : 1463.386 Mean :11.258
## 3rd Qu.:109.52 3rd Qu.: 28.57 3rd Qu.: 501.644 3rd Qu.: 3.336
## Max. :185.64 Max. : 64.46 Max. :20935.000 Max. :63.500
## NA's :2 NA's :4
## X12 X13 X14
## Min. :16.45 Min. : 8.882 Min. : 2.000
## 1st Qu.:17.79 1st Qu.:17.208 1st Qu.: 4.675
## Median :22.03 Median :23.211 Median : 7.150
## Mean :21.91 Mean :23.693 Mean : 8.963
## 3rd Qu.:24.86 3rd Qu.:27.953 3rd Qu.: 9.700
## Max. :31.60 Max. :47.254 Max. :33.700
## NA's :1
colnames(train.data) <- make.names(colnames(train.data))
train.data$Risk.Level <- as.factor(train.data$Risk.Level)
str(train.data)
## tibble [100 × 16] (S3: tbl_df/tbl/data.frame)
## $ Country : chr [1:100] "AD" "AE" "AE-AZ" "AE-RK" ...
## $ X1 : num [1:100] 17.5 18.2 18.7 NA 14 ...
## $ X2 : num [1:100] 38675 40105 76038 27883 4251 ...
## $ X3 : num [1:100] 172.8 103.5 31 24.8 89.6 ...
## $ X4 : num [1:100] 0.68 1.77 2.63 1.29 1.44 ...
## $ X5 : num [1:100] 1.221 0.87 1.489 1.753 0.256 ...
## $ X6 : num [1:100] 1.79 2.66 1.85 2.23 4.75 ...
## $ X7 : num [1:100] -2.084 -0.725 -1.901 -1.135 2.332 ...
## $ X8 : num [1:100] 55 103 103 103 167 ...
## $ X9 : num [1:100] -26.5 -13.6 -56.2 24.8 47.3 ...
## $ X10 : num [1:100] 2.86 352.91 199.93 10.11 12.65 ...
## $ X11 : num [1:100] 8 8.15 8.15 NA 6.6 ...
## $ X12 : num [1:100] 23.1 24.9 20.4 21.7 19.4 ...
## $ X13 : num [1:100] 26.9 32.5 31 17.3 15.1 ...
## $ X14 : num [1:100] 3 2.45 NA NA 18.5 ...
## $ Risk.Level: Factor w/ 2 levels "high","low": 2 2 2 2 1 1 1 2 2 1 ...
train.data <- train.data[, -which(names(train.data) == "Country")]
test.data <- test.data[, -which(names(test.data) == "Country")]
colSums(is.na(test.data))
## X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
## 1 0 0 0 0 0 0 2 0 0 4 0 0 1
aggr(train.data)

train.data_imp <- mice(train.data, m = 20, method = 'pmm', seed = 500)
##
## iter imp variable
## 1 1 X1 X8 X11 X14
## 1 2 X1 X8 X11 X14
## 1 3 X1 X8 X11 X14
## 1 4 X1 X8 X11 X14
## 1 5 X1 X8 X11 X14
## 1 6 X1 X8 X11 X14
## 1 7 X1 X8 X11 X14
## 1 8 X1 X8 X11 X14
## 1 9 X1 X8 X11 X14
## 1 10 X1 X8 X11 X14
## 1 11 X1 X8 X11 X14
## 1 12 X1 X8 X11 X14
## 1 13 X1 X8 X11 X14
## 1 14 X1 X8 X11 X14
## 1 15 X1 X8 X11 X14
## 1 16 X1 X8 X11 X14
## 1 17 X1 X8 X11 X14
## 1 18 X1 X8 X11 X14
## 1 19 X1 X8 X11 X14
## 1 20 X1 X8 X11 X14
## 2 1 X1 X8 X11 X14
## 2 2 X1 X8 X11 X14
## 2 3 X1 X8 X11 X14
## 2 4 X1 X8 X11 X14
## 2 5 X1 X8 X11 X14
## 2 6 X1 X8 X11 X14
## 2 7 X1 X8 X11 X14
## 2 8 X1 X8 X11 X14
## 2 9 X1 X8 X11 X14
## 2 10 X1 X8 X11 X14
## 2 11 X1 X8 X11 X14
## 2 12 X1 X8 X11 X14
## 2 13 X1 X8 X11 X14
## 2 14 X1 X8 X11 X14
## 2 15 X1 X8 X11 X14
## 2 16 X1 X8 X11 X14
## 2 17 X1 X8 X11 X14
## 2 18 X1 X8 X11 X14
## 2 19 X1 X8 X11 X14
## 2 20 X1 X8 X11 X14
## 3 1 X1 X8 X11 X14
## 3 2 X1 X8 X11 X14
## 3 3 X1 X8 X11 X14
## 3 4 X1 X8 X11 X14
## 3 5 X1 X8 X11 X14
## 3 6 X1 X8 X11 X14
## 3 7 X1 X8 X11 X14
## 3 8 X1 X8 X11 X14
## 3 9 X1 X8 X11 X14
## 3 10 X1 X8 X11 X14
## 3 11 X1 X8 X11 X14
## 3 12 X1 X8 X11 X14
## 3 13 X1 X8 X11 X14
## 3 14 X1 X8 X11 X14
## 3 15 X1 X8 X11 X14
## 3 16 X1 X8 X11 X14
## 3 17 X1 X8 X11 X14
## 3 18 X1 X8 X11 X14
## 3 19 X1 X8 X11 X14
## 3 20 X1 X8 X11 X14
## 4 1 X1 X8 X11 X14
## 4 2 X1 X8 X11 X14
## 4 3 X1 X8 X11 X14
## 4 4 X1 X8 X11 X14
## 4 5 X1 X8 X11 X14
## 4 6 X1 X8 X11 X14
## 4 7 X1 X8 X11 X14
## 4 8 X1 X8 X11 X14
## 4 9 X1 X8 X11 X14
## 4 10 X1 X8 X11 X14
## 4 11 X1 X8 X11 X14
## 4 12 X1 X8 X11 X14
## 4 13 X1 X8 X11 X14
## 4 14 X1 X8 X11 X14
## 4 15 X1 X8 X11 X14
## 4 16 X1 X8 X11 X14
## 4 17 X1 X8 X11 X14
## 4 18 X1 X8 X11 X14
## 4 19 X1 X8 X11 X14
## 4 20 X1 X8 X11 X14
## 5 1 X1 X8 X11 X14
## 5 2 X1 X8 X11 X14
## 5 3 X1 X8 X11 X14
## 5 4 X1 X8 X11 X14
## 5 5 X1 X8 X11 X14
## 5 6 X1 X8 X11 X14
## 5 7 X1 X8 X11 X14
## 5 8 X1 X8 X11 X14
## 5 9 X1 X8 X11 X14
## 5 10 X1 X8 X11 X14
## 5 11 X1 X8 X11 X14
## 5 12 X1 X8 X11 X14
## 5 13 X1 X8 X11 X14
## 5 14 X1 X8 X11 X14
## 5 15 X1 X8 X11 X14
## 5 16 X1 X8 X11 X14
## 5 17 X1 X8 X11 X14
## 5 18 X1 X8 X11 X14
## 5 19 X1 X8 X11 X14
## 5 20 X1 X8 X11 X14
train.data_comp <- complete(train.data_imp, action = 18)
aggr(train.data_comp)

plot(density(train.data$X1, na.rm = T), main = "Data X1 With NA")
lines(density(train.data_comp$X1, na.rm = T), col = "purple", lty = 3)

plot(density(train.data$X8, na.rm = T), main = "Data X8 With NA")
lines(density(train.data_comp$X8, na.rm = T), col = "purple", lty = 3)

plot(density(train.data$X11, na.rm = T), main = "Data X11 With NA")
lines(density(train.data_comp$X11, na.rm = T), col = "purple", lty = 3)

plot(density(train.data$X14, na.rm = T), main = "Data X14 With NA")
lines(density(train.data_comp$X14, na.rm = T), col = "purple", lty = 3)

aggr(test.data)

test.data_imp <- mice(test.data, m = 20, method = 'pmm', seed = 500)
##
## iter imp variable
## 1 1 X1 X8 X11 X14
## 1 2 X1 X8 X11 X14
## 1 3 X1 X8 X11 X14
## 1 4 X1 X8 X11 X14
## 1 5 X1 X8 X11 X14
## 1 6 X1 X8 X11 X14
## 1 7 X1 X8 X11 X14
## 1 8 X1 X8 X11 X14
## 1 9 X1 X8 X11 X14
## 1 10 X1 X8 X11 X14
## 1 11 X1 X8 X11 X14
## 1 12 X1 X8 X11 X14
## 1 13 X1 X8 X11 X14
## 1 14 X1 X8 X11 X14
## 1 15 X1 X8 X11 X14
## 1 16 X1 X8 X11 X14
## 1 17 X1 X8 X11 X14
## 1 18 X1 X8 X11 X14
## 1 19 X1 X8 X11 X14
## 1 20 X1 X8 X11 X14
## 2 1 X1 X8 X11 X14
## 2 2 X1 X8 X11 X14
## 2 3 X1 X8 X11 X14
## 2 4 X1 X8 X11 X14
## 2 5 X1 X8 X11 X14
## 2 6 X1 X8 X11 X14
## 2 7 X1 X8 X11 X14
## 2 8 X1 X8 X11 X14
## 2 9 X1 X8 X11 X14
## 2 10 X1 X8 X11 X14
## 2 11 X1 X8 X11 X14
## 2 12 X1 X8 X11 X14
## 2 13 X1 X8 X11 X14
## 2 14 X1 X8 X11 X14
## 2 15 X1 X8 X11 X14
## 2 16 X1 X8 X11 X14
## 2 17 X1 X8 X11 X14
## 2 18 X1 X8 X11 X14
## 2 19 X1 X8 X11 X14
## 2 20 X1 X8 X11 X14
## 3 1 X1 X8 X11 X14
## 3 2 X1 X8 X11 X14
## 3 3 X1 X8 X11 X14
## 3 4 X1 X8 X11 X14
## 3 5 X1 X8 X11 X14
## 3 6 X1 X8 X11 X14
## 3 7 X1 X8 X11 X14
## 3 8 X1 X8 X11 X14
## 3 9 X1 X8 X11 X14
## 3 10 X1 X8 X11 X14
## 3 11 X1 X8 X11 X14
## 3 12 X1 X8 X11 X14
## 3 13 X1 X8 X11 X14
## 3 14 X1 X8 X11 X14
## 3 15 X1 X8 X11 X14
## 3 16 X1 X8 X11 X14
## 3 17 X1 X8 X11 X14
## 3 18 X1 X8 X11 X14
## 3 19 X1 X8 X11 X14
## 3 20 X1 X8 X11 X14
## 4 1 X1 X8 X11 X14
## 4 2 X1 X8 X11 X14
## 4 3 X1 X8 X11 X14
## 4 4 X1 X8 X11 X14
## 4 5 X1 X8 X11 X14
## 4 6 X1 X8 X11 X14
## 4 7 X1 X8 X11 X14
## 4 8 X1 X8 X11 X14
## 4 9 X1 X8 X11 X14
## 4 10 X1 X8 X11 X14
## 4 11 X1 X8 X11 X14
## 4 12 X1 X8 X11 X14
## 4 13 X1 X8 X11 X14
## 4 14 X1 X8 X11 X14
## 4 15 X1 X8 X11 X14
## 4 16 X1 X8 X11 X14
## 4 17 X1 X8 X11 X14
## 4 18 X1 X8 X11 X14
## 4 19 X1 X8 X11 X14
## 4 20 X1 X8 X11 X14
## 5 1 X1 X8 X11 X14
## 5 2 X1 X8 X11 X14
## 5 3 X1 X8 X11 X14
## 5 4 X1 X8 X11 X14
## 5 5 X1 X8 X11 X14
## 5 6 X1 X8 X11 X14
## 5 7 X1 X8 X11 X14
## 5 8 X1 X8 X11 X14
## 5 9 X1 X8 X11 X14
## 5 10 X1 X8 X11 X14
## 5 11 X1 X8 X11 X14
## 5 12 X1 X8 X11 X14
## 5 13 X1 X8 X11 X14
## 5 14 X1 X8 X11 X14
## 5 15 X1 X8 X11 X14
## 5 16 X1 X8 X11 X14
## 5 17 X1 X8 X11 X14
## 5 18 X1 X8 X11 X14
## 5 19 X1 X8 X11 X14
## 5 20 X1 X8 X11 X14
## Warning: Number of logged events: 124
test.data_comp <- complete(test.data_imp, action = 15)
aggr(test.data_comp)

plot(density(test.data$X1, na.rm = T), main = "Data X1 With NA")
lines(density(test.data_comp$X1, na.rm = T), col = "purple", lty = 3)

plot(density(test.data$X8, na.rm = T), main = "Data X8 With NA")
lines(density(test.data_comp$X8, na.rm = T), col = "purple", lty = 3)

plot(density(test.data$X11, na.rm = T), main = "Data X11 With NA")
lines(density(test.data_comp$X11, na.rm = T), col = "purple", lty = 3)

plot(density(test.data$X14, na.rm = T), main = "Data X14 With NA")
lines(density(test.data_comp$X14, na.rm = T), col = "purple", lty = 3)

glimpse(train.data_comp)
## Rows: 100
## Columns: 15
## $ X1 <dbl> 17.5000, 18.2000, 18.7000, 24.8200, 14.0000, 26.0000, 23.25…
## $ X2 <dbl> 38674.616, 40105.120, 76037.997, 27882.829, 4251.398, 2033.…
## $ X3 <dbl> 172.75400, 103.52280, 31.03626, 24.78532, 89.61882, 57.0556…
## $ X4 <dbl> 0.68000, 1.76600, 2.63056, 1.29416, 1.44000, 22.35646, 36.7…
## $ X5 <dbl> 1.2206, 0.8698, 1.4893, 1.7530, 0.2562, 3.3422, 0.9657, 0.7…
## $ X6 <dbl> 1.78560, 2.65884, 1.85034, 2.23192, 4.74800, -0.87800, -0.2…
## $ X7 <dbl> -2.0843, -0.7254, -1.9008, -1.1355, 2.3318, -5.2032, -3.729…
## $ X8 <dbl> 55.00000, 102.52738, 102.52738, 102.52738, 166.80851, 34.81…
## $ X9 <dbl> -26.52000, -13.59890, -56.24160, 24.78532, 47.27262, 15.449…
## $ X10 <dbl> 2.857862, 352.910575, 199.928422, 10.108892, 12.645460, 62.…
## $ X11 <dbl> 8.0000, 8.1550, 8.1550, 0.9250, 6.6000, 10.3000, 10.6000, 2…
## $ X12 <dbl> 23.08410, 24.85976, 20.39940, 21.69104, 19.40300, 31.12380,…
## $ X13 <dbl> 26.94344, 32.47740, 31.03926, 17.30888, 15.11172, 20.57210,…
## $ X14 <dbl> 3.0000, 2.4500, 4.9000, 15.0000, 18.5000, 10.5000, 11.0500,…
## $ Risk.Level <fct> low, low, low, low, high, high, high, low, low, high, high,…
glimpse(test.data_comp)
## Rows: 17
## Columns: 14
## $ X1 <dbl> 23.2000, 16.8056, 18.2857, 19.6715, 11.9000, 14.1400, 19.8000, 12.…
## $ X2 <dbl> 60338.0204, 62432.9952, 28684.1682, 21042.7221, 49356.2618, 3989.1…
## $ X3 <dbl> 175.42230, 409.69700, 103.06040, 102.73060, 60.15464, 65.55750, 33…
## $ X4 <dbl> 1.62000, 0.10510, 0.84352, 1.17400, 0.89594, 0.39400, 0.34500, 5.5…
## $ X5 <dbl> 0.6755, 0.9068, 0.0746, 0.0734, 0.5865, 0.5042, 0.3153, 1.1173, 1.…
## $ X6 <dbl> 2.47168, 2.77600, 3.55290, 3.21976, 1.75420, 2.44734, 3.44058, 1.6…
## $ X7 <dbl> 0.3526, 0.2912, 1.9299, 1.2325, -1.1342, -0.1248, 1.2787, -1.5047,…
## $ X8 <dbl> 185.64097, 94.00211, 72.30708, 111.78982, 88.60514, 88.88685, 100.…
## $ X9 <dbl> 64.14972, -200.98100, 16.23838, 33.35258, -145.43800, 27.33332, -4…
## $ X10 <dbl> 537.609866, 339.988210, 52.761781, 102.567122, 1.490827, 24.638720…
## $ X11 <dbl> 0.5000, 1.3095, 3.0176, 2.5300, 63.5000, 1.5706, 3.2000, 13.6000, …
## $ X12 <dbl> 25.11320, 26.76784, 19.90742, 22.83084, 17.79208, 16.78238, 23.059…
## $ X13 <dbl> 27.95256, 47.25374, 25.76882, 20.95780, 23.21144, 14.52982, 32.479…
## $ X14 <dbl> 8.6000, 3.0000, 5.0000, 7.0000, 7.3000, 9.0000, 2.0000, 17.0000, 1…
risk_counts <- train.data_comp %>%
group_by(Risk.Level) %>%
summarize(count = n())
ggplot(risk_counts, aes(x = Risk.Level, y = count, fill = Risk.Level)) +
geom_bar(stat = "identity") +
geom_text(aes(label = count), vjust = -0.5, size = 5) +
scale_fill_manual(values = c("low" = "green", "high" = "purple")) +
labs(title = "Frekuensi Risk Level", x = "Risk Level", y = "Frekuensi") +
theme_minimal()

set.seed(123)
train_index <- createDataPartition(train.data_comp$Risk.Level, p = 0.8, list = FALSE)
train_data <- train.data_comp[train_index, ]
test_data <- train.data_comp[-train_index, ]
svm_model <- svm(Risk.Level ~ ., data = train_data, kernel = "linear")
svm_pred <- predict(svm_model, newdata = test_data)
akurasi_svm <- sum(svm_pred == test_data$Risk.Level) / nrow(test_data)
print(paste("SVM Akurasi Model:", round(akurasi_svm * 98, 2),"%"))
## [1] "SVM Akurasi Model: 98 %"
confusion_matrix <- confusionMatrix(as.factor(svm_pred), as.factor(test_data$Risk.Level))
print(confusion_matrix)
## Confusion Matrix and Statistics
##
## Reference
## Prediction high low
## high 10 0
## low 0 9
##
## Accuracy : 1
## 95% CI : (0.8235, 1)
## No Information Rate : 0.5263
## P-Value [Acc > NIR] : 5.055e-06
##
## Kappa : 1
##
## Mcnemar's Test P-Value : NA
##
## Sensitivity : 1.0000
## Specificity : 1.0000
## Pos Pred Value : 1.0000
## Neg Pred Value : 1.0000
## Prevalence : 0.5263
## Detection Rate : 0.5263
## Detection Prevalence : 0.5263
## Balanced Accuracy : 1.0000
##
## 'Positive' Class : high
##
confusion_data <- as.data.frame(confusion_matrix$table)
ggplot(confusion_data, aes(x = Reference, y = Prediction)) +
geom_tile(aes(fill = Freq), color = "white") +
scale_fill_gradient(low = "white", high = "purple") +
theme_minimal() +
labs(title = "Confusion Matrix", x = "Actual", y = "Predicted") +
geom_text(aes(label = Freq), vjust = 1)

predictions <- data.frame(Actual = test_data$Risk.Level, Predicted = svm_pred)
print(predictions)
## Actual Predicted
## 1 low low
## 5 high high
## 6 high high
## 12 high high
## 22 low low
## 30 high high
## 32 low low
## 42 high high
## 50 low low
## 51 low low
## 58 high high
## 62 low low
## 66 high high
## 68 low low
## 71 high high
## 73 high high
## 75 low low
## 93 high high
## 94 low low
svm_pred.excel <- predict(svm_model, newdata = test.data_comp)
print(svm_pred.excel)
## 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16
## low low low low high high low high low high high high low high high low
## 17
## high
## Levels: high low
test.data_comp$Prediksi.Risk.Level <- svm_pred.excel
accuracy_svm.excel <- sum(test.data_comp$Prediksi.Risk.Level == svm_pred.excel) / nrow(test.data_comp)
print(paste("SVM Akurasi untuk Data Excel:", round(accuracy_svm.excel * 98, 2),"%"))
## [1] "SVM Akurasi untuk Data Excel: 98 %"
test.data_comp <- cbind(Country = test.data.ori$Country, test.data_comp)
test.data_comp <- test.data_comp %>%
mutate(Prediksi.Risk.Level = ifelse(
Prediksi.Risk.Level == "high",
cell_spec(Prediksi.Risk.Level, background = "purple", bold = TRUE),
cell_spec(Prediksi.Risk.Level, background = "green", bold = TRUE)
))
test.data_comp %>%
kbl(escape = FALSE) %>%
kable_material_dark() %>%
row_spec(0, bold = TRUE, color = "white") %>%
kable_styling(font_size = 9)
Country
|
X1
|
X2
|
X3
|
X4
|
X5
|
X6
|
X7
|
X8
|
X9
|
X10
|
X11
|
X12
|
X13
|
X14
|
Prediksi.Risk.Level
|
SE
|
23.2000
|
60338.0204
|
175.42230
|
1.62000
|
0.6755
|
2.47168
|
0.3526
|
185.64097
|
64.14972
|
537.609866
|
0.5000
|
25.11320
|
27.95256
|
8.6000
|
low
|
SG
|
16.8056
|
62432.9952
|
409.69700
|
0.10510
|
0.9068
|
2.77600
|
0.2912
|
94.00211
|
-200.98100
|
339.988210
|
1.3095
|
26.76784
|
47.25374
|
3.0000
|
low
|
SI
|
18.2857
|
28684.1682
|
103.06040
|
0.84352
|
0.0746
|
3.55290
|
1.9299
|
72.30708
|
16.23838
|
52.761781
|
3.0176
|
19.90742
|
25.76882
|
5.0000
|
low
|
SK
|
19.6715
|
21042.7221
|
102.73060
|
1.17400
|
0.0734
|
3.21976
|
1.2325
|
111.78982
|
33.35258
|
102.567122
|
2.5300
|
22.83084
|
20.95780
|
7.0000
|
low
|
SM
|
11.9000
|
49356.2618
|
60.15464
|
0.89594
|
0.5865
|
1.75420
|
-1.1342
|
88.60514
|
-145.43800
|
1.490827
|
63.5000
|
17.79208
|
23.21144
|
7.3000
|
high
|
SV
|
14.1400
|
3989.1913
|
65.55750
|
0.39400
|
0.5042
|
2.44734
|
-0.1248
|
88.88685
|
27.33332
|
24.638720
|
1.5706
|
16.78238
|
14.52982
|
9.0000
|
high
|
TH
|
19.8000
|
7450.5523
|
33.22256
|
0.34500
|
0.3153
|
3.44058
|
1.2787
|
100.19298
|
-42.56340
|
501.644054
|
3.2000
|
23.05990
|
32.47950
|
2.0000
|
low
|
TN
|
12.9000
|
3616.8650
|
85.26668
|
5.55600
|
1.1173
|
1.60820
|
-1.5047
|
134.47988
|
64.46288
|
39.218118
|
13.6000
|
18.80654
|
8.88180
|
17.0000
|
high
|
TR
|
18.0000
|
8652.9973
|
51.65878
|
11.65444
|
1.4844
|
4.15702
|
1.8070
|
116.52826
|
28.56998
|
720.244499
|
2.5300
|
28.55834
|
26.32778
|
13.2000
|
low
|
TW
|
14.1400
|
31854.2815
|
48.51016
|
0.72360
|
0.1015
|
2.53870
|
2.7686
|
71.08310
|
-189.14000
|
668.122597
|
49.0000
|
22.02936
|
33.80118
|
3.7000
|
high
|
UA
|
22.0000
|
3955.0704
|
103.90710
|
19.17300
|
-0.3906
|
0.34000
|
1.8906
|
72.25639
|
-5.46582
|
155.581868
|
49.0000
|
17.79388
|
16.04966
|
9.5000
|
high
|
UG
|
21.5689
|
786.8776
|
42.26784
|
4.29470
|
3.6551
|
5.73874
|
0.4207
|
69.45587
|
21.88058
|
33.538172
|
3.3357
|
24.85996
|
19.47826
|
17.0000
|
high
|
US
|
16.3000
|
69324.7338
|
104.17110
|
1.55316
|
0.6255
|
2.45554
|
0.4867
|
134.47988
|
47.70210
|
20935.000000
|
1.0000
|
17.40650
|
17.20802
|
5.6146
|
low
|
UY
|
17.0400
|
15968.2306
|
73.01010
|
8.00348
|
0.3592
|
0.82090
|
-0.7169
|
49.05568
|
-16.23150
|
53.628838
|
2.5300
|
16.44666
|
17.57796
|
10.3000
|
high
|
UZ
|
18.4000
|
1872.6699
|
30.04996
|
12.29840
|
1.6065
|
5.84000
|
3.0735
|
72.25639
|
-45.16010
|
57.707193
|
2.1000
|
31.60162
|
29.24286
|
6.0000
|
high
|
VN
|
12.0977
|
3886.5162
|
34.52492
|
2.79600
|
0.8506
|
6.94570
|
5.2762
|
86.56201
|
7.39622
|
351.683014
|
1.6900
|
23.54764
|
25.80812
|
2.5000
|
low
|
ZA
|
16.6000
|
6404.6725
|
50.79576
|
4.98278
|
1.4772
|
0.78940
|
-2.3230
|
107.24859
|
15.04496
|
302.141270
|
3.3357
|
19.11740
|
16.25316
|
33.7000
|
high
|