library(readxl)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
train_data <- read_excel("Level Risiko Investasi.xlsx", sheet = "Training")
test_data <- read_excel("Level Risiko Investasi.xlsx", sheet = "Testing")
print(train_data)
## # A tibble: 100 × 16
## Country X1 X2 X3 X4 X5 X6 X7 X8 X9 X10
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 AD 17.5 38675. 173. 0.68 1.22 1.79 -2.08 55 -26.5 2.86
## 2 AE 18.2 40105. 104. 1.77 0.870 2.66 -0.725 103. -13.6 353.
## 3 AE-AZ 18.7 76038. 31.0 2.63 1.49 1.85 -1.90 103. -56.2 200.
## 4 AE-RK NA 27883. 24.8 1.29 1.75 2.23 -1.14 103. 24.8 10.1
## 5 AM 14 4251. 89.6 1.44 0.256 4.75 2.33 167. 47.3 12.6
## 6 AO NA 2034. 57.1 22.4 3.34 -0.878 -5.20 34.8 15.4 62.5
## 7 AR 23.3 9203. 43.3 36.7 0.966 -0.237 -3.73 NA -5.01 375.
## 8 AT 18.6 53174. 159. 1.52 0.726 1.88 -0.300 116. 15.4 430.
## 9 AU 15.7 63972. 122. 1.65 1.48 2.45 0.0306 192. 58.0 1359.
## 10 AW 33.5 24643. 92.8 1.22 0.797 2.06 -4.72 80.5 28.1 2.38
## # ℹ 90 more rows
## # ℹ 5 more variables: X11 <dbl>, X12 <dbl>, X13 <dbl>, X14 <dbl>,
## # `Risk Level` <chr>
print(test_data)
## # A tibble: 17 × 15
## Country X1 X2 X3 X4 X5 X6 X7 X8 X9 X10
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 SE 23.2 60338. 175. 1.62 0.676 2.47 0.353 186. 64.1 538.
## 2 SG 16.8 62433. 410. 0.105 0.907 2.78 0.291 94.0 -201. 340.
## 3 SI 18.3 28684. 103. 0.844 0.0746 3.55 1.93 72.3 16.2 52.8
## 4 SK 19.7 21043. 103. 1.17 0.0734 3.22 1.23 112. 33.4 103.
## 5 SM 11.9 49356. 60.2 0.896 0.586 1.75 -1.13 88.6 -145. 1.49
## 6 SV NA 3989. 65.6 0.394 0.504 2.45 -0.125 88.9 27.3 24.6
## 7 TH 19.8 7451. 33.2 0.345 0.315 3.44 1.28 100. -42.6 502.
## 8 TN 12.9 3617. 85.3 5.56 1.12 1.61 -1.50 134. 64.5 39.2
## 9 TR 18 8653. 51.7 11.7 1.48 4.16 1.81 117. 28.6 720.
## 10 TW 14.1 31854. 48.5 0.724 0.102 2.54 2.77 71.1 -189. 668.
## 11 UA 22 3955. 104. 19.2 -0.391 0.34 1.89 72.3 -5.47 156.
## 12 UG 21.6 787. 42.3 4.29 3.66 5.74 0.421 69.5 21.9 33.5
## 13 US 16.3 69325. 104. 1.55 0.626 2.46 0.487 NA 47.7 20935
## 14 UY 17.0 15968. 73.0 8.00 0.359 0.821 -0.717 49.1 -16.2 53.6
## 15 UZ 18.4 1873. 30.0 12.3 1.61 5.84 3.07 NA -45.2 57.7
## 16 VN 12.1 3887. 34.5 2.80 0.851 6.95 5.28 86.6 7.40 352.
## 17 ZA 16.6 6405. 50.8 4.98 1.48 0.789 -2.32 107. 15.0 302.
## # ℹ 4 more variables: X11 <dbl>, X12 <dbl>, X13 <dbl>, X14 <dbl>
library(caret)
## Loading required package: ggplot2
## Loading required package: lattice
colnames(train_data) <- make.names(colnames(train_data))
colnames(test_data) <- make.names(colnames(test_data))
train_data$Risk.Level <- as.factor(train_data$Risk.Level)
str(train_data)
## tibble [100 × 16] (S3: tbl_df/tbl/data.frame)
## $ Country : chr [1:100] "AD" "AE" "AE-AZ" "AE-RK" ...
## $ X1 : num [1:100] 17.5 18.2 18.7 NA 14 ...
## $ X2 : num [1:100] 38675 40105 76038 27883 4251 ...
## $ X3 : num [1:100] 172.8 103.5 31 24.8 89.6 ...
## $ X4 : num [1:100] 0.68 1.77 2.63 1.29 1.44 ...
## $ X5 : num [1:100] 1.221 0.87 1.489 1.753 0.256 ...
## $ X6 : num [1:100] 1.79 2.66 1.85 2.23 4.75 ...
## $ X7 : num [1:100] -2.084 -0.725 -1.901 -1.135 2.332 ...
## $ X8 : num [1:100] 55 103 103 103 167 ...
## $ X9 : num [1:100] -26.5 -13.6 -56.2 24.8 47.3 ...
## $ X10 : num [1:100] 2.86 352.91 199.93 10.11 12.65 ...
## $ X11 : num [1:100] 8 8.15 8.15 NA 6.6 ...
## $ X12 : num [1:100] 23.1 24.9 20.4 21.7 19.4 ...
## $ X13 : num [1:100] 26.9 32.5 31 17.3 15.1 ...
## $ X14 : num [1:100] 3 2.45 NA NA 18.5 ...
## $ Risk.Level: Factor w/ 2 levels "high","low": 2 2 2 2 1 1 1 2 2 1 ...
library(ggplot2)
library(VIM)
## Loading required package: colorspace
## Loading required package: grid
## VIM is ready to use.
## Suggestions and bug-reports can be submitted at: https://github.com/statistikat/VIM/issues
##
## Attaching package: 'VIM'
## The following object is masked from 'package:datasets':
##
## sleep
aggr(train_data)

library(mice)
##
## Attaching package: 'mice'
## The following object is masked from 'package:stats':
##
## filter
## The following objects are masked from 'package:base':
##
## cbind, rbind
train_data_imp <- mice(train_data, m = 10, method = "pmm", seed = 123)
##
## iter imp variable
## 1 1 X1 X8 X11 X14
## 1 2 X1 X8 X11 X14
## 1 3 X1 X8 X11 X14
## 1 4 X1 X8 X11 X14
## 1 5 X1 X8 X11 X14
## 1 6 X1 X8 X11 X14
## 1 7 X1 X8 X11 X14
## 1 8 X1 X8 X11 X14
## 1 9 X1 X8 X11 X14
## 1 10 X1 X8 X11 X14
## 2 1 X1 X8 X11 X14
## 2 2 X1 X8 X11 X14
## 2 3 X1 X8 X11 X14
## 2 4 X1 X8 X11 X14
## 2 5 X1 X8 X11 X14
## 2 6 X1 X8 X11 X14
## 2 7 X1 X8 X11 X14
## 2 8 X1 X8 X11 X14
## 2 9 X1 X8 X11 X14
## 2 10 X1 X8 X11 X14
## 3 1 X1 X8 X11 X14
## 3 2 X1 X8 X11 X14
## 3 3 X1 X8 X11 X14
## 3 4 X1 X8 X11 X14
## 3 5 X1 X8 X11 X14
## 3 6 X1 X8 X11 X14
## 3 7 X1 X8 X11 X14
## 3 8 X1 X8 X11 X14
## 3 9 X1 X8 X11 X14
## 3 10 X1 X8 X11 X14
## 4 1 X1 X8 X11 X14
## 4 2 X1 X8 X11 X14
## 4 3 X1 X8 X11 X14
## 4 4 X1 X8 X11 X14
## 4 5 X1 X8 X11 X14
## 4 6 X1 X8 X11 X14
## 4 7 X1 X8 X11 X14
## 4 8 X1 X8 X11 X14
## 4 9 X1 X8 X11 X14
## 4 10 X1 X8 X11 X14
## 5 1 X1 X8 X11 X14
## 5 2 X1 X8 X11 X14
## 5 3 X1 X8 X11 X14
## 5 4 X1 X8 X11 X14
## 5 5 X1 X8 X11 X14
## 5 6 X1 X8 X11 X14
## 5 7 X1 X8 X11 X14
## 5 8 X1 X8 X11 X14
## 5 9 X1 X8 X11 X14
## 5 10 X1 X8 X11 X14
## Warning: Number of logged events: 1
train_data_comp <- complete(train_data_imp, action = 10)
aggr(train_data_comp)

aggr(test_data)

test_data_imp <- mice(test_data)
##
## iter imp variable
## 1 1 X1 X8 X11 X14
## 1 2 X1 X8 X11 X14
## 1 3 X1 X8 X11 X14
## 1 4 X1 X8 X11 X14
## 1 5 X1 X8 X11 X14
## 2 1 X1 X8 X11 X14
## 2 2 X1 X8 X11 X14
## 2 3 X1 X8 X11 X14
## 2 4 X1 X8 X11 X14
## 2 5 X1 X8 X11 X14
## 3 1 X1 X8 X11 X14
## 3 2 X1 X8 X11 X14
## 3 3 X1 X8 X11 X14
## 3 4 X1 X8 X11 X14
## 3 5 X1 X8 X11 X14
## 4 1 X1 X8 X11 X14
## 4 2 X1 X8 X11 X14
## 4 3 X1 X8 X11 X14
## 4 4 X1 X8 X11 X14
## 4 5 X1 X8 X11 X14
## 5 1 X1 X8 X11 X14
## 5 2 X1 X8 X11 X14
## 5 3 X1 X8 X11 X14
## 5 4 X1 X8 X11 X14
## 5 5 X1 X8 X11 X14
## Warning: Number of logged events: 31
test_data_comp <- complete(test_data_imp)
aggr(test_data_comp)

library(reshape2)
library(corrplot)
## corrplot 0.94 loaded
set.seed(123)
train_index <- createDataPartition(train_data_comp$Risk.Level, p = 0.8, list = FALSE)
train_data <- train_data_comp[train_index,]
test_data <- train_data_comp[-train_index,]
str(train_data)
## 'data.frame': 81 obs. of 16 variables:
## $ Country : chr "AE" "AE-AZ" "AE-RK" "AR" ...
## $ X1 : num 18.2 18.7 22.7 23.3 18.6 ...
## $ X2 : num 40105 76038 27883 9203 53174 ...
## $ X3 : num 103.5 31 24.8 43.3 159.4 ...
## $ X4 : num 1.77 2.63 1.29 36.7 1.52 ...
## $ X5 : num 0.87 1.489 1.753 0.966 0.726 ...
## $ X6 : num 2.659 1.85 2.232 -0.237 1.88 ...
## $ X7 : num -0.725 -1.901 -1.135 -3.73 -0.3 ...
## $ X8 : num 102.5 102.5 102.5 50.1 116.4 ...
## $ X9 : num -13.6 -56.24 24.79 -5.01 15.37 ...
## $ X10 : num 352.9 199.9 10.1 375.2 430 ...
## $ X11 : num 8.15 8.15 6.2 10.6 2.02 ...
## $ X12 : num 24.9 20.4 21.7 16.7 24.8 ...
## $ X13 : num 32.5 31 17.3 13.8 26.9 ...
## $ X14 : num 2.45 4.9 7.5 11.05 6 ...
## $ Risk.Level: Factor w/ 2 levels "high","low": 2 2 2 1 2 2 1 1 2 2 ...
library(e1071)
train_data <- train_data[, -which(names(train_data) == "Country")]
test_data <- test_data[, -which(names(test_data) == "Country")]
svm_model <- svm(Risk.Level ~ ., data = train_data, kernel = "linear")
svm_pred <- predict(svm_model, newdata = test_data)
accuracy_svm <- sum(svm_pred == test_data$Risk.Level) / nrow(test_data)
print(paste("SVM Akurasi:", accuracy_svm))
## [1] "SVM Akurasi: 0.947368421052632"
confusionMatrix <- confusionMatrix(as.factor(svm_pred), as.factor(test_data$Risk.Level))
print(confusionMatrix)
## Confusion Matrix and Statistics
##
## Reference
## Prediction high low
## high 10 1
## low 0 8
##
## Accuracy : 0.9474
## 95% CI : (0.7397, 0.9987)
## No Information Rate : 0.5263
## P-Value [Acc > NIR] : 9.149e-05
##
## Kappa : 0.8939
##
## Mcnemar's Test P-Value : 1
##
## Sensitivity : 1.0000
## Specificity : 0.8889
## Pos Pred Value : 0.9091
## Neg Pred Value : 1.0000
## Prevalence : 0.5263
## Detection Rate : 0.5263
## Detection Prevalence : 0.5789
## Balanced Accuracy : 0.9444
##
## 'Positive' Class : high
##
confusion_data <- as.data.frame(confusionMatrix$table)
library(ggplot2)
df <- data.frame(
x = rep(1:5, each=5),
y = rep(1:5, times=5),
Freq = sample(1:100, 25, replace=TRUE)
)