library(readxl)
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
train_data <- read_excel("Level Risiko Investasi.xlsx", sheet = "Training")
test_data <- read_excel("Level Risiko Investasi.xlsx", sheet = "Testing")
print(train_data)
## # A tibble: 100 × 16
##    Country    X1     X2    X3    X4    X5     X6      X7    X8     X9     X10
##    <chr>   <dbl>  <dbl> <dbl> <dbl> <dbl>  <dbl>   <dbl> <dbl>  <dbl>   <dbl>
##  1 AD       17.5 38675. 173.   0.68 1.22   1.79  -2.08    55   -26.5     2.86
##  2 AE       18.2 40105. 104.   1.77 0.870  2.66  -0.725  103.  -13.6   353.  
##  3 AE-AZ    18.7 76038.  31.0  2.63 1.49   1.85  -1.90   103.  -56.2   200.  
##  4 AE-RK    NA   27883.  24.8  1.29 1.75   2.23  -1.14   103.   24.8    10.1 
##  5 AM       14    4251.  89.6  1.44 0.256  4.75   2.33   167.   47.3    12.6 
##  6 AO       NA    2034.  57.1 22.4  3.34  -0.878 -5.20    34.8  15.4    62.5 
##  7 AR       23.3  9203.  43.3 36.7  0.966 -0.237 -3.73    NA    -5.01  375.  
##  8 AT       18.6 53174. 159.   1.52 0.726  1.88  -0.300  116.   15.4   430.  
##  9 AU       15.7 63972. 122.   1.65 1.48   2.45   0.0306 192.   58.0  1359.  
## 10 AW       33.5 24643.  92.8  1.22 0.797  2.06  -4.72    80.5  28.1     2.38
## # ℹ 90 more rows
## # ℹ 5 more variables: X11 <dbl>, X12 <dbl>, X13 <dbl>, X14 <dbl>,
## #   `Risk Level` <chr>
print(test_data)
## # A tibble: 17 × 15
##    Country    X1     X2    X3     X4      X5    X6     X7    X8      X9      X10
##    <chr>   <dbl>  <dbl> <dbl>  <dbl>   <dbl> <dbl>  <dbl> <dbl>   <dbl>    <dbl>
##  1 SE       23.2 60338. 175.   1.62   0.676  2.47   0.353 186.    64.1    538.  
##  2 SG       16.8 62433. 410.   0.105  0.907  2.78   0.291  94.0 -201.     340.  
##  3 SI       18.3 28684. 103.   0.844  0.0746 3.55   1.93   72.3   16.2     52.8 
##  4 SK       19.7 21043. 103.   1.17   0.0734 3.22   1.23  112.    33.4    103.  
##  5 SM       11.9 49356.  60.2  0.896  0.586  1.75  -1.13   88.6 -145.       1.49
##  6 SV       NA    3989.  65.6  0.394  0.504  2.45  -0.125  88.9   27.3     24.6 
##  7 TH       19.8  7451.  33.2  0.345  0.315  3.44   1.28  100.   -42.6    502.  
##  8 TN       12.9  3617.  85.3  5.56   1.12   1.61  -1.50  134.    64.5     39.2 
##  9 TR       18    8653.  51.7 11.7    1.48   4.16   1.81  117.    28.6    720.  
## 10 TW       14.1 31854.  48.5  0.724  0.102  2.54   2.77   71.1 -189.     668.  
## 11 UA       22    3955. 104.  19.2   -0.391  0.34   1.89   72.3   -5.47   156.  
## 12 UG       21.6   787.  42.3  4.29   3.66   5.74   0.421  69.5   21.9     33.5 
## 13 US       16.3 69325. 104.   1.55   0.626  2.46   0.487  NA     47.7  20935   
## 14 UY       17.0 15968.  73.0  8.00   0.359  0.821 -0.717  49.1  -16.2     53.6 
## 15 UZ       18.4  1873.  30.0 12.3    1.61   5.84   3.07   NA    -45.2     57.7 
## 16 VN       12.1  3887.  34.5  2.80   0.851  6.95   5.28   86.6    7.40   352.  
## 17 ZA       16.6  6405.  50.8  4.98   1.48   0.789 -2.32  107.    15.0    302.  
## # ℹ 4 more variables: X11 <dbl>, X12 <dbl>, X13 <dbl>, X14 <dbl>
library(caret)
## Loading required package: ggplot2
## Loading required package: lattice
colnames(train_data) <- make.names(colnames(train_data))
colnames(test_data) <- make.names(colnames(test_data))

train_data$Risk.Level <- as.factor(train_data$Risk.Level)
str(train_data)
## tibble [100 × 16] (S3: tbl_df/tbl/data.frame)
##  $ Country   : chr [1:100] "AD" "AE" "AE-AZ" "AE-RK" ...
##  $ X1        : num [1:100] 17.5 18.2 18.7 NA 14 ...
##  $ X2        : num [1:100] 38675 40105 76038 27883 4251 ...
##  $ X3        : num [1:100] 172.8 103.5 31 24.8 89.6 ...
##  $ X4        : num [1:100] 0.68 1.77 2.63 1.29 1.44 ...
##  $ X5        : num [1:100] 1.221 0.87 1.489 1.753 0.256 ...
##  $ X6        : num [1:100] 1.79 2.66 1.85 2.23 4.75 ...
##  $ X7        : num [1:100] -2.084 -0.725 -1.901 -1.135 2.332 ...
##  $ X8        : num [1:100] 55 103 103 103 167 ...
##  $ X9        : num [1:100] -26.5 -13.6 -56.2 24.8 47.3 ...
##  $ X10       : num [1:100] 2.86 352.91 199.93 10.11 12.65 ...
##  $ X11       : num [1:100] 8 8.15 8.15 NA 6.6 ...
##  $ X12       : num [1:100] 23.1 24.9 20.4 21.7 19.4 ...
##  $ X13       : num [1:100] 26.9 32.5 31 17.3 15.1 ...
##  $ X14       : num [1:100] 3 2.45 NA NA 18.5 ...
##  $ Risk.Level: Factor w/ 2 levels "high","low": 2 2 2 2 1 1 1 2 2 1 ...
library(ggplot2)
library(VIM)
## Loading required package: colorspace
## Loading required package: grid
## VIM is ready to use.
## Suggestions and bug-reports can be submitted at: https://github.com/statistikat/VIM/issues
## 
## Attaching package: 'VIM'
## The following object is masked from 'package:datasets':
## 
##     sleep
aggr(train_data)

library(mice)
## 
## Attaching package: 'mice'
## The following object is masked from 'package:stats':
## 
##     filter
## The following objects are masked from 'package:base':
## 
##     cbind, rbind
train_data_imp <- mice(train_data, m = 10, method = "pmm", seed = 123)
## 
##  iter imp variable
##   1   1  X1  X8  X11  X14
##   1   2  X1  X8  X11  X14
##   1   3  X1  X8  X11  X14
##   1   4  X1  X8  X11  X14
##   1   5  X1  X8  X11  X14
##   1   6  X1  X8  X11  X14
##   1   7  X1  X8  X11  X14
##   1   8  X1  X8  X11  X14
##   1   9  X1  X8  X11  X14
##   1   10  X1  X8  X11  X14
##   2   1  X1  X8  X11  X14
##   2   2  X1  X8  X11  X14
##   2   3  X1  X8  X11  X14
##   2   4  X1  X8  X11  X14
##   2   5  X1  X8  X11  X14
##   2   6  X1  X8  X11  X14
##   2   7  X1  X8  X11  X14
##   2   8  X1  X8  X11  X14
##   2   9  X1  X8  X11  X14
##   2   10  X1  X8  X11  X14
##   3   1  X1  X8  X11  X14
##   3   2  X1  X8  X11  X14
##   3   3  X1  X8  X11  X14
##   3   4  X1  X8  X11  X14
##   3   5  X1  X8  X11  X14
##   3   6  X1  X8  X11  X14
##   3   7  X1  X8  X11  X14
##   3   8  X1  X8  X11  X14
##   3   9  X1  X8  X11  X14
##   3   10  X1  X8  X11  X14
##   4   1  X1  X8  X11  X14
##   4   2  X1  X8  X11  X14
##   4   3  X1  X8  X11  X14
##   4   4  X1  X8  X11  X14
##   4   5  X1  X8  X11  X14
##   4   6  X1  X8  X11  X14
##   4   7  X1  X8  X11  X14
##   4   8  X1  X8  X11  X14
##   4   9  X1  X8  X11  X14
##   4   10  X1  X8  X11  X14
##   5   1  X1  X8  X11  X14
##   5   2  X1  X8  X11  X14
##   5   3  X1  X8  X11  X14
##   5   4  X1  X8  X11  X14
##   5   5  X1  X8  X11  X14
##   5   6  X1  X8  X11  X14
##   5   7  X1  X8  X11  X14
##   5   8  X1  X8  X11  X14
##   5   9  X1  X8  X11  X14
##   5   10  X1  X8  X11  X14
## Warning: Number of logged events: 1
train_data_comp <- complete(train_data_imp, action = 10)
aggr(train_data_comp)

aggr(test_data)

test_data_imp <- mice(test_data)
## 
##  iter imp variable
##   1   1  X1  X8  X11  X14
##   1   2  X1  X8  X11  X14
##   1   3  X1  X8  X11  X14
##   1   4  X1  X8  X11  X14
##   1   5  X1  X8  X11  X14
##   2   1  X1  X8  X11  X14
##   2   2  X1  X8  X11  X14
##   2   3  X1  X8  X11  X14
##   2   4  X1  X8  X11  X14
##   2   5  X1  X8  X11  X14
##   3   1  X1  X8  X11  X14
##   3   2  X1  X8  X11  X14
##   3   3  X1  X8  X11  X14
##   3   4  X1  X8  X11  X14
##   3   5  X1  X8  X11  X14
##   4   1  X1  X8  X11  X14
##   4   2  X1  X8  X11  X14
##   4   3  X1  X8  X11  X14
##   4   4  X1  X8  X11  X14
##   4   5  X1  X8  X11  X14
##   5   1  X1  X8  X11  X14
##   5   2  X1  X8  X11  X14
##   5   3  X1  X8  X11  X14
##   5   4  X1  X8  X11  X14
##   5   5  X1  X8  X11  X14
## Warning: Number of logged events: 31
test_data_comp <- complete(test_data_imp)
aggr(test_data_comp)

library(reshape2)
library(corrplot)
## corrplot 0.94 loaded
set.seed(123)
train_index <- createDataPartition(train_data_comp$Risk.Level, p = 0.8, list = FALSE)
train_data <- train_data_comp[train_index,]
test_data <- train_data_comp[-train_index,]
str(train_data)
## 'data.frame':    81 obs. of  16 variables:
##  $ Country   : chr  "AE" "AE-AZ" "AE-RK" "AR" ...
##  $ X1        : num  18.2 18.7 22.7 23.3 18.6 ...
##  $ X2        : num  40105 76038 27883 9203 53174 ...
##  $ X3        : num  103.5 31 24.8 43.3 159.4 ...
##  $ X4        : num  1.77 2.63 1.29 36.7 1.52 ...
##  $ X5        : num  0.87 1.489 1.753 0.966 0.726 ...
##  $ X6        : num  2.659 1.85 2.232 -0.237 1.88 ...
##  $ X7        : num  -0.725 -1.901 -1.135 -3.73 -0.3 ...
##  $ X8        : num  102.5 102.5 102.5 50.1 116.4 ...
##  $ X9        : num  -13.6 -56.24 24.79 -5.01 15.37 ...
##  $ X10       : num  352.9 199.9 10.1 375.2 430 ...
##  $ X11       : num  8.15 8.15 6.2 10.6 2.02 ...
##  $ X12       : num  24.9 20.4 21.7 16.7 24.8 ...
##  $ X13       : num  32.5 31 17.3 13.8 26.9 ...
##  $ X14       : num  2.45 4.9 7.5 11.05 6 ...
##  $ Risk.Level: Factor w/ 2 levels "high","low": 2 2 2 1 2 2 1 1 2 2 ...
library(e1071)
train_data <- train_data[, -which(names(train_data) == "Country")]
test_data <- test_data[, -which(names(test_data) == "Country")]
svm_model <- svm(Risk.Level ~ ., data = train_data, kernel = "linear")
svm_pred <- predict(svm_model, newdata = test_data)
accuracy_svm <- sum(svm_pred == test_data$Risk.Level) / nrow(test_data)
print(paste("SVM Akurasi:", accuracy_svm))
## [1] "SVM Akurasi: 0.947368421052632"
confusionMatrix <- confusionMatrix(as.factor(svm_pred), as.factor(test_data$Risk.Level))
print(confusionMatrix)
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction high low
##       high   10   1
##       low     0   8
##                                           
##                Accuracy : 0.9474          
##                  95% CI : (0.7397, 0.9987)
##     No Information Rate : 0.5263          
##     P-Value [Acc > NIR] : 9.149e-05       
##                                           
##                   Kappa : 0.8939          
##                                           
##  Mcnemar's Test P-Value : 1               
##                                           
##             Sensitivity : 1.0000          
##             Specificity : 0.8889          
##          Pos Pred Value : 0.9091          
##          Neg Pred Value : 1.0000          
##              Prevalence : 0.5263          
##          Detection Rate : 0.5263          
##    Detection Prevalence : 0.5789          
##       Balanced Accuracy : 0.9444          
##                                           
##        'Positive' Class : high            
## 
confusion_data <- as.data.frame(confusionMatrix$table)
library(ggplot2)
df <- data.frame(
    x = rep(1:5, each=5),
    y = rep(1:5, times=5),
    Freq = sample(1:100, 25, replace=TRUE)
)