library(readxl)
library(caTools)
library(rpart)
library(rpart.plot)
data <- read_excel("C:/Users/Hp/Downloads/Level Risiko Investasi.xlsx")
head(data)
## # A tibble: 6 × 16
## Country X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 AD 17.5 38675. 173. 0.68 1.22 1.79 -2.08 55 -26.5 2.86 8
## 2 AE 18.2 40105. 104. 1.77 0.870 2.66 -0.725 103. -13.6 353. 8.15
## 3 AE-AZ 18.7 76038. 31.0 2.63 1.49 1.85 -1.90 103. -56.2 200. 8.15
## 4 AE-RK NA 27883. 24.8 1.29 1.75 2.23 -1.14 103. 24.8 10.1 NA
## 5 AM 14 4251. 89.6 1.44 0.256 4.75 2.33 167. 47.3 12.6 6.6
## 6 AO NA 2034. 57.1 22.4 3.34 -0.878 -5.20 34.8 15.4 62.5 10.3
## # ℹ 4 more variables: X12 <dbl>, X13 <dbl>, X14 <dbl>, `Risk Level` <chr>
data <- na.omit(data)
set.seed(123)
split <- sample.split(data$`Risk Level`, SplitRatio = 0.434)
train_data <- subset(data, split == TRUE)
test_data <- subset(data, split == FALSE)
model <- rpart(`Risk Level` ~ X1 + X2 + X3 + X4 + X5 + X6 + X7 + X8 + X9 + X10 + X11 + X12 + X13 + X14,data = train_data, method = "class")
rpart.plot(model)
predictions <- predict(model, newdata = test_data, type = "class")
print(predictions)
## 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16
## low high low low high high high low high high low low low high low low
## 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32
## high low high low high high low low high low high high high high high high
## 33 34 35 36 37 38
## high high low high high high
## Levels: high low
accuracy <- sum(predictions == test_data$`Risk Level`) / nrow(test_data)
print(paste("Accuracy: ", accuracy))
## [1] "Accuracy: 0.815789473684211"
head(predictions)
## 1 2 3 4 5 6
## low high low low high high
## Levels: high low
table(Predicted = predictions, Actual = test_data$`Risk Level`)
## Actual
## Predicted high low
## high 17 6
## low 1 14
data$X1 <- as.factor(data$X1)
print(test_data)
## # A tibble: 38 × 16
## Country X1 X2 X3 X4 X5 X6 X7 X8 X9 X10
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 AE 18.2 40105. 104. 1.77 0.870 2.66 -0.725 103. -13.6 353.
## 2 AM 14 4251. 89.6 1.44 0.256 4.75 2.33 167. 47.3 12.6
## 3 AU 15.7 63972. 122. 1.65 1.48 2.45 0.0306 192. 58.0 1359.
## 4 AW 33.5 24643. 92.8 1.22 0.797 2.06 -4.72 80.5 28.1 2.38
## 5 BG 22.7 11289. 70.3 0.779 -0.710 3.62 2.70 73.0 -13.0 69.1
## 6 CL 14.3 15986. 65.8 2.98 1.25 1.97 -0.892 117. 14.0 253.
## 7 CV 19.4 3466. 115. 0.379 1.16 3.92 -0.404 67.5 51.4 1.70
## 8 DK 22.6 67566. 156. 0.54 0.361 2.69 1.31 359. -5.59 355.
## 9 DO 18.6 8172. 42.5 2.22 0.921 6.06 2.40 80.1 21.0 79.0
## 10 EG 20.1 3756. 31.9 16.2 2.05 4.45 2.24 53.3 11.9 362.
## # ℹ 28 more rows
## # ℹ 5 more variables: X11 <dbl>, X12 <dbl>, X13 <dbl>, X14 <dbl>,
## # `Risk Level` <chr>
print(train_data)
## # A tibble: 29 × 16
## Country X1 X2 X3 X4 X5 X6 X7 X8 X9 X10
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 AD 17.5 38675. 173. 0.68 1.22 1.79 -2.08 55 -26.5 2.86
## 2 AT 18.6 53174. 159. 1.52 0.726 1.88 -0.300 116. 15.4 430.
## 3 BD 4.2 2324. 19.7 5.81 1.06 7.39 6.07 78.4 4.92 347.
## 4 BJ 10.5 1421. 49.6 0.225 2.77 4.88 2.21 81.5 20.6 15.4
## 5 CA 16.1 51705. 118. 1.67 1.19 1.80 -0.588 107. 45.5 1722.
## 6 CH 19.3 89771. 276. 0.00116 0.840 1.89 0.173 82.5 -152. 749.
## 7 CN 14.7 12227. 13.6 2 0.458 6.64 5.27 94.2 -27.0 14867.
## 8 CO 17.2 5860. 47.6 4.71 1.38 2.45 -0.888 117. 9.93 271.
## 9 CR 13.3 11955. 45.0 1.35 0.996 3.25 0.703 125. 1.75 61.5
## 10 CZ 21.4 27045. 79.2 1.58 0.207 3.72 1.32 76.5 -16.6 244.
## # ℹ 19 more rows
## # ℹ 5 more variables: X11 <dbl>, X12 <dbl>, X13 <dbl>, X14 <dbl>,
## # `Risk Level` <chr>
insight dari hasil diatas adalah: