data <- read.csv("Level Risiko Investasi.csv")
head(data)
## Country X1 X2 X3 X4 X5 X6 X7 X8
## 1 SE 23.2000 60338.020 175.42230 1.62000 0.6755 2.47168 0.3526 185.64097
## 2 SG 16.8056 62432.995 409.69700 0.10510 0.9068 2.77600 0.2912 94.00211
## 3 SI 18.2857 28684.168 103.06040 0.84352 0.0746 3.55290 1.9299 72.30708
## 4 SK 19.6715 21042.722 102.73060 1.17400 0.0734 3.21976 1.2325 111.78982
## 5 SM 11.9000 49356.262 60.15464 0.89594 0.5865 1.75420 -1.1342 88.60514
## 6 SV NA 3989.191 65.55750 0.39400 0.5042 2.44734 -0.1248 88.88685
## X9 X10 X11 X12 X13 X14
## 1 64.14972 537.609866 0.5000 25.11320 27.95256 8.6
## 2 -200.98100 339.988210 1.3095 26.76784 47.25374 3.0
## 3 16.23838 52.761781 3.0176 19.90742 25.76882 5.0
## 4 33.35258 102.567122 2.5300 22.83084 20.95780 7.0
## 5 -145.43800 1.490827 63.5000 17.79208 23.21144 7.3
## 6 27.33332 24.638720 1.5706 16.78238 14.52982 9.0
##DATA CLEANING
#memeriksa missing value
total_missing_values_all_columns <- sum(is.na(data))
cat("Total missing values across all columns:\n", total_missing_values_all_columns, "\n\n")
## Total missing values across all columns:
## 8
#menghapus baris dengan nilai yang hilang
data_clean <- na.omit(data)
#memastikan setiap kolom memiliki tipe data yang sesuai
str(data)
## 'data.frame': 17 obs. of 15 variables:
## $ Country: chr "SE" "SG" "SI" "SK" ...
## $ X1 : num 23.2 16.8 18.3 19.7 11.9 ...
## $ X2 : num 60338 62433 28684 21043 49356 ...
## $ X3 : num 175.4 409.7 103.1 102.7 60.2 ...
## $ X4 : num 1.62 0.105 0.844 1.174 0.896 ...
## $ X5 : num 0.6755 0.9068 0.0746 0.0734 0.5865 ...
## $ X6 : num 2.47 2.78 3.55 3.22 1.75 ...
## $ X7 : num 0.353 0.291 1.93 1.232 -1.134 ...
## $ X8 : num 185.6 94 72.3 111.8 88.6 ...
## $ X9 : num 64.1 -201 16.2 33.4 -145.4 ...
## $ X10 : num 537.61 339.99 52.76 102.57 1.49 ...
## $ X11 : num 0.5 1.31 3.02 2.53 63.5 ...
## $ X12 : num 25.1 26.8 19.9 22.8 17.8 ...
## $ X13 : num 28 47.3 25.8 21 23.2 ...
## $ X14 : num 8.6 3 5 7 7.3 9 2 17 13.2 3.7 ...
#Mengubah tipe data
data$Country <- as.factor(data$Country)
#Menghapus kolom yang tidak diperlukan (menghapus kolom 'Country')
data_clean <- data_clean[, -c(1)]
#menyimpan data yang sudah bersih dari missing data
write.csv(data_clean, "Level Risiko Investasi unmissing.csv", row.names = FALSE)
show(data_clean)
## X1 X2 X3 X4 X5 X6 X7 X8
## 1 23.2000 60338.020 175.42230 1.62000 0.6755 2.47168 0.3526 185.64097
## 2 16.8056 62432.995 409.69700 0.10510 0.9068 2.77600 0.2912 94.00211
## 3 18.2857 28684.168 103.06040 0.84352 0.0746 3.55290 1.9299 72.30708
## 4 19.6715 21042.722 102.73060 1.17400 0.0734 3.21976 1.2325 111.78982
## 5 11.9000 49356.262 60.15464 0.89594 0.5865 1.75420 -1.1342 88.60514
## 7 19.8000 7450.552 33.22256 0.34500 0.3153 3.44058 1.2787 100.19298
## 8 12.9000 3616.865 85.26668 5.55600 1.1173 1.60820 -1.5047 134.47988
## 11 22.0000 3955.070 103.90710 19.17300 -0.3906 0.34000 1.8906 72.25639
## 16 12.0977 3886.516 34.52492 2.79600 0.8506 6.94570 5.2762 86.56201
## X9 X10 X11 X12 X13 X14
## 1 64.14972 537.609866 0.5000 25.11320 27.95256 8.6
## 2 -200.98100 339.988210 1.3095 26.76784 47.25374 3.0
## 3 16.23838 52.761781 3.0176 19.90742 25.76882 5.0
## 4 33.35258 102.567122 2.5300 22.83084 20.95780 7.0
## 5 -145.43800 1.490827 63.5000 17.79208 23.21144 7.3
## 7 -42.56340 501.644054 3.2000 23.05990 32.47950 2.0
## 8 64.46288 39.218118 13.6000 18.80654 8.88180 17.0
## 11 -5.46582 155.581868 49.0000 17.79388 16.04966 9.5
## 16 7.39622 351.683014 1.6900 23.54764 25.80812 2.5
##DATA CLEANING
#memeriksa missing value
total_missing_values_all_columns <- sum(is.na(data_clean))
cat("Total missing values across all columns:\n", total_missing_values_all_columns, "\n\n")
## Total missing values across all columns:
## 0
#Membersihkan Outlier
data <- read.csv("Level Risiko Investasi unmissing.csv", na.strings = "")
head(data)
## X1 X2 X3 X4 X5 X6 X7 X8
## 1 23.2000 60338.020 175.42230 1.62000 0.6755 2.47168 0.3526 185.64097
## 2 16.8056 62432.995 409.69700 0.10510 0.9068 2.77600 0.2912 94.00211
## 3 18.2857 28684.168 103.06040 0.84352 0.0746 3.55290 1.9299 72.30708
## 4 19.6715 21042.722 102.73060 1.17400 0.0734 3.21976 1.2325 111.78982
## 5 11.9000 49356.262 60.15464 0.89594 0.5865 1.75420 -1.1342 88.60514
## 6 19.8000 7450.552 33.22256 0.34500 0.3153 3.44058 1.2787 100.19298
## X9 X10 X11 X12 X13 X14
## 1 64.14972 537.609866 0.5000 25.11320 27.95256 8.6
## 2 -200.98100 339.988210 1.3095 26.76784 47.25374 3.0
## 3 16.23838 52.761781 3.0176 19.90742 25.76882 5.0
## 4 33.35258 102.567122 2.5300 22.83084 20.95780 7.0
## 5 -145.43800 1.490827 63.5000 17.79208 23.21144 7.3
## 6 -42.56340 501.644054 3.2000 23.05990 32.47950 2.0
#mengidentifikasi outlier menggunakan metode interquartile range (IQR).
#menghapus outlier berdasarkan IQR:
remove_outliers <- function(x) {
q1 <- quantile(x, 0.25, na.rm = TRUE)
q3 <- quantile(x, 0.75, na.rm = TRUE)
iqr <- q3 - q1
lower_bound <- q1 - 1.5 * iqr
upper_bound <- q3 + 1.5 * iqr
x[x < lower_bound | x > upper_bound] <- NA
return(x)
}
data_cleaned <- data
for (col in names(data_clean)[-1]) {
data_clean[[col]] <- remove_outliers(data[[col]])
}
#pada funsi di atas, kolom 'Country' telah diabaikan
#setelah mengganti outlier langkah selanjutnya adalah menghapus baris yang memiliki nilai 'NA'
data <- na.omit(data_cleaned)
#menyimpan kembali data setela membersihkan outlier, file disimpan dalam bentuk csv.
write.csv(data_cleaned, "Level Risiko Investasi Cleaned Finallll.csv", row.names = FALSE)
#menampilkan data yang telah dibersihkan dari missing data dan outlier
show(data_cleaned)
## X1 X2 X3 X4 X5 X6 X7 X8
## 1 23.2000 60338.020 175.42230 1.62000 0.6755 2.47168 0.3526 185.64097
## 2 16.8056 62432.995 409.69700 0.10510 0.9068 2.77600 0.2912 94.00211
## 3 18.2857 28684.168 103.06040 0.84352 0.0746 3.55290 1.9299 72.30708
## 4 19.6715 21042.722 102.73060 1.17400 0.0734 3.21976 1.2325 111.78982
## 5 11.9000 49356.262 60.15464 0.89594 0.5865 1.75420 -1.1342 88.60514
## 6 19.8000 7450.552 33.22256 0.34500 0.3153 3.44058 1.2787 100.19298
## 7 12.9000 3616.865 85.26668 5.55600 1.1173 1.60820 -1.5047 134.47988
## 8 22.0000 3955.070 103.90710 19.17300 -0.3906 0.34000 1.8906 72.25639
## 9 12.0977 3886.516 34.52492 2.79600 0.8506 6.94570 5.2762 86.56201
## X9 X10 X11 X12 X13 X14
## 1 64.14972 537.609866 0.5000 25.11320 27.95256 8.6
## 2 -200.98100 339.988210 1.3095 26.76784 47.25374 3.0
## 3 16.23838 52.761781 3.0176 19.90742 25.76882 5.0
## 4 33.35258 102.567122 2.5300 22.83084 20.95780 7.0
## 5 -145.43800 1.490827 63.5000 17.79208 23.21144 7.3
## 6 -42.56340 501.644054 3.2000 23.05990 32.47950 2.0
## 7 64.46288 39.218118 13.6000 18.80654 8.88180 17.0
## 8 -5.46582 155.581868 49.0000 17.79388 16.04966 9.5
## 9 7.39622 351.683014 1.6900 23.54764 25.80812 2.5
#Struktur data untuk menampilkan tipe data
str(data_cleaned)
## 'data.frame': 9 obs. of 14 variables:
## $ X1 : num 23.2 16.8 18.3 19.7 11.9 ...
## $ X2 : num 60338 62433 28684 21043 49356 ...
## $ X3 : num 175.4 409.7 103.1 102.7 60.2 ...
## $ X4 : num 1.62 0.105 0.844 1.174 0.896 ...
## $ X5 : num 0.6755 0.9068 0.0746 0.0734 0.5865 ...
## $ X6 : num 2.47 2.78 3.55 3.22 1.75 ...
## $ X7 : num 0.353 0.291 1.93 1.232 -1.134 ...
## $ X8 : num 185.6 94 72.3 111.8 88.6 ...
## $ X9 : num 64.1 -201 16.2 33.4 -145.4 ...
## $ X10: num 537.61 339.99 52.76 102.57 1.49 ...
## $ X11: num 0.5 1.31 3.02 2.53 63.5 ...
## $ X12: num 25.1 26.8 19.9 22.8 17.8 ...
## $ X13: num 28 47.3 25.8 21 23.2 ...
## $ X14: num 8.6 3 5 7 7.3 2 17 9.5 2.5
#Menghitung Regresi Linear
linear_model <- lm(X14 ~ X1, data = data)
print(linear_model)
##
## Call:
## lm(formula = X14 ~ X1, data = data)
##
## Coefficients:
## (Intercept) X1
## 8.48071 -0.09209
#Visualisasi data linier model
plot(linear_model)




#Korelasi Matrix
correlation_matrix <- cor(data_cleaned, method = "pearson")
print(correlation_matrix)
## X1 X2 X3 X4 X5 X6
## X1 1.00000000 0.132873061 0.1900699 0.2744029 -0.6166061 -0.33430178
## X2 0.13287306 1.000000000 0.6850229 -0.4686016 0.2656109 -0.16512981
## X3 0.19006994 0.685022908 1.0000000 -0.1492817 0.2594287 -0.18488443
## X4 0.27440289 -0.468601587 -0.1492817 1.0000000 -0.5145210 -0.51211884
## X5 -0.61660610 0.265610879 0.2594287 -0.5145210 1.0000000 0.30326915
## X6 -0.33430178 -0.165129811 -0.1848844 -0.5121188 0.3032692 1.00000000
## X7 0.04581372 -0.406296146 -0.2447768 0.1139032 -0.2520888 0.74858977
## X8 0.28889728 0.340810650 0.1260536 -0.2439023 0.4376938 -0.12339747
## X9 0.29402195 -0.501954252 -0.5286786 0.2273147 -0.1555573 0.09089035
## X10 0.43582164 0.186048129 0.2186606 -0.2099671 0.1881472 0.34390742
## X11 -0.21788380 -0.008869658 -0.2541867 0.5064816 -0.2924356 -0.58733939
## X12 0.24836320 0.438680457 0.5886052 -0.5285666 0.3690163 0.48898111
## X13 0.13737308 0.608548454 0.6793410 -0.5132112 0.1882858 0.30687493
## X14 -0.08419649 -0.189553013 -0.1433857 0.4148337 0.1537423 -0.61455701
## X7 X8 X9 X10 X11 X12
## X1 0.04581372 0.28889728 0.294021955 0.435821637 -0.217883803 0.2483632
## X2 -0.40629615 0.34081065 -0.501954252 0.186048129 -0.008869658 0.4386805
## X3 -0.24477685 0.12605361 -0.528678586 0.218660631 -0.254186655 0.5886052
## X4 0.11390319 -0.24390225 0.227314705 -0.209967085 0.506481578 -0.5285666
## X5 -0.25208882 0.43769379 -0.155557318 0.188147245 -0.292435631 0.3690163
## X6 0.74858977 -0.12339747 0.090890352 0.343907417 -0.587339395 0.4889811
## X7 1.00000000 -0.38687877 0.204944063 0.312500195 -0.329991062 0.2517648
## X8 -0.38687877 1.00000000 0.438431821 0.433076606 -0.345532165 0.3736531
## X9 0.20494406 0.43843182 1.000000000 0.009695843 -0.320913067 -0.1752875
## X10 0.31250020 0.43307661 0.009695843 1.000000000 -0.523225109 0.7635011
## X11 -0.32999106 -0.34553216 -0.320913067 -0.523225109 1.000000000 -0.7546298
## X12 0.25176479 0.37365306 -0.175287482 0.763501083 -0.754629805 1.0000000
## X13 0.13817175 -0.08547333 -0.677952919 0.568612383 -0.380118801 0.7719893
## X14 -0.59275118 0.41321980 0.468778031 -0.477860761 0.293524282 -0.5409865
## X13 X14
## X1 0.13737308 -0.08419649
## X2 0.60854845 -0.18955301
## X3 0.67934096 -0.14338572
## X4 -0.51321116 0.41483373
## X5 0.18828577 0.15374234
## X6 0.30687493 -0.61455701
## X7 0.13817175 -0.59275118
## X8 -0.08547333 0.41321980
## X9 -0.67795292 0.46877803
## X10 0.56861238 -0.47786076
## X11 -0.38011880 0.29352428
## X12 0.77198932 -0.54098654
## X13 1.00000000 -0.77642300
## X14 -0.77642300 1.00000000
##Boxplot
#Boxplot X1 dan X14 `guna mengetahui hubungan antara X1 dan X14.
par(mfrow=c(1, 2))
boxplot(data$X1, main="X1 (Variable 1)",
sub=paste("Outlier rows:", paste(boxplot.stats(data$X1)$out, collapse=", ")))
boxplot(data$X14, main="X14 (Variable 14)",
sub=paste("Outlier rows:", paste(boxplot.stats(data$X14)$out, collapse=", ")))
