library(readxl)
library(kernlab)
finansial <- read_excel("C:/Users/Acer/Downloads/Indikator Finansial.xlsx")
data_fin <- finansial[, 9:ncol(finansial)]
data_fin[] <- lapply(data_fin, function(x) as.numeric(as.character(x)))
## Warning in FUN(X[[i]], ...): NAs introduced by coercion
## Warning in FUN(X[[i]], ...): NAs introduced by coercion
## Warning in FUN(X[[i]], ...): NAs introduced by coercion
## Warning in FUN(X[[i]], ...): NAs introduced by coercion
## Warning in FUN(X[[i]], ...): NAs introduced by coercion
## Warning in FUN(X[[i]], ...): NAs introduced by coercion
## Warning in FUN(X[[i]], ...): NAs introduced by coercion
## Warning in FUN(X[[i]], ...): NAs introduced by coercion
## Warning in FUN(X[[i]], ...): NAs introduced by coercion
## Warning in FUN(X[[i]], ...): NAs introduced by coercion
## Warning in FUN(X[[i]], ...): NAs introduced by coercion
## Warning in FUN(X[[i]], ...): NAs introduced by coercion
## Warning in FUN(X[[i]], ...): NAs introduced by coercion
## Warning in FUN(X[[i]], ...): NAs introduced by coercion
## Warning in FUN(X[[i]], ...): NAs introduced by coercion
## Warning in FUN(X[[i]], ...): NAs introduced by coercion
## Warning in FUN(X[[i]], ...): NAs introduced by coercion
## Warning in FUN(X[[i]], ...): NAs introduced by coercion
## Warning in FUN(X[[i]], ...): NAs introduced by coercion
## Warning in FUN(X[[i]], ...): NAs introduced by coercion
## Warning in FUN(X[[i]], ...): NAs introduced by coercion
## Warning in FUN(X[[i]], ...): NAs introduced by coercion
## Warning in FUN(X[[i]], ...): NAs introduced by coercion
## Warning in FUN(X[[i]], ...): NAs introduced by coercion
## Warning in FUN(X[[i]], ...): NAs introduced by coercion
## Warning in FUN(X[[i]], ...): NAs introduced by coercion
## Warning in FUN(X[[i]], ...): NAs introduced by coercion
## Warning in FUN(X[[i]], ...): NAs introduced by coercion
## Warning in FUN(X[[i]], ...): NAs introduced by coercion
## Warning in FUN(X[[i]], ...): NAs introduced by coercion
## Warning in FUN(X[[i]], ...): NAs introduced by coercion
## Warning in FUN(X[[i]], ...): NAs introduced by coercion
## Warning in FUN(X[[i]], ...): NAs introduced by coercion
## Warning in FUN(X[[i]], ...): NAs introduced by coercion
## Warning in FUN(X[[i]], ...): NAs introduced by coercion
## Warning in FUN(X[[i]], ...): NAs introduced by coercion
## Warning in FUN(X[[i]], ...): NAs introduced by coercion
## Warning in FUN(X[[i]], ...): NAs introduced by coercion
## Warning in FUN(X[[i]], ...): NAs introduced by coercion
## Warning in FUN(X[[i]], ...): NAs introduced by coercion
set.seed(2025)
if (nrow(data_fin) > 1500) {
data_fin <- data_fin[sample(1:nrow(data_fin), 1500), ]
}
# Imputasi NA dengan rata-rata kolom (mean)
for (j in 1:ncol(data_fin)) {
data_fin[[j]][is.na(data_fin[[j]])] <- mean(data_fin[[j]], na.rm = TRUE)
}
# Standarisasi Data
X <- scale(data_fin, center = TRUE, scale = TRUE)
# Matriks Korelasi & Eigenvalue/Eigenvector
S <- cor(X)
eig <- eigen(S, symmetric = TRUE)
lambda <- eig$values
E <- eig$vectors
# Proporsi dan Kumulatif Varians
proporsi_varians <- lambda / sum(lambda)
proporsi_kumulatif <- cumsum(proporsi_varians)
# Tabel Hasil PCA
hasil_pca <- data.frame(
Komponen = paste0("PC", 1:length(lambda)),
Eigenvalue = round(lambda, 4),
Proporsi = round(proporsi_varians * 100, 2),
Kumulatif = round(proporsi_kumulatif * 100, 2)
)
print(hasil_pca)
## Komponen Eigenvalue Proporsi Kumulatif
## 1 PC1 13.7578 21.17 21.17
## 2 PC2 6.4332 9.90 31.06
## 3 PC3 3.0417 4.68 35.74
## 4 PC4 2.8054 4.32 40.06
## 5 PC5 2.3622 3.63 43.69
## 6 PC6 2.2928 3.53 47.22
## 7 PC7 2.0629 3.17 50.39
## 8 PC8 1.9790 3.04 53.44
## 9 PC9 1.8060 2.78 56.22
## 10 PC10 1.6827 2.59 58.81
## 11 PC11 1.5448 2.38 61.18
## 12 PC12 1.3368 2.06 63.24
## 13 PC13 1.2716 1.96 65.20
## 14 PC14 1.1782 1.81 67.01
## 15 PC15 1.1538 1.78 68.78
## 16 PC16 1.1228 1.73 70.51
## 17 PC17 1.0967 1.69 72.20
## 18 PC18 1.0546 1.62 73.82
## 19 PC19 1.0492 1.61 75.43
## 20 PC20 0.9950 1.53 76.97
## 21 PC21 0.9772 1.50 78.47
## 22 PC22 0.9370 1.44 79.91
## 23 PC23 0.9306 1.43 81.34
## 24 PC24 0.8926 1.37 82.72
## 25 PC25 0.8678 1.34 84.05
## 26 PC26 0.8528 1.31 85.36
## 27 PC27 0.8116 1.25 86.61
## 28 PC28 0.8000 1.23 87.84
## 29 PC29 0.7446 1.15 88.99
## 30 PC30 0.7380 1.14 90.12
## 31 PC31 0.6809 1.05 91.17
## 32 PC32 0.6485 1.00 92.17
## 33 PC33 0.6128 0.94 93.11
## 34 PC34 0.5702 0.88 93.99
## 35 PC35 0.5677 0.87 94.86
## 36 PC36 0.4801 0.74 95.60
## 37 PC37 0.3954 0.61 96.21
## 38 PC38 0.3788 0.58 96.79
## 39 PC39 0.3146 0.48 97.28
## 40 PC40 0.2849 0.44 97.71
## 41 PC41 0.2550 0.39 98.11
## 42 PC42 0.2228 0.34 98.45
## 43 PC43 0.1714 0.26 98.71
## 44 PC44 0.1649 0.25 98.97
## 45 PC45 0.1530 0.24 99.20
## 46 PC46 0.1195 0.18 99.39
## 47 PC47 0.0870 0.13 99.52
## 48 PC48 0.0725 0.11 99.63
## 49 PC49 0.0567 0.09 99.72
## 50 PC50 0.0512 0.08 99.80
## 51 PC51 0.0365 0.06 99.85
## 52 PC52 0.0273 0.04 99.89
## 53 PC53 0.0231 0.04 99.93
## 54 PC54 0.0159 0.02 99.95
## 55 PC55 0.0109 0.02 99.97
## 56 PC56 0.0075 0.01 99.98
## 57 PC57 0.0049 0.01 99.99
## 58 PC58 0.0034 0.01 100.00
## 59 PC59 0.0023 0.00 100.00
## 60 PC60 0.0001 0.00 100.00
## 61 PC61 0.0001 0.00 100.00
## 62 PC62 0.0000 0.00 100.00
## 63 PC63 0.0000 0.00 100.00
## 64 PC64 0.0000 0.00 100.00
## 65 PC65 0.0000 0.00 100.00
# Menentukan jumlah komponen utama
k_kaiser <- sum(lambda > 1)
k_80pct <- which(proporsi_kumulatif >= 0.80)[1]
cat("Jumlah komponen (Eigenvalue > 1):", k_kaiser, "\n")
## Jumlah komponen (Eigenvalue > 1): 19
cat("Jumlah komponen untuk ≥80% varians:", k_80pct, "\n")
## Jumlah komponen untuk ≥80% varians: 23
# Hitung Skor Komponen Utama
k <- min(26, ncol(data_fin))
scores <- as.data.frame(X %*% E[, 1:k])
colnames(scores) <- paste0("PC", 1:k)
head(scores)
## PC1 PC2 PC3 PC4 PC5 PC6
## 1 0.7485310 -0.04236954 -0.27597338 0.8520014 -1.018803700 -0.6751042
## 2 -11.1731065 0.44942543 -1.50455246 -0.2350548 0.720265067 1.8135337
## 3 1.0581054 -0.02878859 -0.33455750 0.1252107 -1.594524199 -1.6012604
## 4 0.9374362 0.12486712 0.06945476 -0.9328719 -0.163241715 -2.0627747
## 5 0.7401604 0.28662130 0.21966776 1.2213603 0.001728331 0.2657497
## 6 0.4541911 -0.13261935 -0.23684117 -3.2465732 0.803541433 0.4455517
## PC7 PC8 PC9 PC10 PC11 PC12
## 1 -0.19177838 -0.73488550 0.3071442 -0.7891857 0.15247290 0.68411557
## 2 -0.01348882 0.66968851 -1.3910519 0.6739856 -0.34433299 -0.05886018
## 3 -0.34539846 -0.55978797 0.1498560 0.8095349 0.19278085 0.45050479
## 4 -0.03190972 0.01889037 1.4369634 4.0661782 0.54884436 -0.35482173
## 5 0.12268973 0.05537398 0.1931371 0.1279154 0.07374756 -1.35216708
## 6 -1.92092318 1.82634163 -2.1543532 -0.6392836 -1.32670965 -3.04457722
## PC13 PC14 PC15 PC16 PC17 PC18 PC19
## 1 -0.1312486 -0.1904549 0.5972920 0.5343700 0.08048659 -0.4466967 0.1216806
## 2 -1.3823970 -0.2988558 1.0419702 0.6388585 0.18710260 -0.7290443 -0.5602862
## 3 -0.5871203 1.0878038 -1.1662327 -0.2837367 0.72000942 0.7510729 1.0124730
## 4 -0.9750995 1.1009721 -1.5339169 -0.9488523 1.25621694 0.4409423 -0.5016894
## 5 0.1301991 -0.5845561 0.2980020 0.2729400 0.33611534 0.1351051 -0.2458140
## 6 -2.9414352 -0.4206907 -0.7269408 -1.5735701 -4.19818828 0.2335003 -0.2858774
## PC20 PC21 PC22 PC23 PC24 PC25
## 1 0.16329379 -0.67933446 0.2581280 -0.8448291 0.90876920 0.7768300
## 2 -0.06519262 0.46884876 0.1377125 -0.7188082 -0.36929245 0.2673866
## 3 0.34699452 0.82770562 -0.2778670 1.3583068 0.33478112 -0.5585827
## 4 0.20824803 -0.25611163 -1.0050883 0.2581901 -0.21565455 0.1942550
## 5 0.02887120 0.01785662 -0.4973721 -0.1587088 -0.27862435 0.4863967
## 6 -0.58917083 0.86301787 0.5751357 1.1855990 -0.06906087 -0.3413605
## PC26
## 1 -0.38980546
## 2 0.00614413
## 3 -0.35752063
## 4 -0.10408961
## 5 0.20847277
## 6 -2.00389859
#KERNEL PCA
X <- scale(data_fin)
#Definisi fungsi kernel
rbf_kernel <- function(X, gamma = NULL) {
if (is.null(gamma)) gamma <- 1 / ncol(X)
if (!requireNamespace("proxy", quietly = TRUE)) install.packages("proxy")
dist_matrix <- as.matrix(proxy::dist(X, method = "euclidean"))^2
K <- exp(-gamma * dist_matrix)
return(K)
}
poly_kernel <- function(X, degree = 3, coef0 = 1) {
K <- (tcrossprod(X) + coef0)^degree
return(K)
}
linear_kernel <- function(X) {
K <- tcrossprod(X)
return(K)
}
#menghitung tiga tipe kernel
K_linear <- linear_kernel(X)
K_poly <- poly_kernel(X, degree = 3)
K_rbf <- rbf_kernel(X, gamma = 1/ncol(X))
#pusatkan matriks kernel
n <- nrow(X)
one_n <- matrix(1, n, n) / n
center_kernel <- function(K) {
K_centered <- K - one_n %*% K - K %*% one_n + one_n %*% K %*% one_n
return(K_centered)
}
Kc_linear <- center_kernel(K_linear)
Kc_poly <- center_kernel(K_poly)
Kc_rbf <- center_kernel(K_rbf)
#Eigen decomposition
eig_linear <- eigen(Kc_linear, symmetric = TRUE)
eig_poly <- eigen(Kc_poly, symmetric = TRUE)
eig_rbf <- eigen(Kc_rbf, symmetric = TRUE)
#normalisasi vektor eigen
normalize_eig <- function(eig) {
values <- eig$values
vectors <- eig$vectors
values[values < 0] <- 0
vectors_norm <- vectors / sqrt(values + 1e-9)
list(values = values, vectors = vectors_norm)
}
eig_linear_n <- normalize_eig(eig_linear)
eig_poly_n <- normalize_eig(eig_poly)
eig_rbf_n <- normalize_eig(eig_rbf)
#Skor eigen
k1 <- 1
k2 <- 2
scores_linear_1 <- Kc_linear %*% eig_linear_n$vectors[, 1:k1]
scores_poly_1 <- Kc_poly %*% eig_poly_n$vectors[, 1:k1]
scores_rbf_1 <- Kc_rbf %*% eig_rbf_n$vectors[, 1:k1]
scores_linear_2 <- Kc_linear %*% eig_linear_n$vectors[, 1:k2]
scores_poly_2 <- Kc_poly %*% eig_poly_n$vectors[, 1:k2]
scores_rbf_2 <- Kc_rbf %*% eig_rbf_n$vectors[, 1:k2]
#Proporsi varians
var_linear <- eig_linear$values / sum(eig_linear$values)
var_poly <- eig_poly$values / sum(eig_poly$values)
var_rbf <- eig_rbf$values / sum(eig_rbf$values)
data.frame(
Kernel = c("Linear", "Polynomial", "RBF"),
PC1 = c(var_linear[1], var_poly[1], var_rbf[1]),
PC2 = c(var_linear[2], var_poly[2], var_rbf[2]),
PC3 = c(var_linear[3], var_poly[3], var_rbf[3]),
PC4 = c(var_linear[4], var_poly[4], var_rbf[4]),
PC5 = c(var_linear[5], var_poly[5], var_rbf[5]),
PC6 = c(var_linear[6], var_poly[6], var_rbf[6]),
PC7 = c(var_linear[7], var_poly[7], var_rbf[7]),
PC8 = c(var_linear[8], var_poly[8], var_rbf[8]),
PC9 = c(var_linear[9], var_poly[9], var_rbf[9]),
PC10 = c(var_linear[10], var_poly[10], var_rbf[10]),
PC11 = c(var_linear[11], var_poly[11], var_rbf[11]),
PC12 = c(var_linear[12], var_poly[12], var_rbf[12]),
PC13 = c(var_linear[13], var_poly[13], var_rbf[13]),
PC14 = c(var_linear[14], var_poly[14], var_rbf[14]),
PC15 = c(var_linear[15], var_poly[15], var_rbf[15]),
PC16 = c(var_linear[16], var_poly[16], var_rbf[16]),
PC17 = c(var_linear[17], var_poly[17], var_rbf[17]),
PC18 = c(var_linear[18], var_poly[18], var_rbf[18]),
PC19 = c(var_linear[19], var_poly[19], var_rbf[19]),
PC20 = c(var_linear[20], var_poly[20], var_rbf[20]),
PC21 = c(var_linear[21], var_poly[21], var_rbf[21]),
PC22= c(var_linear[22], var_poly[22], var_rbf[22]),
PC23 = c(var_linear[23], var_poly[23], var_rbf[23]),
Kumulatif_5PC = c(cumsum(var_linear)[23],
cumsum(var_poly)[23],
cumsum(var_rbf)[23])
)
## Kernel PC1 PC2 PC3 PC4 PC5 PC6
## 1 Linear 0.2116581 0.09897279 0.04679511 0.04315963 0.03634225 0.03527378
## 2 Polynomial 0.4833901 0.34066618 0.05531847 0.03118602 0.02795949 0.01951701
## 3 RBF 0.1204128 0.07286036 0.07118427 0.04589449 0.03744253 0.03181374
## PC7 PC8 PC9 PC10 PC11 PC12
## 1 0.03173742 0.030446349 0.027785256 0.025887502 0.023766603 0.020566237
## 2 0.01671829 0.005342146 0.004780421 0.002886779 0.002815669 0.002289874
## 3 0.02684122 0.025643919 0.023921245 0.022489037 0.019607424 0.018761766
## PC13 PC14 PC15 PC16 PC17 PC18
## 1 0.019563447 0.018126139 0.01775113 0.0172741933 0.0168728845 0.0162247803
## 2 0.001416312 0.001070451 0.00074828 0.0005916521 0.0005192147 0.0003673101
## 3 0.017292040 0.016683163 0.01415243 0.0132879508 0.0123648173 0.0114989691
## PC19 PC20 PC21 PC22 PC23
## 1 0.0161419202 0.0153082213 0.0150333295 0.0144152711 0.0143165672
## 2 0.0003648702 0.0003408887 0.0002915625 0.0002295272 0.0002160557
## 3 0.0103679186 0.0097931390 0.0089631850 0.0080953760 0.0077261117
## Kumulatif_5PC
## 1 0.8134189
## 2 0.9990266
## 3 0.6470980