library(readxl)
library(kernlab)

finansial <- read_excel("C:/Users/Acer/Downloads/Indikator Finansial.xlsx")

data_fin <- finansial[, 9:ncol(finansial)]
data_fin[] <- lapply(data_fin, function(x) as.numeric(as.character(x)))
## Warning in FUN(X[[i]], ...): NAs introduced by coercion
## Warning in FUN(X[[i]], ...): NAs introduced by coercion
## Warning in FUN(X[[i]], ...): NAs introduced by coercion
## Warning in FUN(X[[i]], ...): NAs introduced by coercion
## Warning in FUN(X[[i]], ...): NAs introduced by coercion
## Warning in FUN(X[[i]], ...): NAs introduced by coercion
## Warning in FUN(X[[i]], ...): NAs introduced by coercion
## Warning in FUN(X[[i]], ...): NAs introduced by coercion
## Warning in FUN(X[[i]], ...): NAs introduced by coercion
## Warning in FUN(X[[i]], ...): NAs introduced by coercion
## Warning in FUN(X[[i]], ...): NAs introduced by coercion
## Warning in FUN(X[[i]], ...): NAs introduced by coercion
## Warning in FUN(X[[i]], ...): NAs introduced by coercion
## Warning in FUN(X[[i]], ...): NAs introduced by coercion
## Warning in FUN(X[[i]], ...): NAs introduced by coercion
## Warning in FUN(X[[i]], ...): NAs introduced by coercion
## Warning in FUN(X[[i]], ...): NAs introduced by coercion
## Warning in FUN(X[[i]], ...): NAs introduced by coercion
## Warning in FUN(X[[i]], ...): NAs introduced by coercion
## Warning in FUN(X[[i]], ...): NAs introduced by coercion
## Warning in FUN(X[[i]], ...): NAs introduced by coercion
## Warning in FUN(X[[i]], ...): NAs introduced by coercion
## Warning in FUN(X[[i]], ...): NAs introduced by coercion
## Warning in FUN(X[[i]], ...): NAs introduced by coercion
## Warning in FUN(X[[i]], ...): NAs introduced by coercion
## Warning in FUN(X[[i]], ...): NAs introduced by coercion
## Warning in FUN(X[[i]], ...): NAs introduced by coercion
## Warning in FUN(X[[i]], ...): NAs introduced by coercion
## Warning in FUN(X[[i]], ...): NAs introduced by coercion
## Warning in FUN(X[[i]], ...): NAs introduced by coercion
## Warning in FUN(X[[i]], ...): NAs introduced by coercion
## Warning in FUN(X[[i]], ...): NAs introduced by coercion
## Warning in FUN(X[[i]], ...): NAs introduced by coercion
## Warning in FUN(X[[i]], ...): NAs introduced by coercion
## Warning in FUN(X[[i]], ...): NAs introduced by coercion
## Warning in FUN(X[[i]], ...): NAs introduced by coercion
## Warning in FUN(X[[i]], ...): NAs introduced by coercion
## Warning in FUN(X[[i]], ...): NAs introduced by coercion
## Warning in FUN(X[[i]], ...): NAs introduced by coercion
## Warning in FUN(X[[i]], ...): NAs introduced by coercion
set.seed(2025)
if (nrow(data_fin) > 1500) {
  data_fin <- data_fin[sample(1:nrow(data_fin), 1500), ]
}

# Imputasi NA dengan rata-rata kolom (mean)
for (j in 1:ncol(data_fin)) {
  data_fin[[j]][is.na(data_fin[[j]])] <- mean(data_fin[[j]], na.rm = TRUE)
}

# Standarisasi Data
X <- scale(data_fin, center = TRUE, scale = TRUE)

# Matriks Korelasi & Eigenvalue/Eigenvector
S <- cor(X)
eig <- eigen(S, symmetric = TRUE)
lambda <- eig$values
E <- eig$vectors

# Proporsi dan Kumulatif Varians
proporsi_varians <- lambda / sum(lambda)
proporsi_kumulatif <- cumsum(proporsi_varians)

# Tabel Hasil PCA 
hasil_pca <- data.frame(
  Komponen = paste0("PC", 1:length(lambda)),
  Eigenvalue = round(lambda, 4),
  Proporsi = round(proporsi_varians * 100, 2),
  Kumulatif = round(proporsi_kumulatif * 100, 2)
)
print(hasil_pca)
##    Komponen Eigenvalue Proporsi Kumulatif
## 1       PC1    13.7578    21.17     21.17
## 2       PC2     6.4332     9.90     31.06
## 3       PC3     3.0417     4.68     35.74
## 4       PC4     2.8054     4.32     40.06
## 5       PC5     2.3622     3.63     43.69
## 6       PC6     2.2928     3.53     47.22
## 7       PC7     2.0629     3.17     50.39
## 8       PC8     1.9790     3.04     53.44
## 9       PC9     1.8060     2.78     56.22
## 10     PC10     1.6827     2.59     58.81
## 11     PC11     1.5448     2.38     61.18
## 12     PC12     1.3368     2.06     63.24
## 13     PC13     1.2716     1.96     65.20
## 14     PC14     1.1782     1.81     67.01
## 15     PC15     1.1538     1.78     68.78
## 16     PC16     1.1228     1.73     70.51
## 17     PC17     1.0967     1.69     72.20
## 18     PC18     1.0546     1.62     73.82
## 19     PC19     1.0492     1.61     75.43
## 20     PC20     0.9950     1.53     76.97
## 21     PC21     0.9772     1.50     78.47
## 22     PC22     0.9370     1.44     79.91
## 23     PC23     0.9306     1.43     81.34
## 24     PC24     0.8926     1.37     82.72
## 25     PC25     0.8678     1.34     84.05
## 26     PC26     0.8528     1.31     85.36
## 27     PC27     0.8116     1.25     86.61
## 28     PC28     0.8000     1.23     87.84
## 29     PC29     0.7446     1.15     88.99
## 30     PC30     0.7380     1.14     90.12
## 31     PC31     0.6809     1.05     91.17
## 32     PC32     0.6485     1.00     92.17
## 33     PC33     0.6128     0.94     93.11
## 34     PC34     0.5702     0.88     93.99
## 35     PC35     0.5677     0.87     94.86
## 36     PC36     0.4801     0.74     95.60
## 37     PC37     0.3954     0.61     96.21
## 38     PC38     0.3788     0.58     96.79
## 39     PC39     0.3146     0.48     97.28
## 40     PC40     0.2849     0.44     97.71
## 41     PC41     0.2550     0.39     98.11
## 42     PC42     0.2228     0.34     98.45
## 43     PC43     0.1714     0.26     98.71
## 44     PC44     0.1649     0.25     98.97
## 45     PC45     0.1530     0.24     99.20
## 46     PC46     0.1195     0.18     99.39
## 47     PC47     0.0870     0.13     99.52
## 48     PC48     0.0725     0.11     99.63
## 49     PC49     0.0567     0.09     99.72
## 50     PC50     0.0512     0.08     99.80
## 51     PC51     0.0365     0.06     99.85
## 52     PC52     0.0273     0.04     99.89
## 53     PC53     0.0231     0.04     99.93
## 54     PC54     0.0159     0.02     99.95
## 55     PC55     0.0109     0.02     99.97
## 56     PC56     0.0075     0.01     99.98
## 57     PC57     0.0049     0.01     99.99
## 58     PC58     0.0034     0.01    100.00
## 59     PC59     0.0023     0.00    100.00
## 60     PC60     0.0001     0.00    100.00
## 61     PC61     0.0001     0.00    100.00
## 62     PC62     0.0000     0.00    100.00
## 63     PC63     0.0000     0.00    100.00
## 64     PC64     0.0000     0.00    100.00
## 65     PC65     0.0000     0.00    100.00
# Menentukan jumlah komponen utama
k_kaiser <- sum(lambda > 1)
k_80pct <- which(proporsi_kumulatif >= 0.80)[1] 

cat("Jumlah komponen (Eigenvalue > 1):", k_kaiser, "\n")
## Jumlah komponen (Eigenvalue > 1): 19
cat("Jumlah komponen untuk ≥80% varians:", k_80pct, "\n")
## Jumlah komponen untuk ≥80% varians: 23
# Hitung Skor Komponen Utama
k <- min(26, ncol(data_fin)) 
scores <- as.data.frame(X %*% E[, 1:k])
colnames(scores) <- paste0("PC", 1:k)

head(scores) 
##           PC1         PC2         PC3        PC4          PC5        PC6
## 1   0.7485310 -0.04236954 -0.27597338  0.8520014 -1.018803700 -0.6751042
## 2 -11.1731065  0.44942543 -1.50455246 -0.2350548  0.720265067  1.8135337
## 3   1.0581054 -0.02878859 -0.33455750  0.1252107 -1.594524199 -1.6012604
## 4   0.9374362  0.12486712  0.06945476 -0.9328719 -0.163241715 -2.0627747
## 5   0.7401604  0.28662130  0.21966776  1.2213603  0.001728331  0.2657497
## 6   0.4541911 -0.13261935 -0.23684117 -3.2465732  0.803541433  0.4455517
##           PC7         PC8        PC9       PC10        PC11        PC12
## 1 -0.19177838 -0.73488550  0.3071442 -0.7891857  0.15247290  0.68411557
## 2 -0.01348882  0.66968851 -1.3910519  0.6739856 -0.34433299 -0.05886018
## 3 -0.34539846 -0.55978797  0.1498560  0.8095349  0.19278085  0.45050479
## 4 -0.03190972  0.01889037  1.4369634  4.0661782  0.54884436 -0.35482173
## 5  0.12268973  0.05537398  0.1931371  0.1279154  0.07374756 -1.35216708
## 6 -1.92092318  1.82634163 -2.1543532 -0.6392836 -1.32670965 -3.04457722
##         PC13       PC14       PC15       PC16        PC17       PC18       PC19
## 1 -0.1312486 -0.1904549  0.5972920  0.5343700  0.08048659 -0.4466967  0.1216806
## 2 -1.3823970 -0.2988558  1.0419702  0.6388585  0.18710260 -0.7290443 -0.5602862
## 3 -0.5871203  1.0878038 -1.1662327 -0.2837367  0.72000942  0.7510729  1.0124730
## 4 -0.9750995  1.1009721 -1.5339169 -0.9488523  1.25621694  0.4409423 -0.5016894
## 5  0.1301991 -0.5845561  0.2980020  0.2729400  0.33611534  0.1351051 -0.2458140
## 6 -2.9414352 -0.4206907 -0.7269408 -1.5735701 -4.19818828  0.2335003 -0.2858774
##          PC20        PC21       PC22       PC23        PC24       PC25
## 1  0.16329379 -0.67933446  0.2581280 -0.8448291  0.90876920  0.7768300
## 2 -0.06519262  0.46884876  0.1377125 -0.7188082 -0.36929245  0.2673866
## 3  0.34699452  0.82770562 -0.2778670  1.3583068  0.33478112 -0.5585827
## 4  0.20824803 -0.25611163 -1.0050883  0.2581901 -0.21565455  0.1942550
## 5  0.02887120  0.01785662 -0.4973721 -0.1587088 -0.27862435  0.4863967
## 6 -0.58917083  0.86301787  0.5751357  1.1855990 -0.06906087 -0.3413605
##          PC26
## 1 -0.38980546
## 2  0.00614413
## 3 -0.35752063
## 4 -0.10408961
## 5  0.20847277
## 6 -2.00389859
#KERNEL PCA
X <- scale(data_fin)

#Definisi fungsi kernel
rbf_kernel <- function(X, gamma = NULL) {
  if (is.null(gamma)) gamma <- 1 / ncol(X)
  if (!requireNamespace("proxy", quietly = TRUE)) install.packages("proxy")
  dist_matrix <- as.matrix(proxy::dist(X, method = "euclidean"))^2

  K <- exp(-gamma * dist_matrix)
  return(K)
}

poly_kernel <- function(X, degree = 3, coef0 = 1) {
  K <- (tcrossprod(X) + coef0)^degree
  return(K)
}

linear_kernel <- function(X) {
  K <- tcrossprod(X)
  return(K)
}

#menghitung tiga tipe kernel
K_linear <- linear_kernel(X)
K_poly   <- poly_kernel(X, degree = 3)
K_rbf    <- rbf_kernel(X, gamma = 1/ncol(X))

#pusatkan matriks kernel
n <- nrow(X)
one_n <- matrix(1, n, n) / n

center_kernel <- function(K) {
  K_centered <- K - one_n %*% K - K %*% one_n + one_n %*% K %*% one_n
  return(K_centered)
}

Kc_linear <- center_kernel(K_linear)
Kc_poly   <- center_kernel(K_poly)
Kc_rbf    <- center_kernel(K_rbf)

#Eigen decomposition
eig_linear <- eigen(Kc_linear, symmetric = TRUE)
eig_poly   <- eigen(Kc_poly, symmetric = TRUE)
eig_rbf    <- eigen(Kc_rbf, symmetric = TRUE)

#normalisasi vektor eigen
normalize_eig <- function(eig) {
  values <- eig$values
  vectors <- eig$vectors
  values[values < 0] <- 0
  vectors_norm <- vectors / sqrt(values + 1e-9)
  list(values = values, vectors = vectors_norm)
}

eig_linear_n <- normalize_eig(eig_linear)
eig_poly_n   <- normalize_eig(eig_poly)
eig_rbf_n    <- normalize_eig(eig_rbf)

#Skor eigen
k1 <- 1
k2 <- 2

scores_linear_1 <- Kc_linear %*% eig_linear_n$vectors[, 1:k1]
scores_poly_1   <- Kc_poly   %*% eig_poly_n$vectors[, 1:k1]
scores_rbf_1    <- Kc_rbf    %*% eig_rbf_n$vectors[, 1:k1]

scores_linear_2 <- Kc_linear %*% eig_linear_n$vectors[, 1:k2]
scores_poly_2   <- Kc_poly   %*% eig_poly_n$vectors[, 1:k2]
scores_rbf_2    <- Kc_rbf    %*% eig_rbf_n$vectors[, 1:k2]

#Proporsi varians
var_linear <- eig_linear$values / sum(eig_linear$values)
var_poly   <- eig_poly$values / sum(eig_poly$values)
var_rbf    <- eig_rbf$values / sum(eig_rbf$values)

data.frame(
  Kernel = c("Linear", "Polynomial", "RBF"),
  PC1 = c(var_linear[1], var_poly[1], var_rbf[1]),
  PC2 = c(var_linear[2], var_poly[2], var_rbf[2]),
  PC3 = c(var_linear[3], var_poly[3], var_rbf[3]),
  PC4 = c(var_linear[4], var_poly[4], var_rbf[4]),
  PC5 = c(var_linear[5], var_poly[5], var_rbf[5]),
  PC6 = c(var_linear[6], var_poly[6], var_rbf[6]),
  PC7 = c(var_linear[7], var_poly[7], var_rbf[7]),
  PC8 = c(var_linear[8], var_poly[8], var_rbf[8]),
  PC9 = c(var_linear[9], var_poly[9], var_rbf[9]),
  PC10 = c(var_linear[10], var_poly[10], var_rbf[10]),
  PC11 = c(var_linear[11], var_poly[11], var_rbf[11]),
  PC12 = c(var_linear[12], var_poly[12], var_rbf[12]),
  PC13 = c(var_linear[13], var_poly[13], var_rbf[13]),
  PC14 = c(var_linear[14], var_poly[14], var_rbf[14]),
  PC15 = c(var_linear[15], var_poly[15], var_rbf[15]),
  PC16 = c(var_linear[16], var_poly[16], var_rbf[16]),
  PC17 = c(var_linear[17], var_poly[17], var_rbf[17]),
  PC18 = c(var_linear[18], var_poly[18], var_rbf[18]),
  PC19 = c(var_linear[19], var_poly[19], var_rbf[19]),
  PC20 = c(var_linear[20], var_poly[20], var_rbf[20]),
  PC21 = c(var_linear[21], var_poly[21], var_rbf[21]),
  PC22= c(var_linear[22], var_poly[22], var_rbf[22]),
  PC23 = c(var_linear[23], var_poly[23], var_rbf[23]),
  Kumulatif_5PC = c(cumsum(var_linear)[23],
                    cumsum(var_poly)[23],
                    cumsum(var_rbf)[23])
)
##       Kernel       PC1        PC2        PC3        PC4        PC5        PC6
## 1     Linear 0.2116581 0.09897279 0.04679511 0.04315963 0.03634225 0.03527378
## 2 Polynomial 0.4833901 0.34066618 0.05531847 0.03118602 0.02795949 0.01951701
## 3        RBF 0.1204128 0.07286036 0.07118427 0.04589449 0.03744253 0.03181374
##          PC7         PC8         PC9        PC10        PC11        PC12
## 1 0.03173742 0.030446349 0.027785256 0.025887502 0.023766603 0.020566237
## 2 0.01671829 0.005342146 0.004780421 0.002886779 0.002815669 0.002289874
## 3 0.02684122 0.025643919 0.023921245 0.022489037 0.019607424 0.018761766
##          PC13        PC14       PC15         PC16         PC17         PC18
## 1 0.019563447 0.018126139 0.01775113 0.0172741933 0.0168728845 0.0162247803
## 2 0.001416312 0.001070451 0.00074828 0.0005916521 0.0005192147 0.0003673101
## 3 0.017292040 0.016683163 0.01415243 0.0132879508 0.0123648173 0.0114989691
##           PC19         PC20         PC21         PC22         PC23
## 1 0.0161419202 0.0153082213 0.0150333295 0.0144152711 0.0143165672
## 2 0.0003648702 0.0003408887 0.0002915625 0.0002295272 0.0002160557
## 3 0.0103679186 0.0097931390 0.0089631850 0.0080953760 0.0077261117
##   Kumulatif_5PC
## 1     0.8134189
## 2     0.9990266
## 3     0.6470980