data_finansial <- finansial[, 9:ncol(finansial)] %>%
mutate(across(where(is.character), ~ na_if(.x, "NA"))) %>%
mutate(across(where(is.character), ~ na_if(.x, "N/A"))) %>%
mutate(across(where(is.character), ~ na_if(.x, "")))
# Konversi kolom ke numeric dan kembalikan jadi data.frame
data_finansial <- data_finansial %>%
mutate(across(everything(), ~ as.numeric(as.character(.))))
if (nrow(data_finansial) > 1500) {
set.seed(2025)
data_finansial <- data_finansial[sample(1:nrow(data_finansial), 1500), ]
}
cat("Struktur data setelah perbaikan:\n")## Struktur data setelah perbaikan:
## tibble [1,500 × 65] (S3: tbl_df/tbl/data.frame)
## $ # of shares outstanding : num [1:1500] 0 1022.2 6.9 34.2 6.1 ...
## $ Market Cap : num [1:1500] 0 57930.7 22 7.7 26.9 ...
## $ Total Debt : num [1:1500] 0 5591 1.5 35.1 14.4 ...
## $ Firm Value : num [1:1500] 0 63521.7 23.5 42.8 41.3 ...
## $ Enterprise Value : num [1:1500] 0 61256.7 21.9 42 40.8 ...
## $ Cash : num [1:1500] 0 2265 1.6 0.8 0.5 ...
## $ Revenues: Last yr : num [1:1500] NA 37445 1.4 13.7 62.7 ...
## $ Trailing 12-mth Revenues : num [1:1500] NA 41302 3.3 11.5 73.7 ...
## $ Current PE : num [1:1500] NA 20.8 NA NA 11.7 ...
## $ Trailing PE : num [1:1500] NA 20.8 NA NA 14.2 ...
## $ Forward EPS : num [1:1500] -1 3.34 NA NA NA 1.3 2.81 1.57 1.45 0.39 ...
## $ Forward PE : num [1:1500] NA 16.7 NA NA NA ...
## $ PEG Ratio : num [1:1500] NA 1.89 NA NA NA 2.4 1.41 1.06 3.45 NA ...
## $ PBV Ratio : num [1:1500] NA 4.14 6.47 NA 1.1 15.4 3.1 1.36 3.73 0.9 ...
## $ PS Ratio : num [1:1500] NA 1.55 15.71 0.56 0.43 ...
## $ EV/EBIT : num [1:1500] NA 11.24 NA NA 6.32 ...
## $ EV/EBITDA : num [1:1500] NA 9.53 NA NA 4.66 ...
## $ EV/ Invested Capital : num [1:1500] NA 3.53 6.64 8.08 1.07 ...
## $ Value/BV of Capital : num [1:1500] NA 3.24 4.8 NA 1.06 15.4 2.83 1.27 2.1 0.92 ...
## $ EV/Sales : num [1:1500] NA 1.64 15.64 3.07 0.65 ...
## $ EV/ Trailing Sales : num [1:1500] NA 1.48 6.64 3.65 0.55 8.32 NA 1.09 1.35 0.55 ...
## $ Growth in EPS: Last 5 years : num [1:1500] NA 0.19 NA NA NA 0.53 0.19 0.12 -0.04 -0.15 ...
## $ Expected Growth in EPS: next 5 years : num [1:1500] 0.02 0.11 NA NA NA 0.29 0.14 0.09 0.05 NA ...
## $ Expected Growth in Revenues: next 5 years: num [1:1500] NA 0.09 NA NA NA 0.3 NA 0.06 -0.02 NA ...
## $ Growth in Revenue- last year : num [1:1500] 0 0.21 -0.46 0.69 0.32 0.52 0 0.04 -0.13 0.09 ...
## $ 3-yr Regression Beta : num [1:1500] 0 0.83 0.51 4 0 0.8 0.22 1.5 0.9 0.34 ...
## $ Value Line Beta : num [1:1500] 0 1.15 0.95 2 0.85 1.2 0.9 1.4 0.9 0.8 ...
## $ HiLo risk : num [1:1500] 0.33 0.1 0.79 0.99 0.45 0.19 0.17 0.18 0.11 0.23 ...
## $ 3-yr Standard Deviation (Stock Price) : num [1:1500] 0 0.17 1.61 2.35 0 0.49 0.16 0.29 0.21 0.31 ...
## $ Reinvestment : num [1:1500] 0 -10.98 -0.57 -2.59 0.42 ...
## $ Correlation : num [1:1500] NA 1 0.06 0.34 NA 0.32 0.28 1.05 0.84 0.22 ...
## $ Payout Ratio : num [1:1500] NA 0.32 NA NA 0 0 0.19 0.25 0.6 NA ...
## $ Reinvestment Rate : num [1:1500] NA 0 0.26 0.22 0.1 0.35 NA 0.01 -0.26 0.65 ...
## $ ROE : num [1:1500] NA 0.2 -0.85 NA 0.09 0.22 0.16 0.14 0.22 0 ...
## $ ROC : num [1:1500] NA 0.23 -0.66 -2.26 0.11 0.79 NA 0.14 0.16 0.09 ...
## $ Net Margin : num [1:1500] NA 0.07 -2.07 -0.94 0.04 0.17 NA 0.09 0.06 0 ...
## $ Pre-tax Operating Margin : num [1:1500] NA 0.15 -1.55 -0.86 0.1 0.23 NA 0.16 0.13 0.06 ...
## $ Invested Capital : num [1:1500] 0 17334 3.3 5.2 38.3 ...
## $ BV of Assets : num [1:1500] 0 40035 7.4 12.6 54.8 ...
## $ Non-cash WC : num [1:1500] 0 1670 -0.2 2.4 9.8 ...
## $ Chg in non-cash WC : num [1:1500] 0 172.02 -0.27 -0.39 1.72 ...
## $ Net Income : num [1:1500] 0 2788 -2.9 -12.9 2.3 ...
## $ EBIT : num [1:1500] NA 5448.25 -2.18 -11.77 6.46 ...
## $ EBIT(1-t) : num [1:1500] NA 4008.93 -2.18 -11.77 4.26 ...
## $ EBITDA : num [1:1500] NA 6426.25 -1.78 -9.47 8.76 ...
## $ FCFF : num [1:1500] NA 4019.91 -1.6 -9.19 3.84 ...
## $ Eff Tax Rate : num [1:1500] 0 0.26 0 0 0.34 0.18 0.35 0.29 0.28 0 ...
## $ Non-cash WC as % of Revenues : num [1:1500] NA 0.04 -0.14 0.18 0.16 0.04 NA -0.14 0.12 0.2 ...
## $ Cash as % of Firm Value : num [1:1500] NA 0.04 0.07 0.02 0.01 0.05 0.15 0.05 0.05 0.01 ...
## $ Cash as % of Revenues : num [1:1500] NA 0.05 0.48 0.07 0.01 0.39 NA 0.05 0.08 0.01 ...
## $ Cash as % of Total Assets : num [1:1500] NA 0.06 0.22 0.06 0.01 0.55 0.06 0.02 0.08 0.01 ...
## $ Capital Expenditures : num [1:1500] 0 795 0.1 0.1 1 ...
## $ Depreciation : num [1:1500] 0 978 0.4 2.3 2.3 ...
## $ SG&A Expenses : num [1:1500] 0 5754 2.4 20.2 11.7 ...
## $ Trailing Revenues : num [1:1500] 0 41302 3.3 11.5 73.7 ...
## $ Trailing Net Income : num [1:1500] 0 2788 -0.8 -23.2 1.9 ...
## $ Dividends : num [1:1500] 0 900 0 0 0 ...
## $ Intangible Assets/Total Assets : num [1:1500] NA 0.3 0 0 0.38 0.04 0 0.34 0.32 0.12 ...
## $ Fixed Assets/Total Assets : num [1:1500] NA 0.13 0.72 0.21 0.24 0.16 0.01 0.04 0.2 0.44 ...
## $ Market D/E : num [1:1500] NA 0.1 0.07 4.56 0.54 0 0.05 0.25 0.4 0.29 ...
## $ Market Debt to Capital : num [1:1500] NA 0.09 0.06 0.82 0.35 0 0.05 0.2 0.29 0.23 ...
## $ Book Debt to Capital : num [1:1500] NA 0.29 0.31 NA 0.37 0 0.13 0.25 0.6 0.21 ...
## $ Dividend Yield : num [1:1500] 0 0.02 0 0 0 0 0.01 0.03 0.04 0 ...
## $ Insider Holdings : num [1:1500] NA 0.02 NA NA NA 0.02 0.18 0.06 0.02 NA ...
## $ Institutional Holdings : num [1:1500] 0.01 0.79 0.01 0.01 0.08 0.89 0.58 0.78 0.86 0.81 ...
na_summary <- data_finansial %>%
summarise(across(everything(), ~sum(is.na(.)))) %>%
pivot_longer(everything(), names_to = "Variabel", values_to = "Jumlah_NA") %>%
arrange(desc(Jumlah_NA))
cat("\n Top 10 Jumlah NA terbanyak per variabel:\n")##
## Top 10 Jumlah NA terbanyak per variabel:
## # A tibble: 10 × 2
## Variabel Jumlah_NA
## <chr> <int>
## 1 Insider Holdings 1218
## 2 Expected Growth in Revenues: next 5 years 1206
## 3 PEG Ratio 1042
## 4 Growth in EPS: Last 5 years 1038
## 5 Forward PE 1007
## 6 Expected Growth in EPS: next 5 years 968
## 7 Forward EPS 933
## 8 EV/EBIT 705
## 9 Trailing PE 682
## 10 EV/EBITDA 671
# Kolom dengan >50% missing
na_percent <- colMeans(is.na(data_finansial))
na_over50 <- names(na_percent[na_percent > 0.5])
cat("\n Kolom dengan >50% missing values:\n")##
## Kolom dengan >50% missing values:
## [1] "Forward EPS"
## [2] "Forward PE"
## [3] "PEG Ratio"
## [4] "Growth in EPS: Last 5 years"
## [5] "Expected Growth in EPS: next 5 years"
## [6] "Expected Growth in Revenues: next 5 years"
## [7] "Insider Holdings"
# Handling Missing Value
# Drop kolom > 50% missing
na_percent <- colMeans(is.na(data_finansial))
drop_cols <- names(na_percent[na_percent > 0.5])
data_finansial_handled <- data_finansial %>%
select(-all_of(drop_cols))
# Imputasi mean
num_cols <- data_finansial_handled %>%
select(where(is.numeric)) %>%
colnames()
data_finansial_handled <- data_finansial_handled %>%
mutate(across(all_of(num_cols), ~ ifelse(is.na(.), mean(., na.rm = TRUE), .)))
# Jumlah variabel & NA setelah handling
n_final <- ncol(data_finansial)
na_after <- data_finansial %>%
summarise(across(everything(), ~sum(is.na(.)))) %>%
pivot_longer(everything(), names_to = "Variabel", values_to = "Jumlah_NA") %>%
arrange(desc(Jumlah_NA))
cat("\n Jumlah variabel setelah handling Missing Value:", n_final, "\n")##
## Jumlah variabel setelah handling Missing Value: 65
##
## Jumlah NA setelah handling:
## [1] 0
num_data <- data_finansial_handled %>% select(where(is.numeric))
# Fungsi untuk capping berbasis IQR
iqr_capping <- function(x) {
if (is.numeric(x)) {
Q1 <- quantile(x, 0.25, na.rm = TRUE)
Q3 <- quantile(x, 0.75, na.rm = TRUE)
IQR_val <- Q3 - Q1
lower <- Q1 - 1.5 * IQR_val
upper <- Q3 + 1.5 * IQR_val
x <- pmin(pmax(x, lower), upper)
}
return(x)
}
# Fungsi hitung jumlah outlier per variabel
outlier_count <- function(x) {
Q1 <- quantile(x, 0.25, na.rm = TRUE)
Q3 <- quantile(x, 0.75, na.rm = TRUE)
IQR_val <- Q3 - Q1
lower <- Q1 - 1.5 * IQR_val
upper <- Q3 + 1.5 * IQR_val
sum(x < lower | x > upper, na.rm = TRUE) }
# Jumlah outlier sebelum handling
outlier_table_before <- num_data %>%
summarise(across(everything(), outlier_count)) %>%
pivot_longer(cols = everything(), names_to = "Variabel", values_to = "Jumlah_Outlier") %>%
arrange(desc(Jumlah_Outlier))
cat("\n Jumlah outlier per variabel (sebelum handling):\n")##
## Jumlah outlier per variabel (sebelum handling):
## # A tibble: 58 × 2
## Variabel Jumlah_Outlier
## <chr> <int>
## 1 Reinvestment 457
## 2 Chg in non-cash WC 446
## 3 Non-cash WC 356
## 4 Dividend Yield 354
## 5 Dividends 349
## 6 Trailing Net Income 289
## 7 Net Income 271
## 8 Capital Expenditures 257
## 9 Total Debt 252
## 10 BV of Assets 245
## # ℹ 48 more rows
# Handling Outlier
# Terapkan IQR capping ke kolom numerik
data_finansial_capped <- data_finansial_handled %>%
mutate(across(where(is.numeric), iqr_capping))
# Jumlah outlier setelah handling
num_data_capped <- data_finansial_capped %>% select(where(is.numeric))
outlier_table_after <- num_data_capped %>%
summarise(across(everything(), outlier_count)) %>%
pivot_longer(cols = everything(), names_to = "Variabel", values_to = "Jumlah_Outlier") %>%
arrange(desc(Jumlah_Outlier))
cat("\n Jumlah outlier per variabel (setelah handling):\n")##
## Jumlah outlier per variabel (setelah handling):
## # A tibble: 58 × 2
## Variabel Jumlah_Outlier
## <chr> <int>
## 1 # of shares outstanding 0
## 2 Market Cap 0
## 3 Total Debt 0
## 4 Firm Value 0
## 5 Enterprise Value 0
## 6 Cash 0
## 7 Revenues: Last yr 0
## 8 Trailing 12-mth Revenues 0
## 9 Current PE 0
## 10 Trailing PE 0
## # ℹ 48 more rows
# Cek variabel konstan (sd = 0)
const_cols <- sapply(num_data_capped, function(x) sd(x, na.rm = TRUE) == 0)
if (any(const_cols)) {
cat("Variabel konstan ditemukan dan dihapus:\n")
print(names(num_data_capped)[const_cols])
num_data_capped <- num_data_capped[, !const_cols]
} else {
cat("Tidak ada variabel konstan.\n")
}## Variabel konstan ditemukan dan dihapus:
## [1] "Dividend Yield"
# Standarisasi (Z-score scaling)
num_scaled <- scale(data_finansial_handled)
num_scaled <- as.data.frame(num_scaled)
# Cek hasil scaling
cat("\nRingkasan hasil scaling:\n")##
## Ringkasan hasil scaling:
## # of shares outstanding Market Cap Total Debt Firm Value
## Min. :-0.2557 Min. :-0.2093 Min. :-0.08583 Min. :-0.1878
## 1st Qu.:-0.2380 1st Qu.:-0.2082 1st Qu.:-0.08583 1st Qu.:-0.1869
## Median :-0.2002 Median :-0.1994 Median :-0.08538 Median :-0.1798
## Mean : 0.0000 Mean : 0.0000 Mean : 0.00000 Mean : 0.0000
## 3rd Qu.:-0.1098 3rd Qu.:-0.1386 3rd Qu.:-0.07432 3rd Qu.:-0.1315
## Max. :16.3234 Max. :26.4852 Max. :35.33988 Max. :24.0613
## Enterprise Value Cash Revenues: Last yr Trailing 12-mth Revenues
## Min. :-0.7000 Min. :-0.1549 Min. :-0.2312 Min. :-0.2216
## 1st Qu.:-0.1850 1st Qu.:-0.1545 1st Qu.:-0.2274 1st Qu.:-0.2174
## Median :-0.1784 Median :-0.1495 Median :-0.1894 Median :-0.1783
## Mean : 0.0000 Mean : 0.0000 Mean : 0.0000 Mean : 0.0000
## 3rd Qu.:-0.1315 3rd Qu.:-0.1181 3rd Qu.: 0.0000 3rd Qu.: 0.0000
## Max. :24.2500 Max. :23.7390 Max. :23.5912 Max. :27.0347
## Current PE Trailing PE PBV Ratio PS Ratio
## Min. :-0.4525 Min. :-0.6534 Min. :-0.2227 Min. :-0.05503
## 1st Qu.:-0.2283 1st Qu.:-0.2846 1st Qu.:-0.1620 1st Qu.:-0.05383
## Median : 0.0000 Median : 0.0000 Median :-0.1096 Median :-0.05120
## Mean : 0.0000 Mean : 0.0000 Mean : 0.0000 Mean : 0.00000
## 3rd Qu.: 0.0000 3rd Qu.: 0.0000 3rd Qu.: 0.0000 3rd Qu.: 0.00000
## Max. :27.6004 Max. :29.8730 Max. :32.1849 Max. :38.04876
## EV/EBIT EV/EBITDA EV/ Invested Capital Value/BV of Capital
## Min. :-0.3141 Min. :-0.2711 Min. :-0.04084 Min. :-0.4460
## 1st Qu.:-0.1803 1st Qu.:-0.1655 1st Qu.:-0.03918 1st Qu.:-0.2739
## Median : 0.0000 Median : 0.0000 Median :-0.03767 Median :-0.1501
## Mean : 0.0000 Mean : 0.0000 Mean : 0.00000 Mean : 0.0000
## 3rd Qu.: 0.0000 3rd Qu.: 0.0000 3rd Qu.:-0.02300 3rd Qu.: 0.0000
## Max. :24.9322 Max. :26.3784 Max. :38.66865 Max. :23.7342
## EV/Sales EV/ Trailing Sales Growth in Revenue- last year
## Min. :-0.05104 Min. :-0.07603 Min. :-2.0060
## 1st Qu.:-0.04992 1st Qu.:-0.07126 1st Qu.:-0.2594
## Median :-0.04758 Median :-0.06135 Median :-0.2594
## Mean : 0.00000 Mean : 0.00000 Mean : 0.0000
## 3rd Qu.: 0.00000 3rd Qu.: 0.00000 3rd Qu.: 0.1132
## Max. :38.37839 Max. :37.84282 Max. : 9.7547
## 3-yr Regression Beta Value Line Beta HiLo risk
## Min. :-5.8357 Min. :-1.58853 Min. :-1.5486
## 1st Qu.:-0.6637 1st Qu.:-0.63008 1st Qu.:-0.7370
## Median :-0.2899 Median :-0.05502 Median :-0.1791
## Mean : 0.0000 Mean : 0.00000 Mean : 0.0000
## 3rd Qu.: 0.3620 3rd Qu.: 0.52005 3rd Qu.: 0.3281
## Max. : 7.6896 Max. : 8.09180 Max. : 3.3715
## 3-yr Standard Deviation (Stock Price) Reinvestment Correlation
## Min. :-0.5986 Min. :-35.33536 Min. :-3.7591
## 1st Qu.:-0.5986 1st Qu.: 0.02163 1st Qu.:-0.5218
## Median :-0.2265 Median : 0.02194 Median : 0.0000
## Mean : 0.0000 Mean : 0.00000 Mean : 0.0000
## 3rd Qu.: 0.1489 3rd Qu.: 0.02283 3rd Qu.: 0.4469
## Max. :13.0612 Max. : 10.05075 Max. : 6.5737
## Payout Ratio Reinvestment Rate ROE ROC
## Min. :-0.5908 Min. :-11.61729 Min. :-28.9584 Min. :-1.07516
## 1st Qu.:-0.5908 1st Qu.: -0.08557 1st Qu.: 0.0000 1st Qu.:-0.03630
## Median : 0.0000 Median : -0.03857 Median : 0.1351 Median :-0.03337
## Mean : 0.0000 Mean : 0.00000 Mean : 0.0000 Mean : 0.00000
## 3rd Qu.: 0.0000 3rd Qu.: 0.00000 3rd Qu.: 0.1730 3rd Qu.: 0.00000
## Max. :25.5474 Max. : 24.16164 Max. : 1.8459 Max. :38.42675
## Net Margin Pre-tax Operating Margin Invested Capital
## Min. :-23.0629 Min. :-14.4475 Min. :-1.1487
## 1st Qu.: 0.0000 1st Qu.: 0.0000 1st Qu.:-0.1285
## Median : 0.1304 Median : 0.2000 Median :-0.1253
## Mean : 0.0000 Mean : 0.0000 Mean : 0.0000
## 3rd Qu.: 0.1348 3rd Qu.: 0.2219 3rd Qu.:-0.1029
## Max. : 0.1869 Max. : 0.3890 Max. :30.0505
## BV of Assets Non-cash WC Chg in non-cash WC Net Income
## Min. :-0.1522 Min. :-2.33085 Min. :-37.02071 Min. :-15.2562
## 1st Qu.:-0.1519 1st Qu.:-0.05652 1st Qu.: 0.02083 1st Qu.: -0.1374
## Median :-0.1478 Median :-0.05635 Median : 0.02084 Median : -0.1341
## Mean : 0.0000 Mean : 0.00000 Mean : 0.00000 Mean : 0.0000
## 3rd Qu.:-0.1271 3rd Qu.:-0.04941 3rd Qu.: 0.02122 3rd Qu.: -0.1033
## Max. :24.0429 Max. :33.93193 Max. : 10.45705 Max. : 27.2283
## EBIT EBIT(1-t) EBITDA FCFF
## Min. :-0.3459 Min. :-0.4108 Min. :-0.2907 Min. :-5.9831
## 1st Qu.:-0.2239 1st Qu.:-0.2302 1st Qu.:-0.2145 1st Qu.:-0.1155
## Median :-0.1912 Median :-0.1966 Median :-0.1846 Median :-0.1054
## Mean : 0.0000 Mean : 0.0000 Mean : 0.0000 Mean : 0.0000
## 3rd Qu.: 0.0000 3rd Qu.: 0.0000 3rd Qu.: 0.0000 3rd Qu.: 0.0000
## Max. :24.0587 Max. :21.8475 Max. :19.9817 Max. :32.2962
## Eff Tax Rate Non-cash WC as % of Revenues Cash as % of Firm Value
## Min. :-0.8917 Min. :-35.90702 Min. :-0.2001
## 1st Qu.:-0.8917 1st Qu.: 0.00000 1st Qu.:-0.1787
## Median :-0.8068 Median : 0.06408 Median :-0.1148
## Mean : 0.0000 Mean : 0.00000 Mean : 0.0000
## 3rd Qu.: 1.0323 3rd Qu.: 0.08348 3rd Qu.: 0.0000
## Max. : 2.5035 Max. : 2.64336 Max. :35.3293
## Cash as % of Revenues Cash as % of Total Assets Capital Expenditures
## Min. :-0.07420 Min. :-0.8462 Min. :-0.1610
## 1st Qu.:-0.07309 1st Qu.:-0.7183 1st Qu.:-0.1610
## Median :-0.06785 Median :-0.3772 Median :-0.1594
## Mean : 0.00000 Mean : 0.0000 Mean : 0.0000
## 3rd Qu.: 0.00000 3rd Qu.: 0.2623 3rd Qu.:-0.1374
## Max. :37.68493 Max. : 3.4169 Max. :17.7089
## Depreciation SG&A Expenses Trailing Revenues Trailing Net Income
## Min. :-0.11353 Min. :-0.1883 Min. :-0.1643 Min. :-5.3485
## 1st Qu.:-0.11353 1st Qu.:-0.1871 1st Qu.:-0.1643 1st Qu.:-0.1491
## Median :-0.11217 Median :-0.1801 Median :-0.1603 Median :-0.1467
## Mean : 0.00000 Mean : 0.0000 Mean : 0.0000 Mean : 0.0000
## 3rd Qu.:-0.09736 3rd Qu.:-0.1467 3rd Qu.:-0.1238 3rd Qu.:-0.1131
## Max. :30.49859 Max. :17.4104 Max. :26.9656 Max. :30.0294
## Dividends Intangible Assets/Total Assets Fixed Assets/Total Assets
## Min. :-0.1477 Min. :-0.6993 Min. :-0.8999
## 1st Qu.:-0.1477 1st Qu.:-0.6993 1st Qu.:-0.7687
## Median :-0.1477 Median :-0.4227 Median :-0.3312
## Mean : 0.0000 Mean : 0.0000 Mean : 0.0000
## 3rd Qu.:-0.1461 3rd Qu.: 0.1857 3rd Qu.: 0.3251
## Max. :21.4405 Max. : 4.6111 Max. : 3.4750
## Market D/E Market Debt to Capital Book Debt to Capital
## Min. :-0.08499 Min. :-0.8546 Min. :-1.1279
## 1st Qu.:-0.08499 1st Qu.:-0.8546 1st Qu.:-0.9548
## Median :-0.08228 Median :-0.2999 Median : 0.0000
## Mean : 0.00000 Mean : 0.0000 Mean : 0.0000
## 3rd Qu.:-0.07361 3rd Qu.: 0.3402 3rd Qu.: 0.5601
## Max. :22.20268 Max. : 3.4126 Max. : 3.1571
## Dividend Yield Institutional Holdings
## Min. :-0.09479 Min. :-0.9732
## 1st Qu.:-0.09479 1st Qu.:-0.9732
## Median :-0.09479 Median :-0.3577
## Mean : 0.00000 Mean : 0.0000
## 3rd Qu.:-0.09479 3rd Qu.: 0.8735
## Max. :38.12419 Max. : 2.1047
##
## Standar deviasi tiap kolom (harus ≈ 1):
## # of shares outstanding Market Cap
## 1 1
## Total Debt Firm Value
## 1 1
## Enterprise Value Cash
## 1 1
## Revenues: Last yr Trailing 12-mth Revenues
## 1 1
## Current PE Trailing PE
## 1 1
## PBV Ratio PS Ratio
## 1 1
## EV/EBIT EV/EBITDA
## 1 1
## EV/ Invested Capital Value/BV of Capital
## 1 1
## EV/Sales EV/ Trailing Sales
## 1 1
## Growth in Revenue- last year 3-yr Regression Beta
## 1 1
## Value Line Beta HiLo risk
## 1 1
## 3-yr Standard Deviation (Stock Price) Reinvestment
## 1 1
## Correlation Payout Ratio
## 1 1
## Reinvestment Rate ROE
## 1 1
## ROC Net Margin
## 1 1
## Pre-tax Operating Margin Invested Capital
## 1 1
## BV of Assets Non-cash WC
## 1 1
## Chg in non-cash WC Net Income
## 1 1
## EBIT EBIT(1-t)
## 1 1
## EBITDA FCFF
## 1 1
## Eff Tax Rate Non-cash WC as % of Revenues
## 1 1
## Cash as % of Firm Value Cash as % of Revenues
## 1 1
## Cash as % of Total Assets Capital Expenditures
## 1 1
## Depreciation SG&A Expenses
## 1 1
## Trailing Revenues Trailing Net Income
## 1 1
## Dividends Intangible Assets/Total Assets
## 1 1
## Fixed Assets/Total Assets Market D/E
## 1 1
## Market Debt to Capital Book Debt to Capital
## 1 1
## Dividend Yield Institutional Holdings
## 1 1
## 'data.frame': 1500 obs. of 58 variables:
## $ # of shares outstanding : num -0.256 2.23 -0.239 -0.173 -0.241 ...
## $ Market Cap : num -0.209 4.209 -0.208 -0.209 -0.207 ...
## $ Total Debt : num -0.0858 0.3549 -0.0857 -0.0831 -0.0847 ...
## $ Firm Value : num -0.188 2.926 -0.187 -0.186 -0.186 ...
## $ Enterprise Value : num -0.186 3.072 -0.185 -0.183 -0.184 ...
## $ Cash : num -0.155 0.875 -0.154 -0.154 -0.155 ...
## $ Revenues: Last yr : num 0 3.148 -0.231 -0.23 -0.226 ...
## $ Trailing 12-mth Revenues : num 0 3.233 -0.221 -0.221 -0.215 ...
## $ Current PE : num 0 -0.205 0 0 -0.316 ...
## $ Trailing PE : num 0 -0.224 0 0 -0.361 ...
## $ PBV Ratio : num 0 -0.0674 0.02 0 -0.1815 ...
## $ PS Ratio : num 0 -0.0529 -0.0331 -0.0542 -0.0544 ...
## $ EV/EBIT : num 0 -0.168 0 0 -0.234 ...
## $ EV/EBITDA : num 0 -0.136 0 0 -0.206 ...
## $ EV/ Invested Capital : num 0 -0.0368 -0.0331 -0.0315 -0.0396 ...
## $ Value/BV of Capital : num 0 -0.0262 0.1771 0 -0.3104 ...
## $ EV/Sales : num 0 -0.0492 -0.0332 -0.0475 -0.0503 ...
## $ EV/ Trailing Sales : num 0 -0.0687 -0.0431 -0.0579 -0.0733 ...
## $ Growth in Revenue- last year : num -0.259 0.23 -1.331 1.348 0.486 ...
## $ 3-yr Regression Beta : num -0.6637 0.0578 -0.2204 2.8132 -0.6637 ...
## $ Value Line Beta : num -1.5885 0.6159 0.2325 2.2453 0.0408 ...
## $ HiLo risk : num -0.0269 -1.1935 2.3064 3.3208 0.5818 ...
## $ 3-yr Standard Deviation (Stock Price): num -0.599 -0.373 1.541 2.524 -0.599 ...
## $ Reinvestment : num 0.0219 0.0183 0.0218 0.0211 0.0221 ...
## $ Correlation : num 0 1.838 -1.275 -0.348 0 ...
## $ Payout Ratio : num 0 0.364 0 0 -0.591 ...
## $ Reinvestment Rate : num 0 -0.0763 -0.0378 -0.0438 -0.0615 ...
## $ ROE : num 0 0.202 -0.24 0 0.156 ...
## $ ROC : num 0 -0.0326 -0.0543 -0.0933 -0.0356 ...
## $ Net Margin : num 0 0.136 0.0161 0.0794 0.1343 ...
## $ Pre-tax Operating Margin : num 0 0.2239 -0.1144 0.0229 0.2139 ...
## $ Invested Capital : num -0.129 1.034 -0.128 -0.128 -0.126 ...
## $ BV of Assets : num -0.152 1.066 -0.152 -0.152 -0.151 ...
## $ Non-cash WC : num -0.0564 0.4926 -0.0564 -0.0556 -0.0531 ...
## $ Chg in non-cash WC : num 0.0208 0.0793 0.0207 0.0207 0.0214 ...
## $ Net Income : num -0.136 2.876 -0.139 -0.15 -0.134 ...
## $ EBIT : num 0 2.674 -0.226 -0.231 -0.221 ...
## $ EBIT(1-t) : num 0 2.926 -0.233 -0.24 -0.228 ...
## $ EBITDA : num 0 2.127 -0.216 -0.219 -0.212 ...
## $ FCFF : num 0 1.091 -0.116 -0.118 -0.114 ...
## $ Eff Tax Rate : num -0.892 0.58 -0.892 -0.892 1.032 ...
## $ Non-cash WC as % of Revenues : num 0 0.0715 0.0447 0.0924 0.0895 ...
## $ Cash as % of Firm Value : num 0 -0.157 -0.125 -0.179 -0.189 ...
## $ Cash as % of Revenues : num 0 -0.0728 -0.061 -0.0723 -0.0739 ...
## $ Cash as % of Total Assets : num 0 -0.5904 0.0917 -0.5904 -0.8035 ...
## $ Capital Expenditures : num -0.161 0.91 -0.161 -0.161 -0.16 ...
## $ Depreciation : num -0.114 0.774 -0.113 -0.111 -0.111 ...
## $ SG&A Expenses : num -0.188 2.977 -0.187 -0.177 -0.182 ...
## $ Trailing Revenues : num -0.164 3.274 -0.164 -0.163 -0.158 ...
## $ Trailing Net Income : num -0.148 3.173 -0.149 -0.176 -0.146 ...
## $ Dividends : num -0.148 2.468 -0.148 -0.148 -0.148 ...
## $ Intangible Assets/Total Assets : num 0 0.96 -0.699 -0.699 1.403 ...
## $ Fixed Assets/Total Assets : num 0 -0.3312 2.25 0.0188 0.1501 ...
## $ Market D/E : num 0 -0.08318 -0.08372 -0.00268 -0.07524 ...
## $ Market Debt to Capital : num 0 -0.471 -0.599 2.645 0.639 ...
## $ Book Debt to Capital : num 0 0.127 0.214 0 0.474 ...
## $ Dividend Yield : num -0.0948 0.1211 -0.0948 -0.0948 -0.0948 ...
## $ Institutional Holdings : num -0.942 1.458 -0.942 -0.942 -0.727 ...
## # of shares outstanding Market Cap Total Debt Firm Value
## Min. :-0.2557 Min. :-0.2093 Min. :-0.08583 Min. :-0.1878
## 1st Qu.:-0.2380 1st Qu.:-0.2082 1st Qu.:-0.08583 1st Qu.:-0.1869
## Median :-0.2002 Median :-0.1994 Median :-0.08538 Median :-0.1798
## Mean : 0.0000 Mean : 0.0000 Mean : 0.00000 Mean : 0.0000
## 3rd Qu.:-0.1098 3rd Qu.:-0.1386 3rd Qu.:-0.07432 3rd Qu.:-0.1315
## Max. :16.3234 Max. :26.4852 Max. :35.33988 Max. :24.0613
## Enterprise Value Cash Revenues: Last yr Trailing 12-mth Revenues
## Min. :-0.7000 Min. :-0.1549 Min. :-0.2312 Min. :-0.2216
## 1st Qu.:-0.1850 1st Qu.:-0.1545 1st Qu.:-0.2274 1st Qu.:-0.2174
## Median :-0.1784 Median :-0.1495 Median :-0.1894 Median :-0.1783
## Mean : 0.0000 Mean : 0.0000 Mean : 0.0000 Mean : 0.0000
## 3rd Qu.:-0.1315 3rd Qu.:-0.1181 3rd Qu.: 0.0000 3rd Qu.: 0.0000
## Max. :24.2500 Max. :23.7390 Max. :23.5912 Max. :27.0347
## Current PE Trailing PE PBV Ratio PS Ratio
## Min. :-0.4525 Min. :-0.6534 Min. :-0.2227 Min. :-0.05503
## 1st Qu.:-0.2283 1st Qu.:-0.2846 1st Qu.:-0.1620 1st Qu.:-0.05383
## Median : 0.0000 Median : 0.0000 Median :-0.1096 Median :-0.05120
## Mean : 0.0000 Mean : 0.0000 Mean : 0.0000 Mean : 0.00000
## 3rd Qu.: 0.0000 3rd Qu.: 0.0000 3rd Qu.: 0.0000 3rd Qu.: 0.00000
## Max. :27.6004 Max. :29.8730 Max. :32.1849 Max. :38.04876
## EV/EBIT EV/EBITDA EV/ Invested Capital Value/BV of Capital
## Min. :-0.3141 Min. :-0.2711 Min. :-0.04084 Min. :-0.4460
## 1st Qu.:-0.1803 1st Qu.:-0.1655 1st Qu.:-0.03918 1st Qu.:-0.2739
## Median : 0.0000 Median : 0.0000 Median :-0.03767 Median :-0.1501
## Mean : 0.0000 Mean : 0.0000 Mean : 0.00000 Mean : 0.0000
## 3rd Qu.: 0.0000 3rd Qu.: 0.0000 3rd Qu.:-0.02300 3rd Qu.: 0.0000
## Max. :24.9322 Max. :26.3784 Max. :38.66865 Max. :23.7342
## EV/Sales EV/ Trailing Sales Growth in Revenue- last year
## Min. :-0.05104 Min. :-0.07603 Min. :-2.0060
## 1st Qu.:-0.04992 1st Qu.:-0.07126 1st Qu.:-0.2594
## Median :-0.04758 Median :-0.06135 Median :-0.2594
## Mean : 0.00000 Mean : 0.00000 Mean : 0.0000
## 3rd Qu.: 0.00000 3rd Qu.: 0.00000 3rd Qu.: 0.1132
## Max. :38.37839 Max. :37.84282 Max. : 9.7547
## 3-yr Regression Beta Value Line Beta HiLo risk
## Min. :-5.8357 Min. :-1.58853 Min. :-1.5486
## 1st Qu.:-0.6637 1st Qu.:-0.63008 1st Qu.:-0.7370
## Median :-0.2899 Median :-0.05502 Median :-0.1791
## Mean : 0.0000 Mean : 0.00000 Mean : 0.0000
## 3rd Qu.: 0.3620 3rd Qu.: 0.52005 3rd Qu.: 0.3281
## Max. : 7.6896 Max. : 8.09180 Max. : 3.3715
## 3-yr Standard Deviation (Stock Price) Reinvestment Correlation
## Min. :-0.5986 Min. :-35.33536 Min. :-3.7591
## 1st Qu.:-0.5986 1st Qu.: 0.02163 1st Qu.:-0.5218
## Median :-0.2265 Median : 0.02194 Median : 0.0000
## Mean : 0.0000 Mean : 0.00000 Mean : 0.0000
## 3rd Qu.: 0.1489 3rd Qu.: 0.02283 3rd Qu.: 0.4469
## Max. :13.0612 Max. : 10.05075 Max. : 6.5737
## Payout Ratio Reinvestment Rate ROE ROC
## Min. :-0.5908 Min. :-11.61729 Min. :-28.9584 Min. :-1.07516
## 1st Qu.:-0.5908 1st Qu.: -0.08557 1st Qu.: 0.0000 1st Qu.:-0.03630
## Median : 0.0000 Median : -0.03857 Median : 0.1351 Median :-0.03337
## Mean : 0.0000 Mean : 0.00000 Mean : 0.0000 Mean : 0.00000
## 3rd Qu.: 0.0000 3rd Qu.: 0.00000 3rd Qu.: 0.1730 3rd Qu.: 0.00000
## Max. :25.5474 Max. : 24.16164 Max. : 1.8459 Max. :38.42675
## Net Margin Pre-tax Operating Margin Invested Capital
## Min. :-23.0629 Min. :-14.4475 Min. :-1.1487
## 1st Qu.: 0.0000 1st Qu.: 0.0000 1st Qu.:-0.1285
## Median : 0.1304 Median : 0.2000 Median :-0.1253
## Mean : 0.0000 Mean : 0.0000 Mean : 0.0000
## 3rd Qu.: 0.1348 3rd Qu.: 0.2219 3rd Qu.:-0.1029
## Max. : 0.1869 Max. : 0.3890 Max. :30.0505
## BV of Assets Non-cash WC Chg in non-cash WC Net Income
## Min. :-0.1522 Min. :-2.33085 Min. :-37.02071 Min. :-15.2562
## 1st Qu.:-0.1519 1st Qu.:-0.05652 1st Qu.: 0.02083 1st Qu.: -0.1374
## Median :-0.1478 Median :-0.05635 Median : 0.02084 Median : -0.1341
## Mean : 0.0000 Mean : 0.00000 Mean : 0.00000 Mean : 0.0000
## 3rd Qu.:-0.1271 3rd Qu.:-0.04941 3rd Qu.: 0.02122 3rd Qu.: -0.1033
## Max. :24.0429 Max. :33.93193 Max. : 10.45705 Max. : 27.2283
## EBIT EBIT(1-t) EBITDA FCFF
## Min. :-0.3459 Min. :-0.4108 Min. :-0.2907 Min. :-5.9831
## 1st Qu.:-0.2239 1st Qu.:-0.2302 1st Qu.:-0.2145 1st Qu.:-0.1155
## Median :-0.1912 Median :-0.1966 Median :-0.1846 Median :-0.1054
## Mean : 0.0000 Mean : 0.0000 Mean : 0.0000 Mean : 0.0000
## 3rd Qu.: 0.0000 3rd Qu.: 0.0000 3rd Qu.: 0.0000 3rd Qu.: 0.0000
## Max. :24.0587 Max. :21.8475 Max. :19.9817 Max. :32.2962
## Eff Tax Rate Non-cash WC as % of Revenues Cash as % of Firm Value
## Min. :-0.8917 Min. :-35.90702 Min. :-0.2001
## 1st Qu.:-0.8917 1st Qu.: 0.00000 1st Qu.:-0.1787
## Median :-0.8068 Median : 0.06408 Median :-0.1148
## Mean : 0.0000 Mean : 0.00000 Mean : 0.0000
## 3rd Qu.: 1.0323 3rd Qu.: 0.08348 3rd Qu.: 0.0000
## Max. : 2.5035 Max. : 2.64336 Max. :35.3293
## Cash as % of Revenues Cash as % of Total Assets Capital Expenditures
## Min. :-0.07420 Min. :-0.8462 Min. :-0.1610
## 1st Qu.:-0.07309 1st Qu.:-0.7183 1st Qu.:-0.1610
## Median :-0.06785 Median :-0.3772 Median :-0.1594
## Mean : 0.00000 Mean : 0.0000 Mean : 0.0000
## 3rd Qu.: 0.00000 3rd Qu.: 0.2623 3rd Qu.:-0.1374
## Max. :37.68493 Max. : 3.4169 Max. :17.7089
## Depreciation SG&A Expenses Trailing Revenues Trailing Net Income
## Min. :-0.11353 Min. :-0.1883 Min. :-0.1643 Min. :-5.3485
## 1st Qu.:-0.11353 1st Qu.:-0.1871 1st Qu.:-0.1643 1st Qu.:-0.1491
## Median :-0.11217 Median :-0.1801 Median :-0.1603 Median :-0.1467
## Mean : 0.00000 Mean : 0.0000 Mean : 0.0000 Mean : 0.0000
## 3rd Qu.:-0.09736 3rd Qu.:-0.1467 3rd Qu.:-0.1238 3rd Qu.:-0.1131
## Max. :30.49859 Max. :17.4104 Max. :26.9656 Max. :30.0294
## Dividends Intangible Assets/Total Assets Fixed Assets/Total Assets
## Min. :-0.1477 Min. :-0.6993 Min. :-0.8999
## 1st Qu.:-0.1477 1st Qu.:-0.6993 1st Qu.:-0.7687
## Median :-0.1477 Median :-0.4227 Median :-0.3312
## Mean : 0.0000 Mean : 0.0000 Mean : 0.0000
## 3rd Qu.:-0.1461 3rd Qu.: 0.1857 3rd Qu.: 0.3251
## Max. :21.4405 Max. : 4.6111 Max. : 3.4750
## Market D/E Market Debt to Capital Book Debt to Capital
## Min. :-0.08499 Min. :-0.8546 Min. :-1.1279
## 1st Qu.:-0.08499 1st Qu.:-0.8546 1st Qu.:-0.9548
## Median :-0.08228 Median :-0.2999 Median : 0.0000
## Mean : 0.00000 Mean : 0.0000 Mean : 0.0000
## 3rd Qu.:-0.07361 3rd Qu.: 0.3402 3rd Qu.: 0.5601
## Max. :22.20268 Max. : 3.4126 Max. : 3.1571
## Dividend Yield Institutional Holdings
## Min. :-0.09479 Min. :-0.9732
## 1st Qu.:-0.09479 1st Qu.:-0.9732
## Median :-0.09479 Median :-0.3577
## Mean : 0.00000 Mean : 0.0000
## 3rd Qu.:-0.09479 3rd Qu.: 0.8735
## Max. :38.12419 Max. : 2.1047
Setelah membentuk dua variabel indikator finansial, seseorang ingin mengetahui posisi masing-masing perusahaan berdasarkan indikator tersebut.
## [1] 58 58
eig <- eigen(S, symmetric = TRUE)
lambda <- eig$values # akar ciri (eigenvalues)
E <- eig$vectors # vektor ciri (eigenvectors) # Proporsi ragam tiap komponen dan kumulatifnya
prop <- lambda / sum(lambda)
cumprop <- cumsum(prop)
#Hasil PCA
hasil_pca <- data.frame(
Komponen = paste0("PC", 1:length(lambda)),
Eigenvalue = round(lambda, 4),
Proporsi = round(prop * 100, 2),
Kumulatif = round(cumprop * 100, 2)
)
hasil_pca## Komponen Eigenvalue Proporsi Kumulatif
## 1 PC1 13.6308 23.50 23.50
## 2 PC2 6.2822 10.83 34.33
## 3 PC3 3.0218 5.21 39.54
## 4 PC4 2.6593 4.59 44.13
## 5 PC5 2.3568 4.06 48.19
## 6 PC6 2.1813 3.76 51.95
## 7 PC7 1.9480 3.36 55.31
## 8 PC8 1.9281 3.32 58.64
## 9 PC9 1.7034 2.94 61.57
## 10 PC10 1.6565 2.86 64.43
## 11 PC11 1.2514 2.16 66.59
## 12 PC12 1.1856 2.04 68.63
## 13 PC13 1.1587 2.00 70.63
## 14 PC14 1.1327 1.95 72.58
## 15 PC15 1.0775 1.86 74.44
## 16 PC16 1.0449 1.80 76.24
## 17 PC17 1.0105 1.74 77.98
## 18 PC18 1.0013 1.73 79.71
## 19 PC19 0.9654 1.66 81.37
## 20 PC20 0.9008 1.55 82.93
## 21 PC21 0.8938 1.54 84.47
## 22 PC22 0.8677 1.50 85.96
## 23 PC23 0.8132 1.40 87.36
## 24 PC24 0.7922 1.37 88.73
## 25 PC25 0.7463 1.29 90.02
## 26 PC26 0.7370 1.27 91.29
## 27 PC27 0.6622 1.14 92.43
## 28 PC28 0.6372 1.10 93.53
## 29 PC29 0.5811 1.00 94.53
## 30 PC30 0.4847 0.84 95.37
## 31 PC31 0.4087 0.70 96.07
## 32 PC32 0.3861 0.67 96.74
## 33 PC33 0.3008 0.52 97.25
## 34 PC34 0.2852 0.49 97.75
## 35 PC35 0.2300 0.40 98.14
## 36 PC36 0.2102 0.36 98.51
## 37 PC37 0.1748 0.30 98.81
## 38 PC38 0.1628 0.28 99.09
## 39 PC39 0.1198 0.21 99.29
## 40 PC40 0.0911 0.16 99.45
## 41 PC41 0.0738 0.13 99.58
## 42 PC42 0.0595 0.10 99.68
## 43 PC43 0.0516 0.09 99.77
## 44 PC44 0.0366 0.06 99.83
## 45 PC45 0.0276 0.05 99.88
## 46 PC46 0.0237 0.04 99.92
## 47 PC47 0.0159 0.03 99.95
## 48 PC48 0.0110 0.02 99.97
## 49 PC49 0.0075 0.01 99.98
## 50 PC50 0.0050 0.01 99.99
## 51 PC51 0.0035 0.01 100.00
## 52 PC52 0.0023 0.00 100.00
## 53 PC53 0.0001 0.00 100.00
## 54 PC54 0.0001 0.00 100.00
## 55 PC55 0.0000 0.00 100.00
## 56 PC56 0.0000 0.00 100.00
## 57 PC57 0.0000 0.00 100.00
## 58 PC58 0.0000 0.00 100.00
#Menentukan jumlah komponen
prop <- lambda / sum(lambda)
cumprop <- cumsum(prop)
k_kaiser <- sum(lambda > 1)
k_kaiser## [1] 18
## [1] 19
## [1] 23.5
#Skor komponen utama
Xz <- num_scaled
k <- min(30,ncol(num_scaled))
lambda_k <- lambda[1:k]
E_k <- E[, 1:k]
scores2 <- as.matrix(Xz) %*% E_k
scores2 <- as.data.frame(scores2)
colnames(scores2) <- paste0("PC", 1:k)
head(scores2)## PC1 PC2 PC3 PC4 PC5 PC6
## 1 0.6362235 -0.03890899 -0.2284873 -0.6379098 1.2011215 -0.1295692
## 2 -11.0263099 0.44732892 -1.6219859 0.6122046 -1.1481243 1.3660981
## 3 1.0626468 -0.03574855 -0.3354528 -0.1234948 1.8747847 -1.3808034
## 4 0.9351954 0.13733629 0.2050495 0.7104460 0.5516308 -2.2514047
## 5 0.7502610 0.26064006 0.1019040 -1.2652652 -0.0171203 0.4727001
## 6 0.3029955 0.04137765 -0.2156327 1.8718580 -1.0941279 -0.4240099
## PC7 PC8 PC9 PC10 PC11 PC12
## 1 0.5713106 0.0991562 0.000812676 0.6204053 0.1825705 0.2939312
## 2 -0.4707741 0.3018366 -1.382709015 -0.3522240 -0.4909652 0.3785711
## 3 0.4450929 -0.3386706 0.040214417 -0.7958995 0.4229812 -1.5533995
## 4 -0.5303199 -1.1292634 0.636775822 -4.2234736 -0.4055567 -1.5112857
## 5 0.1054478 0.0149494 0.229097179 -0.2067430 -1.2851666 0.7191739
## 6 -0.4243253 0.7654672 -0.801023528 1.1207332 -0.2634748 -0.5167057
## PC13 PC14 PC15 PC16 PC17 PC18
## 1 -0.3309680 -0.03567005 0.2246902465 0.2597683 -0.38473794 0.06845173
## 2 0.2719057 1.33165232 -0.2478410986 0.7002579 -0.06245786 -0.40400691
## 3 0.5973448 -0.48311343 0.3286183346 -1.0600654 1.12714191 0.23707735
## 4 0.9315681 -0.80140445 -1.0939852292 -0.5651896 0.09088823 -0.22895935
## 5 0.2730401 0.18409972 -0.0009416184 -0.1141876 -0.32815988 -0.29208019
## 6 0.1952191 -0.10235748 -0.5072469159 -0.2994742 -0.48506277 0.47948098
## PC19 PC20 PC21 PC22 PC23 PC24
## 1 0.027195686 0.52298935 0.0751210 -0.2362871 0.73538989 -0.456241229
## 2 0.312815599 0.21560251 0.4218280 0.2876800 0.24813439 -0.097363315
## 3 -1.463418083 0.04527495 -0.5726276 0.0339438 0.36364923 0.063890692
## 4 -0.804445466 -0.68646352 0.7350047 -0.3716603 0.05325169 0.447730927
## 5 0.004550777 -0.62184217 0.3685203 -0.1578313 0.62097403 0.004790563
## 6 0.906056971 -0.25411781 -0.5163555 0.5987621 -0.49536609 0.678923464
## PC25 PC26 PC27 PC28 PC29 PC30
## 1 0.4042329677 0.28807504 0.08486737 0.036615724 -0.696549 0.08419631
## 2 0.0368647273 0.20943136 -0.07764022 -0.008692929 -0.389751 0.21870139
## 3 -1.3869021313 0.04711376 -0.44609198 -0.399362313 0.355995 -0.64649579
## 4 0.6846600224 -0.32384818 -0.10844186 -0.244831898 1.692749 -0.70374407
## 5 0.0004955636 0.12575152 0.11656000 -0.377990592 0.993171 -0.25366311
## 6 -0.5780086527 -1.05216359 -0.39812122 0.483707353 -0.510688 0.30188643
#KERNEL PCA
X <- as.matrix(num_scaled)
mode(X) <- "numeric"
#Definisi fungsi kernel
rbf_kernel <- function(X, gamma = NULL) {
if (is.null(gamma)) gamma <- 1 / ncol(X)
if (!requireNamespace("proxy", quietly = TRUE)) install.packages("proxy")
dist_matrix <- as.matrix(proxy::dist(X, method = "euclidean"))^2
K <- exp(-gamma * dist_matrix)
return(K)
}
poly_kernel <- function(X, degree = 3, coef0 = 1) {
K <- (tcrossprod(X) + coef0)^degree
return(K)
}
linear_kernel <- function(X) {
K <- tcrossprod(X)
return(K)
}
#menghitung tiga tipe kernel
K_linear <- linear_kernel(X)
K_poly <- poly_kernel(X, degree = 3)
K_rbf <- rbf_kernel(X, gamma = 1/ncol(X))
#pusatkan matriks kernel
n <- nrow(X)
one_n <- matrix(1, n, n) / n
center_kernel <- function(K) {
K_centered <- K - one_n %*% K - K %*% one_n + one_n %*% K %*% one_n
return(K_centered)
}
Kc_linear <- center_kernel(K_linear)
Kc_poly <- center_kernel(K_poly)
Kc_rbf <- center_kernel(K_rbf)
#Eigen decomposition
eig_linear <- eigen(Kc_linear, symmetric = TRUE)
eig_poly <- eigen(Kc_poly, symmetric = TRUE)
eig_rbf <- eigen(Kc_rbf, symmetric = TRUE)
#normalisasi vektor eigen
normalize_eig <- function(eig) {
values <- eig$values
vectors <- eig$vectors
values[values < 0] <- 0
vectors_norm <- vectors / sqrt(values + 1e-9)
list(values = values, vectors = vectors_norm)
}
eig_linear_n <- normalize_eig(eig_linear)
eig_poly_n <- normalize_eig(eig_poly)
eig_rbf_n <- normalize_eig(eig_rbf)
#Skor eigen
k1 <- 1
k2 <- 2
scores_linear_1 <- Kc_linear %*% eig_linear_n$vectors[, 1:k1]
scores_poly_1 <- Kc_poly %*% eig_poly_n$vectors[, 1:k1]
scores_rbf_1 <- Kc_rbf %*% eig_rbf_n$vectors[, 1:k1]
scores_linear_2 <- Kc_linear %*% eig_linear_n$vectors[, 1:k2]
scores_poly_2 <- Kc_poly %*% eig_poly_n$vectors[, 1:k2]
scores_rbf_2 <- Kc_rbf %*% eig_rbf_n$vectors[, 1:k2]
#Proporsi varians
var_linear <- eig_linear$values / sum(eig_linear$values)
var_poly <- eig_poly$values / sum(eig_poly$values)
var_rbf <- eig_rbf$values / sum(eig_rbf$values)
data.frame(
Kernel = c("Linear", "Polynomial", "RBF"),
PC1 = c(var_linear[1], var_poly[1], var_rbf[1]),
PC2 = c(var_linear[2], var_poly[2], var_rbf[2]),
PC3 = c(var_linear[3], var_poly[3], var_rbf[3]),
PC4 = c(var_linear[4], var_poly[4], var_rbf[4]),
PC5 = c(var_linear[5], var_poly[5], var_rbf[5]),
PC6 = c(var_linear[6], var_poly[6], var_rbf[6]),
PC7 = c(var_linear[7], var_poly[7], var_rbf[7]),
PC8 = c(var_linear[8], var_poly[8], var_rbf[8]),
PC9 = c(var_linear[9], var_poly[9], var_rbf[9]),
PC10 = c(var_linear[10], var_poly[10], var_rbf[10]),
PC11 = c(var_linear[11], var_poly[11], var_rbf[11]),
PC12 = c(var_linear[12], var_poly[12], var_rbf[12]),
PC13 = c(var_linear[13], var_poly[13], var_rbf[13]),
PC14 = c(var_linear[14], var_poly[14], var_rbf[14]),
PC15 = c(var_linear[15], var_poly[15], var_rbf[15]),
PC16 = c(var_linear[16], var_poly[16], var_rbf[16]),
PC17 = c(var_linear[17], var_poly[17], var_rbf[17]),
PC18 = c(var_linear[18], var_poly[18], var_rbf[18]),
Kumulatif_18PC = c(cumsum(var_linear)[18],
cumsum(var_poly)[18],
cumsum(var_rbf)[18])
)## Kernel PC1 PC2 PC3 PC4 PC5 PC6
## 1 Linear 0.2350141 0.10831351 0.05209934 0.04585033 0.04063458 0.03760867
## 2 Polynomial 0.4726871 0.35280416 0.05652690 0.03126117 0.02905487 0.02008050
## 3 RBF 0.1206419 0.09264346 0.08592908 0.05472602 0.04666290 0.03919768
## PC7 PC8 PC9 PC10 PC11 PC12
## 1 0.03358650 0.033243380 0.029369557 0.028561045 0.021575514 0.020440572
## 2 0.01635002 0.005554057 0.004832184 0.002705458 0.002380718 0.001472497
## 3 0.03174300 0.030204267 0.024717001 0.022467871 0.020744486 0.019449738
## PC13 PC14 PC15 PC16 PC17 PC18
## 1 0.0199770970 0.0195289105 0.0185778264 0.0180151159 0.0174226973 0.017263174
## 2 0.0007368118 0.0005398184 0.0004745973 0.0003818573 0.0003544145 0.000303130
## 3 0.0166943907 0.0166150527 0.0130464182 0.0117134581 0.0093376612 0.009118077
## Kumulatif_18PC
## 1 0.7970819
## 2 0.9985002
## 3 0.6656525
# Plot Kernel PCA dengan Polynomial Kernel
set.seed(2025)
sampel_index <- sample(1:nrow(finansial), 1500)
num_scaled <- finansial[sampel_index, 9:ncol(finansial)]
num_scaled[] <- lapply(num_scaled, function(x) as.numeric(as.character(x)))## Warning in FUN(X[[i]], ...): NAs introduced by coercion
## Warning in FUN(X[[i]], ...): NAs introduced by coercion
## Warning in FUN(X[[i]], ...): NAs introduced by coercion
## Warning in FUN(X[[i]], ...): NAs introduced by coercion
## Warning in FUN(X[[i]], ...): NAs introduced by coercion
## Warning in FUN(X[[i]], ...): NAs introduced by coercion
## Warning in FUN(X[[i]], ...): NAs introduced by coercion
## Warning in FUN(X[[i]], ...): NAs introduced by coercion
## Warning in FUN(X[[i]], ...): NAs introduced by coercion
## Warning in FUN(X[[i]], ...): NAs introduced by coercion
## Warning in FUN(X[[i]], ...): NAs introduced by coercion
## Warning in FUN(X[[i]], ...): NAs introduced by coercion
## Warning in FUN(X[[i]], ...): NAs introduced by coercion
## Warning in FUN(X[[i]], ...): NAs introduced by coercion
## Warning in FUN(X[[i]], ...): NAs introduced by coercion
## Warning in FUN(X[[i]], ...): NAs introduced by coercion
## Warning in FUN(X[[i]], ...): NAs introduced by coercion
## Warning in FUN(X[[i]], ...): NAs introduced by coercion
## Warning in FUN(X[[i]], ...): NAs introduced by coercion
## Warning in FUN(X[[i]], ...): NAs introduced by coercion
## Warning in FUN(X[[i]], ...): NAs introduced by coercion
## Warning in FUN(X[[i]], ...): NAs introduced by coercion
## Warning in FUN(X[[i]], ...): NAs introduced by coercion
## Warning in FUN(X[[i]], ...): NAs introduced by coercion
## Warning in FUN(X[[i]], ...): NAs introduced by coercion
## Warning in FUN(X[[i]], ...): NAs introduced by coercion
## Warning in FUN(X[[i]], ...): NAs introduced by coercion
## Warning in FUN(X[[i]], ...): NAs introduced by coercion
## Warning in FUN(X[[i]], ...): NAs introduced by coercion
## Warning in FUN(X[[i]], ...): NAs introduced by coercion
## Warning in FUN(X[[i]], ...): NAs introduced by coercion
## Warning in FUN(X[[i]], ...): NAs introduced by coercion
## Warning in FUN(X[[i]], ...): NAs introduced by coercion
## Warning in FUN(X[[i]], ...): NAs introduced by coercion
## Warning in FUN(X[[i]], ...): NAs introduced by coercion
## Warning in FUN(X[[i]], ...): NAs introduced by coercion
## Warning in FUN(X[[i]], ...): NAs introduced by coercion
## Warning in FUN(X[[i]], ...): NAs introduced by coercion
## Warning in FUN(X[[i]], ...): NAs introduced by coercion
## Warning in FUN(X[[i]], ...): NAs introduced by coercion
company_names <- finansial$`Company Name`[sampel_index]
# Membuat data frame untuk hasil Kernel PCA (Polynomial)
scores_poly_df <- data.frame(
PC1 = scores_poly_2[, 1],
PC2 = scores_poly_2[, 2],
Perusahaan = company_names
)
# Plot scatter PC1 vs PC2 untuk Kernel Polynomial
ggplot(scores_poly_df, aes(x = PC1, y = PC2, label = Perusahaan)) +
geom_point(color = "darkorange", size = 3) +
geom_text(aes(label = Perusahaan), hjust = 0.5, vjust = -0.8, size = 3) +
theme_minimal(base_size = 12) +
labs(
title = "Posisi Perusahaan Berdasarkan Kernel PCA (Polynomial Kernel)",
x = "Komponen Utama 1 (PC1)",
y = "Komponen Utama 2 (PC2)"
) +
theme(plot.title = element_text(hjust = 0.5, face = "bold"))## Warning: package 'ggrepel' was built under R version 4.4.3
library(scales)
# Pastikan data frame bersih
scores_poly_df <- scores_poly_df %>%
filter(!is.na(PC1), !is.na(PC2), !is.na(Perusahaan))
# Skala ulang supaya lebih stabil untuk plotting
scores_poly_df$PC1_scaled <- rescale(scores_poly_df$PC1, to = c(-1, 1))
scores_poly_df$PC2_scaled <- rescale(scores_poly_df$PC2, to = c(-1, 1))
# Plot versi stabil
ggplot(scores_poly_df, aes(x = PC1_scaled, y = PC2_scaled, color = Perusahaan, label = Perusahaan)) +
geom_point(size = 3, alpha = 0.8) +
geom_text_repel(size = 3, max.overlaps = 10, show.legend = FALSE, force = 3) +
scale_color_viridis_d(option = "plasma", end = 0.9) +
theme_minimal(base_size = 12) +
labs(
title = "Posisi Perusahaan Berdasarkan Kernel PCA (Polynomial Kernel)",
x = "Komponen Utama 1 (PC1, diskalakan)",
y = "Komponen Utama 2 (PC2, diskalakan)",
color = "Perusahaan"
) +
theme(
plot.title = element_text(hjust = 0.5, face = "bold"),
legend.position = "none"
)## Warning: ggrepel: 1496 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps
set.seed(2025)
scores_poly_df$Cluster <- kmeans(scores_poly_df[, c("PC1", "PC2")], centers = 4)$cluster
ggplot(scores_poly_df, aes(x = PC1, y = PC2, color = factor(Cluster))) +
geom_point(size = 3, alpha = 0.9) +
geom_text_repel(aes(label = Perusahaan), size = 3, show.legend = FALSE, max.overlaps = 15) +
scale_color_viridis_d(option = "plasma", end = 0.9) +
theme_minimal(base_size = 12) +
labs(
title = "Posisi Perusahaan Berdasarkan Kernel PCA (Polynomial Kernel)",
x = "Komponen Utama 1 (PC1)",
y = "Komponen Utama 2 (PC2)",
color = "Cluster"
) +
theme(plot.title = element_text(hjust = 0.5, face = "bold"))## Warning: ggrepel: 1496 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps