Library

library(readxl)
library(dplyr)
library(readr)
library(dplyr)
library(tidyr)
library(ggplot2)
library(GGally)
library(kableExtra)
library(kernlab)
library(factoextra)
library(gridExtra)
library(scales)
library(kernlab)
library(plotly)

A. Load Data

finansial <- read_excel("D:/UNY/MySta/SEM 5/ML/dataset_ML/Indikator Finansial.xlsx")

B. Data Preprocessing

2.1. Struktur Tipe Data

data_finansial <- finansial[, 9:ncol(finansial)] %>%
  mutate(across(where(is.character), ~ na_if(.x, "NA"))) %>%
  mutate(across(where(is.character), ~ na_if(.x, "N/A"))) %>%
  mutate(across(where(is.character), ~ na_if(.x, "")))

# Konversi kolom ke numeric dan kembalikan jadi data.frame
data_finansial <- data_finansial %>%
  mutate(across(everything(), ~ as.numeric(as.character(.))))

if (nrow(data_finansial) > 1500) {
 set.seed(2025)
 data_finansial <- data_finansial[sample(1:nrow(data_finansial), 1500), ]
 }

cat("Struktur data setelah perbaikan:\n")
## Struktur data setelah perbaikan:
str(data_finansial)
## tibble [1,500 × 65] (S3: tbl_df/tbl/data.frame)
##  $ # of shares outstanding                  : num [1:1500] 0 1022.2 6.9 34.2 6.1 ...
##  $ Market Cap                               : num [1:1500] 0 57930.7 22 7.7 26.9 ...
##  $ Total Debt                               : num [1:1500] 0 5591 1.5 35.1 14.4 ...
##  $ Firm Value                               : num [1:1500] 0 63521.7 23.5 42.8 41.3 ...
##  $ Enterprise Value                         : num [1:1500] 0 61256.7 21.9 42 40.8 ...
##  $ Cash                                     : num [1:1500] 0 2265 1.6 0.8 0.5 ...
##  $ Revenues: Last yr                        : num [1:1500] NA 37445 1.4 13.7 62.7 ...
##  $ Trailing 12-mth Revenues                 : num [1:1500] NA 41302 3.3 11.5 73.7 ...
##  $ Current PE                               : num [1:1500] NA 20.8 NA NA 11.7 ...
##  $ Trailing PE                              : num [1:1500] NA 20.8 NA NA 14.2 ...
##  $ Forward EPS                              : num [1:1500] -1 3.34 NA NA NA 1.3 2.81 1.57 1.45 0.39 ...
##  $ Forward PE                               : num [1:1500] NA 16.7 NA NA NA ...
##  $ PEG Ratio                                : num [1:1500] NA 1.89 NA NA NA 2.4 1.41 1.06 3.45 NA ...
##  $ PBV Ratio                                : num [1:1500] NA 4.14 6.47 NA 1.1 15.4 3.1 1.36 3.73 0.9 ...
##  $ PS Ratio                                 : num [1:1500] NA 1.55 15.71 0.56 0.43 ...
##  $ EV/EBIT                                  : num [1:1500] NA 11.24 NA NA 6.32 ...
##  $ EV/EBITDA                                : num [1:1500] NA 9.53 NA NA 4.66 ...
##  $ EV/ Invested Capital                     : num [1:1500] NA 3.53 6.64 8.08 1.07 ...
##  $ Value/BV of Capital                      : num [1:1500] NA 3.24 4.8 NA 1.06 15.4 2.83 1.27 2.1 0.92 ...
##  $ EV/Sales                                 : num [1:1500] NA 1.64 15.64 3.07 0.65 ...
##  $ EV/ Trailing Sales                       : num [1:1500] NA 1.48 6.64 3.65 0.55 8.32 NA 1.09 1.35 0.55 ...
##  $ Growth in EPS: Last 5 years              : num [1:1500] NA 0.19 NA NA NA 0.53 0.19 0.12 -0.04 -0.15 ...
##  $ Expected Growth in EPS: next 5 years     : num [1:1500] 0.02 0.11 NA NA NA 0.29 0.14 0.09 0.05 NA ...
##  $ Expected Growth in Revenues: next 5 years: num [1:1500] NA 0.09 NA NA NA 0.3 NA 0.06 -0.02 NA ...
##  $ Growth in Revenue- last year             : num [1:1500] 0 0.21 -0.46 0.69 0.32 0.52 0 0.04 -0.13 0.09 ...
##  $ 3-yr Regression Beta                     : num [1:1500] 0 0.83 0.51 4 0 0.8 0.22 1.5 0.9 0.34 ...
##  $ Value Line Beta                          : num [1:1500] 0 1.15 0.95 2 0.85 1.2 0.9 1.4 0.9 0.8 ...
##  $ HiLo risk                                : num [1:1500] 0.33 0.1 0.79 0.99 0.45 0.19 0.17 0.18 0.11 0.23 ...
##  $ 3-yr Standard Deviation (Stock Price)    : num [1:1500] 0 0.17 1.61 2.35 0 0.49 0.16 0.29 0.21 0.31 ...
##  $ Reinvestment                             : num [1:1500] 0 -10.98 -0.57 -2.59 0.42 ...
##  $ Correlation                              : num [1:1500] NA 1 0.06 0.34 NA 0.32 0.28 1.05 0.84 0.22 ...
##  $ Payout Ratio                             : num [1:1500] NA 0.32 NA NA 0 0 0.19 0.25 0.6 NA ...
##  $ Reinvestment Rate                        : num [1:1500] NA 0 0.26 0.22 0.1 0.35 NA 0.01 -0.26 0.65 ...
##  $ ROE                                      : num [1:1500] NA 0.2 -0.85 NA 0.09 0.22 0.16 0.14 0.22 0 ...
##  $ ROC                                      : num [1:1500] NA 0.23 -0.66 -2.26 0.11 0.79 NA 0.14 0.16 0.09 ...
##  $ Net Margin                               : num [1:1500] NA 0.07 -2.07 -0.94 0.04 0.17 NA 0.09 0.06 0 ...
##  $ Pre-tax Operating Margin                 : num [1:1500] NA 0.15 -1.55 -0.86 0.1 0.23 NA 0.16 0.13 0.06 ...
##  $ Invested Capital                         : num [1:1500] 0 17334 3.3 5.2 38.3 ...
##  $ BV of Assets                             : num [1:1500] 0 40035 7.4 12.6 54.8 ...
##  $ Non-cash WC                              : num [1:1500] 0 1670 -0.2 2.4 9.8 ...
##  $ Chg in non-cash WC                       : num [1:1500] 0 172.02 -0.27 -0.39 1.72 ...
##  $ Net Income                               : num [1:1500] 0 2788 -2.9 -12.9 2.3 ...
##  $ EBIT                                     : num [1:1500] NA 5448.25 -2.18 -11.77 6.46 ...
##  $ EBIT(1-t)                                : num [1:1500] NA 4008.93 -2.18 -11.77 4.26 ...
##  $ EBITDA                                   : num [1:1500] NA 6426.25 -1.78 -9.47 8.76 ...
##  $ FCFF                                     : num [1:1500] NA 4019.91 -1.6 -9.19 3.84 ...
##  $ Eff Tax Rate                             : num [1:1500] 0 0.26 0 0 0.34 0.18 0.35 0.29 0.28 0 ...
##  $ Non-cash WC as % of Revenues             : num [1:1500] NA 0.04 -0.14 0.18 0.16 0.04 NA -0.14 0.12 0.2 ...
##  $ Cash as % of Firm Value                  : num [1:1500] NA 0.04 0.07 0.02 0.01 0.05 0.15 0.05 0.05 0.01 ...
##  $ Cash as % of Revenues                    : num [1:1500] NA 0.05 0.48 0.07 0.01 0.39 NA 0.05 0.08 0.01 ...
##  $ Cash as % of Total Assets                : num [1:1500] NA 0.06 0.22 0.06 0.01 0.55 0.06 0.02 0.08 0.01 ...
##  $ Capital Expenditures                     : num [1:1500] 0 795 0.1 0.1 1 ...
##  $ Depreciation                             : num [1:1500] 0 978 0.4 2.3 2.3 ...
##  $ SG&A Expenses                            : num [1:1500] 0 5754 2.4 20.2 11.7 ...
##  $ Trailing Revenues                        : num [1:1500] 0 41302 3.3 11.5 73.7 ...
##  $ Trailing Net Income                      : num [1:1500] 0 2788 -0.8 -23.2 1.9 ...
##  $ Dividends                                : num [1:1500] 0 900 0 0 0 ...
##  $ Intangible Assets/Total Assets           : num [1:1500] NA 0.3 0 0 0.38 0.04 0 0.34 0.32 0.12 ...
##  $ Fixed Assets/Total Assets                : num [1:1500] NA 0.13 0.72 0.21 0.24 0.16 0.01 0.04 0.2 0.44 ...
##  $ Market D/E                               : num [1:1500] NA 0.1 0.07 4.56 0.54 0 0.05 0.25 0.4 0.29 ...
##  $ Market Debt to Capital                   : num [1:1500] NA 0.09 0.06 0.82 0.35 0 0.05 0.2 0.29 0.23 ...
##  $ Book Debt to Capital                     : num [1:1500] NA 0.29 0.31 NA 0.37 0 0.13 0.25 0.6 0.21 ...
##  $ Dividend Yield                           : num [1:1500] 0 0.02 0 0 0 0 0.01 0.03 0.04 0 ...
##  $ Insider Holdings                         : num [1:1500] NA 0.02 NA NA NA 0.02 0.18 0.06 0.02 NA ...
##  $ Institutional Holdings                   : num [1:1500] 0.01 0.79 0.01 0.01 0.08 0.89 0.58 0.78 0.86 0.81 ...

2.2. Missing Value

na_summary <- data_finansial %>%
  summarise(across(everything(), ~sum(is.na(.)))) %>%
  pivot_longer(everything(), names_to = "Variabel", values_to = "Jumlah_NA") %>%
  arrange(desc(Jumlah_NA))

cat("\n Top 10 Jumlah NA terbanyak per variabel:\n")
## 
##  Top 10 Jumlah NA terbanyak per variabel:
print(head(na_summary, 10))
## # A tibble: 10 × 2
##    Variabel                                  Jumlah_NA
##    <chr>                                         <int>
##  1 Insider Holdings                               1218
##  2 Expected Growth in Revenues: next 5 years      1206
##  3 PEG Ratio                                      1042
##  4 Growth in EPS: Last 5 years                    1038
##  5 Forward PE                                     1007
##  6 Expected Growth in EPS: next 5 years            968
##  7 Forward EPS                                     933
##  8 EV/EBIT                                         705
##  9 Trailing PE                                     682
## 10 EV/EBITDA                                       671
# Kolom dengan >50% missing 
na_percent <- colMeans(is.na(data_finansial))
na_over50 <- names(na_percent[na_percent > 0.5])

cat("\n Kolom dengan >50% missing values:\n")
## 
##  Kolom dengan >50% missing values:
print(na_over50)
## [1] "Forward EPS"                              
## [2] "Forward PE"                               
## [3] "PEG Ratio"                                
## [4] "Growth in EPS: Last 5 years"              
## [5] "Expected Growth in EPS: next 5 years"     
## [6] "Expected Growth in Revenues: next 5 years"
## [7] "Insider Holdings"
# Handling Missing Value

# Drop kolom > 50% missing
na_percent <- colMeans(is.na(data_finansial))
drop_cols <- names(na_percent[na_percent > 0.5])
data_finansial_handled <- data_finansial %>%
  select(-all_of(drop_cols))

# Imputasi mean
num_cols <- data_finansial_handled %>%
  select(where(is.numeric)) %>%
  colnames()

data_finansial_handled <- data_finansial_handled %>%
  mutate(across(all_of(num_cols), ~ ifelse(is.na(.), mean(., na.rm = TRUE), .)))


# Jumlah variabel & NA setelah handling
n_final <- ncol(data_finansial)
na_after <- data_finansial %>%
  summarise(across(everything(), ~sum(is.na(.)))) %>%
  pivot_longer(everything(), names_to = "Variabel", values_to = "Jumlah_NA") %>%
  arrange(desc(Jumlah_NA))

cat("\n Jumlah variabel setelah handling Missing Value:", n_final, "\n")
## 
##  Jumlah variabel setelah handling Missing Value: 65
cat("\n Jumlah NA setelah handling:\n")
## 
##  Jumlah NA setelah handling:
sum(is.na(n_final))
## [1] 0

2.3. Outlier

num_data <- data_finansial_handled %>% select(where(is.numeric))

# Fungsi untuk capping berbasis IQR
iqr_capping <- function(x) {
  if (is.numeric(x)) {
    Q1 <- quantile(x, 0.25, na.rm = TRUE)
    Q3 <- quantile(x, 0.75, na.rm = TRUE)
    IQR_val <- Q3 - Q1
    lower <- Q1 - 1.5 * IQR_val
    upper <- Q3 + 1.5 * IQR_val
    x <- pmin(pmax(x, lower), upper)
  }
  return(x)
}


# Fungsi hitung jumlah outlier per variabel 
outlier_count <- function(x) { 
  Q1 <- quantile(x, 0.25, na.rm = TRUE) 
  Q3 <- quantile(x, 0.75, na.rm = TRUE) 
  IQR_val <- Q3 - Q1 
  lower <- Q1 - 1.5 * IQR_val 
  upper <- Q3 + 1.5 * IQR_val 
  sum(x < lower | x > upper, na.rm = TRUE) }

# Jumlah outlier sebelum handling
outlier_table_before <- num_data %>%
  summarise(across(everything(), outlier_count)) %>%
  pivot_longer(cols = everything(), names_to = "Variabel", values_to = "Jumlah_Outlier") %>%
  arrange(desc(Jumlah_Outlier))

cat("\n Jumlah outlier per variabel (sebelum handling):\n")
## 
##  Jumlah outlier per variabel (sebelum handling):
print(outlier_table_before)
## # A tibble: 58 × 2
##    Variabel             Jumlah_Outlier
##    <chr>                         <int>
##  1 Reinvestment                    457
##  2 Chg in non-cash WC              446
##  3 Non-cash WC                     356
##  4 Dividend Yield                  354
##  5 Dividends                       349
##  6 Trailing Net Income             289
##  7 Net Income                      271
##  8 Capital Expenditures            257
##  9 Total Debt                      252
## 10 BV of Assets                    245
## # ℹ 48 more rows
# Handling Outlier

# Terapkan IQR capping ke kolom numerik
data_finansial_capped <- data_finansial_handled %>%
  mutate(across(where(is.numeric), iqr_capping))

# Jumlah outlier setelah handling
num_data_capped <- data_finansial_capped %>% select(where(is.numeric))

outlier_table_after <- num_data_capped %>%
  summarise(across(everything(), outlier_count)) %>%
  pivot_longer(cols = everything(), names_to = "Variabel", values_to = "Jumlah_Outlier") %>%
  arrange(desc(Jumlah_Outlier))

cat("\n Jumlah outlier per variabel (setelah handling):\n")
## 
##  Jumlah outlier per variabel (setelah handling):
print(head(outlier_table_after,62))
## # A tibble: 58 × 2
##    Variabel                 Jumlah_Outlier
##    <chr>                             <int>
##  1 # of shares outstanding               0
##  2 Market Cap                            0
##  3 Total Debt                            0
##  4 Firm Value                            0
##  5 Enterprise Value                      0
##  6 Cash                                  0
##  7 Revenues: Last yr                     0
##  8 Trailing 12-mth Revenues              0
##  9 Current PE                            0
## 10 Trailing PE                           0
## # ℹ 48 more rows

2.4. Standarisasi

# Cek variabel konstan (sd = 0) 
const_cols <- sapply(num_data_capped, function(x) sd(x, na.rm = TRUE) == 0)
if (any(const_cols)) {
  cat("Variabel konstan ditemukan dan dihapus:\n")
  print(names(num_data_capped)[const_cols])
  num_data_capped <- num_data_capped[, !const_cols]
} else {
  cat("Tidak ada variabel konstan.\n")
}
## Variabel konstan ditemukan dan dihapus:
## [1] "Dividend Yield"
# Standarisasi (Z-score scaling)
num_scaled <- scale(data_finansial_handled)
num_scaled <- as.data.frame(num_scaled)

# Cek hasil scaling
cat("\nRingkasan hasil scaling:\n")
## 
## Ringkasan hasil scaling:
print(summary(num_scaled))
##  # of shares outstanding   Market Cap        Total Debt         Firm Value     
##  Min.   :-0.2557         Min.   :-0.2093   Min.   :-0.08583   Min.   :-0.1878  
##  1st Qu.:-0.2380         1st Qu.:-0.2082   1st Qu.:-0.08583   1st Qu.:-0.1869  
##  Median :-0.2002         Median :-0.1994   Median :-0.08538   Median :-0.1798  
##  Mean   : 0.0000         Mean   : 0.0000   Mean   : 0.00000   Mean   : 0.0000  
##  3rd Qu.:-0.1098         3rd Qu.:-0.1386   3rd Qu.:-0.07432   3rd Qu.:-0.1315  
##  Max.   :16.3234         Max.   :26.4852   Max.   :35.33988   Max.   :24.0613  
##  Enterprise Value       Cash         Revenues: Last yr Trailing 12-mth Revenues
##  Min.   :-0.7000   Min.   :-0.1549   Min.   :-0.2312   Min.   :-0.2216         
##  1st Qu.:-0.1850   1st Qu.:-0.1545   1st Qu.:-0.2274   1st Qu.:-0.2174         
##  Median :-0.1784   Median :-0.1495   Median :-0.1894   Median :-0.1783         
##  Mean   : 0.0000   Mean   : 0.0000   Mean   : 0.0000   Mean   : 0.0000         
##  3rd Qu.:-0.1315   3rd Qu.:-0.1181   3rd Qu.: 0.0000   3rd Qu.: 0.0000         
##  Max.   :24.2500   Max.   :23.7390   Max.   :23.5912   Max.   :27.0347         
##    Current PE       Trailing PE        PBV Ratio          PS Ratio       
##  Min.   :-0.4525   Min.   :-0.6534   Min.   :-0.2227   Min.   :-0.05503  
##  1st Qu.:-0.2283   1st Qu.:-0.2846   1st Qu.:-0.1620   1st Qu.:-0.05383  
##  Median : 0.0000   Median : 0.0000   Median :-0.1096   Median :-0.05120  
##  Mean   : 0.0000   Mean   : 0.0000   Mean   : 0.0000   Mean   : 0.00000  
##  3rd Qu.: 0.0000   3rd Qu.: 0.0000   3rd Qu.: 0.0000   3rd Qu.: 0.00000  
##  Max.   :27.6004   Max.   :29.8730   Max.   :32.1849   Max.   :38.04876  
##     EV/EBIT          EV/EBITDA       EV/ Invested Capital Value/BV of Capital
##  Min.   :-0.3141   Min.   :-0.2711   Min.   :-0.04084     Min.   :-0.4460    
##  1st Qu.:-0.1803   1st Qu.:-0.1655   1st Qu.:-0.03918     1st Qu.:-0.2739    
##  Median : 0.0000   Median : 0.0000   Median :-0.03767     Median :-0.1501    
##  Mean   : 0.0000   Mean   : 0.0000   Mean   : 0.00000     Mean   : 0.0000    
##  3rd Qu.: 0.0000   3rd Qu.: 0.0000   3rd Qu.:-0.02300     3rd Qu.: 0.0000    
##  Max.   :24.9322   Max.   :26.3784   Max.   :38.66865     Max.   :23.7342    
##     EV/Sales        EV/ Trailing Sales Growth in Revenue- last year
##  Min.   :-0.05104   Min.   :-0.07603   Min.   :-2.0060             
##  1st Qu.:-0.04992   1st Qu.:-0.07126   1st Qu.:-0.2594             
##  Median :-0.04758   Median :-0.06135   Median :-0.2594             
##  Mean   : 0.00000   Mean   : 0.00000   Mean   : 0.0000             
##  3rd Qu.: 0.00000   3rd Qu.: 0.00000   3rd Qu.: 0.1132             
##  Max.   :38.37839   Max.   :37.84282   Max.   : 9.7547             
##  3-yr Regression Beta Value Line Beta      HiLo risk      
##  Min.   :-5.8357      Min.   :-1.58853   Min.   :-1.5486  
##  1st Qu.:-0.6637      1st Qu.:-0.63008   1st Qu.:-0.7370  
##  Median :-0.2899      Median :-0.05502   Median :-0.1791  
##  Mean   : 0.0000      Mean   : 0.00000   Mean   : 0.0000  
##  3rd Qu.: 0.3620      3rd Qu.: 0.52005   3rd Qu.: 0.3281  
##  Max.   : 7.6896      Max.   : 8.09180   Max.   : 3.3715  
##  3-yr Standard Deviation (Stock Price)  Reinvestment        Correlation     
##  Min.   :-0.5986                       Min.   :-35.33536   Min.   :-3.7591  
##  1st Qu.:-0.5986                       1st Qu.:  0.02163   1st Qu.:-0.5218  
##  Median :-0.2265                       Median :  0.02194   Median : 0.0000  
##  Mean   : 0.0000                       Mean   :  0.00000   Mean   : 0.0000  
##  3rd Qu.: 0.1489                       3rd Qu.:  0.02283   3rd Qu.: 0.4469  
##  Max.   :13.0612                       Max.   : 10.05075   Max.   : 6.5737  
##   Payout Ratio     Reinvestment Rate        ROE                ROC          
##  Min.   :-0.5908   Min.   :-11.61729   Min.   :-28.9584   Min.   :-1.07516  
##  1st Qu.:-0.5908   1st Qu.: -0.08557   1st Qu.:  0.0000   1st Qu.:-0.03630  
##  Median : 0.0000   Median : -0.03857   Median :  0.1351   Median :-0.03337  
##  Mean   : 0.0000   Mean   :  0.00000   Mean   :  0.0000   Mean   : 0.00000  
##  3rd Qu.: 0.0000   3rd Qu.:  0.00000   3rd Qu.:  0.1730   3rd Qu.: 0.00000  
##  Max.   :25.5474   Max.   : 24.16164   Max.   :  1.8459   Max.   :38.42675  
##    Net Margin       Pre-tax Operating Margin Invested Capital 
##  Min.   :-23.0629   Min.   :-14.4475         Min.   :-1.1487  
##  1st Qu.:  0.0000   1st Qu.:  0.0000         1st Qu.:-0.1285  
##  Median :  0.1304   Median :  0.2000         Median :-0.1253  
##  Mean   :  0.0000   Mean   :  0.0000         Mean   : 0.0000  
##  3rd Qu.:  0.1348   3rd Qu.:  0.2219         3rd Qu.:-0.1029  
##  Max.   :  0.1869   Max.   :  0.3890         Max.   :30.0505  
##   BV of Assets      Non-cash WC       Chg in non-cash WC    Net Income      
##  Min.   :-0.1522   Min.   :-2.33085   Min.   :-37.02071   Min.   :-15.2562  
##  1st Qu.:-0.1519   1st Qu.:-0.05652   1st Qu.:  0.02083   1st Qu.: -0.1374  
##  Median :-0.1478   Median :-0.05635   Median :  0.02084   Median : -0.1341  
##  Mean   : 0.0000   Mean   : 0.00000   Mean   :  0.00000   Mean   :  0.0000  
##  3rd Qu.:-0.1271   3rd Qu.:-0.04941   3rd Qu.:  0.02122   3rd Qu.: -0.1033  
##  Max.   :24.0429   Max.   :33.93193   Max.   : 10.45705   Max.   : 27.2283  
##       EBIT           EBIT(1-t)           EBITDA             FCFF        
##  Min.   :-0.3459   Min.   :-0.4108   Min.   :-0.2907   Min.   :-5.9831  
##  1st Qu.:-0.2239   1st Qu.:-0.2302   1st Qu.:-0.2145   1st Qu.:-0.1155  
##  Median :-0.1912   Median :-0.1966   Median :-0.1846   Median :-0.1054  
##  Mean   : 0.0000   Mean   : 0.0000   Mean   : 0.0000   Mean   : 0.0000  
##  3rd Qu.: 0.0000   3rd Qu.: 0.0000   3rd Qu.: 0.0000   3rd Qu.: 0.0000  
##  Max.   :24.0587   Max.   :21.8475   Max.   :19.9817   Max.   :32.2962  
##   Eff Tax Rate     Non-cash WC as % of Revenues Cash as % of Firm Value
##  Min.   :-0.8917   Min.   :-35.90702            Min.   :-0.2001        
##  1st Qu.:-0.8917   1st Qu.:  0.00000            1st Qu.:-0.1787        
##  Median :-0.8068   Median :  0.06408            Median :-0.1148        
##  Mean   : 0.0000   Mean   :  0.00000            Mean   : 0.0000        
##  3rd Qu.: 1.0323   3rd Qu.:  0.08348            3rd Qu.: 0.0000        
##  Max.   : 2.5035   Max.   :  2.64336            Max.   :35.3293        
##  Cash as % of Revenues Cash as % of Total Assets Capital Expenditures
##  Min.   :-0.07420      Min.   :-0.8462           Min.   :-0.1610     
##  1st Qu.:-0.07309      1st Qu.:-0.7183           1st Qu.:-0.1610     
##  Median :-0.06785      Median :-0.3772           Median :-0.1594     
##  Mean   : 0.00000      Mean   : 0.0000           Mean   : 0.0000     
##  3rd Qu.: 0.00000      3rd Qu.: 0.2623           3rd Qu.:-0.1374     
##  Max.   :37.68493      Max.   : 3.4169           Max.   :17.7089     
##   Depreciation      SG&A Expenses     Trailing Revenues Trailing Net Income
##  Min.   :-0.11353   Min.   :-0.1883   Min.   :-0.1643   Min.   :-5.3485    
##  1st Qu.:-0.11353   1st Qu.:-0.1871   1st Qu.:-0.1643   1st Qu.:-0.1491    
##  Median :-0.11217   Median :-0.1801   Median :-0.1603   Median :-0.1467    
##  Mean   : 0.00000   Mean   : 0.0000   Mean   : 0.0000   Mean   : 0.0000    
##  3rd Qu.:-0.09736   3rd Qu.:-0.1467   3rd Qu.:-0.1238   3rd Qu.:-0.1131    
##  Max.   :30.49859   Max.   :17.4104   Max.   :26.9656   Max.   :30.0294    
##    Dividends       Intangible Assets/Total Assets Fixed Assets/Total Assets
##  Min.   :-0.1477   Min.   :-0.6993                Min.   :-0.8999          
##  1st Qu.:-0.1477   1st Qu.:-0.6993                1st Qu.:-0.7687          
##  Median :-0.1477   Median :-0.4227                Median :-0.3312          
##  Mean   : 0.0000   Mean   : 0.0000                Mean   : 0.0000          
##  3rd Qu.:-0.1461   3rd Qu.: 0.1857                3rd Qu.: 0.3251          
##  Max.   :21.4405   Max.   : 4.6111                Max.   : 3.4750          
##    Market D/E       Market Debt to Capital Book Debt to Capital
##  Min.   :-0.08499   Min.   :-0.8546        Min.   :-1.1279     
##  1st Qu.:-0.08499   1st Qu.:-0.8546        1st Qu.:-0.9548     
##  Median :-0.08228   Median :-0.2999        Median : 0.0000     
##  Mean   : 0.00000   Mean   : 0.0000        Mean   : 0.0000     
##  3rd Qu.:-0.07361   3rd Qu.: 0.3402        3rd Qu.: 0.5601     
##  Max.   :22.20268   Max.   : 3.4126        Max.   : 3.1571     
##  Dividend Yield     Institutional Holdings
##  Min.   :-0.09479   Min.   :-0.9732       
##  1st Qu.:-0.09479   1st Qu.:-0.9732       
##  Median :-0.09479   Median :-0.3577       
##  Mean   : 0.00000   Mean   : 0.0000       
##  3rd Qu.:-0.09479   3rd Qu.: 0.8735       
##  Max.   :38.12419   Max.   : 2.1047
cat("\nStandar deviasi tiap kolom (harus ≈ 1):\n")
## 
## Standar deviasi tiap kolom (harus ≈ 1):
print(sapply(num_scaled, sd, na.rm = TRUE))
##               # of shares outstanding                            Market Cap 
##                                     1                                     1 
##                            Total Debt                            Firm Value 
##                                     1                                     1 
##                      Enterprise Value                                  Cash 
##                                     1                                     1 
##                     Revenues: Last yr              Trailing 12-mth Revenues 
##                                     1                                     1 
##                            Current PE                           Trailing PE 
##                                     1                                     1 
##                             PBV Ratio                              PS Ratio 
##                                     1                                     1 
##                               EV/EBIT                             EV/EBITDA 
##                                     1                                     1 
##                  EV/ Invested Capital                   Value/BV of Capital 
##                                     1                                     1 
##                              EV/Sales                    EV/ Trailing Sales 
##                                     1                                     1 
##          Growth in Revenue- last year                  3-yr Regression Beta 
##                                     1                                     1 
##                       Value Line Beta                             HiLo risk 
##                                     1                                     1 
## 3-yr Standard Deviation (Stock Price)                          Reinvestment 
##                                     1                                     1 
##                           Correlation                          Payout Ratio 
##                                     1                                     1 
##                     Reinvestment Rate                                   ROE 
##                                     1                                     1 
##                                   ROC                            Net Margin 
##                                     1                                     1 
##              Pre-tax Operating Margin                      Invested Capital 
##                                     1                                     1 
##                          BV of Assets                           Non-cash WC 
##                                     1                                     1 
##                    Chg in non-cash WC                            Net Income 
##                                     1                                     1 
##                                  EBIT                             EBIT(1-t) 
##                                     1                                     1 
##                                EBITDA                                  FCFF 
##                                     1                                     1 
##                          Eff Tax Rate          Non-cash WC as % of Revenues 
##                                     1                                     1 
##               Cash as % of Firm Value                 Cash as % of Revenues 
##                                     1                                     1 
##             Cash as % of Total Assets                  Capital Expenditures 
##                                     1                                     1 
##                          Depreciation                         SG&A Expenses 
##                                     1                                     1 
##                     Trailing Revenues                   Trailing Net Income 
##                                     1                                     1 
##                             Dividends        Intangible Assets/Total Assets 
##                                     1                                     1 
##             Fixed Assets/Total Assets                            Market D/E 
##                                     1                                     1 
##                Market Debt to Capital                  Book Debt to Capital 
##                                     1                                     1 
##                        Dividend Yield                Institutional Holdings 
##                                     1                                     1

2.5. Final Preprocessing

# Update Var num ke df Utama
data_final <- num_scaled
str(data_final)
## 'data.frame':    1500 obs. of  58 variables:
##  $ # of shares outstanding              : num  -0.256 2.23 -0.239 -0.173 -0.241 ...
##  $ Market Cap                           : num  -0.209 4.209 -0.208 -0.209 -0.207 ...
##  $ Total Debt                           : num  -0.0858 0.3549 -0.0857 -0.0831 -0.0847 ...
##  $ Firm Value                           : num  -0.188 2.926 -0.187 -0.186 -0.186 ...
##  $ Enterprise Value                     : num  -0.186 3.072 -0.185 -0.183 -0.184 ...
##  $ Cash                                 : num  -0.155 0.875 -0.154 -0.154 -0.155 ...
##  $ Revenues: Last yr                    : num  0 3.148 -0.231 -0.23 -0.226 ...
##  $ Trailing 12-mth Revenues             : num  0 3.233 -0.221 -0.221 -0.215 ...
##  $ Current PE                           : num  0 -0.205 0 0 -0.316 ...
##  $ Trailing PE                          : num  0 -0.224 0 0 -0.361 ...
##  $ PBV Ratio                            : num  0 -0.0674 0.02 0 -0.1815 ...
##  $ PS Ratio                             : num  0 -0.0529 -0.0331 -0.0542 -0.0544 ...
##  $ EV/EBIT                              : num  0 -0.168 0 0 -0.234 ...
##  $ EV/EBITDA                            : num  0 -0.136 0 0 -0.206 ...
##  $ EV/ Invested Capital                 : num  0 -0.0368 -0.0331 -0.0315 -0.0396 ...
##  $ Value/BV of Capital                  : num  0 -0.0262 0.1771 0 -0.3104 ...
##  $ EV/Sales                             : num  0 -0.0492 -0.0332 -0.0475 -0.0503 ...
##  $ EV/ Trailing Sales                   : num  0 -0.0687 -0.0431 -0.0579 -0.0733 ...
##  $ Growth in Revenue- last year         : num  -0.259 0.23 -1.331 1.348 0.486 ...
##  $ 3-yr Regression Beta                 : num  -0.6637 0.0578 -0.2204 2.8132 -0.6637 ...
##  $ Value Line Beta                      : num  -1.5885 0.6159 0.2325 2.2453 0.0408 ...
##  $ HiLo risk                            : num  -0.0269 -1.1935 2.3064 3.3208 0.5818 ...
##  $ 3-yr Standard Deviation (Stock Price): num  -0.599 -0.373 1.541 2.524 -0.599 ...
##  $ Reinvestment                         : num  0.0219 0.0183 0.0218 0.0211 0.0221 ...
##  $ Correlation                          : num  0 1.838 -1.275 -0.348 0 ...
##  $ Payout Ratio                         : num  0 0.364 0 0 -0.591 ...
##  $ Reinvestment Rate                    : num  0 -0.0763 -0.0378 -0.0438 -0.0615 ...
##  $ ROE                                  : num  0 0.202 -0.24 0 0.156 ...
##  $ ROC                                  : num  0 -0.0326 -0.0543 -0.0933 -0.0356 ...
##  $ Net Margin                           : num  0 0.136 0.0161 0.0794 0.1343 ...
##  $ Pre-tax Operating Margin             : num  0 0.2239 -0.1144 0.0229 0.2139 ...
##  $ Invested Capital                     : num  -0.129 1.034 -0.128 -0.128 -0.126 ...
##  $ BV of Assets                         : num  -0.152 1.066 -0.152 -0.152 -0.151 ...
##  $ Non-cash WC                          : num  -0.0564 0.4926 -0.0564 -0.0556 -0.0531 ...
##  $ Chg in non-cash WC                   : num  0.0208 0.0793 0.0207 0.0207 0.0214 ...
##  $ Net Income                           : num  -0.136 2.876 -0.139 -0.15 -0.134 ...
##  $ EBIT                                 : num  0 2.674 -0.226 -0.231 -0.221 ...
##  $ EBIT(1-t)                            : num  0 2.926 -0.233 -0.24 -0.228 ...
##  $ EBITDA                               : num  0 2.127 -0.216 -0.219 -0.212 ...
##  $ FCFF                                 : num  0 1.091 -0.116 -0.118 -0.114 ...
##  $ Eff Tax Rate                         : num  -0.892 0.58 -0.892 -0.892 1.032 ...
##  $ Non-cash WC as % of Revenues         : num  0 0.0715 0.0447 0.0924 0.0895 ...
##  $ Cash as % of Firm Value              : num  0 -0.157 -0.125 -0.179 -0.189 ...
##  $ Cash as % of Revenues                : num  0 -0.0728 -0.061 -0.0723 -0.0739 ...
##  $ Cash as % of Total Assets            : num  0 -0.5904 0.0917 -0.5904 -0.8035 ...
##  $ Capital Expenditures                 : num  -0.161 0.91 -0.161 -0.161 -0.16 ...
##  $ Depreciation                         : num  -0.114 0.774 -0.113 -0.111 -0.111 ...
##  $ SG&A Expenses                        : num  -0.188 2.977 -0.187 -0.177 -0.182 ...
##  $ Trailing Revenues                    : num  -0.164 3.274 -0.164 -0.163 -0.158 ...
##  $ Trailing Net Income                  : num  -0.148 3.173 -0.149 -0.176 -0.146 ...
##  $ Dividends                            : num  -0.148 2.468 -0.148 -0.148 -0.148 ...
##  $ Intangible Assets/Total Assets       : num  0 0.96 -0.699 -0.699 1.403 ...
##  $ Fixed Assets/Total Assets            : num  0 -0.3312 2.25 0.0188 0.1501 ...
##  $ Market D/E                           : num  0 -0.08318 -0.08372 -0.00268 -0.07524 ...
##  $ Market Debt to Capital               : num  0 -0.471 -0.599 2.645 0.639 ...
##  $ Book Debt to Capital                 : num  0 0.127 0.214 0 0.474 ...
##  $ Dividend Yield                       : num  -0.0948 0.1211 -0.0948 -0.0948 -0.0948 ...
##  $ Institutional Holdings               : num  -0.942 1.458 -0.942 -0.942 -0.727 ...
summary(data_final)
##  # of shares outstanding   Market Cap        Total Debt         Firm Value     
##  Min.   :-0.2557         Min.   :-0.2093   Min.   :-0.08583   Min.   :-0.1878  
##  1st Qu.:-0.2380         1st Qu.:-0.2082   1st Qu.:-0.08583   1st Qu.:-0.1869  
##  Median :-0.2002         Median :-0.1994   Median :-0.08538   Median :-0.1798  
##  Mean   : 0.0000         Mean   : 0.0000   Mean   : 0.00000   Mean   : 0.0000  
##  3rd Qu.:-0.1098         3rd Qu.:-0.1386   3rd Qu.:-0.07432   3rd Qu.:-0.1315  
##  Max.   :16.3234         Max.   :26.4852   Max.   :35.33988   Max.   :24.0613  
##  Enterprise Value       Cash         Revenues: Last yr Trailing 12-mth Revenues
##  Min.   :-0.7000   Min.   :-0.1549   Min.   :-0.2312   Min.   :-0.2216         
##  1st Qu.:-0.1850   1st Qu.:-0.1545   1st Qu.:-0.2274   1st Qu.:-0.2174         
##  Median :-0.1784   Median :-0.1495   Median :-0.1894   Median :-0.1783         
##  Mean   : 0.0000   Mean   : 0.0000   Mean   : 0.0000   Mean   : 0.0000         
##  3rd Qu.:-0.1315   3rd Qu.:-0.1181   3rd Qu.: 0.0000   3rd Qu.: 0.0000         
##  Max.   :24.2500   Max.   :23.7390   Max.   :23.5912   Max.   :27.0347         
##    Current PE       Trailing PE        PBV Ratio          PS Ratio       
##  Min.   :-0.4525   Min.   :-0.6534   Min.   :-0.2227   Min.   :-0.05503  
##  1st Qu.:-0.2283   1st Qu.:-0.2846   1st Qu.:-0.1620   1st Qu.:-0.05383  
##  Median : 0.0000   Median : 0.0000   Median :-0.1096   Median :-0.05120  
##  Mean   : 0.0000   Mean   : 0.0000   Mean   : 0.0000   Mean   : 0.00000  
##  3rd Qu.: 0.0000   3rd Qu.: 0.0000   3rd Qu.: 0.0000   3rd Qu.: 0.00000  
##  Max.   :27.6004   Max.   :29.8730   Max.   :32.1849   Max.   :38.04876  
##     EV/EBIT          EV/EBITDA       EV/ Invested Capital Value/BV of Capital
##  Min.   :-0.3141   Min.   :-0.2711   Min.   :-0.04084     Min.   :-0.4460    
##  1st Qu.:-0.1803   1st Qu.:-0.1655   1st Qu.:-0.03918     1st Qu.:-0.2739    
##  Median : 0.0000   Median : 0.0000   Median :-0.03767     Median :-0.1501    
##  Mean   : 0.0000   Mean   : 0.0000   Mean   : 0.00000     Mean   : 0.0000    
##  3rd Qu.: 0.0000   3rd Qu.: 0.0000   3rd Qu.:-0.02300     3rd Qu.: 0.0000    
##  Max.   :24.9322   Max.   :26.3784   Max.   :38.66865     Max.   :23.7342    
##     EV/Sales        EV/ Trailing Sales Growth in Revenue- last year
##  Min.   :-0.05104   Min.   :-0.07603   Min.   :-2.0060             
##  1st Qu.:-0.04992   1st Qu.:-0.07126   1st Qu.:-0.2594             
##  Median :-0.04758   Median :-0.06135   Median :-0.2594             
##  Mean   : 0.00000   Mean   : 0.00000   Mean   : 0.0000             
##  3rd Qu.: 0.00000   3rd Qu.: 0.00000   3rd Qu.: 0.1132             
##  Max.   :38.37839   Max.   :37.84282   Max.   : 9.7547             
##  3-yr Regression Beta Value Line Beta      HiLo risk      
##  Min.   :-5.8357      Min.   :-1.58853   Min.   :-1.5486  
##  1st Qu.:-0.6637      1st Qu.:-0.63008   1st Qu.:-0.7370  
##  Median :-0.2899      Median :-0.05502   Median :-0.1791  
##  Mean   : 0.0000      Mean   : 0.00000   Mean   : 0.0000  
##  3rd Qu.: 0.3620      3rd Qu.: 0.52005   3rd Qu.: 0.3281  
##  Max.   : 7.6896      Max.   : 8.09180   Max.   : 3.3715  
##  3-yr Standard Deviation (Stock Price)  Reinvestment        Correlation     
##  Min.   :-0.5986                       Min.   :-35.33536   Min.   :-3.7591  
##  1st Qu.:-0.5986                       1st Qu.:  0.02163   1st Qu.:-0.5218  
##  Median :-0.2265                       Median :  0.02194   Median : 0.0000  
##  Mean   : 0.0000                       Mean   :  0.00000   Mean   : 0.0000  
##  3rd Qu.: 0.1489                       3rd Qu.:  0.02283   3rd Qu.: 0.4469  
##  Max.   :13.0612                       Max.   : 10.05075   Max.   : 6.5737  
##   Payout Ratio     Reinvestment Rate        ROE                ROC          
##  Min.   :-0.5908   Min.   :-11.61729   Min.   :-28.9584   Min.   :-1.07516  
##  1st Qu.:-0.5908   1st Qu.: -0.08557   1st Qu.:  0.0000   1st Qu.:-0.03630  
##  Median : 0.0000   Median : -0.03857   Median :  0.1351   Median :-0.03337  
##  Mean   : 0.0000   Mean   :  0.00000   Mean   :  0.0000   Mean   : 0.00000  
##  3rd Qu.: 0.0000   3rd Qu.:  0.00000   3rd Qu.:  0.1730   3rd Qu.: 0.00000  
##  Max.   :25.5474   Max.   : 24.16164   Max.   :  1.8459   Max.   :38.42675  
##    Net Margin       Pre-tax Operating Margin Invested Capital 
##  Min.   :-23.0629   Min.   :-14.4475         Min.   :-1.1487  
##  1st Qu.:  0.0000   1st Qu.:  0.0000         1st Qu.:-0.1285  
##  Median :  0.1304   Median :  0.2000         Median :-0.1253  
##  Mean   :  0.0000   Mean   :  0.0000         Mean   : 0.0000  
##  3rd Qu.:  0.1348   3rd Qu.:  0.2219         3rd Qu.:-0.1029  
##  Max.   :  0.1869   Max.   :  0.3890         Max.   :30.0505  
##   BV of Assets      Non-cash WC       Chg in non-cash WC    Net Income      
##  Min.   :-0.1522   Min.   :-2.33085   Min.   :-37.02071   Min.   :-15.2562  
##  1st Qu.:-0.1519   1st Qu.:-0.05652   1st Qu.:  0.02083   1st Qu.: -0.1374  
##  Median :-0.1478   Median :-0.05635   Median :  0.02084   Median : -0.1341  
##  Mean   : 0.0000   Mean   : 0.00000   Mean   :  0.00000   Mean   :  0.0000  
##  3rd Qu.:-0.1271   3rd Qu.:-0.04941   3rd Qu.:  0.02122   3rd Qu.: -0.1033  
##  Max.   :24.0429   Max.   :33.93193   Max.   : 10.45705   Max.   : 27.2283  
##       EBIT           EBIT(1-t)           EBITDA             FCFF        
##  Min.   :-0.3459   Min.   :-0.4108   Min.   :-0.2907   Min.   :-5.9831  
##  1st Qu.:-0.2239   1st Qu.:-0.2302   1st Qu.:-0.2145   1st Qu.:-0.1155  
##  Median :-0.1912   Median :-0.1966   Median :-0.1846   Median :-0.1054  
##  Mean   : 0.0000   Mean   : 0.0000   Mean   : 0.0000   Mean   : 0.0000  
##  3rd Qu.: 0.0000   3rd Qu.: 0.0000   3rd Qu.: 0.0000   3rd Qu.: 0.0000  
##  Max.   :24.0587   Max.   :21.8475   Max.   :19.9817   Max.   :32.2962  
##   Eff Tax Rate     Non-cash WC as % of Revenues Cash as % of Firm Value
##  Min.   :-0.8917   Min.   :-35.90702            Min.   :-0.2001        
##  1st Qu.:-0.8917   1st Qu.:  0.00000            1st Qu.:-0.1787        
##  Median :-0.8068   Median :  0.06408            Median :-0.1148        
##  Mean   : 0.0000   Mean   :  0.00000            Mean   : 0.0000        
##  3rd Qu.: 1.0323   3rd Qu.:  0.08348            3rd Qu.: 0.0000        
##  Max.   : 2.5035   Max.   :  2.64336            Max.   :35.3293        
##  Cash as % of Revenues Cash as % of Total Assets Capital Expenditures
##  Min.   :-0.07420      Min.   :-0.8462           Min.   :-0.1610     
##  1st Qu.:-0.07309      1st Qu.:-0.7183           1st Qu.:-0.1610     
##  Median :-0.06785      Median :-0.3772           Median :-0.1594     
##  Mean   : 0.00000      Mean   : 0.0000           Mean   : 0.0000     
##  3rd Qu.: 0.00000      3rd Qu.: 0.2623           3rd Qu.:-0.1374     
##  Max.   :37.68493      Max.   : 3.4169           Max.   :17.7089     
##   Depreciation      SG&A Expenses     Trailing Revenues Trailing Net Income
##  Min.   :-0.11353   Min.   :-0.1883   Min.   :-0.1643   Min.   :-5.3485    
##  1st Qu.:-0.11353   1st Qu.:-0.1871   1st Qu.:-0.1643   1st Qu.:-0.1491    
##  Median :-0.11217   Median :-0.1801   Median :-0.1603   Median :-0.1467    
##  Mean   : 0.00000   Mean   : 0.0000   Mean   : 0.0000   Mean   : 0.0000    
##  3rd Qu.:-0.09736   3rd Qu.:-0.1467   3rd Qu.:-0.1238   3rd Qu.:-0.1131    
##  Max.   :30.49859   Max.   :17.4104   Max.   :26.9656   Max.   :30.0294    
##    Dividends       Intangible Assets/Total Assets Fixed Assets/Total Assets
##  Min.   :-0.1477   Min.   :-0.6993                Min.   :-0.8999          
##  1st Qu.:-0.1477   1st Qu.:-0.6993                1st Qu.:-0.7687          
##  Median :-0.1477   Median :-0.4227                Median :-0.3312          
##  Mean   : 0.0000   Mean   : 0.0000                Mean   : 0.0000          
##  3rd Qu.:-0.1461   3rd Qu.: 0.1857                3rd Qu.: 0.3251          
##  Max.   :21.4405   Max.   : 4.6111                Max.   : 3.4750          
##    Market D/E       Market Debt to Capital Book Debt to Capital
##  Min.   :-0.08499   Min.   :-0.8546        Min.   :-1.1279     
##  1st Qu.:-0.08499   1st Qu.:-0.8546        1st Qu.:-0.9548     
##  Median :-0.08228   Median :-0.2999        Median : 0.0000     
##  Mean   : 0.00000   Mean   : 0.0000        Mean   : 0.0000     
##  3rd Qu.:-0.07361   3rd Qu.: 0.3402        3rd Qu.: 0.5601     
##  Max.   :22.20268   Max.   : 3.4126        Max.   : 3.1571     
##  Dividend Yield     Institutional Holdings
##  Min.   :-0.09479   Min.   :-0.9732       
##  1st Qu.:-0.09479   1st Qu.:-0.9732       
##  Median :-0.09479   Median :-0.3577       
##  Mean   : 0.00000   Mean   : 0.0000       
##  3rd Qu.:-0.09479   3rd Qu.: 0.8735       
##  Max.   :38.12419   Max.   : 2.1047

Tugas P13

Soal 1

Setelah membentuk dua variabel indikator finansial, seseorang ingin mengetahui posisi masing-masing perusahaan berdasarkan indikator tersebut.

nomor 1a

  1. Buatlah plot untuk menyajikan posisi masing-masing perusahaan pada kedua indikator tersebut. Apa simpulan anda?
S <- cor(num_scaled)
dim(S)
## [1] 58 58
eig <- eigen(S, symmetric = TRUE) 
lambda <- eig$values    # akar ciri (eigenvalues) 
E <- eig$vectors        # vektor ciri (eigenvectors) 
# Proporsi ragam tiap komponen dan kumulatifnya 
prop <- lambda / sum(lambda) 
cumprop <- cumsum(prop) 
 
#Hasil PCA
 hasil_pca <- data.frame(
 Komponen = paste0("PC", 1:length(lambda)),
 Eigenvalue = round(lambda, 4),
 Proporsi = round(prop * 100, 2),
 Kumulatif = round(cumprop * 100, 2)
 )
 hasil_pca
##    Komponen Eigenvalue Proporsi Kumulatif
## 1       PC1    13.6308    23.50     23.50
## 2       PC2     6.2822    10.83     34.33
## 3       PC3     3.0218     5.21     39.54
## 4       PC4     2.6593     4.59     44.13
## 5       PC5     2.3568     4.06     48.19
## 6       PC6     2.1813     3.76     51.95
## 7       PC7     1.9480     3.36     55.31
## 8       PC8     1.9281     3.32     58.64
## 9       PC9     1.7034     2.94     61.57
## 10     PC10     1.6565     2.86     64.43
## 11     PC11     1.2514     2.16     66.59
## 12     PC12     1.1856     2.04     68.63
## 13     PC13     1.1587     2.00     70.63
## 14     PC14     1.1327     1.95     72.58
## 15     PC15     1.0775     1.86     74.44
## 16     PC16     1.0449     1.80     76.24
## 17     PC17     1.0105     1.74     77.98
## 18     PC18     1.0013     1.73     79.71
## 19     PC19     0.9654     1.66     81.37
## 20     PC20     0.9008     1.55     82.93
## 21     PC21     0.8938     1.54     84.47
## 22     PC22     0.8677     1.50     85.96
## 23     PC23     0.8132     1.40     87.36
## 24     PC24     0.7922     1.37     88.73
## 25     PC25     0.7463     1.29     90.02
## 26     PC26     0.7370     1.27     91.29
## 27     PC27     0.6622     1.14     92.43
## 28     PC28     0.6372     1.10     93.53
## 29     PC29     0.5811     1.00     94.53
## 30     PC30     0.4847     0.84     95.37
## 31     PC31     0.4087     0.70     96.07
## 32     PC32     0.3861     0.67     96.74
## 33     PC33     0.3008     0.52     97.25
## 34     PC34     0.2852     0.49     97.75
## 35     PC35     0.2300     0.40     98.14
## 36     PC36     0.2102     0.36     98.51
## 37     PC37     0.1748     0.30     98.81
## 38     PC38     0.1628     0.28     99.09
## 39     PC39     0.1198     0.21     99.29
## 40     PC40     0.0911     0.16     99.45
## 41     PC41     0.0738     0.13     99.58
## 42     PC42     0.0595     0.10     99.68
## 43     PC43     0.0516     0.09     99.77
## 44     PC44     0.0366     0.06     99.83
## 45     PC45     0.0276     0.05     99.88
## 46     PC46     0.0237     0.04     99.92
## 47     PC47     0.0159     0.03     99.95
## 48     PC48     0.0110     0.02     99.97
## 49     PC49     0.0075     0.01     99.98
## 50     PC50     0.0050     0.01     99.99
## 51     PC51     0.0035     0.01    100.00
## 52     PC52     0.0023     0.00    100.00
## 53     PC53     0.0001     0.00    100.00
## 54     PC54     0.0001     0.00    100.00
## 55     PC55     0.0000     0.00    100.00
## 56     PC56     0.0000     0.00    100.00
## 57     PC57     0.0000     0.00    100.00
## 58     PC58     0.0000     0.00    100.00
#Menentukan jumlah komponen
 prop <- lambda / sum(lambda)
 cumprop <- cumsum(prop)
 k_kaiser <- sum(lambda > 1)
 k_kaiser
## [1] 18
 k_80pct <- which(cumprop >= 0.80)[1]
 k_80pct
## [1] 19
 round(prop[1]*100,2)
## [1] 23.5
#Skor komponen utama
 Xz <- num_scaled
 k <- min(30,ncol(num_scaled))
 lambda_k <- lambda[1:k]
 E_k <- E[, 1:k]
 scores2 <- as.matrix(Xz) %*% E_k
 scores2 <- as.data.frame(scores2)
 colnames(scores2) <- paste0("PC", 1:k)
 head(scores2)
##           PC1         PC2        PC3        PC4        PC5        PC6
## 1   0.6362235 -0.03890899 -0.2284873 -0.6379098  1.2011215 -0.1295692
## 2 -11.0263099  0.44732892 -1.6219859  0.6122046 -1.1481243  1.3660981
## 3   1.0626468 -0.03574855 -0.3354528 -0.1234948  1.8747847 -1.3808034
## 4   0.9351954  0.13733629  0.2050495  0.7104460  0.5516308 -2.2514047
## 5   0.7502610  0.26064006  0.1019040 -1.2652652 -0.0171203  0.4727001
## 6   0.3029955  0.04137765 -0.2156327  1.8718580 -1.0941279 -0.4240099
##          PC7        PC8          PC9       PC10       PC11       PC12
## 1  0.5713106  0.0991562  0.000812676  0.6204053  0.1825705  0.2939312
## 2 -0.4707741  0.3018366 -1.382709015 -0.3522240 -0.4909652  0.3785711
## 3  0.4450929 -0.3386706  0.040214417 -0.7958995  0.4229812 -1.5533995
## 4 -0.5303199 -1.1292634  0.636775822 -4.2234736 -0.4055567 -1.5112857
## 5  0.1054478  0.0149494  0.229097179 -0.2067430 -1.2851666  0.7191739
## 6 -0.4243253  0.7654672 -0.801023528  1.1207332 -0.2634748 -0.5167057
##         PC13        PC14          PC15       PC16        PC17        PC18
## 1 -0.3309680 -0.03567005  0.2246902465  0.2597683 -0.38473794  0.06845173
## 2  0.2719057  1.33165232 -0.2478410986  0.7002579 -0.06245786 -0.40400691
## 3  0.5973448 -0.48311343  0.3286183346 -1.0600654  1.12714191  0.23707735
## 4  0.9315681 -0.80140445 -1.0939852292 -0.5651896  0.09088823 -0.22895935
## 5  0.2730401  0.18409972 -0.0009416184 -0.1141876 -0.32815988 -0.29208019
## 6  0.1952191 -0.10235748 -0.5072469159 -0.2994742 -0.48506277  0.47948098
##           PC19        PC20       PC21       PC22        PC23         PC24
## 1  0.027195686  0.52298935  0.0751210 -0.2362871  0.73538989 -0.456241229
## 2  0.312815599  0.21560251  0.4218280  0.2876800  0.24813439 -0.097363315
## 3 -1.463418083  0.04527495 -0.5726276  0.0339438  0.36364923  0.063890692
## 4 -0.804445466 -0.68646352  0.7350047 -0.3716603  0.05325169  0.447730927
## 5  0.004550777 -0.62184217  0.3685203 -0.1578313  0.62097403  0.004790563
## 6  0.906056971 -0.25411781 -0.5163555  0.5987621 -0.49536609  0.678923464
##            PC25        PC26        PC27         PC28      PC29        PC30
## 1  0.4042329677  0.28807504  0.08486737  0.036615724 -0.696549  0.08419631
## 2  0.0368647273  0.20943136 -0.07764022 -0.008692929 -0.389751  0.21870139
## 3 -1.3869021313  0.04711376 -0.44609198 -0.399362313  0.355995 -0.64649579
## 4  0.6846600224 -0.32384818 -0.10844186 -0.244831898  1.692749 -0.70374407
## 5  0.0004955636  0.12575152  0.11656000 -0.377990592  0.993171 -0.25366311
## 6 -0.5780086527 -1.05216359 -0.39812122  0.483707353 -0.510688  0.30188643
#KERNEL PCA
X <- as.matrix(num_scaled)
mode(X) <- "numeric"

 #Definisi fungsi kernel
 rbf_kernel <- function(X, gamma = NULL) {
  if (is.null(gamma)) gamma <- 1 / ncol(X)
  if (!requireNamespace("proxy", quietly = TRUE)) install.packages("proxy")
  dist_matrix <- as.matrix(proxy::dist(X, method = "euclidean"))^2
  
  K <- exp(-gamma * dist_matrix)
  return(K)
 }
 poly_kernel <- function(X, degree = 3, coef0 = 1) {
  K <- (tcrossprod(X) + coef0)^degree
  return(K)
 }
 linear_kernel <- function(X) {
  K <- tcrossprod(X)
  return(K)
 }
 #menghitung tiga tipe kernel
 K_linear <- linear_kernel(X)
 K_poly   <- poly_kernel(X, degree = 3)
 K_rbf    <- rbf_kernel(X, gamma = 1/ncol(X))
 #pusatkan matriks kernel
 n <- nrow(X)
 one_n <- matrix(1, n, n) / n
 center_kernel <- function(K) {
  K_centered <- K - one_n %*% K - K %*% one_n + one_n %*% K %*% one_n
  return(K_centered)
 }
 Kc_linear <- center_kernel(K_linear)
 Kc_poly   <- center_kernel(K_poly)
 Kc_rbf    <- center_kernel(K_rbf)
 #Eigen decomposition
 eig_linear <- eigen(Kc_linear, symmetric = TRUE)
 eig_poly   <- eigen(Kc_poly, symmetric = TRUE)
 eig_rbf    <- eigen(Kc_rbf, symmetric = TRUE)
 #normalisasi vektor eigen
 normalize_eig <- function(eig) {
  values <- eig$values
  vectors <- eig$vectors
  values[values < 0] <- 0
  vectors_norm <- vectors / sqrt(values + 1e-9)
  list(values = values, vectors = vectors_norm)
 }
 
 eig_linear_n <- normalize_eig(eig_linear)
 eig_poly_n   <- normalize_eig(eig_poly)
 eig_rbf_n    <- normalize_eig(eig_rbf)
 #Skor eigen
 k1 <- 1
 k2 <- 2
 scores_linear_1 <- Kc_linear %*% eig_linear_n$vectors[, 1:k1]
 scores_poly_1   <- Kc_poly   %*% eig_poly_n$vectors[, 1:k1]
 scores_rbf_1    <- Kc_rbf    %*% eig_rbf_n$vectors[, 1:k1]
 scores_linear_2 <- Kc_linear %*% eig_linear_n$vectors[, 1:k2]
 scores_poly_2   <- Kc_poly   %*% eig_poly_n$vectors[, 1:k2]
 scores_rbf_2    <- Kc_rbf    %*% eig_rbf_n$vectors[, 1:k2]
 #Proporsi varians
 var_linear <- eig_linear$values / sum(eig_linear$values)
 var_poly   <- eig_poly$values / sum(eig_poly$values)
 var_rbf    <- eig_rbf$values / sum(eig_rbf$values)
 data.frame(
  Kernel = c("Linear", "Polynomial", "RBF"),
  PC1 = c(var_linear[1], var_poly[1], var_rbf[1]),
  PC2 = c(var_linear[2], var_poly[2], var_rbf[2]),
  PC3 = c(var_linear[3], var_poly[3], var_rbf[3]),
  PC4 = c(var_linear[4], var_poly[4], var_rbf[4]),
  PC5 = c(var_linear[5], var_poly[5], var_rbf[5]),
  PC6 = c(var_linear[6], var_poly[6], var_rbf[6]),
  PC7 = c(var_linear[7], var_poly[7], var_rbf[7]),
  PC8 = c(var_linear[8], var_poly[8], var_rbf[8]),
  PC9 = c(var_linear[9], var_poly[9], var_rbf[9]),
  PC10 = c(var_linear[10], var_poly[10], var_rbf[10]),
  PC11 = c(var_linear[11], var_poly[11], var_rbf[11]),
  PC12 = c(var_linear[12], var_poly[12], var_rbf[12]),
  PC13 = c(var_linear[13], var_poly[13], var_rbf[13]),
  PC14 = c(var_linear[14], var_poly[14], var_rbf[14]),
  PC15 = c(var_linear[15], var_poly[15], var_rbf[15]),
  PC16 = c(var_linear[16], var_poly[16], var_rbf[16]),
  PC17 = c(var_linear[17], var_poly[17], var_rbf[17]),
  PC18 = c(var_linear[18], var_poly[18], var_rbf[18]),
  Kumulatif_18PC = c(cumsum(var_linear)[18],
                     cumsum(var_poly)[18],
                     cumsum(var_rbf)[18])
 )
##       Kernel       PC1        PC2        PC3        PC4        PC5        PC6
## 1     Linear 0.2350141 0.10831351 0.05209934 0.04585033 0.04063458 0.03760867
## 2 Polynomial 0.4726871 0.35280416 0.05652690 0.03126117 0.02905487 0.02008050
## 3        RBF 0.1206419 0.09264346 0.08592908 0.05472602 0.04666290 0.03919768
##          PC7         PC8         PC9        PC10        PC11        PC12
## 1 0.03358650 0.033243380 0.029369557 0.028561045 0.021575514 0.020440572
## 2 0.01635002 0.005554057 0.004832184 0.002705458 0.002380718 0.001472497
## 3 0.03174300 0.030204267 0.024717001 0.022467871 0.020744486 0.019449738
##           PC13         PC14         PC15         PC16         PC17        PC18
## 1 0.0199770970 0.0195289105 0.0185778264 0.0180151159 0.0174226973 0.017263174
## 2 0.0007368118 0.0005398184 0.0004745973 0.0003818573 0.0003544145 0.000303130
## 3 0.0166943907 0.0166150527 0.0130464182 0.0117134581 0.0093376612 0.009118077
##   Kumulatif_18PC
## 1      0.7970819
## 2      0.9985002
## 3      0.6656525
 # Plot Kernel PCA dengan Polynomial Kernel
set.seed(2025)
sampel_index <- sample(1:nrow(finansial), 1500)
num_scaled <- finansial[sampel_index, 9:ncol(finansial)]
num_scaled[] <- lapply(num_scaled, function(x) as.numeric(as.character(x)))
## Warning in FUN(X[[i]], ...): NAs introduced by coercion
## Warning in FUN(X[[i]], ...): NAs introduced by coercion
## Warning in FUN(X[[i]], ...): NAs introduced by coercion
## Warning in FUN(X[[i]], ...): NAs introduced by coercion
## Warning in FUN(X[[i]], ...): NAs introduced by coercion
## Warning in FUN(X[[i]], ...): NAs introduced by coercion
## Warning in FUN(X[[i]], ...): NAs introduced by coercion
## Warning in FUN(X[[i]], ...): NAs introduced by coercion
## Warning in FUN(X[[i]], ...): NAs introduced by coercion
## Warning in FUN(X[[i]], ...): NAs introduced by coercion
## Warning in FUN(X[[i]], ...): NAs introduced by coercion
## Warning in FUN(X[[i]], ...): NAs introduced by coercion
## Warning in FUN(X[[i]], ...): NAs introduced by coercion
## Warning in FUN(X[[i]], ...): NAs introduced by coercion
## Warning in FUN(X[[i]], ...): NAs introduced by coercion
## Warning in FUN(X[[i]], ...): NAs introduced by coercion
## Warning in FUN(X[[i]], ...): NAs introduced by coercion
## Warning in FUN(X[[i]], ...): NAs introduced by coercion
## Warning in FUN(X[[i]], ...): NAs introduced by coercion
## Warning in FUN(X[[i]], ...): NAs introduced by coercion
## Warning in FUN(X[[i]], ...): NAs introduced by coercion
## Warning in FUN(X[[i]], ...): NAs introduced by coercion
## Warning in FUN(X[[i]], ...): NAs introduced by coercion
## Warning in FUN(X[[i]], ...): NAs introduced by coercion
## Warning in FUN(X[[i]], ...): NAs introduced by coercion
## Warning in FUN(X[[i]], ...): NAs introduced by coercion
## Warning in FUN(X[[i]], ...): NAs introduced by coercion
## Warning in FUN(X[[i]], ...): NAs introduced by coercion
## Warning in FUN(X[[i]], ...): NAs introduced by coercion
## Warning in FUN(X[[i]], ...): NAs introduced by coercion
## Warning in FUN(X[[i]], ...): NAs introduced by coercion
## Warning in FUN(X[[i]], ...): NAs introduced by coercion
## Warning in FUN(X[[i]], ...): NAs introduced by coercion
## Warning in FUN(X[[i]], ...): NAs introduced by coercion
## Warning in FUN(X[[i]], ...): NAs introduced by coercion
## Warning in FUN(X[[i]], ...): NAs introduced by coercion
## Warning in FUN(X[[i]], ...): NAs introduced by coercion
## Warning in FUN(X[[i]], ...): NAs introduced by coercion
## Warning in FUN(X[[i]], ...): NAs introduced by coercion
## Warning in FUN(X[[i]], ...): NAs introduced by coercion
company_names <- finansial$`Company Name`[sampel_index]
# Membuat data frame untuk hasil Kernel PCA (Polynomial)
 scores_poly_df <- data.frame(
 PC1 = scores_poly_2[, 1],
 PC2 = scores_poly_2[, 2],
 Perusahaan = company_names
 )
 # Plot scatter PC1 vs PC2 untuk Kernel Polynomial
 ggplot(scores_poly_df, aes(x = PC1, y = PC2, label = Perusahaan)) +
 geom_point(color = "darkorange", size = 3) +
 geom_text(aes(label = Perusahaan), hjust = 0.5, vjust = -0.8, size = 3) +
 theme_minimal(base_size = 12) +
 labs(
 title = "Posisi Perusahaan Berdasarkan Kernel PCA (Polynomial Kernel)",
 x = "Komponen Utama 1 (PC1)",
 y = "Komponen Utama 2 (PC2)"
 ) +
 theme(plot.title = element_text(hjust = 0.5, face = "bold"))

library(ggplot2)
library(ggrepel)
## Warning: package 'ggrepel' was built under R version 4.4.3
library(scales)

# Pastikan data frame bersih
scores_poly_df <- scores_poly_df %>%
  filter(!is.na(PC1), !is.na(PC2), !is.na(Perusahaan))

# Skala ulang supaya lebih stabil untuk plotting
scores_poly_df$PC1_scaled <- rescale(scores_poly_df$PC1, to = c(-1, 1))
scores_poly_df$PC2_scaled <- rescale(scores_poly_df$PC2, to = c(-1, 1))

# Plot versi stabil
ggplot(scores_poly_df, aes(x = PC1_scaled, y = PC2_scaled, color = Perusahaan, label = Perusahaan)) +
  geom_point(size = 3, alpha = 0.8) +
  geom_text_repel(size = 3, max.overlaps = 10, show.legend = FALSE, force = 3) +
  scale_color_viridis_d(option = "plasma", end = 0.9) +
  theme_minimal(base_size = 12) +
  labs(
    title = "Posisi Perusahaan Berdasarkan Kernel PCA (Polynomial Kernel)",
    x = "Komponen Utama 1 (PC1, diskalakan)",
    y = "Komponen Utama 2 (PC2, diskalakan)",
    color = "Perusahaan"
  ) +
  theme(
    plot.title = element_text(hjust = 0.5, face = "bold"),
    legend.position = "none"
  )
## Warning: ggrepel: 1496 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps

set.seed(2025)
scores_poly_df$Cluster <- kmeans(scores_poly_df[, c("PC1", "PC2")], centers = 4)$cluster

ggplot(scores_poly_df, aes(x = PC1, y = PC2, color = factor(Cluster))) +
  geom_point(size = 3, alpha = 0.9) +
  geom_text_repel(aes(label = Perusahaan), size = 3, show.legend = FALSE, max.overlaps = 15) +
  scale_color_viridis_d(option = "plasma", end = 0.9) +
  theme_minimal(base_size = 12) +
  labs(
    title = "Posisi Perusahaan Berdasarkan Kernel PCA (Polynomial Kernel)",
    x = "Komponen Utama 1 (PC1)",
    y = "Komponen Utama 2 (PC2)",
    color = "Cluster"
  ) +
  theme(plot.title = element_text(hjust = 0.5, face = "bold"))
## Warning: ggrepel: 1496 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps