## 1. 專案背景 (Introduction)

## 本專案旨在透過**主成分分析 (PCA)**,將 2017 年上市公司的 16 項複雜財務指標,濃縮為少數幾個關鍵維度,以協助投資人快速識別具有「高獲利」與「高效率」潛力的優質企業。

## 2. 資料匯入與整理 (Import & Tidy)

## 我們使用《R for Data Science》建議的 Tidyverse 流程進行資料清洗。
financial <- read_csv("2017_financial index_163 comp.csv")
names(financial)
##  [1] "comp_id"               "roe"                   "roa"                  
##  [4] "profit_margin_rate"    "gross_margin_rate"     "expense_rate"         
##  [7] "asset_turnover"        "inventory_turnover"    "equity_turnnover"     
## [10] "rev_growth_rate"       "margin_growth_rate"    "op_profit_growth_rate"
## [13] "cash_reinv_rate"       "asset_growth_rate"     "current_ratio"        
## [16] "quick_rartio"          "debt_ratio"
# 資料清洗
fin_clean <- financial %>%
  mutate(
    op_profit_growth_rate = parse_number(as.character(op_profit_growth_rate)),
    current_ratio = parse_number(as.character(current_ratio)),
    quick_rartio = parse_number(as.character(quick_rartio))
  ) %>%
  drop_na()
head(fin_clean) %>% knitr::kable()
comp_id roe roa profit_margin_rate gross_margin_rate expense_rate asset_turnover inventory_turnover equity_turnnover rev_growth_rate margin_growth_rate op_profit_growth_rate cash_reinv_rate asset_growth_rate current_ratio quick_rartio debt_ratio
2303 3.06 2.21 4.40 18.12 14.83 0.38 6.93 0.69 0.96 -10.93 6.05 4.66 1.93 158.03 118.71 45.69
2330 23.56 17.84 39.45 50.62 11.04 0.50 7.88 0.67 3.11 4.21 2.01 11.06 5.59 238.97 215.17 23.55
2337 25.68 14.29 16.82 36.95 20.12 0.86 2.54 1.59 41.75 116.49 1708.73 5.31 24.33 187.85 110.85 44.21
2342 -3.41 -0.72 3.86 16.86 13.00 0.66 8.87 1.55 12.73 13.25 22.41 0.56 17.54 184.99 158.19 51.42
2344 10.90 7.69 13.99 34.30 20.31 0.61 3.99 0.89 13.07 35.83 79.26 5.91 29.60 229.31 173.05 30.06
2408 37.01 28.27 34.22 44.87 10.66 0.38 5.16 0.50 31.91 91.77 119.72 7.94 10.08 424.44 366.56 12.38

3. 主成分分析 (Transformation: PCA)

由於財務指標間的單位差異巨大(如 ROE 為百分比,週轉率為小數),我們設定 scale. = TRUE 進行標準化,避免特定變數主導模型。

# 執行 PCA (暫時移除 id 欄位)
pca_model <- prcomp(fin_clean %>% select(-comp_id), scale. = TRUE)

# 查看解釋變異量
summary(pca_model)
## Importance of components:
##                           PC1    PC2    PC3     PC4     PC5     PC6     PC7
## Standard deviation     2.0330 1.8739 1.4399 1.16215 1.01323 0.91896 0.87186
## Proportion of Variance 0.2583 0.2195 0.1296 0.08441 0.06417 0.05278 0.04751
## Cumulative Proportion  0.2583 0.4778 0.6074 0.69177 0.75594 0.80872 0.85623
##                            PC8     PC9    PC10    PC11    PC12    PC13    PC14
## Standard deviation     0.77645 0.73927 0.67853 0.56103 0.50353 0.27912 0.15178
## Proportion of Variance 0.03768 0.03416 0.02878 0.01967 0.01585 0.00487 0.00144
## Cumulative Proportion  0.89391 0.92806 0.95684 0.97651 0.99236 0.99723 0.99867
##                           PC15    PC16
## Standard deviation     0.12692 0.07223
## Proportion of Variance 0.00101 0.00033
## Cumulative Proportion  0.99967 1.00000

4. 視覺化分析 (Visualization)

4.1 陡坡圖 (Scree Plot)

決定要保留多少主成分。

# 整理繪圖資料
pca_var <- tibble(
  pc = 1:length(pca_model$sdev),
  var_pct = pca_model$sdev^2 / sum(pca_model$sdev^2),
  cum_var = cumsum(var_pct)
)

# 繪圖
ggplot(pca_var, aes(x = pc)) +
  geom_col(aes(y = var_pct), fill = "steelblue", alpha = 0.7) +
  geom_line(aes(y = cum_var), color = "red", size = 1) +
  geom_point(aes(y = cum_var), color = "red") +
  geom_hline(yintercept = 0.8, linetype = "dashed", color = "orange") +
  labs(title = "Scree Plot: 累積解釋力", x = "主成分", y = "解釋變異比例") +
  theme_minimal()

4.2 變數負荷圖 (Loading Plot)

定義主成分的商業意義。 發現: - PC1 (橫軸):與 ROE, ROA 高度相關 -> 定義為「獲利指標」。 - PC2 (縱軸):與資產週轉率高度相關 -> 定義為「效率指標」

# 提取負荷量
pca_loadings <- as_tibble(pca_model$rotation, rownames = "variable")

ggplot(pca_loadings, aes(x = PC1, y = PC2, label = variable)) +
  geom_segment(aes(xend = PC1, yend = PC2), x = 0, y = 0, 
               arrow = arrow(length = unit(0.3, "cm")), color = "darkblue", alpha = 0.5) +
  geom_text_repel(color = "darkred", size = 3) +
  coord_fixed() +
  labs(title = "Loading Plot: 變數與主成分的關係", x = "PC1 (獲利)", y = "PC2 (效率)") +
  theme_minimal()

4.3 企業戰略地圖 (Score Plot)

將所有公司投影到這兩個新維度上。

# 結合 PCA 分數與公司代號
fin_scores <- fin_clean %>%
  bind_cols(as_tibble(pca_model$x))

# 找出表現優異的「超級巨星」 (假設 PC1, PC2 正向代表好,視實際正負號調整)
# 這裡標示出 PC2 (效率) 特別突出的公司
top_efficiency <- fin_scores %>% 
  filter(abs(PC2) > 3) 

ggplot(fin_scores, aes(x = PC1, y = PC2)) +
  geom_hline(yintercept = 0, linetype = "dashed", color = "grey") +
  geom_vline(xintercept = 0, linetype = "dashed", color = "grey") +
  geom_point(color = "steelblue", alpha = 0.6) +
  # 標註特定公司
  geom_text_repel(data = top_efficiency, aes(label = comp_id), color = "red", size = 4) +
  labs(
    title = "企業戰略地圖",
    subtitle = "尋找兼具獲利與效率的投資標的",
    x = "獲利指標 (PC1)", 
    y = "效率指標 (PC2)"
  ) +
  theme_minimal()

5. 結論 (Conclusion)

透過 PCA 分析,我們成功將 16 維的財務數據降維,並發現 3219 (倚強科)4967 (十銓) 在效率與獲利指標上表現突出,值得投資人進一步深入研究。 ```