meta_tab.xlsx 並把 Lag 層級表映射成短/長名說明:一次安裝並載入後面會用到的套件(含 ranger 做
Random Forest)。
# ---- 安裝/載入套件 ----
pkgs <- c("data.table","readxl","lubridate","forecast","glmnet","ipred",
"ggplot2","scales","ranger")
ins <- pkgs[!pkgs %in% installed.packages()[,"Package"]]
if(length(ins)) install.packages(ins, repos="https://cloud.r-project.org")
invisible(lapply(pkgs, library, character.only=TRUE))
說明:train_len 決定 rolling 視窗長度;h
是步數(預設 1);target_col 換成要預測的欄名。
# ---- 參數 ----
excel_path <- "~/Documents/文件 - Travis' MBA/OneDrive/Documents/1. R/Research/Oct_2025/scratch_3/existing_excel_files/wide_tab.xlsx" # Excel 路徑
sheet_name <- 1 # 工作表
time_col <- "ym" # 時間欄(像 2000-M1)
target_col <- "A100101010_1" # <--- 請改成你的目標欄名
h <- 1 # 預測步數(1..12)
train_len <- 60 # <--- rolling window 長度(例:60 個月)
p_max <- 6 # 每個變數要做幾階 lag
elastic_alpha <- 0.5 # Elastic Net 的 alpha
adapt_gamma <- 1.0 # Adaptive 權重 1/|beta|^gamma
var_exp_cut <- 0.95 # PCR 主成分保留的累積解釋比例
說明:把 "YYYY-Mm" 轉成該月第一天的
Date,再依時間排序。
# ---- 讀檔 ----
raw <- as.data.table(readxl::read_excel(excel_path, sheet = sheet_name))
setnames(raw, old=names(raw), new=make.names(names(raw)))
time_col <- make.names(time_col)
target_col <- make.names(target_col)
stopifnot(time_col %in% names(raw), target_col %in% names(raw))
# ---- 專門解析 "YYYY-Mm" 格式成 Date ----
parse_ym_M <- function(v){
v <- as.character(v)
yr <- as.integer(sub("^(\\d{4})-M.*$", "\\1", v)) # 抓年
mm <- as.integer(sub("^\\d{4}-M(\\d{1,2})$", "\\1", v)) # 抓月
if (any(is.na(yr)) || any(is.na(mm))) {
stop("時間欄位必須長得像 '2025-M1' 或 '2025-M11'")
}
as.Date(sprintf("%04d-%02d-01", yr, mm)) # 轉換成該月第一天
}
# 先把 ym 轉成日期欄
raw[, (time_col) := parse_ym_M(get(time_col))]
# 正確的排序寫法(升冪)
data.table::setorderv(raw, cols = time_col, order = 1L, na.last = TRUE)
# ---- 變數轉換(使其平穩)----
cat("\n步驟 2.5:變數轉換\n")
##
## 步驟 2.5:變數轉換
cat("----------------------------------------\n")
## ----------------------------------------
meta_path = "/Users/traviscua/Library/CloudStorage/OneDrive-個人/Documents/1. R/Research/Oct_2025/scratch_5/existing_excel_files/meta_tab.xlsx"
# 讀取 meta_tab 取得 tcode
meta_data <- readxl::read_excel(meta_path)
tcode_mapping <- setNames(meta_data$tcode, meta_data$var_label)
# 定義轉換函數
transform_variable <- function(x, tcode) {
if(tcode == 1) return(x)
else if(tcode == 2) return(c(NA, diff(x)))
else if(tcode == 3) {
temp <- c(NA, diff(x))
return(c(NA, diff(temp, na.rm=FALSE)))
}
else if(tcode == 4) return(log(x))
else if(tcode == 5) return(c(NA, diff(log(x))))
else if(tcode == 6) {
temp <- c(NA, diff(log(x)))
return(c(NA, diff(temp, na.rm=FALSE)))
}
else if(tcode == 7) {
pct <- x / c(NA, x[-length(x)]) - 1
return(c(NA, diff(pct)))
}
else return(x)
}
# 排除時間欄和目標欄
transform_vars <- setdiff(names(raw), c(time_col, target_col))
# 逐一轉換變數
for(v in transform_vars) {
tcode <- tcode_mapping[v]
if(!is.na(tcode)) {
original_mean <- mean(raw[[v]], na.rm=TRUE)
raw[[v]] <- transform_variable(raw[[v]], tcode)
transformed_mean <- mean(raw[[v]], na.rm=TRUE)
if(v %in% head(transform_vars, 5)) { # 只印前5個
cat(sprintf("變數 %s (tcode=%d): 原始均值=%.2f, 轉換後均值=%.4f\n",
v, tcode, original_mean, transformed_mean))
}
}
}
## 變數 A030101015_1 (tcode=1): 原始均值=92.67, 轉換後均值=92.6686
## 變數 A030102015_1 (tcode=1): 原始均值=91.50, 轉換後均值=91.5047
## 變數 A030301015_1 (tcode=1): 原始均值=90.90, 轉換後均值=90.8957
## 變數 A030302015_1 (tcode=1): 原始均值=101.38, 轉換後均值=101.3802
## 變數 A040107010_1_1 (tcode=1): 原始均值=58.39, 轉換後均值=58.3933
cat("轉換後前3列:\n")
## 轉換後前3列:
print(head(raw[, 1:min(6, ncol(raw))], 3))
## ym A030101015_1 A030102015_1 A030301015_1 A030302015_1
## <Date> <num> <num> <num> <num>
## 1: 2000-01-01 80.91 79.87 69.75 76.76
## 2: 2000-02-01 82.11 79.63 69.69 76.48
## 3: 2000-03-01 80.70 79.39 70.09 76.89
## A040107010_1_1
## <num>
## 1: 57.92
## 2: 57.41
## 3: 57.43
cat("\n")
# ---- 計算 PCA 因子 ----
cat("步驟 2.6:計算 PCA 因子\n")
## 步驟 2.6:計算 PCA 因子
cat("----------------------------------------\n")
## ----------------------------------------
# 提取數值變數(排除時間欄和目標欄)
numeric_vars <- setdiff(names(raw), c(time_col, target_col))
numeric_data <- as.data.frame(raw[, ..numeric_vars]) # 方法1:用 ..
# 移除含 NA 的列
complete_rows <- complete.cases(numeric_data)
clean_data <- numeric_data[complete_rows, ]
cat("用於 PCA 的樣本數:", sum(complete_rows), "/", nrow(raw), "\n")
## 用於 PCA 的樣本數: 308 / 308
# 標準化
scaled_data <- scale(clean_data)
# 執行 PCA
pca_result <- prcomp(scaled_data, center=FALSE, scale.=FALSE)
# 提取前4個主成分
factors <- pca_result$x[, 1:4]
# 查看解釋變異比例
var_explained <- summary(pca_result)$importance[2, 1:4]
cat("前4個因子解釋變異:\n")
## 前4個因子解釋變異:
cat(sprintf(" Factor1: %.1f%%\n", var_explained[1]*100))
## Factor1: 63.8%
cat(sprintf(" Factor2: %.1f%%\n", var_explained[2]*100))
## Factor2: 10.8%
cat(sprintf(" Factor3: %.1f%%\n", var_explained[3]*100))
## Factor3: 5.4%
cat(sprintf(" Factor4: %.1f%%\n", var_explained[4]*100))
## Factor4: 4.6%
cat(sprintf(" 累計: %.1f%%\n\n", sum(var_explained)*100))
## 累計: 84.6%
# 放回原始數據框(前面的 NA 列保持 NA)
factor_df <- as.data.frame(matrix(NA, nrow=nrow(raw), ncol=4))
names(factor_df) <- paste0("Factor", 1:4)
factor_df[complete_rows, ] <- factors
# 合併到 raw
raw <- cbind(raw, factor_df)
cat("加入因子後維度:", nrow(raw), "列 ×", ncol(raw), "欄\n")
## 加入因子後維度: 308 列 × 45 欄
cat("因子前3列:\n")
## 因子前3列:
print(head(factor_df, 3))
## Factor1 Factor2 Factor3 Factor4
## 1 -7.583645 4.578565 2.783557 -0.4334360
## 2 -7.982886 4.249492 1.982869 -0.9461896
## 3 -7.847674 3.855892 4.230689 0.5986163
cat("\n")
說明:對所有數值欄(包含目標 y)都做
L1..Lp 的 lag。後續 ML 只會用 lag 當特徵。
# ---- 產生 lag 特徵 ----
num_vars <- setdiff(names(raw), time_col)
make_lags <- function(dt, vars, pmax){
dt <- copy(dt)
for (v in vars){
for (L in 1:pmax){
dt[, paste0(v,"_L",L) := shift(get(v), n=L, type="lag")]
}
}
dt
}
dat <- make_lags(raw, vars=num_vars, pmax=p_max)
lag_vars <- grep("(_L[0-9]+)$", names(dat), value=TRUE)
說明:評分用 RMSE、MAE、MAD(MAD=median absolute error);建立三個模型家族的暫存列表。
# ---- 指標 ----
rmse <- function(e) sqrt(mean(e^2, na.rm=TRUE))
mae <- function(e) mean(abs(e), na.rm=TRUE)
madm <- function(e) median(abs(e), na.rm=TRUE)
# ---- 結果容器 ----
collect_ts <- list() # RandomWalk / AR / ARMA
collect_lasso <- list() # Lasso / Ridge / EN / Ada* / PCR
collect_ml <- list() # Bagging Tree / Random Forest
說明:每次只用最近 train_len 期做訓練,預測
h 期後的下一點。
# ---- rolling 索引 ----
n <- nrow(dat)
idx_train_end <- seq(from = train_len, to = n - h, by = 1)
if (!length(idx_train_end)) stop("資料太短,請降低 train_len 或確認資料長度。")
for (te in idx_train_end){
train_idx <- (te - train_len + 1):te # 固定長度視窗
test_idx <- te + h
tr <- dat[train_idx]
te_row <- dat[test_idx]
# ===== (A) 時間序列族:RW / AR / ARMA =====
y_tr <- tr[[target_col]]
y_te <- te_row[[target_col]]
yhat_rw <- tail(y_tr, 1)
fit_ar <- tryCatch(forecast::auto.arima(y_tr, d=0, max.q=0, seasonal=FALSE,
stepwise=TRUE, approximation=TRUE), error=function(e) NULL)
yhat_ar <- tryCatch(as.numeric(forecast::forecast(fit_ar, h=h)$mean[h]), error=function(e) yhat_rw)
fit_arma <- tryCatch(forecast::auto.arima(y_tr, d=0, seasonal=FALSE,
stepwise=TRUE, approximation=TRUE), error=function(e) NULL)
yhat_arma <- tryCatch(as.numeric(forecast::forecast(fit_arma, h=h)$mean[h]), error=function(e) yhat_rw)
collect_ts[[length(collect_ts)+1]] <- data.table(
date = te_row[[time_col]],
model = c("RandomWalk","AR","ARMA"),
y = y_te,
yhat = c(yhat_rw, yhat_ar, yhat_arma)
)
# ===== (B) Lasso 家族 + PCR =====
X_tr <- as.matrix(tr[, ..lag_vars])
X_te <- as.matrix(te_row[, ..lag_vars])
good <- colSums(is.na(X_tr)) == 0
X_tr <- X_tr[, good, drop=FALSE]
X_te <- X_te[, good, drop=FALSE]
y_tr2 <- y_tr
# ---- 新增:過濾近零變異 + glmnet 公共參數 ----
if(ncol(X_tr) > 0){
sd_ok <- apply(X_tr, 2, sd, na.rm=TRUE) > 1e-8
X_tr <- X_tr[, sd_ok, drop=FALSE]
X_te <- X_te[, sd_ok, drop=FALSE]
}
glmnet_common <- list(
standardize = TRUE,
maxit = 1e6, # 提高收斂上限
nlambda = 80, # 多一點 λ
lambda.min.ratio = 1e-3 # 避免過小 λ
)
if (ncol(X_tr) >= 1){
# Lasso
fit_lasso <- tryCatch(
do.call(cv.glmnet, c(list(x=X_tr, y=y_tr2, alpha=1), glmnet_common)),
error=function(e) NULL
)
yhat_lasso <- if(!is.null(fit_lasso)) as.numeric(predict(fit_lasso, X_te, s="lambda.1se")) else NA_real_
# Ridge
fit_ridge <- tryCatch(
do.call(cv.glmnet, c(list(x=X_tr, y=y_tr2, alpha=0), glmnet_common)),
error=function(e) NULL
)
yhat_ridge <- if(!is.null(fit_ridge)) as.numeric(predict(fit_ridge, X_te, s="lambda.1se")) else NA_real_
# Elastic Net
fit_en <- tryCatch(
do.call(cv.glmnet, c(list(x=X_tr, y=y_tr2, alpha=elastic_alpha), glmnet_common)),
error=function(e) NULL
)
yhat_en <- if(!is.null(fit_en)) as.numeric(predict(fit_en, X_te, s="lambda.1se")) else NA_real_
# Adaptive 權重(以 Ridge 初估)
w_al <- tryCatch({
fit0 <- do.call(cv.glmnet, c(list(x=X_tr, y=y_tr2, alpha=0), glmnet_common))
b0 <- as.numeric(coef(fit0, s="lambda.1se"))[-1]
w <- 1/(abs(b0)^adapt_gamma + 1e-6)
pmin(w, 1e4) # 上限裁切,避免極端
}, error=function(e) rep(1, ncol(X_tr)))
fit_ada_lasso <- tryCatch(
do.call(cv.glmnet, c(list(x=X_tr, y=y_tr2, alpha=1, penalty.factor=w_al), glmnet_common)),
error=function(e) NULL
)
yhat_ada_lasso <- if(!is.null(fit_ada_lasso)) as.numeric(predict(fit_ada_lasso, X_te, s="lambda.1se")) else NA_real_
fit_ada_en <- tryCatch(
do.call(cv.glmnet, c(list(x=X_tr, y=y_tr2, alpha=elastic_alpha, penalty.factor=w_al), glmnet_common)),
error=function(e) NULL
)
yhat_ada_en <- if(!is.null(fit_ada_en)) as.numeric(predict(fit_ada_en, X_te, s="lambda.1se")) else NA_real_
# PCR(保留到 var_exp_cut)
pca <- tryCatch(prcomp(X_tr, center=TRUE, scale.=TRUE), error=function(e) NULL)
yhat_pcr <- NA_real_
if(!is.null(pca)){
var_exp <- cumsum(pca$sdev^2)/sum(pca$sdev^2)
k <- which(var_exp >= var_exp_cut)[1]
Z_tr <- pca$x[,1:k,drop=FALSE]
Z_te <- scale(X_te, center=pca$center, scale=pca$scale) %*% pca$rotation[,1:k,drop=FALSE]
fit_pcr <- tryCatch(lm(y_tr2 ~ Z_tr), error=function(e) NULL)
if(!is.null(fit_pcr)) yhat_pcr <- as.numeric(cbind(1, Z_te) %*% coef(fit_pcr))
}
collect_lasso[[length(collect_lasso)+1]] <- data.table(
date = te_row[[time_col]],
model = c("Lasso","Ridge","ElasticNet","AdaLasso","AdaElasticNet","PCR"),
y = y_te,
yhat = c(yhat_lasso,yhat_ridge,yhat_en,yhat_ada_lasso,yhat_ada_en,yhat_pcr)
)
}
# ===== (C) 非線性:Bagging Tree + Random Forest =====
if (ncol(X_tr) >= 1){
tr_df <- data.frame(y = y_tr2, X_tr)
te_df <- data.frame(X_te)
# Bagging
fit_bag <- tryCatch(ipred::bagging(y ~ ., data=tr_df, coob=TRUE, nbagg=100),
error=function(e) NULL)
yhat_bag <- if(!is.null(fit_bag)) as.numeric(predict(fit_bag, newdata=te_df)) else NA_real_
# Random Forest(ranger)
fit_rf <- tryCatch(
ranger::ranger(y ~ ., data=tr_df,
num.trees=500,
mtry=max(1, floor(sqrt(ncol(X_tr)))),
min.node.size=5),
error=function(e) NULL
)
yhat_rf <- if(!is.null(fit_rf)) as.numeric(predict(fit_rf, data=te_df)$predictions) else NA_real_
collect_ml[[length(collect_ml)+1]] <- rbind(
data.table(date = te_row[[time_col]], model="BaggingTree", y=y_te, yhat=yhat_bag),
data.table(date = te_row[[time_col]], model="RandomForest", y=y_te, yhat=yhat_rf)
)
}
}
# ---- rolling 索引 ----
n <- nrow(dat)
idx_train_end <- seq(from = train_len, to = n - h, by = 1)
if (!length(idx_train_end)) stop("資料太短,請降低 train_len 或確認資料長度。")
for (te in idx_train_end){
train_idx <- (te - train_len + 1):te # 固定長度視窗
test_idx <- te + h
tr <- dat[train_idx]
te_row <- dat[test_idx]
# ===== (A) 時間序列族:RW / AR / ARMA =====
y_tr <- tr[[target_col]]
y_te <- te_row[[target_col]]
yhat_rw <- tail(y_tr, 1)
fit_ar <- tryCatch(forecast::auto.arima(y_tr, d=0, max.q=0, seasonal=FALSE,
stepwise=TRUE, approximation=TRUE), error=function(e) NULL)
yhat_ar <- tryCatch(as.numeric(forecast::forecast(fit_ar, h=h)$mean[h]), error=function(e) yhat_rw)
fit_arma <- tryCatch(forecast::auto.arima(y_tr, d=0, seasonal=FALSE,
stepwise=TRUE, approximation=TRUE), error=function(e) NULL)
yhat_arma <- tryCatch(as.numeric(forecast::forecast(fit_arma, h=h)$mean[h]), error=function(e) yhat_rw)
collect_ts[[length(collect_ts)+1]] <- data.table(
date = te_row[[time_col]],
model = c("RandomWalk","AR","ARMA"),
y = y_te,
yhat = c(yhat_rw, yhat_ar, yhat_arma)
)
# ===== (B) Lasso 家族 + PCR =====
X_tr <- as.matrix(tr[, ..lag_vars])
X_te <- as.matrix(te_row[, ..lag_vars])
good <- colSums(is.na(X_tr)) == 0
X_tr <- X_tr[, good, drop=FALSE]
X_te <- X_te[, good, drop=FALSE]
y_tr2 <- y_tr
if (ncol(X_tr) >= 1){
# Lasso / Ridge / Elastic Net
fit_lasso <- tryCatch(cv.glmnet(X_tr, y_tr2, alpha=1, standardize=TRUE), error=function(e) NULL)
yhat_lasso <- if(!is.null(fit_lasso)) as.numeric(predict(fit_lasso, X_te, s="lambda.min")) else NA_real_
fit_ridge <- tryCatch(cv.glmnet(X_tr, y_tr2, alpha=0, standardize=TRUE), error=function(e) NULL)
yhat_ridge <- if(!is.null(fit_ridge)) as.numeric(predict(fit_ridge, X_te, s="lambda.min")) else NA_real_
fit_en <- tryCatch(cv.glmnet(X_tr, y_tr2, alpha=elastic_alpha, standardize=TRUE), error=function(e) NULL)
yhat_en <- if(!is.null(fit_en)) as.numeric(predict(fit_en, X_te, s="lambda.min")) else NA_real_
# Adaptive 權重(以 Ridge 初估)
w_al <- tryCatch({
fit0 <- cv.glmnet(X_tr, y_tr2, alpha=0, standardize=TRUE)
b0 <- as.numeric(coef(fit0, s="lambda.min"))[-1]
1/(abs(b0)^adapt_gamma + 1e-6)
}, error=function(e) rep(1, ncol(X_tr)))
fit_ada_lasso <- tryCatch(cv.glmnet(X_tr, y_tr2, alpha=1, penalty.factor=w_al, standardize=TRUE), error=function(e) NULL)
yhat_ada_lasso <- if(!is.null(fit_ada_lasso)) as.numeric(predict(fit_ada_lasso, X_te, s="lambda.min")) else NA_real_
fit_ada_en <- tryCatch(cv.glmnet(X_tr, y_tr2, alpha=elastic_alpha, penalty.factor=w_al, standardize=TRUE), error=function(e) NULL)
yhat_ada_en <- if(!is.null(fit_ada_en)) as.numeric(predict(fit_ada_en, X_te, s="lambda.min")) else NA_real_
# PCR(保留到 var_exp_cut)
pca <- tryCatch(prcomp(X_tr, center=TRUE, scale.=TRUE), error=function(e) NULL)
yhat_pcr <- NA_real_
if(!is.null(pca)){
var_exp <- cumsum(pca$sdev^2)/sum(pca$sdev^2)
k <- which(var_exp >= var_exp_cut)[1]
Z_tr <- pca$x[,1:k,drop=FALSE]
Z_te <- scale(X_te, center=pca$center, scale=pca$scale) %*% pca$rotation[,1:k,drop=FALSE]
fit_pcr <- tryCatch(lm(y_tr2 ~ Z_tr), error=function(e) NULL)
if(!is.null(fit_pcr)) yhat_pcr <- as.numeric(cbind(1, Z_te) %*% coef(fit_pcr))
}
collect_lasso[[length(collect_lasso)+1]] <- data.table(
date = te_row[[time_col]],
model = c("Lasso","Ridge","ElasticNet","AdaLasso","AdaElasticNet","PCR"),
y = y_te,
yhat = c(yhat_lasso,yhat_ridge,yhat_en,yhat_ada_lasso,yhat_ada_en,yhat_pcr)
)
}
# ===== (C) 非線性:Bagging Tree + Random Forest =====
if (ncol(X_tr) >= 1){
tr_df <- data.frame(y = y_tr2, X_tr)
te_df <- data.frame(X_te)
# Bagging
fit_bag <- tryCatch(ipred::bagging(y ~ ., data=tr_df, coob=TRUE, nbagg=100),
error=function(e) NULL)
yhat_bag <- if(!is.null(fit_bag)) as.numeric(predict(fit_bag, newdata=te_df)) else NA_real_
# Random Forest(ranger)
fit_rf <- tryCatch(
ranger::ranger(y ~ ., data=tr_df,
num.trees=500,
mtry=max(1, floor(sqrt(ncol(X_tr)))),
min.node.size=5),
error=function(e) NULL
)
yhat_rf <- if(!is.null(fit_rf)) as.numeric(predict(fit_rf, data=te_df)$predictions) else NA_real_
collect_ml[[length(collect_ml)+1]] <- rbind(
data.table(date = te_row[[time_col]], model="BaggingTree", y=y_te, yhat=yhat_bag),
data.table(date = te_row[[time_col]], model="RandomForest", y=y_te, yhat=yhat_rf)
)
}
}
說明:把所有樣本外預測合併,計算 RMSE/MAE/MAD,輸出表。
# ---- 合併滾動預測 ----
roll_ts <- rbindlist(collect_ts, fill=TRUE)
roll_lasso <- rbindlist(collect_lasso, fill=TRUE)
roll_ml <- rbindlist(collect_ml, fill=TRUE)
metrics <- rbindlist(list(roll_ts, roll_lasso, roll_ml), fill=TRUE)
metrics <- metrics[!is.na(yhat)]
score <- metrics[, .(
RMSE = rmse(y - yhat),
MAE = mae(y - yhat),
MAD = madm(y - yhat)
), by=.(model)]
# 建立輸出資料夾:gen_excel_files/P6_彙整_評分表_輸出/當日日期
out_root <- "generated_excel_files/P6_彙整_評分表_輸出"
out_dir <- file.path(out_root, format(Sys.Date(), "%Y-%m-%d"))
dir.create(out_dir, recursive = TRUE, showWarnings = FALSE)
# 分別輸出兩個 xlsx 檔
writexl::write_xlsx(score, path = file.path(out_dir, "model_metrics_rmse_mae_mad.xlsx"))
writexl::write_xlsx(metrics, path = file.path(out_dir, "oos_predictions_by_model.xlsx"))
# 合併兩個 Sheet
writexl::write_xlsx(
list(
model_metrics_rmse_mae_mad = score,
oos_predictions_by_model = metrics
),
path = file.path(out_dir, "forecast_outputs.xlsx")
)
score
## Warning in attr(x, "align"): 'xfun::attr()' is deprecated.
## Use 'xfun::attr2()' instead.
## See help("Deprecated")
| model | RMSE | MAE | MAD |
|---|---|---|---|
| RandomWalk | 154157.9 | 107254.10 | 73629.11 |
| AR | 124158.5 | 81819.52 | 52235.10 |
| ARMA | 125746.4 | 84037.73 | 53110.68 |
| Lasso | 109885.3 | 62706.62 | 35915.94 |
| Ridge | 107119.8 | 68700.99 | 44959.05 |
| ElasticNet | 109768.6 | 62890.77 | 36247.97 |
| AdaLasso | 131001.9 | 72621.01 | 42770.10 |
| AdaElasticNet | 128469.3 | 71500.17 | 42041.21 |
| PCR | 105831.8 | 62681.73 | 38680.44 |
| BaggingTree | 104787.9 | 67566.52 | 44180.77 |
| RandomForest | 114081.1 | 80815.21 | 60400.57 |
說明:一張時間序列家族、一張 Lasso 家族、一張非線性(Bagging+RF)。
# ---- 作圖 ----
plot_series <- function(dt, title_str){
ggplot(dt, aes(x=date)) +
geom_line(aes(y=y), linewidth=0.6) +
geom_line(aes(y=yhat, color=model), linewidth=0.6) +
scale_x_date(date_labels="%Y-%m") +
labs(x=NULL, y=target_col, title=title_str) +
theme_minimal()
}
p1 <- plot_series(roll_ts, "Time Series Model (RW/AR/ARMA) OOS")
p2 <- plot_series(roll_lasso, "Lasso Family (Ridge/EN/Adaptive/PCR) OOS")
p3 <- plot_series(roll_ml, "Nonlinear ML models (Bagging & Random Forest) OOS")
out_dir <- file.path("generated_figures/三張樣本外走勢圖", format(Sys.Date(), "%Y-%m-%d"))
if (!dir.exists(out_dir)) dir.create(out_dir, recursive = TRUE)
ggsave(file.path(out_dir, "plot_timeseries.png"), p1, width = 22, height = 7, units = "in", dpi = 600)
ggsave(file.path(out_dir, "plot_lasso.png"), p2, width = 22, height = 7, units = "in", dpi = 600)
ggsave(file.path(out_dir, "plot_ml.png"), p3, width = 22, height = 7, units = "in", dpi = 600)
p1; p2; p3
說明:仍用 rolling 的最後一個視窗來擬合,再預測
last + h;並輸出各模型的單點預測值。
# ---- 一次多步預測的步數 ----
H_out <- 12 # 想要幾個月就寫幾個月(例:12 = 一整年)
說明:仍用 rolling 的最後視窗來擬合;預測日期用月加法產生。
# ---- 取 rolling 的最後視窗 ----
last_time <- max(dat[[time_col]])
end_idx <- nrow(dat)
start_idx <- max(1, end_idx - train_len + 1)
final_tr <- dat[start_idx:end_idx]
# ---- 目標向量與特徵矩陣(同前)----
y_all <- final_tr[[target_col]]
X_all <- as.matrix(final_tr[, ..lag_vars])
good <- colSums(is.na(X_all)) == 0
X_all <- X_all[, good, drop=FALSE]
# 預測當下可用的特徵(來自全資料最後一列的 lag)
last_row <- tail(dat, 1)
x_origin <- as.matrix(last_row[, ..lag_vars])[, good, drop=FALSE]
# 預測的日期(t+1 ... t+H_out)
future_dates <- sapply(1:H_out, function(hh) last_time %m+% months(hh))
說明:RW/AR/ARMA 可以一口氣輸出 h=H_out 的路徑。
# ---- 時間序列:一次多步 ----
next_rw_path <- rep(tail(y_all,1), H_out)
fit_ar_all <- tryCatch(forecast::auto.arima(y_all, d=0, max.q=0, seasonal=FALSE), error=function(e) NULL)
next_ar_path <- tryCatch(as.numeric(forecast::forecast(fit_ar_all, h=H_out)$mean), error=function(e) next_rw_path)
fit_arma_all <- tryCatch(forecast::auto.arima(y_all, d=0, seasonal=FALSE), error=function(e) NULL)
next_arma_path <- tryCatch(as.numeric(forecast::forecast(fit_arma_all, h=H_out)$mean), error=function(e) next_rw_path)
extra_ts <- data.table(
date = as.Date(future_dates, origin = "1970-01-01"),
horizon = 1:H_out,
model = rep(c("RandomWalk","AR","ARMA"), each = H_out),
yhat = c(next_rw_path, next_ar_path, next_arma_path)
)
說明:對每個 h,把目標改成「h 步之後的 y」(lead
h),用當下的 lag 特徵訓練,然後用
x_origin 預測 t+h。這樣不需要知道未來 X
的路徑,也避免遞迴誤差累積。
# ---- 公共設定(穩定收斂)----
glmnet_common <- list(standardize=TRUE, maxit=1e6, nlambda=80, lambda.min.ratio=1e-3)
# ---- 建 H_out 組 training pairs: (X_all_trim, y_h) ----
train_list <- lapply(1:H_out, function(hh){
# y 需能往前對齊:去掉最後 hh 期
y_h <- shift(y_all, type="lead", n=hh)
keep <- !is.na(y_h)
list(y = y_h[keep], X = X_all[keep,, drop=FALSE], h = hh)
})
# ---- 對每個 h 訓練與預測 ----
res_lasso <- list()
for (item in train_list){
y_tr2 <- item$y
X_tr2 <- item$X
hh <- item$h
# 過濾近零變異
sd_ok <- apply(X_tr2, 2, sd, na.rm=TRUE) > 1e-8
X_tr2 <- X_tr2[, sd_ok, drop=FALSE]
x0 <- x_origin[, sd_ok, drop=FALSE]
# 如果沒有特徵就跳過
if (ncol(X_tr2) < 1){
res_lasso[[length(res_lasso)+1]] <- data.table(
date = future_dates[hh], horizon=hh,
model = c("Lasso","Ridge","ElasticNet","AdaLasso","AdaElasticNet","PCR"),
yhat = NA_real_)
next
}
# Lasso / Ridge / EN
fit_lasso <- tryCatch(do.call(cv.glmnet, c(list(x=X_tr2, y=y_tr2, alpha=1), glmnet_common)), error=function(e) NULL)
yhat_lasso <- if(!is.null(fit_lasso)) as.numeric(predict(fit_lasso, x0, s="lambda.1se")) else NA_real_
fit_ridge <- tryCatch(do.call(cv.glmnet, c(list(x=X_tr2, y=y_tr2, alpha=0), glmnet_common)), error=function(e) NULL)
yhat_ridge <- if(!is.null(fit_ridge)) as.numeric(predict(fit_ridge, x0, s="lambda.1se")) else NA_real_
fit_en <- tryCatch(do.call(cv.glmnet, c(list(x=X_tr2, y=y_tr2, alpha=elastic_alpha), glmnet_common)), error=function(e) NULL)
yhat_en <- if(!is.null(fit_en)) as.numeric(predict(fit_en, x0, s="lambda.1se")) else NA_real_
# Adaptive 權重(ridge 初估 + 上限裁切)
w_al <- tryCatch({
fit0 <- do.call(cv.glmnet, c(list(x=X_tr2, y=y_tr2, alpha=0), glmnet_common))
b0 <- as.numeric(coef(fit0, s="lambda.1se"))[-1]
w <- 1/(abs(b0)^adapt_gamma + 1e-6)
pmin(w, 1e4)
}, error=function(e) rep(1, ncol(X_tr2)))
fit_ada_lasso <- tryCatch(do.call(cv.glmnet, c(list(x=X_tr2, y=y_tr2, alpha=1, penalty.factor=w_al), glmnet_common)), error=function(e) NULL)
yhat_ada_lasso <- if(!is.null(fit_ada_lasso)) as.numeric(predict(fit_ada_lasso, x0, s="lambda.1se")) else NA_real_
fit_ada_en <- tryCatch(do.call(cv.glmnet, c(list(x=X_tr2, y=y_tr2, alpha=elastic_alpha, penalty.factor=w_al), glmnet_common)), error=function(e) NULL)
yhat_ada_en <- if(!is.null(fit_ada_en)) as.numeric(predict(fit_ada_en, x0, s="lambda.1se")) else NA_real_
# PCR
yhat_pcr <- NA_real_
pca <- tryCatch(prcomp(X_tr2, center=TRUE, scale.=TRUE), error=function(e) NULL)
if(!is.null(pca)){
var_exp <- cumsum(pca$sdev^2)/sum(pca$sdev^2)
k <- which(var_exp >= var_exp_cut)[1]
Z_tr <- pca$x[,1:k,drop=FALSE]
fit_pcr <- tryCatch(lm(y_tr2 ~ Z_tr), error=function(e) NULL)
if(!is.null(fit_pcr)){
z0 <- scale(x0, center=pca$center, scale=pca$scale) %*% pca$rotation[,1:k,drop=FALSE]
yhat_pcr <- as.numeric(cbind(1, z0) %*% coef(fit_pcr))
}
}
res_lasso[[length(res_lasso)+1]] <- data.table(
date = future_dates[hh], horizon = hh,
model = c("Lasso","Ridge","ElasticNet","AdaLasso","AdaElasticNet","PCR"),
yhat = c(yhat_lasso,yhat_ridge,yhat_en,yhat_ada_lasso,yhat_ada_en,yhat_pcr)
)
}
extra_lasso <- rbindlist(res_lasso, fill=TRUE)
說明:跟上面相同概念,每個 h 一個模型;特徵是
t 時點的 lag。
res_ml <- list()
for (item in train_list){
y_tr2 <- item$y
X_tr2 <- item$X
hh <- item$h
# 過濾近零變異
sd_ok <- apply(X_tr2, 2, sd, na.rm=TRUE) > 1e-8
X_tr2 <- X_tr2[, sd_ok, drop=FALSE]
x0 <- x_origin[, sd_ok, drop=FALSE]
if (ncol(X_tr2) < 1){
res_ml[[length(res_ml)+1]] <- data.table(
date=future_dates[hh], horizon=hh,
model=c("BaggingTree","RandomForest"),
yhat=c(NA_real_, NA_real_))
next
}
tr_df <- data.frame(y=y_tr2, X_tr2)
te_df <- data.frame(x0)
# Bagging
fit_bag <- tryCatch(ipred::bagging(y ~ ., data=tr_df, coob=FALSE, nbagg=100), error=function(e) NULL)
yhat_bag <- if(!is.null(fit_bag)) as.numeric(predict(fit_bag, newdata=te_df)) else NA_real_
# Random Forest
fit_rf <- tryCatch(
ranger::ranger(y ~ ., data=tr_df, num.trees=500,
mtry=max(1, floor(sqrt(ncol(X_tr2)))), min.node.size=5),
error=function(e) NULL
)
yhat_rf <- if(!is.null(fit_rf)) as.numeric(predict(fit_rf, data=te_df)$predictions) else NA_real_
res_ml[[length(res_ml)+1]] <- data.table(
date=future_dates[hh], horizon=hh,
model=c("BaggingTree","RandomForest"),
yhat=c(yhat_bag,yhat_rf)
)
}
extra_ml <- rbindlist(res_ml, fill=TRUE)
# 排序不變
extra_all <- rbindlist(list(extra_ts, extra_lasso, extra_ml), fill=TRUE)
setorderv(extra_all, c("horizon","model"))
# 輸出到 generated_excel_files/P8_Nowcast/YYYY-MM-DD/extra_multi_step_forecasts_H{H_out}.xlsx
if (!requireNamespace("writexl", quietly = TRUE)) install.packages("writexl")
out_root <- file.path("generated_excel_files/P8_Nowcast")
out_dir <- file.path(out_root, format(Sys.Date(), "%Y-%m-%d"))
dir.create(out_dir, recursive = TRUE, showWarnings = FALSE)
writexl::write_xlsx(
extra_all,
path = file.path(out_dir, sprintf("extra_multi_step_forecasts_H%d.xlsx", H_out))
)
extra_all
## Warning in attr(x, "align"): 'xfun::attr()' is deprecated.
## Use 'xfun::attr2()' instead.
## See help("Deprecated")
| date | horizon | model | yhat |
|---|---|---|---|
| 2025-09-01 | 1 | AR | 250525.9 |
| 2025-09-01 | 1 | ARMA | 216172.0 |
| 2025-09-01 | 1 | AdaElasticNet | 300390.6 |
| 2025-09-01 | 1 | AdaLasso | 292688.9 |
| 2025-09-01 | 1 | BaggingTree | 308917.7 |
| 2025-09-01 | 1 | ElasticNet | 313995.0 |
| 2025-09-01 | 1 | Lasso | 279658.4 |
| 2025-09-01 | 1 | PCR | 377705.3 |
| 2025-09-01 | 1 | RandomForest | 388379.1 |
| 2025-09-01 | 1 | RandomWalk | 153710.0 |
| 2025-09-01 | 1 | Ridge | 313481.4 |
| 2025-10-01 | 2 | AR | 271017.5 |
| 2025-10-01 | 2 | ARMA | 276167.5 |
| 2025-10-01 | 2 | AdaElasticNet | 281481.2 |
| 2025-10-01 | 2 | AdaLasso | 282675.2 |
| 2025-10-01 | 2 | BaggingTree | 294894.8 |
| 2025-10-01 | 2 | ElasticNet | 288813.9 |
| 2025-10-01 | 2 | Lasso | 291642.0 |
| 2025-10-01 | 2 | PCR | 268775.1 |
| 2025-10-01 | 2 | RandomForest | 387878.6 |
| 2025-10-01 | 2 | RandomWalk | 153710.0 |
| 2025-10-01 | 2 | Ridge | 307453.2 |
| 2025-11-01 | 3 | AR | 275354.7 |
| 2025-11-01 | 3 | ARMA | 276167.5 |
| 2025-11-01 | 3 | AdaElasticNet | 285973.7 |
| 2025-11-01 | 3 | AdaLasso | 286547.4 |
| 2025-11-01 | 3 | BaggingTree | 269264.1 |
| 2025-11-01 | 3 | ElasticNet | 298200.9 |
| 2025-11-01 | 3 | Lasso | 532923.4 |
| 2025-11-01 | 3 | PCR | 352531.3 |
| 2025-11-01 | 3 | RandomForest | 451568.7 |
| 2025-11-01 | 3 | RandomWalk | 153710.0 |
| 2025-11-01 | 3 | Ridge | 279938.9 |
| 2025-12-01 | 4 | AR | 276272.7 |
| 2025-12-01 | 4 | ARMA | 276167.5 |
| 2025-12-01 | 4 | AdaElasticNet | 202988.6 |
| 2025-12-01 | 4 | AdaLasso | 196656.5 |
| 2025-12-01 | 4 | BaggingTree | 241008.0 |
| 2025-12-01 | 4 | ElasticNet | 247032.7 |
| 2025-12-01 | 4 | Lasso | 210526.3 |
| 2025-12-01 | 4 | PCR | 274638.1 |
| 2025-12-01 | 4 | RandomForest | 392035.1 |
| 2025-12-01 | 4 | RandomWalk | 153710.0 |
| 2025-12-01 | 4 | Ridge | 307100.9 |
| 2026-01-01 | 5 | AR | 276466.9 |
| 2026-01-01 | 5 | ARMA | 276167.5 |
| 2026-01-01 | 5 | AdaElasticNet | 301334.0 |
| 2026-01-01 | 5 | AdaLasso | 313969.5 |
| 2026-01-01 | 5 | BaggingTree | 276801.8 |
| 2026-01-01 | 5 | ElasticNet | 290853.1 |
| 2026-01-01 | 5 | Lasso | 302399.8 |
| 2026-01-01 | 5 | PCR | 428350.5 |
| 2026-01-01 | 5 | RandomForest | 427399.0 |
| 2026-01-01 | 5 | RandomWalk | 153710.0 |
| 2026-01-01 | 5 | Ridge | 347761.3 |
| 2026-02-01 | 6 | AR | 276508.1 |
| 2026-02-01 | 6 | ARMA | 276167.5 |
| 2026-02-01 | 6 | AdaElasticNet | 282090.0 |
| 2026-02-01 | 6 | AdaLasso | 277481.9 |
| 2026-02-01 | 6 | BaggingTree | 196130.4 |
| 2026-02-01 | 6 | ElasticNet | 264355.1 |
| 2026-02-01 | 6 | Lasso | 260875.7 |
| 2026-02-01 | 6 | PCR | 108029.7 |
| 2026-02-01 | 6 | RandomForest | 419991.0 |
| 2026-02-01 | 6 | RandomWalk | 153710.0 |
| 2026-02-01 | 6 | Ridge | 313566.3 |
| 2026-03-01 | 7 | AR | 276516.8 |
| 2026-03-01 | 7 | ARMA | 276167.5 |
| 2026-03-01 | 7 | AdaElasticNet | 273071.8 |
| 2026-03-01 | 7 | AdaLasso | 274514.1 |
| 2026-03-01 | 7 | BaggingTree | 312384.7 |
| 2026-03-01 | 7 | ElasticNet | 282349.9 |
| 2026-03-01 | 7 | Lasso | 278831.2 |
| 2026-03-01 | 7 | PCR | 273644.6 |
| 2026-03-01 | 7 | RandomForest | 474400.1 |
| 2026-03-01 | 7 | RandomWalk | 153710.0 |
| 2026-03-01 | 7 | Ridge | 289220.5 |
| 2026-04-01 | 8 | AR | 276518.6 |
| 2026-04-01 | 8 | ARMA | 276167.5 |
| 2026-04-01 | 8 | AdaElasticNet | 292580.3 |
| 2026-04-01 | 8 | AdaLasso | 292999.4 |
| 2026-04-01 | 8 | BaggingTree | 210334.9 |
| 2026-04-01 | 8 | ElasticNet | 271526.0 |
| 2026-04-01 | 8 | Lasso | 268351.1 |
| 2026-04-01 | 8 | PCR | 232379.0 |
| 2026-04-01 | 8 | RandomForest | 486750.9 |
| 2026-04-01 | 8 | RandomWalk | 153710.0 |
| 2026-04-01 | 8 | Ridge | 324073.2 |
| 2026-05-01 | 9 | AR | 276519.0 |
| 2026-05-01 | 9 | ARMA | 276167.5 |
| 2026-05-01 | 9 | AdaElasticNet | 304438.2 |
| 2026-05-01 | 9 | AdaLasso | 310879.3 |
| 2026-05-01 | 9 | BaggingTree | 541433.6 |
| 2026-05-01 | 9 | ElasticNet | 292211.1 |
| 2026-05-01 | 9 | Lasso | 306885.2 |
| 2026-05-01 | 9 | PCR | 432775.2 |
| 2026-05-01 | 9 | RandomForest | 514878.1 |
| 2026-05-01 | 9 | RandomWalk | 153710.0 |
| 2026-05-01 | 9 | Ridge | 292211.1 |
| 2026-06-01 | 10 | AR | 276519.1 |
| 2026-06-01 | 10 | ARMA | 276167.5 |
| 2026-06-01 | 10 | AdaElasticNet | 291191.8 |
| 2026-06-01 | 10 | AdaLasso | 306402.3 |
| 2026-06-01 | 10 | BaggingTree | 562914.5 |
| 2026-06-01 | 10 | ElasticNet | 301013.6 |
| 2026-06-01 | 10 | Lasso | 313066.3 |
| 2026-06-01 | 10 | PCR | 402690.1 |
| 2026-06-01 | 10 | RandomForest | 558002.9 |
| 2026-06-01 | 10 | RandomWalk | 153710.0 |
| 2026-06-01 | 10 | Ridge | 291191.8 |
| 2026-07-01 | 11 | AR | 276519.1 |
| 2026-07-01 | 11 | ARMA | 276167.5 |
| 2026-07-01 | 11 | AdaElasticNet | 264785.4 |
| 2026-07-01 | 11 | AdaLasso | 263517.8 |
| 2026-07-01 | 11 | BaggingTree | 437415.6 |
| 2026-07-01 | 11 | ElasticNet | 302566.7 |
| 2026-07-01 | 11 | Lasso | 307687.9 |
| 2026-07-01 | 11 | PCR | 387910.1 |
| 2026-07-01 | 11 | RandomForest | 486322.0 |
| 2026-07-01 | 11 | RandomWalk | 153710.0 |
| 2026-07-01 | 11 | Ridge | 376305.4 |
| 2026-08-01 | 12 | AR | 276519.1 |
| 2026-08-01 | 12 | ARMA | 276167.5 |
| 2026-08-01 | 12 | AdaElasticNet | 217100.2 |
| 2026-08-01 | 12 | AdaLasso | 100240.1 |
| 2026-08-01 | 12 | BaggingTree | 218407.2 |
| 2026-08-01 | 12 | ElasticNet | 234837.2 |
| 2026-08-01 | 12 | Lasso | 101141.8 |
| 2026-08-01 | 12 | PCR | 112041.4 |
| 2026-08-01 | 12 | RandomForest | 435466.8 |
| 2026-08-01 | 12 | RandomWalk | 153710.0 |
| 2026-08-01 | 12 | Ridge | 256530.1 |
# ── 重要性參數(可調) ─────────────────────────────────────────────
h <- 1 # 這一輪先做 h=1(預測下一期)
k_top <- 8 # 取前幾個重要變數
var_exp_cut <- 0.95 # PCR 累積解釋比例
elastic_alpha <- 0.5 # Elastic Net 的 alpha
adapt_gamma <- 1.0 # Adaptive 權重 1/|beta|^gamma
agg_method <- "sum" # 彙總到原始變數時的方式:"sum" 或 "max"
# 名稱對照(meat_tab.xlsx)設定
map_path <- "/Users/traviscua/Library/CloudStorage/OneDrive-個人/Documents/1. R/Research/Oct_2025/scratch_3/existing_excel_files/meta_tab.xlsx"
map_sheet <- 1
short_max_chars <- 28
short_use_dataset_prefix <- TRUE
dataset_prefix_nchar <- 6
# ── glmnet 共用設定(提升收斂穩定) ───────────────────────────────
glmnet_common <- list(
standardize = TRUE,
maxit = 1e6,
nlambda = 80,
lambda.min.ratio = 1e-3
)
# ── 輔助:取前 k 名(依重要性排序的名稱) ─────────────────────────
topk_names <- function(imp_vec, k = k_top){
imp_vec <- imp_vec[is.finite(imp_vec)]
if(length(imp_vec)==0) return(rep(NA_character_, k))
ord <- order(imp_vec, decreasing=TRUE)
picks <- names(imp_vec)[ord][seq_len(min(k, length(ord)))]
out <- rep(NA_character_, k)
out[seq_along(picks)] <- picks
out
}
# 取得最後一個 rolling 視窗
end_idx <- nrow(dat)
start_idx <- max(1, end_idx - train_len + 1)
final_tr <- dat[start_idx:end_idx]
# 目標與特徵(lag 特徵)
y_all <- final_tr[[target_col]]
X_all <- as.matrix(final_tr[, ..lag_vars])
# 去掉含 NA 的欄、近零變異欄
good_na <- colSums(is.na(X_all)) == 0
X_all <- X_all[, good_na, drop=FALSE]
if(ncol(X_all) == 0) stop("此視窗內沒有可用的 lag 特徵。")
sd_ok <- apply(X_all, 2, sd, na.rm=TRUE) > 1e-8
X_all <- X_all[, sd_ok, drop=FALSE]
# direct strategy:用 (X_t, y_{t+h})
y_lead <- data.table::shift(y_all, type="lead", n=h)
keep <- !is.na(y_lead)
X_tr2 <- X_all[keep,, drop=FALSE]
y_tr2 <- y_lead[keep]
# Lasso
fit_lasso <- tryCatch(do.call(glmnet::cv.glmnet, c(list(x=X_tr2, y=y_tr2, alpha=1), glmnet_common)), error=function(e) NULL)
imp_lasso <- if(!is.null(fit_lasso)) abs(as.numeric(coef(fit_lasso, s="lambda.1se"))[-1]) else setNames(numeric(0), character(0))
names(imp_lasso) <- colnames(X_tr2)
# Ridge
fit_ridge <- tryCatch(do.call(glmnet::cv.glmnet, c(list(x=X_tr2, y=y_tr2, alpha=0), glmnet_common)), error=function(e) NULL)
imp_ridge <- if(!is.null(fit_ridge)) abs(as.numeric(coef(fit_ridge, s="lambda.1se"))[-1]) else setNames(numeric(0), character(0))
names(imp_ridge) <- colnames(X_tr2)
# Elastic Net
fit_en <- tryCatch(do.call(glmnet::cv.glmnet, c(list(x=X_tr2, y=y_tr2, alpha=elastic_alpha), glmnet_common)), error=function(e) NULL)
imp_en <- if(!is.null(fit_en)) abs(as.numeric(coef(fit_en, s="lambda.1se"))[-1]) else setNames(numeric(0), character(0))
names(imp_en) <- colnames(X_tr2)
# Adaptive 權重(Ridge 初估 + 上限裁切)
w_al <- tryCatch({
fit0 <- do.call(glmnet::cv.glmnet, c(list(x=X_tr2, y=y_tr2, alpha=0), glmnet_common))
b0 <- as.numeric(coef(fit0, s="lambda.1se"))[-1]
pmin(1/(abs(b0)^adapt_gamma + 1e-6), 1e4)
}, error=function(e) rep(1, ncol(X_tr2)))
fit_ada_lasso <- tryCatch(do.call(glmnet::cv.glmnet, c(list(x=X_tr2, y=y_tr2, alpha=1, penalty.factor=w_al), glmnet_common)), error=function(e) NULL)
imp_ada_lasso <- if(!is.null(fit_ada_lasso)) abs(as.numeric(coef(fit_ada_lasso, s="lambda.1se"))[-1]) else setNames(numeric(0), character(0))
names(imp_ada_lasso) <- colnames(X_tr2)
fit_ada_en <- tryCatch(do.call(glmnet::cv.glmnet, c(list(x=X_tr2, y=y_tr2, alpha=elastic_alpha, penalty.factor=w_al), glmnet_common)), error=function(e) NULL)
imp_ada_en <- if(!is.null(fit_ada_en)) abs(as.numeric(coef(fit_ada_en, s="lambda.1se"))[-1]) else setNames(numeric(0), character(0))
names(imp_ada_en) <- colnames(X_tr2)
imp_pcr <- setNames(numeric(0), character(0))
pca <- tryCatch(prcomp(X_tr2, center=TRUE, scale.=TRUE), error=function(e) NULL)
if(!is.null(pca)){
var_exp <- cumsum(pca$sdev^2)/sum(pca$sdev^2)
k_pc <- which(var_exp >= var_exp_cut)[1]
Z_tr <- pca$x[,1:k_pc, drop=FALSE]
fit_pcr <- tryCatch(lm(y_tr2 ~ Z_tr), error=function(e) NULL)
if(!is.null(fit_pcr)){
beta <- coef(fit_pcr)[-1]
rot <- pca$rotation[,1:k_pc, drop=FALSE] # loading: var × PC
sc <- as.numeric(abs(rot %*% abs(beta))) # 變數層級加權
imp_pcr <- setNames(sc, rownames(rot))
}
}
library(ranger)
p <- ncol(X_tr2)
# Bagging 的 proxy:ranger + mtry = p (不抽樣特徵) + permutation importance
fit_bag_imp <- tryCatch(
ranger(y = y_tr2, x = data.frame(X_tr2),
num.trees = 500, mtry = p, min.node.size = 5,
importance = "permutation", seed = 123),
error=function(e) NULL
)
imp_bag <- if(!is.null(fit_bag_imp)) fit_bag_imp$variable.importance else setNames(numeric(0), character(0))
# Random Forest:mtry = sqrt(p)
fit_rf_imp <- tryCatch(
ranger(y = y_tr2, x = data.frame(X_tr2),
num.trees = 500, mtry = max(1, floor(sqrt(p))), min.node.size = 5,
importance = "permutation", seed = 123),
error=function(e) NULL
)
imp_rf <- if(!is.null(fit_rf_imp)) fit_rf_imp$variable.importance else setNames(numeric(0), character(0))
library(data.table)
imp_list_lag <- list(
"LASSO" = imp_lasso,
"EN" = imp_en,
"Adaptive LASSO" = imp_ada_lasso,
"Adaptive EN" = imp_ada_en,
"Ridge" = imp_ridge,
"PCR" = imp_pcr,
"Bagging (proxy)" = imp_bag,
"Random Forest" = imp_rf
)
rank_vec <- as.character(seq_len(k_top))
tbl_lag_code <- data.table(排序 = rank_vec)
for (nm in names(imp_list_lag)){
tbl_lag_code[[nm]] <- topk_names(imp_list_lag[[nm]], k_top)
}
out_root <- file.path("generated_excel_files/P9_重要變數/9e")
out_dir <- file.path(out_root, format(Sys.Date(), "%Y-%m-%d"))
dir.create(out_dir, recursive = TRUE, showWarnings = FALSE)
writexl::write_xlsx(
tbl_lag_code,
path = file.path(out_dir, sprintf("variable_importance_LAG_top%d_h%d_CODE.xlsx", k_top, h))
)
meta_tab.xlsx 並把 Lag
層級表映射成短/長名# 讀對照表
map_raw <- as.data.table(readxl::read_excel(map_path, sheet = map_sheet))
setnames(map_raw, old=names(map_raw), new=make.names(names(map_raw)))
stopifnot(all(c("var_label","dataset_title","series_name") %in% names(map_raw)))
setorder(map_raw, var_label)
if (any(duplicated(map_raw$var_label))){
warning("var_label 在對照表中有重複,將取第一筆。")
map_raw <- map_raw[!duplicated(var_label)]
}
# 剝 lag 後綴
split_lag <- function(x){
lag <- ifelse(grepl("_L\\d+$", x), sub(".*_L(\\d+)$","L\\1", x), NA_character_)
base <- ifelse(is.na(lag), x, sub("_L\\d+$","", x))
list(base=base, lag=lag)
}
# 產生長/短名
make_display_names <- function(base, lag, mapDT,
short_max=short_max_chars,
use_prefix=short_use_dataset_prefix,
prefix_n=dataset_prefix_nchar){
m <- mapDT[.(base), on=.(var_label)]
# 長名
long <- ifelse(!is.na(m$dataset_title) & !is.na(m$series_name),
paste0(m$dataset_title, "|", m$series_name),
base)
long <- ifelse(!is.na(lag), paste0(long, " (", lag, ")"), long)
# 短名
short_core <- ifelse(!is.na(m$series_name), m$series_name, base)
if (use_prefix){
pref <- ifelse(!is.na(m$dataset_title), substr(m$dataset_title, 1L, prefix_n), "")
short_core <- ifelse(pref!="", paste0(pref, "|", short_core), short_core)
}
short_core <- ifelse(!is.na(lag), paste0(short_core, " (", lag, ")"), short_core)
short <- ifelse(nchar(short_core) > short_max,
paste0(substr(short_core, 1L, short_max-1L), "…"),
short_core)
list(long=long, short=short)
}
map_var_labels <- function(v, mapDT){
sp <- split_lag(v)
nm <- make_display_names(sp$base, sp$lag, mapDT)
data.table(var_label=v, base=sp$base, lag=sp$lag,
long_name=nm$long, short_name=nm$short)
}
# 套用到 lag 層級表
model_cols <- setdiff(names(tbl_lag_code), "排序")
all_vars <- unique(unlist(lapply(tbl_lag_code[, ..model_cols], unlist), use.names=FALSE))
all_vars <- all_vars[!is.na(all_vars) & all_vars!=""]
dict_lag <- if(length(all_vars)) map_var_labels(all_vars, map_raw) else data.table()
tbl_lag_short <- copy(tbl_lag_code)
tbl_lag_long <- copy(tbl_lag_code)
for (mc in model_cols){
m <- merge(data.table(var_label = tbl_lag_code[[mc]]), dict_lag, by="var_label", all.x=TRUE)
tbl_lag_short[[mc]] <- ifelse(is.na(m$short_name), paste0(m$var_label, " (未對應)"), m$short_name)
tbl_lag_long[[mc]] <- ifelse(is.na(m$long_name), paste0(m$var_label, " (未對應)"), m$long_name)
}
out_root <- file.path("generated_excel_files", "P9_重要變數", "9f")
out_dir <- file.path(out_root, format(Sys.Date(), "%Y-%m-%d"))
dir.create(out_dir, recursive = TRUE, showWarnings = FALSE)
# 三個檔各自輸出
writexl::write_xlsx(
dict_lag[, .(var_label, base, lag, short_name, long_name)],
path = file.path(out_dir, sprintf("var_name_dictionary_LAG_h%d.xlsx", h))
)
writexl::write_xlsx(
tbl_lag_short,
path = file.path(out_dir, sprintf("variable_importance_LAG_top%d_h%d_SHORT.xlsx", k_top, h))
)
writexl::write_xlsx(
tbl_lag_long,
path = file.path(out_dir, sprintf("variable_importance_LAG_top%d_h%d_LONG.xlsx", k_top, h))
)
writexl::write_xlsx(
list(
var_name_dictionary_LAG = dict_lag[, .(var_label, base, lag, short_name, long_name)],
LAG_top_SHORT = tbl_lag_short,
LAG_top_LONG = tbl_lag_long
),
path = file.path(out_dir, sprintf("P9_重要變數_9f_top%d_h%d.xlsx", k_top, h))
)
tbl_lag_short
## Warning in attr(x, "align"): 'xfun::attr()' is deprecated.
## Use 'xfun::attr2()' instead.
## See help("Deprecated")
| 排序 | LASSO | EN | Adaptive LASSO | Adaptive EN | Ridge | PCR | Bagging (proxy) | Random Forest |
|---|---|---|---|---|---|---|---|---|
| 1 | 消費者物價基|總指數 (L1) | 消費者物價基|總指數 (L1) | 消費者物價基|總指數 (L1) | 消費者物價基|總指數 (L1) | 人力資源主要|勞動力參與率(%) | 合計 (L1) | 人力資源主要|勞動力參與率(%) | 合計 (L1) | 每人每月總薪|工業及服務業 | 合計 (L4) | 每人每月總薪|工業及服務業 | 合計 (L4) |
| 2 | 消費者物價基|總指數 (L2) | 消費者物價基|總指數 (L2) | 消費者物價基|總指數 (L2) | 消費者物價基|總指數 (L2) | 人力資源主要|勞動力參與率(%) | 合計 (L6) | 人力資源主要|勞動力參與率(%) | 合計 (L2) | 受僱員工每人|工業及服務業 | 合計 (L4) | 受僱員工每人|工業及服務業 | 合計 (L3) |
| 3 | 消費者物價基|總指數 (L3) | 每人每月總薪|工業及服務業 | 合計 (L4) | 消費者物價基|總指數 (L3) | 消費者物價基|總指數 (L3) | 人力資源主要|失業率(%) | 合計 (L4) | 人力資源主要|勞動力參與率(%) | 合計 (L6) | 受僱員工進退|進入率 | 工業及服務業 (L1) | 受僱員工進退|進入率 | 工業及服務業 (L4) |
| 4 | 每人每月總薪|工業及服務業 | 合計 (L4) | 受僱員工每人|工業及服務業 | 合計 (L4) | 消費者物價基|總指數 (L4) | 消費者物價基|總指數 (L4) | 人力資源主要|失業率(%) | 合計 (L5) | 每人每月總薪|工業及服務業 | 合計 (L6) | 受僱員工進退|進入率 | 工業及服務業 (L4) | 工業生產指數|總指數 (L3) |
| 5 | 受僱員工進退|進入率 | 工業及服務業 (L4) | 受僱員工進退|進入率 | 工業及服務業 (L4) | 消費者物價基|總指數 (L5) | 消費者物價基|總指數 (L5) | 人力資源主要|失業率(%) | 合計 (L6) | 受僱員工每人|工業及服務業 | 合計 (L5) | 批發零售及餐|零售業 (L5) | 批發零售及餐|零售業 (L4) |
| 6 | 受僱員工進退|進入率 | 工業及服務業 (L6) | 受僱員工進退|進入率 | 工業及服務業 (L6) | 消費者物價基|總指數 (L6) | 消費者物價基|總指數 (L6) | 受僱員工進退|進入率 | 工業及服務業 (L4) | 批發零售及餐|零售業 (L6) | 利率統計-五|金額(新臺幣百萬元) | 購屋貸款 (L… | 批發零售及餐|零售業 (L5) |
| 7 | 批發零售及餐|零售業 (L5) | 批發零售及餐|零售業 (L5) | 受僱員工進退|進入率 | 工業及服務業 (L4) | 受僱員工進退|進入率 | 工業及服務業 (L4) | 受僱員工進退|進入率 | 工業及服務業 (L5) | 全體銀行放款|貼現 (L1) | 全國賦稅實徵|總計 (L4) | 外匯統計-我|新台幣 (NTD/USD) (L6) |
| 8 | 全體銀行放款|貼現 (L2) | 全體銀行放款|貼現 (L2) | 受僱員工進退|進入率 | 工業及服務業 (L6) | 受僱員工進退|進入率 | 工業及服務業 (L6) | 受僱員工進退|進入率 | 工業及服務業 (L6) | 全國賦稅實徵|總計 (L5) | Factor4 (L4) | 全國賦稅實徵|總計 (L3) |
tbl_lag_long
## Warning in attr(x, "align"): 'xfun::attr()' is deprecated.
## Use 'xfun::attr2()' instead.
## See help("Deprecated")
| 排序 | LASSO | EN | Adaptive LASSO | Adaptive EN | Ridge | PCR | Bagging (proxy) | Random Forest |
|---|---|---|---|---|---|---|---|---|
| 1 | 消費者物價基本分類指數|總指數 (L1) | 消費者物價基本分類指數|總指數 (L1) | 消費者物價基本分類指數|總指數 (L1) | 消費者物價基本分類指數|總指數 (L1) | 人力資源主要指標|勞動力參與率(%) | 合計 (L1) | 人力資源主要指標|勞動力參與率(%) | 合計 (L1) | 每人每月總薪資|工業及服務業 | 合計 (L4) | 每人每月總薪資|工業及服務業 | 合計 (L4) |
| 2 | 消費者物價基本分類指數|總指數 (L2) | 消費者物價基本分類指數|總指數 (L2) | 消費者物價基本分類指數|總指數 (L2) | 消費者物價基本分類指數|總指數 (L2) | 人力資源主要指標|勞動力參與率(%) | 合計 (L6) | 人力資源主要指標|勞動力參與率(%) | 合計 (L2) | 受僱員工每人每月工時|工業及服務業 | 合計 (L4) | 受僱員工每人每月工時|工業及服務業 | 合計 (L3) |
| 3 | 消費者物價基本分類指數|總指數 (L3) | 每人每月總薪資|工業及服務業 | 合計 (L4) | 消費者物價基本分類指數|總指數 (L3) | 消費者物價基本分類指數|總指數 (L3) | 人力資源主要指標|失業率(%) | 合計 (L4) | 人力資源主要指標|勞動力參與率(%) | 合計 (L6) | 受僱員工進退狀況|進入率 | 工業及服務業 (L1) | 受僱員工進退狀況|進入率 | 工業及服務業 (L4) |
| 4 | 每人每月總薪資|工業及服務業 | 合計 (L4) | 受僱員工每人每月工時|工業及服務業 | 合計 (L4) | 消費者物價基本分類指數|總指數 (L4) | 消費者物價基本分類指數|總指數 (L4) | 人力資源主要指標|失業率(%) | 合計 (L5) | 每人每月總薪資|工業及服務業 | 合計 (L6) | 受僱員工進退狀況|進入率 | 工業及服務業 (L4) | 工業生產指數|總指數 (L3) |
| 5 | 受僱員工進退狀況|進入率 | 工業及服務業 (L4) | 受僱員工進退狀況|進入率 | 工業及服務業 (L4) | 消費者物價基本分類指數|總指數 (L5) | 消費者物價基本分類指數|總指數 (L5) | 人力資源主要指標|失業率(%) | 合計 (L6) | 受僱員工每人每月工時|工業及服務業 | 合計 (L5) | 批發零售及餐飲業營業額|零售業 (L5) | 批發零售及餐飲業營業額|零售業 (L4) |
| 6 | 受僱員工進退狀況|進入率 | 工業及服務業 (L6) | 受僱員工進退狀況|進入率 | 工業及服務業 (L6) | 消費者物價基本分類指數|總指數 (L6) | 消費者物價基本分類指數|總指數 (L6) | 受僱員工進退狀況|進入率 | 工業及服務業 (L4) | 批發零售及餐飲業營業額|零售業 (L6) | 利率統計-五大銀行新承做放款金額與利率-月|金額(新臺幣百萬元) | 購屋貸款 (L4) | 批發零售及餐飲業營業額|零售業 (L5) |
| 7 | 批發零售及餐飲業營業額|零售業 (L5) | 批發零售及餐飲業營業額|零售業 (L5) | 受僱員工進退狀況|進入率 | 工業及服務業 (L4) | 受僱員工進退狀況|進入率 | 工業及服務業 (L4) | 受僱員工進退狀況|進入率 | 工業及服務業 (L5) | 全體銀行放款餘額統計-科目別|貼現 (L1) | 全國賦稅實徵淨額-按稅目別分|總計 (L4) | 外匯統計-我國與主要貿易對手通貨對美元之匯率|新台幣 (NTD/USD) (L6) |
| 8 | 全體銀行放款餘額統計-科目別|貼現 (L2) | 全體銀行放款餘額統計-科目別|貼現 (L2) | 受僱員工進退狀況|進入率 | 工業及服務業 (L6) | 受僱員工進退狀況|進入率 | 工業及服務業 (L6) | 受僱員工進退狀況|進入率 | 工業及服務業 (L6) | 全國賦稅實徵淨額-按稅目別分|總計 (L5) | Factor4 (L4) | 全國賦稅實徵淨額-按稅目別分|總計 (L3) |
# 彙總函數:把 "A_B_C_L3" → "A_B_C" 為 key,對 importance 做 sum 或 max
aggregate_importance <- function(imp_vec, method = c("sum","max")){
method <- match.arg(method)
if(length(imp_vec)==0) return(imp_vec)
nm <- names(imp_vec)
base <- sub("_L\\d+$","", nm)
DT <- data.table(base=base, imp=as.numeric(imp_vec))
agg <- DT[, .(imp = if (method=="sum") sum(imp, na.rm=TRUE) else max(imp, na.rm=TRUE)), by=base]
setNames(agg$imp, agg$base)
}
imp_list_base <- lapply(imp_list_lag, aggregate_importance, method = agg_method)
# 組表(代碼名)
tbl_base_code <- data.table(排序 = rank_vec)
for (nm in names(imp_list_base)){
tbl_base_code[[nm]] <- topk_names(imp_list_base[[nm]], k_top)
}
# 目錄:generated_excel_files/P9_重要變數/9g/當日/
out_root <- file.path("generated_excel_files", "P9_重要變數", "9g")
out_dir <- file.path(out_root, format(Sys.Date(), "%Y-%m-%d"))
dir.create(out_dir, recursive = TRUE, showWarnings = FALSE)
# 輸出 xlsx(沿用你的檔名規則)
writexl::write_xlsx(
tbl_base_code,
path = file.path(out_dir, sprintf("variable_importance_BASE_top%d_h%d_CODE_%s.xlsx",
k_top, h, toupper(agg_method)))
)
# 建 base 對照字典(不帶 lag)
all_bases <- unique(unlist(lapply(tbl_base_code[, ..model_cols], unlist), use.names=FALSE))
all_bases <- all_bases[!is.na(all_bases) & all_bases!=""]
dict_base <- if(length(all_bases)){
# 這裡直接把 base 當 var_label 去 map(沒有 lag)
m <- map_raw[.(all_bases), on=.(var_label)]
data.table(var_label = all_bases,
short_name = {
core <- ifelse(!is.na(m$series_name), m$series_name, all_bases)
if (short_use_dataset_prefix){
pref <- ifelse(!is.na(m$dataset_title), substr(m$dataset_title, 1L, dataset_prefix_nchar), "")
core <- ifelse(pref!="", paste0(pref, "|", core), core)
}
ifelse(nchar(core) > short_max_chars,
paste0(substr(core, 1L, short_max_chars-1L), "…"),
core)
},
long_name = ifelse(!is.na(m$dataset_title) & !is.na(m$series_name),
paste0(m$dataset_title, "|", m$series_name),
all_bases))
} else data.table()
tbl_base_short <- copy(tbl_base_code)
tbl_base_long <- copy(tbl_base_code)
for (mc in model_cols){
m <- merge(data.table(var_label = tbl_base_code[[mc]]), dict_base, by="var_label", all.x=TRUE)
tbl_base_short[[mc]] <- ifelse(is.na(m$short_name), paste0(m$var_label, " (未對應)"), m$short_name)
tbl_base_long[[mc]] <- ifelse(is.na(m$long_name), paste0(m$var_label, " (未對應)"), m$long_name)
}
# 目錄:generated_excel_files/P9_重要變數/9h/當日/
out_root <- file.path("generated_excel_files", "P9_重要變數", "9h")
out_dir <- file.path(out_root, format(Sys.Date(), "%Y-%m-%d"))
dir.create(out_dir, recursive = TRUE, showWarnings = FALSE)
# 三個表分別輸出為 xlsx
writexl::write_xlsx(
dict_base[, .(var_label, short_name, long_name)],
path = file.path(out_dir, sprintf("var_name_dictionary_BASE_h%d.xlsx", h))
)
writexl::write_xlsx(
tbl_base_short,
path = file.path(out_dir, sprintf("variable_importance_BASE_top%d_h%d_SHORT_%s.xlsx",
k_top, h, toupper(agg_method)))
)
writexl::write_xlsx(
tbl_base_long,
path = file.path(out_dir, sprintf("variable_importance_BASE_top%d_h%d_LONG_%s.xlsx",
k_top, h, toupper(agg_method)))
)
writexl::write_xlsx(
list(
dictionary_BASE = dict_base[, .(var_label, short_name, long_name)],
BASE_top_SHORT = tbl_base_short,
BASE_top_LONG = tbl_base_long
),
path = file.path(out_dir, sprintf("P9_重要變數_9h_top%d_h%d_%s.xlsx",
k_top, h, toupper(agg_method)))
)
tbl_base_short
## Warning in attr(x, "align"): 'xfun::attr()' is deprecated.
## Use 'xfun::attr2()' instead.
## See help("Deprecated")
| 排序 | LASSO | EN | Adaptive LASSO | Adaptive EN | Ridge | PCR | Bagging (proxy) | Random Forest |
|---|---|---|---|---|---|---|---|---|
| 1 | 消費者物價基|總指數 | 消費者物價基|總指數 | 消費者物價基|總指數 | 消費者物價基|總指數 | 人力資源主要|勞動力參與率(%) | 合計 | 人力資源主要|勞動力參與率(%) | 合計 | 每人每月總薪|工業及服務業 | 合計 | 每人每月總薪|工業及服務業 | 合計 |
| 2 | 消費者物價商|核心商品 | 消費者物價商|核心商品 | 消費者物價商|核心商品 | 消費者物價商|核心商品 | 人力資源主要|失業率(%) | 合計 | 人力資源主要|失業率(%) | 合計 | 受僱員工每人|工業及服務業 | 合計 | 受僱員工每人|工業及服務業 | 合計 |
| 3 | 進口物價基本|總指數 | 進口物價基本|總指數 | 進口物價基本|總指數 | 進口物價基本|總指數 | 受僱員工進退|進入率 | 工業及服務業 | 每人每月總薪|工業及服務業 | 合計 | 受僱員工進退|進入率 | 工業及服務業 | 受僱員工進退|進入率 | 工業及服務業 |
| 4 | 進口物價基本|總指數 | 每人每月總薪|工業及服務業 | 合計 | 進口物價基本|總指數 | 進口物價基本|總指數 | 利率統計-五|利率(年息百分比率) | 購屋貸款 | 受僱員工進退|進入率 | 工業及服務業 | 批發零售及餐|零售業 | 工業生產指數|總指數 |
| 5 | 每人每月總薪|工業及服務業 | 合計 | 受僱員工每人|工業及服務業 | 合計 | 人力資源主要|勞動力參與率(%) | 合計 | 人力資源主要|勞動力參與率(%) | 合計 | 利率統計-貨|金融業拆款 | 批發零售及餐|零售業 | 全體銀行放款|貼現 | 批發零售及餐|零售業 |
| 6 | 受僱員工進退|進入率 | 工業及服務業 | 受僱員工進退|進入率 | 工業及服務業 | 人力資源主要|失業率(%) | 合計 | 人力資源主要|失業率(%) | 合計 | 外匯統計-我|人民幣 (CNY/USD) | 全體銀行放款|貼現 | 利率統計-五|金額(新臺幣百萬元) | 購屋貸款 | 全體銀行放款|貼現 |
| 7 | 批發零售及餐|零售業 | 批發零售及餐|零售業 | 每人每月總薪|工業及服務業 | 合計 | 每人每月總薪|工業及服務業 | 合計 | Factor3 | 外匯統計-外|外匯存底(期底數) | 全國賦稅實徵|總計 | 利率統計-五|金額(新臺幣百萬元) | 購屋貸款 |
| 8 | 全體銀行放款|貼現 | 全體銀行放款|貼現 | 受僱員工進退|進入率 | 工業及服務業 | 受僱員工進退|進入率 | 工業及服務業 | Factor4 | 全國賦稅實徵|總計 | Factor4 | 全國賦稅實徵|總計 |
tbl_base_long
## Warning in attr(x, "align"): 'xfun::attr()' is deprecated.
## Use 'xfun::attr2()' instead.
## See help("Deprecated")
| 排序 | LASSO | EN | Adaptive LASSO | Adaptive EN | Ridge | PCR | Bagging (proxy) | Random Forest |
|---|---|---|---|---|---|---|---|---|
| 1 | 消費者物價基本分類指數|總指數 | 消費者物價基本分類指數|總指數 | 消費者物價基本分類指數|總指數 | 消費者物價基本分類指數|總指數 | 人力資源主要指標|勞動力參與率(%) | 合計 | 人力資源主要指標|勞動力參與率(%) | 合計 | 每人每月總薪資|工業及服務業 | 合計 | 每人每月總薪資|工業及服務業 | 合計 |
| 2 | 消費者物價商品性質分類指數|核心商品 | 消費者物價商品性質分類指數|核心商品 | 消費者物價商品性質分類指數|核心商品 | 消費者物價商品性質分類指數|核心商品 | 人力資源主要指標|失業率(%) | 合計 | 人力資源主要指標|失業率(%) | 合計 | 受僱員工每人每月工時|工業及服務業 | 合計 | 受僱員工每人每月工時|工業及服務業 | 合計 |
| 3 | 進口物價基本分類指數(按HS)(美元計價)|總指數 | 進口物價基本分類指數(按HS)(美元計價)|總指數 | 進口物價基本分類指數(按HS)(美元計價)|總指數 | 進口物價基本分類指數(按HS)(美元計價)|總指數 | 受僱員工進退狀況|進入率 | 工業及服務業 | 每人每月總薪資|工業及服務業 | 合計 | 受僱員工進退狀況|進入率 | 工業及服務業 | 受僱員工進退狀況|進入率 | 工業及服務業 |
| 4 | 進口物價基本分類指數(按HS)(新臺幣計價)|總指數 | 每人每月總薪資|工業及服務業 | 合計 | 進口物價基本分類指數(按HS)(新臺幣計價)|總指數 | 進口物價基本分類指數(按HS)(新臺幣計價)|總指數 | 利率統計-五大銀行新承做放款金額與利率-月|利率(年息百分比率) | 購屋貸款 | 受僱員工進退狀況|進入率 | 工業及服務業 | 批發零售及餐飲業營業額|零售業 | 工業生產指數|總指數 |
| 5 | 每人每月總薪資|工業及服務業 | 合計 | 受僱員工每人每月工時|工業及服務業 | 合計 | 人力資源主要指標|勞動力參與率(%) | 合計 | 人力資源主要指標|勞動力參與率(%) | 合計 | 利率統計-貨幣市場利率|金融業拆款 | 批發零售及餐飲業營業額|零售業 | 全體銀行放款餘額統計-科目別|貼現 | 批發零售及餐飲業營業額|零售業 |
| 6 | 受僱員工進退狀況|進入率 | 工業及服務業 | 受僱員工進退狀況|進入率 | 工業及服務業 | 人力資源主要指標|失業率(%) | 合計 | 人力資源主要指標|失業率(%) | 合計 | 外匯統計-我國與主要貿易對手通貨對美元之匯率|人民幣 (CNY/USD) | 全體銀行放款餘額統計-科目別|貼現 | 利率統計-五大銀行新承做放款金額與利率-月|金額(新臺幣百萬元) | 購屋貸款 | 全體銀行放款餘額統計-科目別|貼現 |
| 7 | 批發零售及餐飲業營業額|零售業 | 批發零售及餐飲業營業額|零售業 | 每人每月總薪資|工業及服務業 | 合計 | 每人每月總薪資|工業及服務業 | 合計 | Factor3 | 外匯統計-外匯交易量、淨部位及外匯存底-月|外匯存底(期底數) | 全國賦稅實徵淨額-按稅目別分|總計 | 利率統計-五大銀行新承做放款金額與利率-月|金額(新臺幣百萬元) | 購屋貸款 |
| 8 | 全體銀行放款餘額統計-科目別|貼現 | 全體銀行放款餘額統計-科目別|貼現 | 受僱員工進退狀況|進入率 | 工業及服務業 | 受僱員工進退狀況|進入率 | 工業及服務業 | Factor4 | 全國賦稅實徵淨額-按稅目別分|總計 | Factor4 | 全國賦稅實徵淨額-按稅目別分|總計 |