根据学号后四位(4-0-0-6)选取股票: 学号后四位4、0、0、6,0按规则取第20支,最终选取第4支、第20支、第20支、第6支股票


环境准备:检查并安装依赖包

pkgs <- c("PerformanceAnalytics", "xts", "ggplot2", "readxl", "dplyr", "scales")

# 清华镜像安装
options(repos = c(CRAN="https://mirrors.tuna.tsinghua.edu.cn/CRAN/"))
new_pkgs <- pkgs[!pkgs %in% installed.packages()[, "Package"]]
if (length(new_pkgs) > 0) install.packages(new_pkgs, force = TRUE)

# 加载包
library(PerformanceAnalytics)
library(xts)
library(ggplot2)
library(readxl)
library(dplyr)
library(scales)

数据读取与预处理

# 1. 先读取第1行,获取股票代码作为列名
col_names <- read_excel("data.xls", n_max = 1) %>% colnames()

# 2. 读取纯数据:跳过前2行(第1行列名、第2行表头),只读取第3行起的纯数据
price_raw <- read_excel("data.xls", skip = 2, col_names = col_names)

# 3. 日期转换:适配Excel序列号格式(44565这种)
date_vec <- as.Date(as.numeric(price_raw[[1]]), origin = "1899-12-30")

# 4. 过滤日期转换失败的NA行,确保时间索引有效
valid_date_rows <- !is.na(date_vec)
date_vec_clean <- date_vec[valid_date_rows]
price_data_raw <- price_raw[valid_date_rows, -1] # 去掉日期列,只剩20支股票数据

# 5. 强制转换所有价格列为数值型
price_data_numeric <- as.data.frame(lapply(price_data_raw, function(x) as.numeric(x)))

# 6. 过滤掉有缺失值的行,确保数据100%有效
valid_price_rows <- complete.cases(price_data_numeric)
price_data_clean <- price_data_numeric[valid_price_rows, ]
date_vec_final <- date_vec_clean[valid_price_rows]


# 规则:去掉日期列后,列1=第1支股票,列4=第4支,列6=第6支,列20=第20支
stock_cols <- c(4, 20, 20, 6)
price_data <- price_data_clean[, stock_cols]

# 7. 构建xts时间序列(100%数值型,无NA)
price <- xts(price_data, order.by = date_vec_final)
names(price) <- c("S1", "S2", "S3", "S4")
tick <- c("S1", "S2", "S3", "S4")

# 数据校验,确认读取100%成功
cat("✅ 数据读取完成!\n")
## ✅ 数据读取完成!
cat("📅 有效日期范围:", min(date_vec_final), " 到 ", max(date_vec_final), "\n")
## 📅 有效日期范围: 18996  到  19461
cat("📈 有效数据行数:", nrow(price), "\n")
## 📈 有效数据行数: 310
cat("📊 数据类型校验:", all(sapply(price, is.numeric)), "(TRUE=全数值,无报错)\n")
## 📊 数据类型校验: TRUE (TRUE=全数值,无报错)
cat("选中的股票列名:", colnames(price), "\n")
## 选中的股票列名: S1 S2 S3 S4
cat("前6行价格数据:\n")
## 前6行价格数据:
print(head(price))
##              S1  S2  S3    S4
## 2022-01-04 9.70 9.5 9.5 15.85
## 2022-01-05 9.77 9.5 9.5 15.66
## 2022-01-06 9.66 9.5 9.5 15.52
## 2022-01-07 9.53 9.5 9.5 15.56
## 2022-01-10 9.50 9.5 9.5 16.00
## 2022-01-11 9.44 9.5 9.5 15.91
# 计算对数日收益率,去除缺失值
rt_daily <- na.omit(Return.calculate(price, method = "log"))

# 计算年化均值收益率与协方差矩阵
mean_ret <- colMeans(rt_daily)
cov_mat  <- cov(rt_daily) * 252

cat("\n年化均值收益率:\n")
## 
## 年化均值收益率:
print(round(mean_ret * 252, 4))
##      S1      S2      S3      S4 
## -0.5979 -0.3640 -0.3640 -0.3281
cat("\n年化协方差矩阵:\n")
## 
## 年化协方差矩阵:
print(round(cov_mat, 4))
##        S1     S2     S3     S4
## S1 0.0667 0.0017 0.0017 0.0250
## S2 0.0017 0.1710 0.1710 0.0013
## S3 0.0017 0.1710 0.1710 0.0013
## S4 0.0250 0.0013 0.0013 0.0920

蒙特卡罗模拟(10,000 个随机组合)

set.seed(42) # 固定随机种子,结果可复现
num_port <- 10000

# 初始化存储矩阵
all_wts      <- matrix(nrow = num_port, ncol = length(tick))
port_returns <- vector("numeric", length = num_port)
port_risk    <- vector("numeric", length = num_port)
sharpe_ratio <- vector("numeric", length = num_port)

# 循环生成随机组合
for (i in seq_len(num_port)) {
  # 生成随机权重并归一化(和为1)
  wts <- runif(length(tick))
  wts <- wts / sum(wts)
  all_wts[i, ] <- wts
  
  # 计算年化收益率
  port_ret <- ((sum(wts * mean_ret) + 1)^252) - 1
  port_returns[i] <- port_ret
  
  # 计算年化风险(标准差)
  port_sd <- sqrt(t(wts) %*% (cov_mat %*% wts))
  port_risk[i] <- port_sd
  
  # 计算夏普比率(无风险利率设为0)
  sharpe_ratio[i] <- port_ret / port_sd
}

# 合并为数据框
portfolio_values <- data.frame(
  all_wts,
  Return = port_returns,
  Risk = port_risk,
  SharpeRatio = sharpe_ratio
)
colnames(portfolio_values)[1:4] <- tick
# 提取三个核心组合
min_var <- portfolio_values[which.min(portfolio_values$Risk), ]  # 最小风险
max_ret <- portfolio_values[which.max(portfolio_values$Return), ]# 最大收益
max_sr  <- portfolio_values[which.max(portfolio_values$SharpeRatio), ] # 最大夏普

第1题 – 最小风险组合

knitr::kable(
  data.frame(
    股票 = tick,
    权重 = round(as.numeric(min_var[, tick]), 4)
  ),
  caption = "最小风险组合 – 权重明细"
)
最小风险组合 – 权重明细
股票 权重
S1 0.4803
S2 0.1062
S3 0.1163
S4 0.2973
cat(sprintf("组合指标:风险 %.2f%%  |  收益 %.2f%%  |  夏普比率 %.4f",
            min_var$Risk * 100, min_var$Return * 100, min_var$SharpeRatio))
## 组合指标:风险 19.91%  |  收益 -37.25%  |  夏普比率 -1.8711
# 整理权重数据
min_var_wt <- data.frame(
  股票 = tick,
  权重 = as.numeric(min_var[, tick])
)

# 绘制权重柱状图
ggplot(min_var_wt, aes(x = 股票, y = 权重, fill = 股票)) +
  geom_col(show.legend = FALSE) +
  theme_minimal() +
  labs(x = "股票", y = "权重", title = "最小风险组合权重分布") +
  scale_y_continuous(labels = percent_format())


第2题 – 最大收益组合

knitr::kable(
  data.frame(
    股票 = tick,
    权重 = round(as.numeric(max_ret[, tick]), 4)
  ),
  caption = "最大收益组合 – 权重明细"
)
最大收益组合 – 权重明细
股票 权重
S1 0.0104
S2 0.0196
S3 0.1432
S4 0.8268
cat(sprintf("组合指标:风险 %.2f%%  |  收益 %.2f%%  |  夏普比率 %.4f",
            max_ret$Risk * 100, max_ret$Return * 100, max_ret$SharpeRatio))
## 组合指标:风险 26.12%  |  收益 -28.61%  |  夏普比率 -1.0954
max_ret_wt <- data.frame(
  股票 = tick,
  权重 = as.numeric(max_ret[, tick])
)

ggplot(max_ret_wt, aes(x = 股票, y = 权重, fill = 股票)) +
  geom_col(show.legend = FALSE) +
  theme_minimal() +
  labs(x = "股票", y = "权重", title = "最大收益组合权重分布") +
  scale_y_continuous(labels = percent_format())


第3题 – 最大夏普比率组合

knitr::kable(
  data.frame(
    股票 = tick,
    权重 = round(as.numeric(max_sr[, tick]), 4)
  ),
  caption = "最大夏普比率组合 – 权重明细"
)
最大夏普比率组合 – 权重明细
股票 权重
S1 0.0006
S2 0.6298
S3 0.3646
S4 0.0050
cat(sprintf("组合指标:风险 %.2f%%  |  收益 %.2f%%  |  夏普比率 %.4f",
            max_sr$Risk * 100, max_sr$Return * 100, max_sr$SharpeRatio))
## 组合指标:风险 41.12%  |  收益 -30.52%  |  夏普比率 -0.7423
max_sr_wt <- data.frame(
  股票 = tick,
  权重 = as.numeric(max_sr[, tick])
)

ggplot(max_sr_wt, aes(x = 股票, y = 权重, fill = 股票)) +
  geom_col(show.legend = FALSE) +
  theme_minimal() +
  labs(x = "股票", y = "权重", title = "最大夏普比率组合权重分布") +
  scale_y_continuous(labels = percent_format())


第4题 – 投资组合优化与有效前沿

ggplot(portfolio_values, aes(x = Risk, y = Return, color = SharpeRatio)) +
  geom_point(alpha = 0.4, size = 0.8) +
  theme_classic() +
  scale_y_continuous(labels = percent_format()) +
  scale_x_continuous(labels = percent_format()) +
  scale_color_gradient(low = "steelblue", high = "orange") +
  labs(
    x     = "年化风险(标准差)",
    y     = "年化收益率",
    title = "投资组合有效前沿",
    color = "夏普\n比率"
  ) +
  # 标注三个核心点
  geom_point(data = min_var, aes(x = Risk, y = Return),
             color = "red", size = 4, shape = 17, inherit.aes = FALSE) +
  geom_point(data = max_ret, aes(x = Risk, y = Return),
             color = "green4", size = 4, shape = 15, inherit.aes = FALSE) +
  geom_point(data = max_sr, aes(x = Risk, y = Return),
             color = "purple", size = 4, shape = 16, inherit.aes = FALSE) +
  # 添加点标注
  annotate("text", x = min_var$Risk, y = min_var$Return,
           label = "最小风险", hjust = -0.15, color = "red", size = 3.5) +
  annotate("text", x = max_ret$Risk, y = max_ret$Return,
           label = "最大收益", hjust = -0.15, color = "green4", size = 3.5) +
  annotate("text", x = max_sr$Risk, y = max_sr$Return,
           label = "最大夏普", hjust = -0.15, color = "purple", size = 3.5)