作业1 – 投资组合优化

根据学号后四位（4-0-0-6）选取股票： 学号后四位4、0、0、6，0按规则取第20支，最终选取第4支、第20支、第20支、第6支股票

环境准备：检查并安装依赖包

pkgs <- c("PerformanceAnalytics", "xts", "ggplot2", "readxl", "dplyr", "scales")

# 清华镜像安装
options(repos = c(CRAN="https://mirrors.tuna.tsinghua.edu.cn/CRAN/"))
new_pkgs <- pkgs[!pkgs %in% installed.packages()[, "Package"]]
if (length(new_pkgs) > 0) install.packages(new_pkgs, force = TRUE)

# 加载包
library(PerformanceAnalytics)
library(xts)
library(ggplot2)
library(readxl)
library(dplyr)
library(scales)

数据读取与预处理

# 1. 先读取第1行，获取股票代码作为列名
col_names <- read_excel("data.xls", n_max = 1) %>% colnames()

# 2. 读取纯数据：跳过前2行（第1行列名、第2行表头），只读取第3行起的纯数据
price_raw <- read_excel("data.xls", skip = 2, col_names = col_names)

# 3. 日期转换：适配Excel序列号格式（44565这种）
date_vec <- as.Date(as.numeric(price_raw[[1]]), origin = "1899-12-30")

# 4. 过滤日期转换失败的NA行，确保时间索引有效
valid_date_rows <- !is.na(date_vec)
date_vec_clean <- date_vec[valid_date_rows]
price_data_raw <- price_raw[valid_date_rows, -1] # 去掉日期列，只剩20支股票数据

# 5. 强制转换所有价格列为数值型
price_data_numeric <- as.data.frame(lapply(price_data_raw, function(x) as.numeric(x)))

# 6. 过滤掉有缺失值的行，确保数据100%有效
valid_price_rows <- complete.cases(price_data_numeric)
price_data_clean <- price_data_numeric[valid_price_rows, ]
date_vec_final <- date_vec_clean[valid_price_rows]


# 规则：去掉日期列后，列1=第1支股票，列4=第4支，列6=第6支，列20=第20支
stock_cols <- c(4, 20, 20, 6)
price_data <- price_data_clean[, stock_cols]

# 7. 构建xts时间序列（100%数值型，无NA）
price <- xts(price_data, order.by = date_vec_final)
names(price) <- c("S1", "S2", "S3", "S4")
tick <- c("S1", "S2", "S3", "S4")

# 数据校验，确认读取100%成功
cat("✅ 数据读取完成！\n")

## ✅ 数据读取完成！

cat("📅 有效日期范围：", min(date_vec_final), " 到 ", max(date_vec_final), "\n")

## 📅 有效日期范围： 18996  到  19461

cat("📈 有效数据行数：", nrow(price), "\n")

## 📈 有效数据行数： 310

cat("📊 数据类型校验：", all(sapply(price, is.numeric)), "（TRUE=全数值，无报错）\n")

## 📊 数据类型校验： TRUE （TRUE=全数值，无报错）

cat("选中的股票列名：", colnames(price), "\n")

## 选中的股票列名： S1 S2 S3 S4

cat("前6行价格数据：\n")

## 前6行价格数据：

print(head(price))

##              S1  S2  S3    S4
## 2022-01-04 9.70 9.5 9.5 15.85
## 2022-01-05 9.77 9.5 9.5 15.66
## 2022-01-06 9.66 9.5 9.5 15.52
## 2022-01-07 9.53 9.5 9.5 15.56
## 2022-01-10 9.50 9.5 9.5 16.00
## 2022-01-11 9.44 9.5 9.5 15.91

# 计算对数日收益率，去除缺失值
rt_daily <- na.omit(Return.calculate(price, method = "log"))

# 计算年化均值收益率与协方差矩阵
mean_ret <- colMeans(rt_daily)
cov_mat  <- cov(rt_daily) * 252

cat("\n年化均值收益率：\n")

## 
## 年化均值收益率：

print(round(mean_ret * 252, 4))

##      S1      S2      S3      S4 
## -0.5979 -0.3640 -0.3640 -0.3281

cat("\n年化协方差矩阵：\n")

## 
## 年化协方差矩阵：

print(round(cov_mat, 4))

##        S1     S2     S3     S4
## S1 0.0667 0.0017 0.0017 0.0250
## S2 0.0017 0.1710 0.1710 0.0013
## S3 0.0017 0.1710 0.1710 0.0013
## S4 0.0250 0.0013 0.0013 0.0920

蒙特卡罗模拟（10,000 个随机组合）

set.seed(42) # 固定随机种子，结果可复现
num_port <- 10000

# 初始化存储矩阵
all_wts      <- matrix(nrow = num_port, ncol = length(tick))
port_returns <- vector("numeric", length = num_port)
port_risk    <- vector("numeric", length = num_port)
sharpe_ratio <- vector("numeric", length = num_port)

# 循环生成随机组合
for (i in seq_len(num_port)) {
  # 生成随机权重并归一化（和为1）
  wts <- runif(length(tick))
  wts <- wts / sum(wts)
  all_wts[i, ] <- wts
  
  # 计算年化收益率
  port_ret <- ((sum(wts * mean_ret) + 1)^252) - 1
  port_returns[i] <- port_ret
  
  # 计算年化风险（标准差）
  port_sd <- sqrt(t(wts) %*% (cov_mat %*% wts))
  port_risk[i] <- port_sd
  
  # 计算夏普比率（无风险利率设为0）
  sharpe_ratio[i] <- port_ret / port_sd
}

# 合并为数据框
portfolio_values <- data.frame(
  all_wts,
  Return = port_returns,
  Risk = port_risk,
  SharpeRatio = sharpe_ratio
)
colnames(portfolio_values)[1:4] <- tick

# 提取三个核心组合
min_var <- portfolio_values[which.min(portfolio_values$Risk), ]  # 最小风险
max_ret <- portfolio_values[which.max(portfolio_values$Return), ]# 最大收益
max_sr  <- portfolio_values[which.max(portfolio_values$SharpeRatio), ] # 最大夏普

第1题 – 最小风险组合

knitr::kable(
  data.frame(
    股票 = tick,
    权重 = round(as.numeric(min_var[, tick]), 4)
  ),
  caption = "最小风险组合 – 权重明细"
)

最小风险组合 – 权重明细
股票	权重
S1	0.4803
S2	0.1062
S3	0.1163
S4	0.2973

cat(sprintf("组合指标：风险 %.2f%%  |  收益 %.2f%%  |  夏普比率 %.4f",
            min_var$Risk * 100, min_var$Return * 100, min_var$SharpeRatio))

## 组合指标：风险 19.91%  |  收益 -37.25%  |  夏普比率 -1.8711

# 整理权重数据
min_var_wt <- data.frame(
  股票 = tick,
  权重 = as.numeric(min_var[, tick])
)

# 绘制权重柱状图
ggplot(min_var_wt, aes(x = 股票, y = 权重, fill = 股票)) +
  geom_col(show.legend = FALSE) +
  theme_minimal() +
  labs(x = "股票", y = "权重", title = "最小风险组合权重分布") +
  scale_y_continuous(labels = percent_format())

第2题 – 最大收益组合

knitr::kable(
  data.frame(
    股票 = tick,
    权重 = round(as.numeric(max_ret[, tick]), 4)
  ),
  caption = "最大收益组合 – 权重明细"
)

最大收益组合 – 权重明细
股票	权重
S1	0.0104
S2	0.0196
S3	0.1432
S4	0.8268

cat(sprintf("组合指标：风险 %.2f%%  |  收益 %.2f%%  |  夏普比率 %.4f",
            max_ret$Risk * 100, max_ret$Return * 100, max_ret$SharpeRatio))

## 组合指标：风险 26.12%  |  收益 -28.61%  |  夏普比率 -1.0954

max_ret_wt <- data.frame(
  股票 = tick,
  权重 = as.numeric(max_ret[, tick])
)

ggplot(max_ret_wt, aes(x = 股票, y = 权重, fill = 股票)) +
  geom_col(show.legend = FALSE) +
  theme_minimal() +
  labs(x = "股票", y = "权重", title = "最大收益组合权重分布") +
  scale_y_continuous(labels = percent_format())

第3题 – 最大夏普比率组合

knitr::kable(
  data.frame(
    股票 = tick,
    权重 = round(as.numeric(max_sr[, tick]), 4)
  ),
  caption = "最大夏普比率组合 – 权重明细"
)

最大夏普比率组合 – 权重明细
股票	权重
S1	0.0006
S2	0.6298
S3	0.3646
S4	0.0050

cat(sprintf("组合指标：风险 %.2f%%  |  收益 %.2f%%  |  夏普比率 %.4f",
            max_sr$Risk * 100, max_sr$Return * 100, max_sr$SharpeRatio))

## 组合指标：风险 41.12%  |  收益 -30.52%  |  夏普比率 -0.7423

max_sr_wt <- data.frame(
  股票 = tick,
  权重 = as.numeric(max_sr[, tick])
)

ggplot(max_sr_wt, aes(x = 股票, y = 权重, fill = 股票)) +
  geom_col(show.legend = FALSE) +
  theme_minimal() +
  labs(x = "股票", y = "权重", title = "最大夏普比率组合权重分布") +
  scale_y_continuous(labels = percent_format())

第4题 – 投资组合优化与有效前沿

ggplot(portfolio_values, aes(x = Risk, y = Return, color = SharpeRatio)) +
  geom_point(alpha = 0.4, size = 0.8) +
  theme_classic() +
  scale_y_continuous(labels = percent_format()) +
  scale_x_continuous(labels = percent_format()) +
  scale_color_gradient(low = "steelblue", high = "orange") +
  labs(
    x     = "年化风险（标准差）",
    y     = "年化收益率",
    title = "投资组合有效前沿",
    color = "夏普\n比率"
  ) +
  # 标注三个核心点
  geom_point(data = min_var, aes(x = Risk, y = Return),
             color = "red", size = 4, shape = 17, inherit.aes = FALSE) +
  geom_point(data = max_ret, aes(x = Risk, y = Return),
             color = "green4", size = 4, shape = 15, inherit.aes = FALSE) +
  geom_point(data = max_sr, aes(x = Risk, y = Return),
             color = "purple", size = 4, shape = 16, inherit.aes = FALSE) +
  # 添加点标注
  annotate("text", x = min_var$Risk, y = min_var$Return,
           label = "最小风险", hjust = -0.15, color = "red", size = 3.5) +
  annotate("text", x = max_ret$Risk, y = max_ret$Return,
           label = "最大收益", hjust = -0.15, color = "green4", size = 3.5) +
  annotate("text", x = max_sr$Risk, y = max_sr$Return,
           label = "最大夏普", hjust = -0.15, color = "purple", size = 3.5)

作业1 – 投资组合优化

姓名：刘楚言学号：2220234006

2026-04-02

环境准备：检查并安装依赖包

数据读取与预处理

蒙特卡罗模拟（10,000 个随机组合）

第1题 – 最小风险组合

第2题 – 最大收益组合

第3题 – 最大夏普比率组合

第4题 – 投资组合优化与有效前沿

作业1 – 投资组合优化

姓名：刘楚言 学号：2220234006

2026-04-02

环境准备：检查并安装依赖包

数据读取与预处理

蒙特卡罗模拟（10,000 个随机组合）

第1题 – 最小风险组合

第2题 – 最大收益组合

第3题 – 最大夏普比率组合

第4题 – 投资组合优化与有效前沿

姓名：刘楚言学号：2220234006