根据学号后四位(4-0-0-6)选取股票:
学号后四位4、0、0、6,0按规则取第20支,最终选取第4支、第20支、第20支、第6支股票
环境准备:检查并安装依赖包
pkgs <- c("PerformanceAnalytics", "xts", "ggplot2", "readxl", "dplyr", "scales")
# 清华镜像安装
options(repos = c(CRAN="https://mirrors.tuna.tsinghua.edu.cn/CRAN/"))
new_pkgs <- pkgs[!pkgs %in% installed.packages()[, "Package"]]
if (length(new_pkgs) > 0) install.packages(new_pkgs, force = TRUE)
# 加载包
library(PerformanceAnalytics)
library(xts)
library(ggplot2)
library(readxl)
library(dplyr)
library(scales)
数据读取与预处理
# 1. 先读取第1行,获取股票代码作为列名
col_names <- read_excel("data.xls", n_max = 1) %>% colnames()
# 2. 读取纯数据:跳过前2行(第1行列名、第2行表头),只读取第3行起的纯数据
price_raw <- read_excel("data.xls", skip = 2, col_names = col_names)
# 3. 日期转换:适配Excel序列号格式(44565这种)
date_vec <- as.Date(as.numeric(price_raw[[1]]), origin = "1899-12-30")
# 4. 过滤日期转换失败的NA行,确保时间索引有效
valid_date_rows <- !is.na(date_vec)
date_vec_clean <- date_vec[valid_date_rows]
price_data_raw <- price_raw[valid_date_rows, -1] # 去掉日期列,只剩20支股票数据
# 5. 强制转换所有价格列为数值型
price_data_numeric <- as.data.frame(lapply(price_data_raw, function(x) as.numeric(x)))
# 6. 过滤掉有缺失值的行,确保数据100%有效
valid_price_rows <- complete.cases(price_data_numeric)
price_data_clean <- price_data_numeric[valid_price_rows, ]
date_vec_final <- date_vec_clean[valid_price_rows]
# 规则:去掉日期列后,列1=第1支股票,列4=第4支,列6=第6支,列20=第20支
stock_cols <- c(4, 20, 20, 6)
price_data <- price_data_clean[, stock_cols]
# 7. 构建xts时间序列(100%数值型,无NA)
price <- xts(price_data, order.by = date_vec_final)
names(price) <- c("S1", "S2", "S3", "S4")
tick <- c("S1", "S2", "S3", "S4")
# 数据校验,确认读取100%成功
cat("✅ 数据读取完成!\n")
## ✅ 数据读取完成!
cat("📅 有效日期范围:", min(date_vec_final), " 到 ", max(date_vec_final), "\n")
## 📅 有效日期范围: 18996 到 19461
cat("📈 有效数据行数:", nrow(price), "\n")
## 📈 有效数据行数: 310
cat("📊 数据类型校验:", all(sapply(price, is.numeric)), "(TRUE=全数值,无报错)\n")
## 📊 数据类型校验: TRUE (TRUE=全数值,无报错)
cat("选中的股票列名:", colnames(price), "\n")
## 选中的股票列名: S1 S2 S3 S4
cat("前6行价格数据:\n")
## 前6行价格数据:
print(head(price))
## S1 S2 S3 S4
## 2022-01-04 9.70 9.5 9.5 15.85
## 2022-01-05 9.77 9.5 9.5 15.66
## 2022-01-06 9.66 9.5 9.5 15.52
## 2022-01-07 9.53 9.5 9.5 15.56
## 2022-01-10 9.50 9.5 9.5 16.00
## 2022-01-11 9.44 9.5 9.5 15.91
# 计算对数日收益率,去除缺失值
rt_daily <- na.omit(Return.calculate(price, method = "log"))
# 计算年化均值收益率与协方差矩阵
mean_ret <- colMeans(rt_daily)
cov_mat <- cov(rt_daily) * 252
cat("\n年化均值收益率:\n")
##
## 年化均值收益率:
print(round(mean_ret * 252, 4))
## S1 S2 S3 S4
## -0.5979 -0.3640 -0.3640 -0.3281
cat("\n年化协方差矩阵:\n")
##
## 年化协方差矩阵:
print(round(cov_mat, 4))
## S1 S2 S3 S4
## S1 0.0667 0.0017 0.0017 0.0250
## S2 0.0017 0.1710 0.1710 0.0013
## S3 0.0017 0.1710 0.1710 0.0013
## S4 0.0250 0.0013 0.0013 0.0920
蒙特卡罗模拟(10,000 个随机组合)
set.seed(42) # 固定随机种子,结果可复现
num_port <- 10000
# 初始化存储矩阵
all_wts <- matrix(nrow = num_port, ncol = length(tick))
port_returns <- vector("numeric", length = num_port)
port_risk <- vector("numeric", length = num_port)
sharpe_ratio <- vector("numeric", length = num_port)
# 循环生成随机组合
for (i in seq_len(num_port)) {
# 生成随机权重并归一化(和为1)
wts <- runif(length(tick))
wts <- wts / sum(wts)
all_wts[i, ] <- wts
# 计算年化收益率
port_ret <- ((sum(wts * mean_ret) + 1)^252) - 1
port_returns[i] <- port_ret
# 计算年化风险(标准差)
port_sd <- sqrt(t(wts) %*% (cov_mat %*% wts))
port_risk[i] <- port_sd
# 计算夏普比率(无风险利率设为0)
sharpe_ratio[i] <- port_ret / port_sd
}
# 合并为数据框
portfolio_values <- data.frame(
all_wts,
Return = port_returns,
Risk = port_risk,
SharpeRatio = sharpe_ratio
)
colnames(portfolio_values)[1:4] <- tick
# 提取三个核心组合
min_var <- portfolio_values[which.min(portfolio_values$Risk), ] # 最小风险
max_ret <- portfolio_values[which.max(portfolio_values$Return), ]# 最大收益
max_sr <- portfolio_values[which.max(portfolio_values$SharpeRatio), ] # 最大夏普
第1题 – 最小风险组合
knitr::kable(
data.frame(
股票 = tick,
权重 = round(as.numeric(min_var[, tick]), 4)
),
caption = "最小风险组合 – 权重明细"
)
最小风险组合 – 权重明细
| S1 |
0.4803 |
| S2 |
0.1062 |
| S3 |
0.1163 |
| S4 |
0.2973 |
cat(sprintf("组合指标:风险 %.2f%% | 收益 %.2f%% | 夏普比率 %.4f",
min_var$Risk * 100, min_var$Return * 100, min_var$SharpeRatio))
## 组合指标:风险 19.91% | 收益 -37.25% | 夏普比率 -1.8711
# 整理权重数据
min_var_wt <- data.frame(
股票 = tick,
权重 = as.numeric(min_var[, tick])
)
# 绘制权重柱状图
ggplot(min_var_wt, aes(x = 股票, y = 权重, fill = 股票)) +
geom_col(show.legend = FALSE) +
theme_minimal() +
labs(x = "股票", y = "权重", title = "最小风险组合权重分布") +
scale_y_continuous(labels = percent_format())

第2题 – 最大收益组合
knitr::kable(
data.frame(
股票 = tick,
权重 = round(as.numeric(max_ret[, tick]), 4)
),
caption = "最大收益组合 – 权重明细"
)
最大收益组合 – 权重明细
| S1 |
0.0104 |
| S2 |
0.0196 |
| S3 |
0.1432 |
| S4 |
0.8268 |
cat(sprintf("组合指标:风险 %.2f%% | 收益 %.2f%% | 夏普比率 %.4f",
max_ret$Risk * 100, max_ret$Return * 100, max_ret$SharpeRatio))
## 组合指标:风险 26.12% | 收益 -28.61% | 夏普比率 -1.0954
max_ret_wt <- data.frame(
股票 = tick,
权重 = as.numeric(max_ret[, tick])
)
ggplot(max_ret_wt, aes(x = 股票, y = 权重, fill = 股票)) +
geom_col(show.legend = FALSE) +
theme_minimal() +
labs(x = "股票", y = "权重", title = "最大收益组合权重分布") +
scale_y_continuous(labels = percent_format())

第3题 – 最大夏普比率组合
knitr::kable(
data.frame(
股票 = tick,
权重 = round(as.numeric(max_sr[, tick]), 4)
),
caption = "最大夏普比率组合 – 权重明细"
)
最大夏普比率组合 – 权重明细
| S1 |
0.0006 |
| S2 |
0.6298 |
| S3 |
0.3646 |
| S4 |
0.0050 |
cat(sprintf("组合指标:风险 %.2f%% | 收益 %.2f%% | 夏普比率 %.4f",
max_sr$Risk * 100, max_sr$Return * 100, max_sr$SharpeRatio))
## 组合指标:风险 41.12% | 收益 -30.52% | 夏普比率 -0.7423
max_sr_wt <- data.frame(
股票 = tick,
权重 = as.numeric(max_sr[, tick])
)
ggplot(max_sr_wt, aes(x = 股票, y = 权重, fill = 股票)) +
geom_col(show.legend = FALSE) +
theme_minimal() +
labs(x = "股票", y = "权重", title = "最大夏普比率组合权重分布") +
scale_y_continuous(labels = percent_format())

第4题 – 投资组合优化与有效前沿
ggplot(portfolio_values, aes(x = Risk, y = Return, color = SharpeRatio)) +
geom_point(alpha = 0.4, size = 0.8) +
theme_classic() +
scale_y_continuous(labels = percent_format()) +
scale_x_continuous(labels = percent_format()) +
scale_color_gradient(low = "steelblue", high = "orange") +
labs(
x = "年化风险(标准差)",
y = "年化收益率",
title = "投资组合有效前沿",
color = "夏普\n比率"
) +
# 标注三个核心点
geom_point(data = min_var, aes(x = Risk, y = Return),
color = "red", size = 4, shape = 17, inherit.aes = FALSE) +
geom_point(data = max_ret, aes(x = Risk, y = Return),
color = "green4", size = 4, shape = 15, inherit.aes = FALSE) +
geom_point(data = max_sr, aes(x = Risk, y = Return),
color = "purple", size = 4, shape = 16, inherit.aes = FALSE) +
# 添加点标注
annotate("text", x = min_var$Risk, y = min_var$Return,
label = "最小风险", hjust = -0.15, color = "red", size = 3.5) +
annotate("text", x = max_ret$Risk, y = max_ret$Return,
label = "最大收益", hjust = -0.15, color = "green4", size = 3.5) +
annotate("text", x = max_sr$Risk, y = max_sr$Return,
label = "最大夏普", hjust = -0.15, color = "purple", size = 3.5)
