#姓名:刘雅茹
#学号:2220233131

# 加载所需包
library(readxl)      # 读取 Excel
library(dplyr)       # 数据处理
library(tidyr)       # 数据整理
library(quadprog)    # 二次规划求解
library(ggplot2)     # 绘图
library(lubridate)   # 日期处理

# ----------------------------- 1. 读取数据 -----------------------------
# 读取整个 Excel,不自动设置列名
raw_data <- read_excel("D:\\R\\data.xls", col_names = FALSE)

# 提取股票代码(第2行,第2列至第21列)
stock_codes <- as.character(raw_data[2, 2:21])

# 提取日期(第4行至末尾,第1列)
dates <- raw_data[4:nrow(raw_data), 1] %>%
  pull() %>%
  as.Date(format = "%Y/%m/%d")

# 提取价格数据(第4行至末尾,第2列至第21列)
price_data <- raw_data[4:nrow(raw_data), 2:21] %>%
  mutate_all(as.numeric) %>%
  as.matrix()

# 添加列名(股票代码)
colnames(price_data) <- stock_codes

# 构建数据框,包含日期和价格
df_prices <- data.frame(Date = dates, price_data)

# ----------------------------- 2. 根据学号后四位选择股票 -----------------
# 学号后四位: 3, 1, 3, 1
# 规则:遇到重复数字或0则加1,直至得到四个不重复且在1-20内的索引
digits <- c(3, 1, 3, 1)
selected <- c()
for (d in digits) {
  while (d %in% selected || d == 0) {
    d <- d + 1
    if (d > 20) d <- 1   # 循环处理(实际不会超过)
  }
  selected <- c(selected, d)
}
# 最终选中的列索引(对应价格矩阵的第几列)
selected_cols <- selected  # 结果: 3,1,4,2 排序后为1,2,3,4
selected_cols <- sort(selected_cols)  # 排序便于阅读

# 提取所选股票的价格数据
prices_selected <- df_prices[, c(1, selected_cols + 1)]  # +1 因为第1列是日期
colnames(prices_selected) <- c("Date", stock_codes[selected_cols])

# 查看股票名称(可选)
print("选中的股票:")
## [1] "选中的股票:"
print(colnames(prices_selected)[-1])
## [1] "诺思兰德" "同辉信息" "华岭股份" "微创光电"
# ----------------------------- 3. 计算日收益率 -------------------------
# 计算简单收益率 (P_t / P_{t-1} - 1)
returns <- prices_selected %>%
  select(-Date) %>%
  mutate_all(~ . / lag(.) - 1) %>%
  na.omit()   # 删除第一行 NA

# 转为矩阵,用于后续计算
R <- as.matrix(returns)
# 期望收益率(日频)
mu <- colMeans(R)
# 协方差矩阵(日频)
Sigma <- cov(R)
# ----------------------------- 4. 定义优化函数(权重非负且和为1)------
# 二次规划求解器:求解 min w' Sigma w  s.t.  Aeq' w = beq,  w >= 0
# 对于给定目标收益率的目标,添加约束 mu' w = target
solve_min_var <- function(target_return = NULL) {
  n <- ncol(R)
  Dmat <- Sigma
  dvec <- rep(0, n)
  # 约束:权重和 = 1
  Aeq <- matrix(1, nrow = 1, ncol = n)
  beq <- 1
  # 不等式约束:w >= 0  ->  -w <= 0
  Amat <- cbind(Aeq, diag(n))
  bvec <- c(beq, rep(0, n))
  meq <- 1  # 第一个约束为等式
  
  if (!is.null(target_return)) {
    # 增加收益约束: mu' w = target_return
    Aeq2 <- matrix(mu, nrow = 1, ncol = n)
    beq2 <- target_return
    Amat <- cbind(Aeq, Aeq2, diag(n))
    bvec <- c(beq, beq2, rep(0, n))
    meq <- 2
  }
  
  # 使用 quadprog 求解
  sol <- solve.QP(Dmat, dvec, Amat, bvec, meq = meq)
  return(list(weights = sol$solution, risk = sqrt(sol$value), 
              ret = if(!is.null(target_return)) target_return else sum(sol$solution * mu)))
}
# ----------------------------- 5. 任务1:最小风险组合 --------------------
# 最小风险组合求解函数
min_risk_port <- function(returns) {
  n <- ncol(returns)
  Dmat <- 2 * cov(returns)
  dvec <- rep(0, n)
  
  Aeq <- matrix(1, 1, n)
  Aineq <- diag(n)
  Amat <- t(rbind(Aeq, Aineq))
  bvec <- c(1, rep(0, n))
  meq <- 1
  
  sol <- solve.QP(Dmat, dvec, Amat, bvec, meq)
  w <- sol$solution
  names(w) <- colnames(returns)
  return(w)
}
w_min <- min_risk_port(R)

# 画直方图
data.frame(Asset = names(w_min), Weight = w_min) %>%
  ggplot(aes(Asset, Weight, fill = Asset)) +
  geom_col() +
  labs(title = "最小风险组合权重") +
  scale_y_continuous(labels = scales::percent) +
  theme_minimal()

# ----------------------------- 6. 任务2:最大收益组合 --------------------
# 在权重非负且和为1下,最大收益即满仓期望收益率最高的股票
max_ret_stock <- which.max(mu)
w_maxret <- rep(0, length(mu))
w_maxret[max_ret_stock] <- 1
names(w_maxret) <- colnames(returns)

# 绘制最大收益组合权重直方图
df_maxret <- data.frame(Stock = names(w_maxret), Weight = w_maxret)
p2 <- ggplot(df_maxret, aes(x = Stock, y = Weight, fill = Stock)) +
  geom_col() +
  labs(title = "Maximum Return Portfolio Weights", y = "Weight", x = "Stock") +
  theme_minimal() +
  theme(legend.position = "none")
print(p2)

# ----------------------------- 7. 任务3 & 4:最大夏普比率与有效前沿 ----------
# 无风险利率设为0(题目未给出,通常取0)
rf <- 0

# 生成目标收益率网格(从最小期望收益到最大期望收益)
target_returns <- seq(min(mu), max(mu), length.out = 100)
ef_frontier <- data.frame(ret = numeric(), risk = numeric(), weights = I(list()))

for (i in seq_along(target_returns)) {
  tr <- target_returns[i]
  # 当目标收益率超过最大单资产收益时,若无做空无法实现,跳过
  if (tr > max(mu)) next
  port <- tryCatch(solve_min_var(target_return = tr), error = function(e) NULL)
  if (!is.null(port)) {
    ef_frontier <- rbind(ef_frontier, 
                         data.frame(ret = port$ret, risk = port$risk, 
                                    weights = I(list(port$weights))))
  }
}

# 检查有效前沿是否为空
if (nrow(ef_frontier) == 0) {
  # 如果网格法失败,则直接使用数值优化寻找最大夏普比率
  library(optimx)
  # 目标函数:负夏普比率(最小化)
  neg_sharpe <- function(w) {
    if (any(w < 0) || abs(sum(w) - 1) > 1e-6) return(1e10)
    port_ret <- sum(w * mu)
    port_risk <- sqrt(t(w) %*% Sigma %*% w)
    if (port_risk == 0) return(1e10)
    sharpe <- (port_ret - rf) / port_risk
    return(-sharpe)
  }
  n <- length(mu)
  init_w <- rep(1/n, n)
  opt <- optim(par = init_w, fn = neg_sharpe, method = "L-BFGS-B",
               lower = rep(0, n), upper = rep(1, n))
  w_maxsharpe <- opt$par
  # 计算对应的收益和风险
  maxsharpe_ret <- sum(w_maxsharpe * mu)
  maxsharpe_risk <- sqrt(t(w_maxsharpe) %*% Sigma %*% w_maxsharpe)
  max_sharpe_val <- (maxsharpe_ret - rf) / maxsharpe_risk
  # 构建一个单行的 data.frame 用于后续绘图
  ef_frontier <- rbind(ef_frontier,
                       data.frame(ret = maxsharpe_ret, risk = maxsharpe_risk,
                                  weights = I(list(w_maxsharpe))))
  idx_max_sharpe <- nrow(ef_frontier)
} else {
  # 计算有效前沿上各组合的夏普比率
  ef_frontier$sharpe <- (ef_frontier$ret - rf) / ef_frontier$risk
  idx_max_sharpe <- which.max(ef_frontier$sharpe)
  max_sharpe_port <- ef_frontier[idx_max_sharpe, ]
  w_maxsharpe <- max_sharpe_port$weights[[1]]
  maxsharpe_ret <- max_sharpe_port$ret
  maxsharpe_risk <- max_sharpe_port$risk
}

# 确保 w_maxsharpe 是数值向量,并赋予名称
if (is.null(names(w_maxsharpe))) {
  names(w_maxsharpe) <- colnames(returns)
}

# 绘制最大夏普比率组合权重直方图
df_maxsharpe <- data.frame(Stock = names(w_maxsharpe), Weight = w_maxsharpe)
p3 <- ggplot(df_maxsharpe, aes(x = Stock, y = Weight, fill = Stock)) +
  geom_col() +
  labs(title = "Maximum Sharpe Ratio Portfolio Weights", y = "Weight", x = "Stock") +
  theme_minimal() +
  theme(legend.position = "none")
print(p3)

# 1. 计算最小风险组合的收益与风险(基于任务一中的权重 w_min)
min_risk_ret <- sum(w_min * mu)
min_risk_risk <- sqrt(t(w_min) %*% Sigma %*% w_min)

# 2. 计算最大收益组合的收益与风险(基于任务二中的权重 w_maxret)
max_ret_ret <- sum(w_maxret * mu)
max_ret_risk <- sqrt(t(w_maxret) %*% Sigma %*% w_maxret)

# 3. 有效前沿数据已存在于 ef_frontier 中(包含列 ret 和 risk)
#    最大夏普比率组合的收益和风险已由任务三给出:maxsharpe_ret, maxsharpe_risk

# 4. 绘制有效前沿(X轴:风险,Y轴:收益)
p4 <- ggplot(ef_frontier, aes(x = risk, y = ret)) +
  geom_line(color = "blue", size = 1) +                    # 有效前沿曲线
  geom_point(aes(x = min_risk_risk, y = min_risk_ret),
             color = "green", size = 4, shape = 18) +      # 最小风险点
  geom_point(aes(x = max_ret_risk, y = max_ret_ret),
             color = "red", size = 4, shape = 18) +        # 最大收益点
  geom_point(aes(x = maxsharpe_risk, y = maxsharpe_ret),
             color = "orange", size = 4, shape = 18) +     # 最大夏普点
  annotate("text", x = min_risk_risk, y = min_risk_ret,
           label = "Min Risk", vjust = -0.8, hjust = 0.5) +
  annotate("text", x = max_ret_risk, y = max_ret_ret,
           label = "Max Return", vjust = -0.8, hjust = 0.5) +
  annotate("text", x = maxsharpe_risk, y = maxsharpe_ret,
           label = "Max Sharpe", vjust = -0.8, hjust = 0.5) +
  labs(title = "Efficient Frontier",
       x = "Portfolio Risk (Standard Deviation)",
       y = "Portfolio Return (Daily)") +
  theme_minimal()

# 显示图形
print(p4)