library(quantmod)
## Warning: package 'quantmod' was built under R version 4.3.3
## Loading required package: xts
## Warning: package 'xts' was built under R version 4.3.3
## Loading required package: zoo
## Warning: package 'zoo' was built under R version 4.3.3
## 
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric
## Loading required package: TTR
## Warning: package 'TTR' was built under R version 4.3.3
## Registered S3 method overwritten by 'quantmod':
##   method            from
##   as.zoo.data.frame zoo
library(PerformanceAnalytics)
## Warning: package 'PerformanceAnalytics' was built under R version 4.3.3
## 
## Attaching package: 'PerformanceAnalytics'
## The following object is masked from 'package:graphics':
## 
##     legend
library(tidyverse)
## Warning: package 'tidyverse' was built under R version 4.3.3
## Warning: package 'readr' was built under R version 4.3.3
## Warning: package 'dplyr' was built under R version 4.3.3
## Warning: package 'lubridate' was built under R version 4.3.3
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ ggplot2   3.5.2     ✔ tibble    3.2.1
## ✔ lubridate 1.9.3     ✔ tidyr     1.3.0
## ✔ purrr     1.0.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::first()  masks xts::first()
## ✖ dplyr::lag()    masks stats::lag()
## ✖ dplyr::last()   masks xts::last()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(PortfolioAnalytics)
## Warning: package 'PortfolioAnalytics' was built under R version 4.3.3
## Loading required package: foreach
## Warning: package 'foreach' was built under R version 4.3.3
## 
## Attaching package: 'foreach'
## 
## The following objects are masked from 'package:purrr':
## 
##     accumulate, when
## 
## Registered S3 method overwritten by 'PortfolioAnalytics':
##   method           from
##   print.constraint ROI
library(knitr)
library(readxl)
## Warning: package 'readxl' was built under R version 4.3.3
library(ROI)
## Warning: package 'ROI' was built under R version 4.3.3
## ROI: R Optimization Infrastructure
## Registered solver plugins: nlminb, symphony, quadprog.
## Default solver: auto.
## 
## Attaching package: 'ROI'
## 
## The following objects are masked from 'package:PortfolioAnalytics':
## 
##     is.constraint, objective
library(ROI.plugin.quadprog)
## Warning: package 'ROI.plugin.quadprog' was built under R version 4.3.3
library(tseries)
## Warning: package 'tseries' was built under R version 4.3.3
library(Matrix)   # nearPD
## 
## Attaching package: 'Matrix'
## 
## The following objects are masked from 'package:tidyr':
## 
##     expand, pack, unpack
# Đường dẫn file của bạn (sửa lại nếu cần)
file_path <- "C:/Users/ASUS/Downloads/data 2.xlsx"

# Đọc dữ liệu
raw_df <- read_excel(file_path, sheet = 1)

# Đổi tên cột về định dạng nhất quán (phòng khi tên cột viết hoa/thường khác nhau)
names(raw_df) <- toupper(names(raw_df))

# Kiểm tra bắt buộc các cột cần có
required_cols <- c("DATE","HPG","MWG","FPT","VCB","VNM","VN30")
stopifnot(all(required_cols %in% names(raw_df)))

# Chuẩn hoá cột DATE
if (!inherits(raw_df$DATE, "Date")) {
  # Nếu cần, bạn có thể chỉ định định dạng ngày cụ thể, ví dụ: "%Y-%m-%d" hay "%d/%m/%Y"
  raw_df$DATE <- as.Date(raw_df$DATE)
}

# Hàm chuyển các cột ký tự có dấu phẩy trong số sang numeric
to_numeric_clean <- function(x) {
  if (is.character(x)) {
    as.numeric(gsub(",", "", x))
  } else {
    as.numeric(x)
  }
}

# Chuẩn hoá toàn bộ các cột giá (trừ DATE)
price_df <- raw_df %>%
  mutate(across(all_of(setdiff(required_cols, "DATE")), to_numeric_clean)) %>%
  arrange(DATE)

# Chuyển sang xts
prices <- xts(price_df[, setdiff(required_cols, "DATE")], order.by = price_df$DATE)

# Xem nhanh
kable(head(prices), caption = "Dữ liệu giá sau khi chuẩn hoá")
Dữ liệu giá sau khi chuẩn hoá
HPG MWG FPT VCB VNM VN30
3459.4 9.957 7437.9 16.822 66667 599.69
3492.0 9.957 7484.6 17.929 66667 606.37
3459.4 9.773 7531.3 17.718 68056 607.65
3459.4 9.865 7453.5 18.245 68403 606.85
3524.7 10.049 7515.7 19.458 71528 618.41
3524.7 10.142 7562.4 19.616 69444 615.73

1 Thiết lập tham số & tính log-returns

# Tham số
market_index <- "VN30"
tickers <- setdiff(colnames(prices), market_index)

# Lãi suất phi rủi ro (có thể chỉnh)
rf_annual <- 0.045
rf_daily  <- (1 + rf_annual)^(1/252) - 1

# Log-returns (bỏ hàng NA đầu)
library(PerformanceAnalytics)
ret_all <- Return.calculate(prices, method = "log")
ret_all <- ret_all[-1, ]

stock_returns  <- ret_all[, tickers, drop = FALSE]
market_returns <- ret_all[, market_index, drop = FALSE]

# Kiểm tra nhanh
kable(head(stock_returns), caption = "Log-returns cổ phiếu (sample)")
Log-returns cổ phiếu (sample)
HPG MWG FPT VCB VNM
0.009379474 0.000000000 0.006259025 0.063731960 0.000000000
-0.009379474 -0.018652340 0.006220093 -0.011838442 0.020620818
0.000000000 0.009369658 -0.010383949 0.029309998 0.005085788
0.018700163 0.018479988 0.008310445 0.064367227 0.044672298
0.000000000 0.009212090 0.006194434 0.008087263 -0.029568309
0.000000000 0.044451763 0.002060712 -0.005367151 0.019804321
kable(head(market_returns), caption = "Log-returns thị trường (VN30)")
Log-returns thị trường (VN30)
VN30
0.011077506
0.002108698
-0.001317415
0.018870024
-0.004343112
0.005926567

2 Beta theo CAPM

# Excess returns
stock_excess_returns  <- sweep(stock_returns, 1, rf_daily)   # r_i - rf
market_excess_returns <- market_returns - rf_daily           # r_m - rf

# Hàm tính beta
calculate_beta <- function(stock_excess_ret, market_excess_ret) {
  fit <- lm(as.numeric(stock_excess_ret) ~ as.numeric(market_excess_ret))
  unname(coef(fit)[2])
}

betas <- apply(stock_excess_returns, 2, calculate_beta, market_excess_ret = market_excess_returns)

beta_df <- data.frame(Ticker = names(betas), Beta = as.numeric(betas), row.names = NULL)
kable(beta_df, caption = "Hệ số Beta ước tính (CAPM)")
Hệ số Beta ước tính (CAPM)
Ticker Beta
HPG 1.2041160
MWG 1.1065076
FPT 0.9289243
VCB 0.9520191
VNM 0.6656178

3 TSSL kỳ vọng theo CAPM

mean_mkt_daily   <- mean(as.numeric(market_returns), na.rm = TRUE)
mean_mkt_annual  <- mean_mkt_daily * 252

expected_returns_annual <- rf_annual + betas * (mean_mkt_annual - rf_annual)

er_df <- data.frame(
  Ticker = names(expected_returns_annual),
  `Expected Return (Annual)` = as.numeric(expected_returns_annual)
)
kable(er_df, caption = "TSSL kỳ vọng theo CAPM (annualized)")
TSSL kỳ vọng theo CAPM (annualized)
Ticker Expected.Return..Annual.
HPG 0.1046185
MWG 0.0997856
FPT 0.0909931
VCB 0.0921366
VNM 0.0779562

4 Ma trận hiệp phương sai (annualized)

cov_matrix <- cov(stock_returns, use = "pairwise.complete.obs") * 252
preview_idx <- 1:min(5, ncol(cov_matrix))
kable(round(cov_matrix[preview_idx, preview_idx], 6),
      caption = "Một phần ma trận hiệp phương sai (annualized)")
Một phần ma trận hiệp phương sai (annualized)
HPG MWG FPT VCB VNM
HPG 0.107866 0.048645 0.039525 0.039440 0.023099
MWG 0.048645 0.112933 0.042372 0.032838 0.025200
FPT 0.039525 0.042372 0.064369 0.029260 0.023392
VCB 0.039440 0.032838 0.029260 0.097065 0.023125
VNM 0.023099 0.025200 0.023392 0.023125 0.068536

5 Tóm tắt rủi ro & lợi nhuận

std_annual <- sqrt(diag(cov_matrix))  # độ lệch chuẩn năm

summary_df <- data.frame(
  Ticker = names(betas),
  `Expected Return (Annual)` = as.numeric(expected_returns_annual[names(betas)]),
  `Risk (Stdev Annual)`      = as.numeric(std_annual[names(betas)]),
  `Beta`                     = as.numeric(betas),
  row.names = NULL,
  check.names = FALSE
)
kable(summary_df, digits = 4, caption = "Tóm tắt rủi ro & lợi nhuận theo mã")
Tóm tắt rủi ro & lợi nhuận theo mã
Ticker Expected Return (Annual) Risk (Stdev Annual) Beta
HPG 0.1046 0.3284 1.2041
MWG 0.0998 0.3361 1.1065
FPT 0.0910 0.2537 0.9289
VCB 0.0921 0.3116 0.9520
VNM 0.0780 0.2618 0.6656

6 Tối ưu hoá đường biên hiệu quả (ROI + quadprog, w ≥ 0, ∑w=1)

library(Matrix)
library(ROI)
library(ROI.plugin.quadprog)

# Đầu vào tối ưu hoá (dạng daily)
mu_daily <- as.numeric(expected_returns_annual) / 252
names(mu_daily) <- names(expected_returns_annual)

Sigma_daily <- cov(stock_returns, use = "pairwise.complete.obs")
Sigma_pd    <- as.matrix(nearPD(Sigma_daily, corr = FALSE)$mat)  # làm dương xác định

# Lưới TSSL mục tiêu
min_ret <- min(mu_daily) + 1e-6
max_ret <- max(mu_daily) - 1e-6
target_returns <- seq(min_ret, max_ret, length.out = 60)

solve_markowitz <- function(target_return, mu, Sigma) {
  n <- length(mu)
  Q <- 2 * Sigma
  L <- rep(0, n)
  A <- rbind(rep(1, n), mu)  # ∑w=1; mu'w = target_return
  dir <- c("==", "==")
  rhs <- c(1, target_return)

  op <- OP(
    objective   = Q_objective(Q = Q, L = L),
    constraints = L_constraint(L = A, dir = dir, rhs = rhs),
    bounds      = V_bound(li = 1:n, lb = rep(0, n)) # w >= 0
  )
  sol <- ROI_solve(op, solver = "quadprog")
  w <- sol$solution
  names(w) <- names(mu)
  risk <- sqrt(sum(w * (Sigma %*% w)))
  c(Return = target_return, Risk = risk, w)
}

ports <- lapply(target_returns, function(tr) {
  tryCatch(solve_markowitz(tr, mu_daily, Sigma_pd), error = function(e) NULL)
})
frontier_df <- do.call(rbind, ports) %>% as.data.frame()
frontier_df <- frontier_df[is.finite(frontier_df$Return) & is.finite(frontier_df$Risk), , drop = FALSE]

# Annualize & Sharpe
frontier_df$Return_annual <- frontier_df$Return * 252
frontier_df$Risk_annual   <- frontier_df$Risk   * sqrt(252)
frontier_df$Sharpe_Ratio  <- (frontier_df$Return_annual - rf_annual) / pmax(frontier_df$Risk_annual, 1e-12)

# MVP & Max Sharpe
mvp_row        <- frontier_df[which.min(frontier_df$Risk_annual), , drop = FALSE]
max_sharpe_row <- frontier_df[which.max(frontier_df$Sharpe_Ratio), , drop = FALSE]

format_portfolio <- function(port_row, caption_text) {
  meta <- c("Return","Risk","Return_annual","Risk_annual","Sharpe_Ratio")
  tick_cols <- setdiff(colnames(port_row), meta)
  w <- as.numeric(port_row[, tick_cols])
  names(w) <- tick_cols
  w <- w[w > 1e-4]
  weights_tbl <- data.frame(`Cổ phiếu` = names(w), `Tỷ trọng` = paste0(round(100*w, 2), "%"))
  stats_tbl <- data.frame(
    `Thuộc tính` = c("TSSL (năm)", "Rủi ro - ĐLC (năm)", "Sharpe"),
    `Giá trị`    = c(paste0(round(100*port_row$Return_annual, 2), "%"),
                     paste0(round(100*port_row$Risk_annual, 2), "%"),
                     round(port_row$Sharpe_Ratio, 2))
  )
  print(kable(weights_tbl, caption = caption_text, row.names = FALSE))
  print(kable(stats_tbl, caption = "Thông số danh mục", row.names = FALSE))
}

format_portfolio(mvp_row,        "Danh mục rủi ro tối thiểu (MVP) - Tỷ trọng")
## 
## 
## Table: Danh mục rủi ro tối thiểu (MVP) - Tỷ trọng
## 
## |Cổ.phiếu |Tỷ.trọng |
## |:--------|:--------|
## |HPG      |8.91%    |
## |MWG      |7.18%    |
## |FPT      |31.07%   |
## |VCB      |17.31%   |
## |VNM      |35.52%   |
## 
## 
## Table: Thông số danh mục
## 
## |Thuộc.tính         |Giá.trị |
## |:------------------|:-------|
## |TSSL (năm)         |8.84%   |
## |Rủi ro - ĐLC (năm) |19.96%  |
## |Sharpe             |0.22    |
format_portfolio(max_sharpe_row, "Danh mục Sharpe tối đa - Tỷ trọng")
## 
## 
## Table: Danh mục Sharpe tối đa - Tỷ trọng
## 
## |Cổ.phiếu |Tỷ.trọng |
## |:--------|:--------|
## |HPG      |23.79%   |
## |MWG      |15.29%   |
## |FPT      |27.28%   |
## |VCB      |17.88%   |
## |VNM      |15.78%   |
## 
## 
## Table: Thông số danh mục
## 
## |Thuộc.tính         |Giá.trị |
## |:------------------|:-------|
## |TSSL (năm)         |9.37%   |
## |Rủi ro - ĐLC (năm) |21.17%  |
## |Sharpe             |0.23    |