Downloading data

First lets load our packages

library(PerformanceAnalytics)
## 载入需要的程序包:xts
## 载入需要的程序包:zoo
## 
## 载入程序包:'zoo'
## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric
## 
## 载入程序包:'PerformanceAnalytics'
## The following object is masked from 'package:graphics':
## 
##     legend
library(quantmod)
## 载入需要的程序包:TTR
## Registered S3 method overwritten by 'quantmod':
##   method            from
##   as.zoo.data.frame zoo
library(xts) 
library(dygraphs)#画图
library(tibble)
library(tidyr) 
library(ggplot2) #画图
library(highcharter) 
## Highcharts (www.highcharts.com) is a Highsoft software product which is
## not free for commercial and Governmental use
library(tidyquant)
## 载入需要的程序包:lubridate
## 
## 载入程序包:'lubridate'
## The following objects are masked from 'package:base':
## 
##     date, intersect, setdiff, union
library(plotly) 
## 
## 载入程序包:'plotly'
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## The following object is masked from 'package:stats':
## 
##     filter
## The following object is masked from 'package:graphics':
## 
##     layout
library(timetk) # To manipulate the data series
library(forcats)

Next lets select a few stocks to build our portfolios.

We will choose the following 4 stocks WBYY(威博液压) JGJM(吉冈精密) SXYY(森萱医药) GSSP(盖世食品) KLT(克莱特)

Read the price data.

##选取投资组合对象
#WBYY(威博液压) JGJM(吉冈精密)  SXYY(森萱医药)  GSSP(盖世食品)  KLT(克莱特)

##读取数据
price <-read.csv("E:/portfolio/ Final Report/data1.csv")
#修改数据格式--转化成时间序列
# 假设 price[,1] 包含的日期时间数据如 "2021/1/4" price[,1] = as.POSIXct(price[,1], format = "%Y/%m/%d")
price[,1] = as.POSIXct(price[,1])


#price[,1] = as.POSIXct(price[,1]): 
#这行代码将数据框(data frame)中的第一列转换为POSIXct类型,
#即日期/时间类型。POSIXct是R语言中用于表示日期和时间的一种数据类型
#它以整数形式存储日期和时间,并使用以秒为单位的时间戳来表示。
price = xts(price[,-1], order.by=price[,1])
#order.by=price[,1] 指定了时间序列对象的时间索引,
#也就是根据数据框中的第一列的时间信息来排序时间序列数据
##绘制走势图
price = price[,1:5]
names(price)<-c("WBYY","JGJM","SXYY","GSSP","KLT")
dygraph(price) #动态图 plot太low了不要用 dy可以实时显示数字 

Next we will calculate the daily returns for these stocks. We will use the logarithmic returns.

#plot(price)
##获取月汇报
#过滤数据
#pr_monthly <- to.monthly(price, indexAt = "lastof", OHLC = FALSE) 
#head(pr_monthly)
#计算回报
#rt_monthly <- na.omit(Return.calculate(pr_monthly, method = "log")) 
head(price)#打印前五行
##            WBYY  JGJM SXYY GSSP  KLT
## 2021-01-04 5.01 10.70 4.93 6.88 4.97
## 2021-01-05 5.01 10.70 4.97 6.88 4.97
## 2021-01-06 5.01 10.70 4.93 6.88 4.97
## 2021-01-07 5.01 10.70 4.75 6.88 4.97
## 2021-01-08 5.01  6.51 4.75 6.88 4.97
## 2021-01-11 5.01  6.51 4.62 6.88 4.97
pr_daily <- to.daily(price, indexAt = "lastof", OHLC = FALSE) 
head(pr_daily)
##            WBYY  JGJM SXYY GSSP  KLT
## 2021-01-31 5.01 10.70 4.93 6.88 4.97
## 2021-01-31 5.01 10.70 4.97 6.88 4.97
## 2021-01-31 5.01 10.70 4.93 6.88 4.97
## 2021-01-31 5.01 10.70 4.75 6.88 4.97
## 2021-01-31 5.01  6.51 4.75 6.88 4.97
## 2021-01-31 5.01  6.51 4.62 6.88 4.97
#计算回报
rt_daily <- na.omit(Return.calculate(pr_daily, method = "log")) #去掉空白值 na就是空值
#这里面计算回报率就是后一天检前一天 除以前一天,用对数收益率的方法
head(rt_daily)
##            WBYY       JGJM         SXYY      GSSP KLT
## 2021-01-31    0  0.0000000  0.008080852  0.000000   0
## 2021-01-31    0  0.0000000 -0.008080852  0.000000   0
## 2021-01-31    0  0.0000000 -0.037194370  0.000000   0
## 2021-01-31    0 -0.4969043  0.000000000  0.000000   0
## 2021-01-31    0  0.0000000 -0.027749913  0.000000   0
## 2021-01-31    0  0.0000000 -0.028542003 -0.444744   0
dygraph(rt_daily)

Next let’s calculate the mean daily returns for each asset. Calculate the covariance matrix for all these stocks. We will annualize it by multiplying by 252.

mean_ret <- colMeans(rt_daily)#计算回报率的均值
print(round(mean_ret, 5))#收益能力
##    WBYY    JGJM    SXYY    GSSP     KLT 
## 0.00304 0.00085 0.00228 0.00092 0.00202
cov_mat <- cov(rt_daily) * 252 #计算协方差 【为什么成252??全年吗?】
print(round(cov_mat,5)) #风险情况
##          WBYY     JGJM     SXYY     GSSP     KLT
## WBYY  1.42693  0.00169 -0.07218 -0.03966 0.01153
## JGJM  0.00169  3.18262  0.01016 -0.05360 0.01732
## SXYY -0.07218  0.01016  0.95445  0.22082 0.02684
## GSSP -0.03966 -0.05360  0.22082  0.80799 0.03302
## KLT   0.01153  0.01732  0.02684  0.03302 0.41387

To calculate the portfolio returns and risk (standard deviation) we will us need

#随机抽样,给四个股票每人一个比值,让他们和是1
#simulation和computation 模拟和计算
#↓模拟法 随机生成一千个,算整体收益和风险,一千个中收益最高 就是现实收益最高,
#一千个中风险最小就是接近现实中风险越小
tick <- c("WBYY","JGJM","SXYY","GSSP","KLT")
wts <- runif(n = length(tick))#tick是四个数 给四个数赋予权重
#wts <- runif(n = length(tick))这行代码生成的是一个随机数向量,
#每个数字都是介于0和1之间的一个值,
#代表了投资组合中每个资产的相对权重。
wts <- wts/sum(wts)
print(wts)
## [1] 0.31615870 0.32867719 0.29704259 0.03042392 0.02769760
port_returns <- (sum(wts * mean_ret) + 1)^252 - 1#四个整体收益


port_risk <- sqrt(t(wts) %*% (cov_mat %*% wts))#转置 再相乘
sharpe_ratio <- port_returns/port_risk

We have everything we need to perform our optimization. All we need now is to run this code on 10000 random portfolios. For that we will use a for loop.

Before we do that, we need to create empty vectors and matrix for storing our values.

# Creating a matrix to store the weights
num_port <- 1000

# Creating a matrix to store the weights

all_wts <- matrix(nrow = num_port,
                  ncol = length(tick))

# Creating an empty vector to store
# Portfolio returns

port_returns <- vector('numeric', length = num_port)

# Creating an empty vector to store
# Portfolio Standard deviation

port_risk <- vector('numeric', length = num_port)

# Creating an empty vector to store
# Portfolio Sharpe Ratio

sharpe_ratio <- vector('numeric', length = num_port)

Next lets run the for loop 5000 times.

for (i in seq_along(port_returns)) {
  
  wts <- runif(length(tick))
  wts <- wts/sum(wts)
  
  # Storing weight in the matrix
  all_wts[i,] <- wts
  
  # Portfolio returns
  
  port_ret <- sum(wts * mean_ret)
  port_ret <- ((port_ret + 1)^252) - 1
  
  # Storing Portfolio Returns values
  port_returns[i] <- port_ret
  
  
  # Creating and storing portfolio risk
  port_sd <- sqrt(t(wts) %*% (cov_mat  %*% wts))
  port_risk[i] <- port_sd
  
  # Creating and storing Portfolio Sharpe Ratios
  # Assuming 0% Risk free rate
  
  sr <- port_ret/port_sd
  sharpe_ratio[i] <- sr
}

All the heavy lifting has been done and now we can create a data table to store all the values together.

# Storing the values in the table
portfolio_values <- tibble(Return = port_returns,
                           Risk = port_risk,
                           SharpeRatio = sharpe_ratio)


# Converting matrix to a tibble and changing column names
all_wts <- tk_tbl(all_wts)
## Warning in tk_tbl.data.frame(as.data.frame(data), preserve_index, rename_index,
## : Warning: No index to preserve. Object otherwise converted to tibble
## successfully.
colnames(all_wts) <- colnames(pr_daily)
# Combing all the values together
portfolio_values <- tk_tbl(cbind(all_wts, portfolio_values))
## Warning in tk_tbl.data.frame(cbind(all_wts, portfolio_values)): Warning: No
## index to preserve. Object otherwise converted to tibble successfully.

We have the weights in each asset with the risk and returns along with the Sharpe ratio of each portfolio.

Next lets look at the portfolios that matter the most.

min_var <- portfolio_values[which.min(portfolio_values$Risk),]
max_sr <- portfolio_values[which.max(portfolio_values$SharpeRatio),]

Plot the weights of each portfolio. First with the minimum variance portfolio.

p <- min_var %>%
  gather(WBYY:KLT, key = Asset,
         value = Weights) %>%
  mutate(Asset = as.factor(Asset)) %>%
  ggplot(aes(x = fct_reorder(Asset,Weights), y = Weights, fill = Asset)) +
  geom_bar(stat = 'identity') +
  theme_minimal() +
  labs(x = 'Assets', y = 'Weights', title = "Minimum Variance Portfolio Weights") +
  scale_y_continuous(labels = scales::percent) 


ggplotly(p)

Next lets look at the tangency portfolio or the the portfolio with the highest sharpe ratio.

p <- max_sr %>%
  gather(WBYY:KLT, key = Asset,
         value = Weights) %>%
  mutate(Asset = as.factor(Asset)) %>%
  ggplot(aes(x = fct_reorder(Asset,Weights), y = Weights, fill = Asset)) +
  geom_bar(stat = 'identity') +
  theme_minimal() +
  labs(x = 'Assets', y = 'Weights', title = "Tangency Portfolio Weights") +
  scale_y_continuous(labels = scales::percent) 


ggplotly(p)

Finally lets plot all the random portfolios and visualize the efficient frontier.

p <- portfolio_values %>%
  ggplot(aes(x = Risk, y = Return, color = SharpeRatio)) +
  geom_point() +
  theme_classic() +
  scale_y_continuous(labels = scales::percent) +
  scale_x_continuous(labels = scales::percent) +
  labs(x = 'Annualized Risk',
       y = 'Annualized Returns',
       title = "Portfolio Optimization & Efficient Frontier") +
  geom_point(aes(x = Risk,
                 y = Return), data = min_var, color = 'orange') +
  geom_point(aes(x = Risk,
                 y = Return), data = max_sr, color = 'orange') +
  scale_color_gradient(low = "skyblue", high = "pink") #+
#annotate('text', x = 3.2, y = 550, label = "Tangency Portfolio") +
#annotate('text', x = 1.3, y = 40, label = "Min Var portfolio") #+
#annotate(geom = 'segment', x = 0.2, xend = 0.185,  y = 0.03, 
#         yend = 0.11, color = 'red', arrow = arrow(type = "open")) +
#annotate(geom = 'segment', x = 0.285, xend = 0.26,  y = 0.34, 
#         yend = 0.31, color = 'red', arrow = arrow(type = "open"))

ggplotly(p)