library(tidyverse)
## Warning: package 'tidyverse' was built under R version 4.1.3
## -- Attaching packages --------------------------------------- tidyverse 1.3.2 --
## v ggplot2 3.3.5     v purrr   0.3.4
## v tibble  3.1.6     v dplyr   1.0.8
## v tidyr   1.2.0     v stringr 1.4.0
## v readr   2.1.2     v forcats 0.5.1
## Warning: package 'ggplot2' was built under R version 4.1.3
## Warning: package 'tibble' was built under R version 4.1.3
## Warning: package 'tidyr' was built under R version 4.1.3
## Warning: package 'readr' was built under R version 4.1.3
## Warning: package 'purrr' was built under R version 4.1.3
## Warning: package 'dplyr' was built under R version 4.1.3
## Warning: package 'stringr' was built under R version 4.1.3
## Warning: package 'forcats' was built under R version 4.1.3
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
setwd('D:/S1 (UNAIR-ASIA)/SMT 6/Investment Portfolio Analysis')

retdata = read_csv('m-barra-9003.csv')
## Rows: 168 Columns: 10
## -- Column specification --------------------------------------------------------
## Delimiter: ","
## dbl (10): AGE, C, MWD, MER, DELL, HPQ, IBM, AA, CAT, PG
## 
## i Use `spec()` to retrieve the full column specification for this data.
## i Specify the column types or set `show_col_types = FALSE` to quiet this message.
glimpse(retdata)
## Rows: 168
## Columns: 10
## $ AGE  <dbl> -12.17, 4.95, 13.08, -11.06, 19.70, -1.44, -6.52, -8.77, -27.47, ~
## $ C    <dbl> -8.69, 2.23, 1.67, 3.90, 13.11, 3.95, -5.84, -14.19, -27.07, -11.~
## $ MWD  <dbl> -8.37, 2.31, 0.16, 2.20, 12.28, 1.82, -16.96, -7.44, -12.81, -1.8~
## $ MER  <dbl> -13.97, -0.64, -3.99, -4.10, 13.21, -5.41, 5.48, -13.85, -9.76, -~
## $ DELL <dbl> -16.55, 34.49, 21.34, 10.83, 28.77, 14.13, -7.57, -0.62, -26.15, ~
## $ HPQ  <dbl> -6.19, -4.01, 5.67, -5.29, 8.81, -1.47, -9.37, -19.75, -4.26, -22~
## $ IBM  <dbl> 4.14, 5.90, 1.51, 2.06, 10.56, -2.73, -5.74, -8.17, 3.80, -1.54, ~
## $ AA   <dbl> -16.40, 4.04, 0.12, -4.28, 5.81, -4.05, 9.09, -7.99, -3.33, -15.1~
## $ CAT  <dbl> -4.44, 8.84, 0.17, 0.25, 8.52, -22.10, -7.19, -12.64, -2.36, -3.4~
## $ PG   <dbl> -8.89, -0.84, 5.41, 4.26, 16.35, 4.80, -0.55, -11.86, -5.97, 7.98~
#Mean
round(apply(retdata,2,mean),2)
##  AGE    C  MWD  MER DELL  HPQ  IBM   AA  CAT   PG 
## 1.36 2.08 1.87 2.08 4.82 1.37 1.06 1.09 1.23 1.08
#Standard Deviation
round(apply(retdata,2,sd),2)
##   AGE     C   MWD   MER  DELL   HPQ   IBM    AA   CAT    PG 
## 10.18  9.60 11.17 10.39 16.44 11.78  9.47  9.49  8.71  6.75
rmean=apply(retdata,2,mean)

#mean correction
R.rm=retdata-rmean 
fin=c(rep(1,4),rep(0,6))
tech=c(rep(0,4),rep(1,3),rep(0,3))
oth=c(rep(0,7),rep(1,3))
#beta
ind.dum=cbind(fin,tech,oth) 
ind.dum
##       fin tech oth
##  [1,]   1    0   0
##  [2,]   1    0   0
##  [3,]   1    0   0
##  [4,]   1    0   0
##  [5,]   0    1   0
##  [6,]   0    1   0
##  [7,]   0    1   0
##  [8,]   0    0   1
##  [9,]   0    0   1
## [10,]   0    0   1
cov.R=var(R.rm)
sd.R=sqrt(diag(cov.R))
corr.R=cov.R/outer(sd.R,sd.R)
print(corr.R)
##            AGE         C       MWD       MER      DELL       HPQ        IBM
## AGE  1.0000000 0.6182390 0.6208611 0.6451494 0.2723693 0.2991778  0.2695563
## C    0.6182390 1.0000000 0.7103460 0.6606422 0.2419522 0.3653988  0.4029239
## MWD  0.6208611 0.7103460 1.0000000 0.7945580 0.2552549 0.4640199  0.3846602
## MER  0.6451494 0.6606422 0.7945580 1.0000000 0.2293966 0.4827828  0.3043901
## DELL 0.2723693 0.2419522 0.2552549 0.2293966 1.0000000 0.4380318  0.3428415
## HPQ  0.2991778 0.3653988 0.4640199 0.4827828 0.4380318 1.0000000  0.4563985
## IBM  0.2695563 0.4029239 0.3846602 0.3043901 0.3428415 0.4563985  1.0000000
## AA   0.2937591 0.3728564 0.4039369 0.3806761 0.3214074 0.5013769  0.4113859
## CAT  0.2852101 0.3667355 0.2807931 0.2968603 0.1264958 0.2376912  0.3263783
## PG   0.1466319 0.2721835 0.2358181 0.2257318 0.1024763 0.0632573 -0.0339072
##             AA       CAT         PG
## AGE  0.2937591 0.2852101  0.1466319
## C    0.3728564 0.3667355  0.2721835
## MWD  0.4039369 0.2807931  0.2358181
## MER  0.3806761 0.2968603  0.2257318
## DELL 0.3214074 0.1264958  0.1024763
## HPQ  0.5013769 0.2376912  0.0632573
## IBM  0.4113859 0.3263783 -0.0339072
## AA   1.0000000 0.6122479  0.0359865
## CAT  0.6122479 1.0000000  0.1150381
## PG   0.0359865 0.1150381  1.0000000
F.hat.o=solve(crossprod(ind.dum))%*%(t(ind.dum)%*%t(R.rm))
E.hat.o=R.rm-ind.dum%*%F.hat.o
diagD.hat.o=diag(apply(E.hat.o,2,var))

Dinv.hat=solve(diagD.hat.o)

H1=t(ind.dum)%*%Dinv.hat%*%ind.dum
Hmtx=solve(H1)%*%t(ind.dum)%*%Dinv.hat
F.hat.g=Hmtx%*%t(R.rm)
par(mfrow=c(3,1))
plot(F.hat.g[1,],type="l")
plot(F.hat.g[2,],type="l")
plot(F.hat.g[3,],type="l")

F.hat.gt=t(F.hat.g)
E.hat.g=R.rm-ind.dum%*%F.hat.g
diagD.hat.g=apply(E.hat.g,2,var)
t(Hmtx)
##             fin      tech       oth
##  [1,] 0.2175398 0.0000000 0.0000000
##  [2,] 0.2984352 0.0000000 0.0000000
##  [3,] 0.2157723 0.0000000 0.0000000
##  [4,] 0.2682527 0.0000000 0.0000000
##  [5,] 0.0000000 0.2569000 0.0000000
##  [6,] 0.0000000 0.3688383 0.0000000
##  [7,] 0.0000000 0.3742617 0.0000000
##  [8,] 0.0000000 0.0000000 0.1985438
##  [9,] 0.0000000 0.0000000 0.3159870
## [10,] 0.0000000 0.0000000 0.4854691
cov.ind=ind.dum%*%var(F.hat.gt)%*%t(ind.dum)+diag(diagD.hat.g)
sd.ind=sqrt(diag(cov.ind))
corr.ind=cov.ind/outer(sd.ind,sd.ind)
print(corr.ind)
##             [,1]      [,2]       [,3]      [,4]       [,5]       [,6]
##  [1,] 1.00000000 0.3485291 0.31152914 0.3355360 0.13061681 0.15041073
##  [2,] 0.34852911 1.0000000 0.34672390 0.3734429 0.14537315 0.16740327
##  [3,] 0.31152914 0.3467239 1.00000000 0.3337980 0.12994028 0.14963168
##  [4,] 0.33553596 0.3734429 0.33379804 1.0000000 0.13995364 0.16116248
##  [5,] 0.13061681 0.1453731 0.12994028 0.1399536 1.00000000 0.26290444
##  [6,] 0.15041073 0.1674033 0.14963168 0.1611625 0.26290444 1.00000000
##  [7,] 0.15242249 0.1696423 0.15163301 0.1633180 0.26642080 0.30679471
##  [8,] 0.09193178 0.1023177 0.09145561 0.0985033 0.05925829 0.06823840
##  [9,] 0.11156333 0.1241671 0.11098549 0.1195382 0.07191259 0.08281036
## [10,] 0.13270135 0.1476932 0.13201402 0.1421872 0.08553794 0.09850052
##             [,7]       [,8]       [,9]      [,10]
##  [1,] 0.15242249 0.09193178 0.11156333 0.13270135
##  [2,] 0.16964230 0.10231770 0.12416711 0.14769318
##  [3,] 0.15163301 0.09145561 0.11098549 0.13201402
##  [4,] 0.16331804 0.09850330 0.11953817 0.14218718
##  [5,] 0.26642080 0.05925829 0.07191259 0.08553794
##  [6,] 0.30679471 0.06823840 0.08281036 0.09850052
##  [7,] 1.00000000 0.06915109 0.08391795 0.09981797
##  [8,] 0.06915109 1.00000000 0.14819746 0.17627657
##  [9,] 0.08391795 0.14819746 1.00000000 0.21391953
## [10,] 0.09981797 0.17627657 0.21391953 1.00000000
glm(t(R.rm)[,1]~ind.dum[,1]+ind.dum[,2]+ind.dum[,3])
## 
## Call:  glm(formula = t(R.rm)[, 1] ~ ind.dum[, 1] + ind.dum[, 2] + ind.dum[, 
##     3])
## 
## Coefficients:
##  (Intercept)  ind.dum[, 1]  ind.dum[, 2]  ind.dum[, 3]  
##     -12.4908       -0.4257        4.8045            NA  
## 
## Degrees of Freedom: 9 Total (i.e. Null);  7 Residual
## Null Deviance:       375.3 
## Residual Deviance: 321.5     AIC: 71.08
#-----------------------------------------------------
# Traditional portfolio optimization
#-----------------------------------------------------
library(quadprogXT) # solveQPXT
library(MASS)       # mvrnorm
## 
## Attaching package: 'MASS'
## 
## The following object is masked from 'package:dplyr':
## 
##     select
retdata = read_csv('m-barra-9003.csv')
## Rows: 168 Columns: 10
## -- Column specification --------------------------------------------------------
## Delimiter: ","
## dbl (10): AGE, C, MWD, MER, DELL, HPQ, IBM, AA, CAT, PG
## 
## i Use `spec()` to retrieve the full column specification for this data.
## i Specify the column types or set `show_col_types = FALSE` to quiet this message.
glimpse(retdata)
## Rows: 168
## Columns: 10
## $ AGE  <dbl> -12.17, 4.95, 13.08, -11.06, 19.70, -1.44, -6.52, -8.77, -27.47, ~
## $ C    <dbl> -8.69, 2.23, 1.67, 3.90, 13.11, 3.95, -5.84, -14.19, -27.07, -11.~
## $ MWD  <dbl> -8.37, 2.31, 0.16, 2.20, 12.28, 1.82, -16.96, -7.44, -12.81, -1.8~
## $ MER  <dbl> -13.97, -0.64, -3.99, -4.10, 13.21, -5.41, 5.48, -13.85, -9.76, -~
## $ DELL <dbl> -16.55, 34.49, 21.34, 10.83, 28.77, 14.13, -7.57, -0.62, -26.15, ~
## $ HPQ  <dbl> -6.19, -4.01, 5.67, -5.29, 8.81, -1.47, -9.37, -19.75, -4.26, -22~
## $ IBM  <dbl> 4.14, 5.90, 1.51, 2.06, 10.56, -2.73, -5.74, -8.17, 3.80, -1.54, ~
## $ AA   <dbl> -16.40, 4.04, 0.12, -4.28, 5.81, -4.05, 9.09, -7.99, -3.33, -15.1~
## $ CAT  <dbl> -4.44, 8.84, 0.17, 0.25, 8.52, -22.10, -7.19, -12.64, -2.36, -3.4~
## $ PG   <dbl> -8.89, -0.84, 5.41, 4.26, 16.35, 4.80, -0.55, -11.86, -5.97, 7.98~
mu = c()
sd = c()

for (data in retdata){
  mu = c(mu,round(mean(data),3))
  sd = c(sd,round(sd(data),3))
}

corr <- cor(retdata)
round(corr,2)
##       AGE    C  MWD  MER DELL  HPQ   IBM   AA  CAT    PG
## AGE  1.00 0.63 0.62 0.64 0.29 0.31  0.27 0.30 0.28  0.18
## C    0.63 1.00 0.71 0.68 0.25 0.37  0.39 0.38 0.39  0.30
## MWD  0.62 0.71 1.00 0.80 0.26 0.46  0.37 0.40 0.27  0.27
## MER  0.64 0.68 0.80 1.00 0.23 0.47  0.31 0.37 0.28  0.27
## DELL 0.29 0.25 0.26 0.23 1.00 0.45  0.36 0.33 0.11  0.10
## HPQ  0.31 0.37 0.46 0.47 0.45 1.00  0.45 0.51 0.23  0.08
## IBM  0.27 0.39 0.37 0.31 0.36 0.45  1.00 0.41 0.34 -0.01
## AA   0.30 0.38 0.40 0.37 0.33 0.51  0.41 1.00 0.60  0.06
## CAT  0.28 0.39 0.27 0.28 0.11 0.23  0.34 0.60 1.00  0.13
## PG   0.18 0.30 0.27 0.27 0.10 0.08 -0.01 0.06 0.13  1.00
# number of asset
nvar <- length(mu)
var.name <- colnames(retdata)

# convert correlation to covariance matrix
cov <- diag(sd)%*%corr%*%diag(sd)

n.er <- 100  # number of EF points
rset <- seq(min(mu),max(mu),length=n.er+2)
rset <- rset[2:n.er+1]

port1.ret <- rset
port1.std <- rset*0
port1.wgt <- matrix(0,n.er,nvar)

# calculate efficient frontier
for (i in 1:c(n.er-1)) {
  Dmat <- 2*cov
  dvec <- rep(0,nvar) #c(0,0)
  Amat <- t(rbind(t(rep(1,nvar)),t(mu),diag(nvar)))
  bvec <- c(1,rset[i],rep(0,nvar))
  
  # mean-variance optimization
  m<-solveQPXT(Dmat,dvec,Amat,bvec,meq=2,factorized=FALSE)
  
  # output
  port1.std[i]  <- sqrt(m$value)
  port1.wgt[i,] <- t(m$solution)
}

# draw efficient fronter and allocation profile
x11(width=6); par(mfrow = c(2,1), mar = c(3,2,2,3), xpd=TRUE)

# individual asset
plot(sqrt(diag(cov)), mu,
     xlim=c(0.8*min(port1.std),1.2*max(port1.std)), 
     ylim=c(0.8*min(mu),1.2*max(mu)),
     col = rainbow(nvar), lwd = 10)
text(sqrt(diag(cov)), mu, 
     labels=var.name, cex= 1)

# efficient frontier
lines(port1.std,port1.ret,col = "green", lwd = 6)

# weight
barplot(t(port1.wgt),col=rainbow(nvar))
legend("topright", legend = var.name,
       fill = rainbow(nvar), ncol = 1,
       inset=c(-0.07,0), cex = 0.6)