library(tidyverse)
## Warning: package 'tidyverse' was built under R version 4.1.3
## -- Attaching packages --------------------------------------- tidyverse 1.3.2 --
## v ggplot2 3.3.5 v purrr 0.3.4
## v tibble 3.1.6 v dplyr 1.0.8
## v tidyr 1.2.0 v stringr 1.4.0
## v readr 2.1.2 v forcats 0.5.1
## Warning: package 'ggplot2' was built under R version 4.1.3
## Warning: package 'tibble' was built under R version 4.1.3
## Warning: package 'tidyr' was built under R version 4.1.3
## Warning: package 'readr' was built under R version 4.1.3
## Warning: package 'purrr' was built under R version 4.1.3
## Warning: package 'dplyr' was built under R version 4.1.3
## Warning: package 'stringr' was built under R version 4.1.3
## Warning: package 'forcats' was built under R version 4.1.3
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
setwd('D:/S1 (UNAIR-ASIA)/SMT 6/Investment Portfolio Analysis')
retdata = read_csv('m-barra-9003.csv')
## Rows: 168 Columns: 10
## -- Column specification --------------------------------------------------------
## Delimiter: ","
## dbl (10): AGE, C, MWD, MER, DELL, HPQ, IBM, AA, CAT, PG
##
## i Use `spec()` to retrieve the full column specification for this data.
## i Specify the column types or set `show_col_types = FALSE` to quiet this message.
glimpse(retdata)
## Rows: 168
## Columns: 10
## $ AGE <dbl> -12.17, 4.95, 13.08, -11.06, 19.70, -1.44, -6.52, -8.77, -27.47, ~
## $ C <dbl> -8.69, 2.23, 1.67, 3.90, 13.11, 3.95, -5.84, -14.19, -27.07, -11.~
## $ MWD <dbl> -8.37, 2.31, 0.16, 2.20, 12.28, 1.82, -16.96, -7.44, -12.81, -1.8~
## $ MER <dbl> -13.97, -0.64, -3.99, -4.10, 13.21, -5.41, 5.48, -13.85, -9.76, -~
## $ DELL <dbl> -16.55, 34.49, 21.34, 10.83, 28.77, 14.13, -7.57, -0.62, -26.15, ~
## $ HPQ <dbl> -6.19, -4.01, 5.67, -5.29, 8.81, -1.47, -9.37, -19.75, -4.26, -22~
## $ IBM <dbl> 4.14, 5.90, 1.51, 2.06, 10.56, -2.73, -5.74, -8.17, 3.80, -1.54, ~
## $ AA <dbl> -16.40, 4.04, 0.12, -4.28, 5.81, -4.05, 9.09, -7.99, -3.33, -15.1~
## $ CAT <dbl> -4.44, 8.84, 0.17, 0.25, 8.52, -22.10, -7.19, -12.64, -2.36, -3.4~
## $ PG <dbl> -8.89, -0.84, 5.41, 4.26, 16.35, 4.80, -0.55, -11.86, -5.97, 7.98~
#Mean
round(apply(retdata,2,mean),2)
## AGE C MWD MER DELL HPQ IBM AA CAT PG
## 1.36 2.08 1.87 2.08 4.82 1.37 1.06 1.09 1.23 1.08
#Standard Deviation
round(apply(retdata,2,sd),2)
## AGE C MWD MER DELL HPQ IBM AA CAT PG
## 10.18 9.60 11.17 10.39 16.44 11.78 9.47 9.49 8.71 6.75
rmean=apply(retdata,2,mean)
#mean correction
R.rm=retdata-rmean
fin=c(rep(1,4),rep(0,6))
tech=c(rep(0,4),rep(1,3),rep(0,3))
oth=c(rep(0,7),rep(1,3))
#beta
ind.dum=cbind(fin,tech,oth)
ind.dum
## fin tech oth
## [1,] 1 0 0
## [2,] 1 0 0
## [3,] 1 0 0
## [4,] 1 0 0
## [5,] 0 1 0
## [6,] 0 1 0
## [7,] 0 1 0
## [8,] 0 0 1
## [9,] 0 0 1
## [10,] 0 0 1
cov.R=var(R.rm)
sd.R=sqrt(diag(cov.R))
corr.R=cov.R/outer(sd.R,sd.R)
print(corr.R)
## AGE C MWD MER DELL HPQ IBM
## AGE 1.0000000 0.6182390 0.6208611 0.6451494 0.2723693 0.2991778 0.2695563
## C 0.6182390 1.0000000 0.7103460 0.6606422 0.2419522 0.3653988 0.4029239
## MWD 0.6208611 0.7103460 1.0000000 0.7945580 0.2552549 0.4640199 0.3846602
## MER 0.6451494 0.6606422 0.7945580 1.0000000 0.2293966 0.4827828 0.3043901
## DELL 0.2723693 0.2419522 0.2552549 0.2293966 1.0000000 0.4380318 0.3428415
## HPQ 0.2991778 0.3653988 0.4640199 0.4827828 0.4380318 1.0000000 0.4563985
## IBM 0.2695563 0.4029239 0.3846602 0.3043901 0.3428415 0.4563985 1.0000000
## AA 0.2937591 0.3728564 0.4039369 0.3806761 0.3214074 0.5013769 0.4113859
## CAT 0.2852101 0.3667355 0.2807931 0.2968603 0.1264958 0.2376912 0.3263783
## PG 0.1466319 0.2721835 0.2358181 0.2257318 0.1024763 0.0632573 -0.0339072
## AA CAT PG
## AGE 0.2937591 0.2852101 0.1466319
## C 0.3728564 0.3667355 0.2721835
## MWD 0.4039369 0.2807931 0.2358181
## MER 0.3806761 0.2968603 0.2257318
## DELL 0.3214074 0.1264958 0.1024763
## HPQ 0.5013769 0.2376912 0.0632573
## IBM 0.4113859 0.3263783 -0.0339072
## AA 1.0000000 0.6122479 0.0359865
## CAT 0.6122479 1.0000000 0.1150381
## PG 0.0359865 0.1150381 1.0000000
F.hat.o=solve(crossprod(ind.dum))%*%(t(ind.dum)%*%t(R.rm))
E.hat.o=R.rm-ind.dum%*%F.hat.o
diagD.hat.o=diag(apply(E.hat.o,2,var))
Dinv.hat=solve(diagD.hat.o)
H1=t(ind.dum)%*%Dinv.hat%*%ind.dum
Hmtx=solve(H1)%*%t(ind.dum)%*%Dinv.hat
F.hat.g=Hmtx%*%t(R.rm)
par(mfrow=c(3,1))
plot(F.hat.g[1,],type="l")
plot(F.hat.g[2,],type="l")
plot(F.hat.g[3,],type="l")

F.hat.gt=t(F.hat.g)
E.hat.g=R.rm-ind.dum%*%F.hat.g
diagD.hat.g=apply(E.hat.g,2,var)
t(Hmtx)
## fin tech oth
## [1,] 0.2175398 0.0000000 0.0000000
## [2,] 0.2984352 0.0000000 0.0000000
## [3,] 0.2157723 0.0000000 0.0000000
## [4,] 0.2682527 0.0000000 0.0000000
## [5,] 0.0000000 0.2569000 0.0000000
## [6,] 0.0000000 0.3688383 0.0000000
## [7,] 0.0000000 0.3742617 0.0000000
## [8,] 0.0000000 0.0000000 0.1985438
## [9,] 0.0000000 0.0000000 0.3159870
## [10,] 0.0000000 0.0000000 0.4854691
cov.ind=ind.dum%*%var(F.hat.gt)%*%t(ind.dum)+diag(diagD.hat.g)
sd.ind=sqrt(diag(cov.ind))
corr.ind=cov.ind/outer(sd.ind,sd.ind)
print(corr.ind)
## [,1] [,2] [,3] [,4] [,5] [,6]
## [1,] 1.00000000 0.3485291 0.31152914 0.3355360 0.13061681 0.15041073
## [2,] 0.34852911 1.0000000 0.34672390 0.3734429 0.14537315 0.16740327
## [3,] 0.31152914 0.3467239 1.00000000 0.3337980 0.12994028 0.14963168
## [4,] 0.33553596 0.3734429 0.33379804 1.0000000 0.13995364 0.16116248
## [5,] 0.13061681 0.1453731 0.12994028 0.1399536 1.00000000 0.26290444
## [6,] 0.15041073 0.1674033 0.14963168 0.1611625 0.26290444 1.00000000
## [7,] 0.15242249 0.1696423 0.15163301 0.1633180 0.26642080 0.30679471
## [8,] 0.09193178 0.1023177 0.09145561 0.0985033 0.05925829 0.06823840
## [9,] 0.11156333 0.1241671 0.11098549 0.1195382 0.07191259 0.08281036
## [10,] 0.13270135 0.1476932 0.13201402 0.1421872 0.08553794 0.09850052
## [,7] [,8] [,9] [,10]
## [1,] 0.15242249 0.09193178 0.11156333 0.13270135
## [2,] 0.16964230 0.10231770 0.12416711 0.14769318
## [3,] 0.15163301 0.09145561 0.11098549 0.13201402
## [4,] 0.16331804 0.09850330 0.11953817 0.14218718
## [5,] 0.26642080 0.05925829 0.07191259 0.08553794
## [6,] 0.30679471 0.06823840 0.08281036 0.09850052
## [7,] 1.00000000 0.06915109 0.08391795 0.09981797
## [8,] 0.06915109 1.00000000 0.14819746 0.17627657
## [9,] 0.08391795 0.14819746 1.00000000 0.21391953
## [10,] 0.09981797 0.17627657 0.21391953 1.00000000
glm(t(R.rm)[,1]~ind.dum[,1]+ind.dum[,2]+ind.dum[,3])
##
## Call: glm(formula = t(R.rm)[, 1] ~ ind.dum[, 1] + ind.dum[, 2] + ind.dum[,
## 3])
##
## Coefficients:
## (Intercept) ind.dum[, 1] ind.dum[, 2] ind.dum[, 3]
## -12.4908 -0.4257 4.8045 NA
##
## Degrees of Freedom: 9 Total (i.e. Null); 7 Residual
## Null Deviance: 375.3
## Residual Deviance: 321.5 AIC: 71.08
#-----------------------------------------------------
# Traditional portfolio optimization
#-----------------------------------------------------
library(quadprogXT) # solveQPXT
library(MASS) # mvrnorm
##
## Attaching package: 'MASS'
##
## The following object is masked from 'package:dplyr':
##
## select
retdata = read_csv('m-barra-9003.csv')
## Rows: 168 Columns: 10
## -- Column specification --------------------------------------------------------
## Delimiter: ","
## dbl (10): AGE, C, MWD, MER, DELL, HPQ, IBM, AA, CAT, PG
##
## i Use `spec()` to retrieve the full column specification for this data.
## i Specify the column types or set `show_col_types = FALSE` to quiet this message.
glimpse(retdata)
## Rows: 168
## Columns: 10
## $ AGE <dbl> -12.17, 4.95, 13.08, -11.06, 19.70, -1.44, -6.52, -8.77, -27.47, ~
## $ C <dbl> -8.69, 2.23, 1.67, 3.90, 13.11, 3.95, -5.84, -14.19, -27.07, -11.~
## $ MWD <dbl> -8.37, 2.31, 0.16, 2.20, 12.28, 1.82, -16.96, -7.44, -12.81, -1.8~
## $ MER <dbl> -13.97, -0.64, -3.99, -4.10, 13.21, -5.41, 5.48, -13.85, -9.76, -~
## $ DELL <dbl> -16.55, 34.49, 21.34, 10.83, 28.77, 14.13, -7.57, -0.62, -26.15, ~
## $ HPQ <dbl> -6.19, -4.01, 5.67, -5.29, 8.81, -1.47, -9.37, -19.75, -4.26, -22~
## $ IBM <dbl> 4.14, 5.90, 1.51, 2.06, 10.56, -2.73, -5.74, -8.17, 3.80, -1.54, ~
## $ AA <dbl> -16.40, 4.04, 0.12, -4.28, 5.81, -4.05, 9.09, -7.99, -3.33, -15.1~
## $ CAT <dbl> -4.44, 8.84, 0.17, 0.25, 8.52, -22.10, -7.19, -12.64, -2.36, -3.4~
## $ PG <dbl> -8.89, -0.84, 5.41, 4.26, 16.35, 4.80, -0.55, -11.86, -5.97, 7.98~
mu = c()
sd = c()
for (data in retdata){
mu = c(mu,round(mean(data),3))
sd = c(sd,round(sd(data),3))
}
corr <- cor(retdata)
round(corr,2)
## AGE C MWD MER DELL HPQ IBM AA CAT PG
## AGE 1.00 0.63 0.62 0.64 0.29 0.31 0.27 0.30 0.28 0.18
## C 0.63 1.00 0.71 0.68 0.25 0.37 0.39 0.38 0.39 0.30
## MWD 0.62 0.71 1.00 0.80 0.26 0.46 0.37 0.40 0.27 0.27
## MER 0.64 0.68 0.80 1.00 0.23 0.47 0.31 0.37 0.28 0.27
## DELL 0.29 0.25 0.26 0.23 1.00 0.45 0.36 0.33 0.11 0.10
## HPQ 0.31 0.37 0.46 0.47 0.45 1.00 0.45 0.51 0.23 0.08
## IBM 0.27 0.39 0.37 0.31 0.36 0.45 1.00 0.41 0.34 -0.01
## AA 0.30 0.38 0.40 0.37 0.33 0.51 0.41 1.00 0.60 0.06
## CAT 0.28 0.39 0.27 0.28 0.11 0.23 0.34 0.60 1.00 0.13
## PG 0.18 0.30 0.27 0.27 0.10 0.08 -0.01 0.06 0.13 1.00
# number of asset
nvar <- length(mu)
var.name <- colnames(retdata)
# convert correlation to covariance matrix
cov <- diag(sd)%*%corr%*%diag(sd)
n.er <- 100 # number of EF points
rset <- seq(min(mu),max(mu),length=n.er+2)
rset <- rset[2:n.er+1]
port1.ret <- rset
port1.std <- rset*0
port1.wgt <- matrix(0,n.er,nvar)
# calculate efficient frontier
for (i in 1:c(n.er-1)) {
Dmat <- 2*cov
dvec <- rep(0,nvar) #c(0,0)
Amat <- t(rbind(t(rep(1,nvar)),t(mu),diag(nvar)))
bvec <- c(1,rset[i],rep(0,nvar))
# mean-variance optimization
m<-solveQPXT(Dmat,dvec,Amat,bvec,meq=2,factorized=FALSE)
# output
port1.std[i] <- sqrt(m$value)
port1.wgt[i,] <- t(m$solution)
}
# draw efficient fronter and allocation profile
x11(width=6); par(mfrow = c(2,1), mar = c(3,2,2,3), xpd=TRUE)
# individual asset
plot(sqrt(diag(cov)), mu,
xlim=c(0.8*min(port1.std),1.2*max(port1.std)),
ylim=c(0.8*min(mu),1.2*max(mu)),
col = rainbow(nvar), lwd = 10)
text(sqrt(diag(cov)), mu,
labels=var.name, cex= 1)
# efficient frontier
lines(port1.std,port1.ret,col = "green", lwd = 6)
# weight
barplot(t(port1.wgt),col=rainbow(nvar))
legend("topright", legend = var.name,
fill = rainbow(nvar), ncol = 1,
inset=c(-0.07,0), cex = 0.6)
