library(readr)
retdata = read.csv('m-fac9003.csv')
t = dim(retdata)[1]
market = retdata[,14]
riskfree = retdata[,14]
retdata1 = retdata[,c(-14, -15)]
retdata1 = as.matrix(retdata1)
head(retdata1)
##          AA    AGE    CAT     F    FDX    GM   HPQ    KMB    MEL    NYT    PG
## [1,] -16.40 -12.17  -4.44 -0.06  -2.28 -2.12 -6.19 -11.01 -10.77  -6.30 -8.89
## [2,]   4.04   4.95   8.84  6.02  10.47  8.97 -4.01  -5.20   0.34  -4.62 -0.84
## [3,]   0.12  13.08   0.17  2.06  10.84  1.57  5.67   3.21  -0.17  -0.66  5.41
## [4,]  -4.28 -11.06   0.25 -5.67  -2.44 -4.19 -5.29  -0.65  -2.20 -10.60  4.26
## [5,]   5.81  19.70   8.52  3.89 -16.17 10.94  8.81   8.83  11.85  11.59 16.35
## [6,]  -4.05  -1.44 -22.10 -5.79  -2.81 -2.70 -1.47   1.55  -7.76  -0.12  4.80
##         TRB   TXN
## [1,] -13.04 -7.61
## [2,]  -0.37  4.97
## [3,]   2.36  2.69
## [4,]  -7.98 -6.85
## [5,]   8.82 22.88
## [6,]  -0.64 -5.87
n = dim(retdata1)[2]
ones = rep(1,t)
X = cbind(ones,market)
b_hat = solve(t(X)%*%X)%*%t(X)%*%retdata1
E_hat = retdata1 - X%*%b_hat
diagD_hat = diag(t(E_hat)%*%E_hat)/(t-2)

#R-square

retvar = apply(retdata1,2,var) 
R_2 = 1 - diag(t(E_hat)%*%E_hat)/((t-1)*retvar)
res_std = sqrt(diagD_hat)
cov_factor = var(market)*t(b_hat)%*%b_hat + diag(diagD_hat); 
sd = sqrt(diag(cov_factor));
cor_factor = cov_factor/(sd%*%t(sd));

sample variance and correlation matrix

cov_sample = cov(retdata1);
cor_sample = cor(retdata1);

use factor covariance matrix to compute global minimum variance portfolio

one.vec = rep(1,13)
a = solve(cov_factor)%*%one.vec
b = t(one.vec)%*%a
mvp.w =a / as.numeric(b)
mvp.w
##            [,1]
## AA   0.03501615
## AGE -0.03373670
## CAT  0.05314427
## F    0.05576422
## FDX  0.06451069
## GM   0.12369835
## HPQ -0.03290554
## KMB  0.28022778
## MEL  0.01901912
## NYT  0.19373120
## PG   0.19019732
## TRB  0.14314200
## TXN -0.09180887

Beta

returns.mat = as.matrix(retdata[, c(-0,-14, -15)])
market.mat = as.matrix(retdata[,14, drop=F])
n.obs = nrow(returns.mat)
X.mat = cbind(rep(1,n.obs),market.mat)
colnames(X.mat)[1] = "intercept"
XX.mat = crossprod(X.mat)
G.hat = solve(XX.mat)%*%crossprod(X.mat,returns.mat)
beta.hat = G.hat[2,]
E.hat = returns.mat - X.mat%*%G.hat
diagD.hat = diag(crossprod(E.hat)/(n.obs-2))
sumSquares = apply(returns.mat, 2, function(x) {sum( (x - mean(x))^2 )})
R.square = 1 - (n.obs-2)*diagD.hat/sumSquares
R.square
##         AA        AGE        CAT          F        FDX         GM        HPQ 
## 0.34699704 0.41493755 0.21854203 0.29217512 0.13489415 0.23777933 0.35787960 
##        KMB        MEL        NYT         PG        TRB        TXN 
## 0.13397602 0.38829659 0.20499217 0.09036596 0.15730899 0.31611097
cbind(beta.hat, diagD.hat, R.square)
##      beta.hat diagD.hat   R.square
## AA  1.2915911  59.19846 0.34699704
## AGE 1.5141359  60.95651 0.41493755
## CAT 0.9406928  59.66741 0.21854203
## F   1.2192453  67.91031 0.29217512
## FDX 0.8051166  78.39072 0.13489415
## GM  1.0457019  66.09876 0.23777933
## HPQ 1.6279512  89.66712 0.35787960
## KMB 0.5498052  36.84610 0.13397602
## MEL 1.1228708  37.45483 0.38829659
## NYT 0.7706495  43.43290 0.20499217
## PG  0.4688034  41.71711 0.09036596
## TRB 0.7178808  52.05835 0.15730899
## TXN 1.7964117 131.65239 0.31611097
w.gmin.si = solve(cov_factor)%*%rep(1,nrow(cov_factor))
w.gmin.si = w.gmin.si/sum(w.gmin.si)
colnames(w.gmin.si) = "single.index"
w.gmin.sample = solve(var(returns.mat))%*%rep(1,nrow(cov_factor))
w.gmin.sample = w.gmin.sample/sum(w.gmin.sample)
colnames(w.gmin.sample) = "sample"
cbind(w.gmin.si, sample = w.gmin.sample)         
##     single.index       sample
## AA    0.03501615 -0.007267634
## AGE  -0.03373670 -0.008485981
## CAT   0.05314427  0.086625516
## F     0.05576422 -0.023245172
## FDX   0.06451069  0.094321402
## GM    0.12369835  0.091558142
## HPQ  -0.03290554  0.034374185
## KMB   0.28022778  0.229633761
## MEL   0.01901912  0.049541470
## NYT   0.19373120  0.179030053
## PG    0.19019732  0.265100370
## TRB   0.14314200  0.016766625
## TXN  -0.09180887 -0.007952736
par(mfrow=c(2,1))

#Compare the mvp weights of two approaches

#1st plot (Weight of MVP for Single Factor Model)

barplot(t(mvp.w), horiz=F, main="Weight of MVP for Single Factor Model", col="dark blue", cex.names = 0.70, las=2)

#second plot (Beta for Single Factor Model)

barplot(t(beta.hat), horiz=F, main="βi for Single Factor Model", col="dark blue", cex.names = 0.70, las=2)