library(tidyverse)
## Warning: package 'tidyverse' was built under R version 4.0.5
## -- Attaching packages --------------------------------------- tidyverse 1.3.1 --
## v ggplot2 3.3.5     v purrr   0.3.4
## v tibble  3.1.5     v dplyr   1.0.7
## v tidyr   1.1.4     v stringr 1.4.0
## v readr   2.1.1     v forcats 0.5.1
## Warning: package 'ggplot2' was built under R version 4.0.5
## Warning: package 'tibble' was built under R version 4.0.5
## Warning: package 'tidyr' was built under R version 4.0.5
## Warning: package 'readr' was built under R version 4.0.5
## Warning: package 'purrr' was built under R version 4.0.5
## Warning: package 'dplyr' was built under R version 4.0.5
## Warning: package 'stringr' was built under R version 4.0.5
## Warning: package 'forcats' was built under R version 4.0.5
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
#Download m-fac9003.csv containing 13 stocks and S&P500 market index monthly returns from 1990/1-2003/12

fac <- read_csv("C:/Users/CTY REDSTAR/Downloads/fac.csv")
## Rows: 168 Columns: 14
## -- Column specification --------------------------------------------------------
## Delimiter: ","
## dbl (14): AA, AGE, CAT, F, FDX, GM, HPQ, KMB, MEL, NYT, PG, TRB, TXN, SP500
## 
## i Use `spec()` to retrieve the full column specification for this data.
## i Specify the column types or set `show_col_types = FALSE` to quiet this message.
head(fac)
## # A tibble: 6 x 14
##       AA    AGE    CAT     F    FDX    GM   HPQ    KMB    MEL    NYT    PG
##    <dbl>  <dbl>  <dbl> <dbl>  <dbl> <dbl> <dbl>  <dbl>  <dbl>  <dbl> <dbl>
## 1 -16.4  -12.2   -4.44 -0.06  -2.28 -2.12 -6.19 -11.0  -10.8   -6.3  -8.89
## 2   4.04   4.95   8.84  6.02  10.5   8.97 -4.01  -5.2    0.34  -4.62 -0.84
## 3   0.12  13.1    0.17  2.06  10.8   1.57  5.67   3.21  -0.17  -0.66  5.41
## 4  -4.28 -11.1    0.25 -5.67  -2.44 -4.19 -5.29  -0.65  -2.2  -10.6   4.26
## 5   5.81  19.7    8.52  3.89 -16.2  10.9   8.81   8.83  11.8   11.6  16.4 
## 6  -4.05  -1.44 -22.1  -5.79  -2.81 -2.7  -1.47   1.55  -7.76  -0.12  4.8 
## # ... with 3 more variables: TRB <dbl>, TXN <dbl>, SP500 <dbl>
#The monthly series of 3-month Treasury bill rates of the secondary market is used as the proxy for risk-
#free rate and has been deducted from the monthly returns to obtain the excess returns for stocks and market index. 
#Compute GMV portfolio weights and return by using single index model.
t = dim(fac)[1]
t
## [1] 168
market = fac[,14]
market
## # A tibble: 168 x 1
##     SP500
##     <dbl>
##  1  -7.52
##  2   0.21
##  3   1.77
##  4  -3.34
##  5   8.55
##  6  -1.53
##  7  -1.16
##  8 -10.0 
##  9  -5.73
## 10  -1.27
## # ... with 158 more rows
fac1 = fac[,c(-14)]
head(fac1)
## # A tibble: 6 x 13
##       AA    AGE    CAT     F    FDX    GM   HPQ    KMB    MEL    NYT    PG
##    <dbl>  <dbl>  <dbl> <dbl>  <dbl> <dbl> <dbl>  <dbl>  <dbl>  <dbl> <dbl>
## 1 -16.4  -12.2   -4.44 -0.06  -2.28 -2.12 -6.19 -11.0  -10.8   -6.3  -8.89
## 2   4.04   4.95   8.84  6.02  10.5   8.97 -4.01  -5.2    0.34  -4.62 -0.84
## 3   0.12  13.1    0.17  2.06  10.8   1.57  5.67   3.21  -0.17  -0.66  5.41
## 4  -4.28 -11.1    0.25 -5.67  -2.44 -4.19 -5.29  -0.65  -2.2  -10.6   4.26
## 5   5.81  19.7    8.52  3.89 -16.2  10.9   8.81   8.83  11.8   11.6  16.4 
## 6  -4.05  -1.44 -22.1  -5.79  -2.81 -2.7  -1.47   1.55  -7.76  -0.12  4.8 
## # ... with 2 more variables: TRB <dbl>, TXN <dbl>
fac1 = as.matrix(fac1)
head(fac1)
##          AA    AGE    CAT     F    FDX    GM   HPQ    KMB    MEL    NYT    PG
## [1,] -16.40 -12.17  -4.44 -0.06  -2.28 -2.12 -6.19 -11.01 -10.77  -6.30 -8.89
## [2,]   4.04   4.95   8.84  6.02  10.47  8.97 -4.01  -5.20   0.34  -4.62 -0.84
## [3,]   0.12  13.08   0.17  2.06  10.84  1.57  5.67   3.21  -0.17  -0.66  5.41
## [4,]  -4.28 -11.06   0.25 -5.67  -2.44 -4.19 -5.29  -0.65  -2.20 -10.60  4.26
## [5,]   5.81  19.70   8.52  3.89 -16.17 10.94  8.81   8.83  11.85  11.59 16.35
## [6,]  -4.05  -1.44 -22.10 -5.79  -2.81 -2.70 -1.47   1.55  -7.76  -0.12  4.80
##         TRB   TXN
## [1,] -13.04 -7.61
## [2,]  -0.37  4.97
## [3,]   2.36  2.69
## [4,]  -7.98 -6.85
## [5,]   8.82 22.88
## [6,]  -0.64 -5.87
num = rep(1,t)
num
##   [1] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
##  [38] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
##  [75] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## [112] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## [149] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
xmat = cbind(num, market)
head(xmat)
##   num SP500
## 1   1 -7.52
## 2   1  0.21
## 3   1  1.77
## 4   1 -3.34
## 5   1  8.55
## 6   1 -1.53
xmat <- as.matrix(xmat)
betta_hat = solve(t(xmat)%*%
                       xmat)%*%
     t(xmat)%*%
     fac1
betta_hat
##             AA       AGE       CAT         F       FDX        GM       HPQ
## num   0.549124 0.7218061 0.8393521 0.4543643 0.7995790 0.1982025 0.6835681
## SP500 1.291591 1.5141359 0.9406928 1.2192453 0.8051166 1.0457019 1.6279512
##             KMB       MEL       NYT        PG       TRB      TXN
## num   0.5463020 0.8849263 0.4904120 0.8880914 0.6512465 1.438887
## SP500 0.5498052 1.1228708 0.7706495 0.4688034 0.7178808 1.796412
E_hat = fac1 - xmat%*%
     betta_hat
head(E_hat)
##              AA        AGE         CAT          F         FDX         GM
## [1,] -7.2363588 -1.5055042   1.7946575  8.6543606   2.9748981  5.5454755
## [2,]  3.2196419  3.9102254   7.8031024  5.3095942   9.5013465  8.5522001
## [3,] -2.7152402  9.6781734  -2.3343784 -0.5524285   8.6153645 -0.4790948
## [4,] -0.5152096 -6.7245922   2.5525617 -2.0520849  -0.5504894 -0.8955583
## [5,] -5.7822280  6.0323321  -0.3622754 -6.9889119 -23.8533263  1.8010466
## [6,] -2.6229896  0.1548218 -21.5000922 -4.3789190  -2.3777506 -1.2982786
##             HPQ        KMB        MEL        NYT        PG        TRB       TXN
## [1,]  5.3686247 -7.4217666 -3.2109382 -0.9951281 -6.252690 -8.2927829  4.460129
## [2,] -5.0354379 -5.8617611 -0.7807291 -5.2722484 -1.826540 -1.1720014  3.153867
## [3,]  2.1049583  1.6905428 -3.0424075 -2.5144615  3.692127  0.4381045 -1.928535
## [4,] -0.5362112  0.6400475  0.6654621 -8.5164428  4.937712 -6.2335246 -2.288872
## [5,] -5.7925506  3.5828633  1.3645288  4.5105352 11.453640  2.0308727  6.081793
## [6,]  0.3371972  1.8449000 -6.9269340  0.5686817  4.629178 -0.1928888 -4.560377
diagD_hat <- diag(t(E_hat)%*%E_hat)/(t-2)
ret <- apply(fac1, 2, var)
r2 <- 1- diag(t(E_hat)%*%E_hat)/((t-1)*ret)
rsq <- sqrt(diagD_hat)
cov_factor <- as.vector(var(market))*t(betta_hat)%*%betta_hat + diag(diagD_hat)
one.vec <- rep(1,13)
a <- solve(cov_factor)%*%one.vec
b <- t(one.vec)%*%a
gmv <- a / as.numeric(b)
gmv
##            [,1]
## AA   0.03501615
## AGE -0.03373670
## CAT  0.05314427
## F    0.05576422
## FDX  0.06451069
## GM   0.12369835
## HPQ -0.03290554
## KMB  0.28022778
## MEL  0.01901912
## NYT  0.19373120
## PG   0.19019732
## TRB  0.14314200
## TXN -0.09180887
barplot(as.vector(gmv), ylim = c(-0.1, 0.3), names.arg = rownames(gmv), cex.names = 0.5)