options(width = 70, digits=4)
# load required packages
#install.packages("ellipse")
library(ellipse)
## Warning: package 'ellipse' was built under R version 3.5.3
##
## Attaching package: 'ellipse'
## The following object is masked from 'package:graphics':
##
## pairs
library(PerformanceAnalytics) # performance and risk analysis functions
## Warning: package 'PerformanceAnalytics' was built under R version
## 3.5.3
## Loading required package: xts
## Warning: package 'xts' was built under R version 3.5.3
## Loading required package: zoo
## Warning: package 'zoo' was built under R version 3.5.3
##
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
##
## Attaching package: 'PerformanceAnalytics'
## The following object is masked from 'package:graphics':
##
## legend
library(zoo)
#install.packages("readxl")
library(readxl)
## Warning: package 'readxl' was built under R version 3.5.3
#install.packages("writexl")
library(writexl)
## Warning: package 'writexl' was built under R version 3.5.3
?read_excel
## starting httpd help server ...
## done
retdata = read_excel("berndt.xlsx")
retdata
## # A tibble: 120 x 18
## date CITCRP CONED CONTIL DATGEN DEC DELTA
## <dttm> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 1978-01-31 00:00:00 -0.115 -0.079 -0.129 -0.084 -0.1 -0.028
## 2 1978-02-28 00:00:00 -0.019 -0.003 0.037 -0.097 -0.063 -0.033
## 3 1978-03-31 00:00:00 0.059 0.022 0.003 0.063 0.01 0.07
## 4 1978-04-30 00:00:00 0.127 -0.005 0.18 0.179 0.165 0.15
## 5 1978-05-31 00:00:00 0.005 -0.014 0.061 0.052 0.038 -0.031
## 6 1978-06-30 00:00:00 0.007 0.034 -0.059 -0.023 -0.021 0.023
## 7 1978-07-31 00:00:00 0.032 0.011 0.066 0.143 0.107 0.185
## 8 1978-08-31 00:00:00 0.088 0.024 0.033 0.026 -0.017 -0.021
## 9 1978-09-30 00:00:00 0.011 0.048 -0.013 -0.031 -0.037 -0.081
## 10 1978-10-31 00:00:00 -0.071 -0.067 -0.123 -0.085 -0.077 -0.153
## # ... with 110 more rows, and 11 more variables: GENMIL <dbl>,
## # GERBER <dbl>, IBM <dbl>, MARKET <dbl>, MOBIL <dbl>, PANAM <dbl>,
## # PSNH <dbl>, TANDY <dbl>, TEXACO <dbl>, WEYER <dbl>, RKFREE <dbl>
str(retdata)
## Classes 'tbl_df', 'tbl' and 'data.frame': 120 obs. of 18 variables:
## $ date : POSIXct, format: "1978-01-31" ...
## $ CITCRP: num -0.115 -0.019 0.059 0.127 0.005 0.007 0.032 0.088 0.011 -0.071 ...
## $ CONED : num -0.079 -0.003 0.022 -0.005 -0.014 0.034 0.011 0.024 0.048 -0.067 ...
## $ CONTIL: num -0.129 0.037 0.003 0.18 0.061 -0.059 0.066 0.033 -0.013 -0.123 ...
## $ DATGEN: num -0.084 -0.097 0.063 0.179 0.052 -0.023 0.143 0.026 -0.031 -0.085 ...
## $ DEC : num -0.1 -0.063 0.01 0.165 0.038 -0.021 0.107 -0.017 -0.037 -0.077 ...
## $ DELTA : num -0.028 -0.033 0.07 0.15 -0.031 0.023 0.185 -0.021 -0.081 -0.153 ...
## $ GENMIL: num -0.099 0.018 -0.023 0.046 0.063 0.008 0.075 -0.051 -0.012 -0.032 ...
## $ GERBER: num -0.048 0.16 -0.036 0.004 0.046 0.028 -0.012 -0.079 0.104 -0.138 ...
## $ IBM : num -0.029 -0.043 -0.063 0.13 -0.018 -0.004 0.092 0.049 -0.051 -0.046 ...
## $ MARKET: num -0.045 0.01 0.05 0.063 0.067 0.007 0.071 0.079 0.002 -0.189 ...
## $ MOBIL : num -0.046 -0.017 0.049 0.077 -0.011 -0.043 0.028 0.056 0.064 -0.069 ...
## $ PANAM : num 0.025 -0.073 0.184 0.089 0.082 0.019 0.204 0.031 0.075 -0.25 ...
## $ PSNH : num -0.008 -0.025 0.026 -0.008 0.019 0.032 -0.014 0.045 0.061 -0.033 ...
## $ TANDY : num -0.075 -0.004 0.124 0.055 0.176 -0.014 0.194 0.222 -0.1 -0.206 ...
## $ TEXACO: num -0.054 -0.01 0.015 0 -0.029 -0.025 0.042 0 0.01 -0.066 ...
## $ WEYER : num -0.116 -0.135 0.084 0.144 -0.031 0.005 0.164 0.039 -0.021 -0.09 ...
## $ RKFREE: num 0.00487 0.00494 0.00526 0.00491 0.00513 ...
returns.mat = as.matrix(retdata[, c(-1,-11, -18)])
market.mat = as.matrix(retdata[,11, drop=F])
n.obs = nrow(returns.mat)
X.mat = cbind(rep(1,n.obs),market.mat)
colnames(X.mat)[1] = "intercept"
XX.mat = crossprod(X.mat)
# multivariate least squares
G.hat = solve(XX.mat)%*%crossprod(X.mat,returns.mat)
# can also use solve(qr(X.mat), returns.mat)
beta.hat = G.hat[2,]
E.hat = returns.mat - X.mat%*%G.hat
diagD.hat = diag(crossprod(E.hat)/(n.obs-2))
# compute R2 values from multivariate regression
sumSquares = apply(returns.mat, 2, function(x) {sum( (x - mean(x))^2 )})
R.square = 1 - (n.obs-2)*diagD.hat/sumSquares
R.square
## CITCRP CONED CONTIL DATGEN DEC DELTA GENMIL GERBER
## 0.31777 0.01532 0.11216 0.30363 0.33783 0.12163 0.07919 0.23694
## IBM MOBIL PANAM PSNH TANDY TEXACO WEYER
## 0.27523 0.36882 0.14337 0.01763 0.31986 0.27661 0.43083
# print and plot results
cbind(beta.hat, diagD.hat, R.square)
## beta.hat diagD.hat R.square
## CITCRP 0.66778 0.004511 0.31777
## CONED 0.09102 0.002510 0.01532
## CONTIL 0.73836 0.020334 0.11216
## DATGEN 1.02816 0.011423 0.30363
## DEC 0.84305 0.006564 0.33783
## DELTA 0.48946 0.008152 0.12163
## GENMIL 0.26776 0.003928 0.07919
## GERBER 0.62481 0.005924 0.23694
## IBM 0.45302 0.002546 0.27523
## MOBIL 0.71352 0.004105 0.36882
## PANAM 0.73014 0.015008 0.14337
## PSNH 0.21263 0.011872 0.01763
## TANDY 1.05549 0.011162 0.31986
## TEXACO 0.61328 0.004634 0.27661
## WEYER 0.81687 0.004154 0.43083
par(mfrow=c(1,2))
barplot(beta.hat, horiz=T, main="Beta values", col="blue", cex.names = 0.75, las=1)
barplot(R.square, horiz=T, main="R-square values", col="blue", cex.names = 0.75, las=1)

par(mfrow=c(1,1))
# compute single index model covariance/correlation matrices
cov.si = as.numeric(var(market.mat))*beta.hat%*%t(beta.hat) + diag(diagD.hat)
cor.si = cov2cor(cov.si)
# plot correlations using plotcorr() from ellipse package
rownames(cor.si) = colnames(cor.si)
ord <- order(cor.si[1,])
ordered.cor.si <- cor.si[ord, ord]
plotcorr(ordered.cor.si, col=cm.colors(11)[5*ordered.cor.si + 6])

# compute global min variance portfolio
# use single index covariance
w.gmin.si = solve(cov.si)%*%rep(1,nrow(cov.si))
w.gmin.si = w.gmin.si/sum(w.gmin.si)
colnames(w.gmin.si) = "single.index"
# use sample covariance
w.gmin.sample = solve(var(returns.mat))%*%rep(1,nrow(cov.si))
w.gmin.sample = w.gmin.sample/sum(w.gmin.sample)
colnames(w.gmin.sample) = "sample"
cbind(w.gmin.si, sample = w.gmin.sample)
## single.index sample
## CITCRP 0.043792 -0.060353
## CONED 0.375702 0.376287
## CONTIL 0.005229 -0.002152
## DATGEN -0.023476 -0.065582
## DEC -0.004413 0.036255
## DELTA 0.052499 0.031554
## GENMIL 0.181880 0.197734
## GERBER 0.042721 -0.029664
## IBM 0.186566 0.284569
## MOBIL 0.033722 0.022567
## PANAM 0.007792 0.010707
## PSNH 0.066180 0.075171
## TANDY -0.027191 -0.018677
## TEXACO 0.057823 0.199624
## WEYER 0.001173 -0.058040
par(mfrow=c(2,1))
barplot(t(w.gmin.si), horiz=F, main="Single Index Weights", col="blue", cex.names = 0.75, las=2)
barplot(t(w.gmin.sample), horiz=F, main="Sample Weights", col="blue", cex.names = 0.75, las=2)

par(mfrow=c(1,1))
# compare means and sd values on global min variance portfolios
mu.vals = colMeans(returns.mat)
mu.gmin.si = as.numeric(crossprod(w.gmin.si, mu.vals))
sd.gmin.si = as.numeric(sqrt(t(w.gmin.si)%*%cov.si%*%w.gmin.si))
mu.gmin.sample = as.numeric(crossprod(w.gmin.sample, mu.vals))
sd.gmin.sample = as.numeric(sqrt(t(w.gmin.sample)%*%var(returns.mat)%*%w.gmin.sample))
cbind(mu.gmin.si,mu.gmin.sample, sd.gmin.si, sd.gmin.sample)
## mu.gmin.si mu.gmin.sample sd.gmin.si sd.gmin.sample
## [1,] 0.01365 0.01382 0.03257 0.03264
##
## use lm function to compute single index model regressions for each asset
##
asset.names = colnames(returns.mat)
asset.names
## [1] "CITCRP" "CONED" "CONTIL" "DATGEN" "DEC" "DELTA" "GENMIL"
## [8] "GERBER" "IBM" "MOBIL" "PANAM" "PSNH" "TANDY" "TEXACO"
## [15] "WEYER"
# initialize list object to hold regression objects
reg.list = list()
# loop over all assets and estimate time series regression
i = "CITCRP"
for (i in asset.names) {
reg.df = retdata[, c(i, "MARKET")]
si.formula = as.formula(paste(i,"~", "MARKET", sep=" "))
reg.list[[i]] = lm(si.formula, data=reg.df)
}
# examine the elements of reg.list - they are lm objects!
names(reg.list)
## [1] "CITCRP" "CONED" "CONTIL" "DATGEN" "DEC" "DELTA" "GENMIL"
## [8] "GERBER" "IBM" "MOBIL" "PANAM" "PSNH" "TANDY" "TEXACO"
## [15] "WEYER"
class(reg.list$CITCRP)
## [1] "lm"
reg.list$CITCRP
##
## Call:
## lm(formula = si.formula, data = reg.df)
##
## Coefficients:
## (Intercept) MARKET
## 0.00252 0.66778
summary(reg.list$CITCRP)
##
## Call:
## lm(formula = si.formula, data = reg.df)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.16432 -0.05012 0.00226 0.04351 0.22467
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.00252 0.00626 0.40 0.69
## MARKET 0.66778 0.09007 7.41 2e-11 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.0672 on 118 degrees of freedom
## Multiple R-squared: 0.318, Adjusted R-squared: 0.312
## F-statistic: 55 on 1 and 118 DF, p-value: 2.03e-11
# plot actual vs. fitted over time
# use chart.TimeSeries() function from PerformanceAnalytics package
dataToPlot = cbind(fitted(reg.list$CITCRP), retdata$CITCRP)
colnames(dataToPlot) = c("Fitted","Actual")
dataToPlot.xts<-as.xts(dataToPlot, order.by = retdata$date)
chart.TimeSeries(dataToPlot.xts, main="Single Index Model for CITCRP",
colorset=c("black","blue"), legend.loc="bottomleft")

# scatterplot of the single index model regression
plot(retdata$MARKET, retdata$CITCRP, main="SI model for CITCRP",
type="p", pch=16, col="blue",
xlab="MARKET", ylab="CITCRP")
abline(h=0, v=0)
abline(reg.list$CITCRP, lwd=2, col="red")

## extract beta values, residual sd's and R2's from list of regression objects
## brute force loop
reg.vals = matrix(0, length(asset.names), 3)
rownames(reg.vals) = asset.names
colnames(reg.vals) = c("beta", "residual.sd", "r.square")
for (i in names(reg.list)) {
tmp.fit = reg.list[[i]]
tmp.summary = summary(tmp.fit)
reg.vals[i, "beta"] = coef(tmp.fit)[2]
reg.vals[i, "residual.sd"] = tmp.summary$sigma
reg.vals[i, "r.square"] = tmp.summary$r.squared
}
reg.vals
## beta residual.sd r.square
## CITCRP 0.66778 0.06716 0.31777
## CONED 0.09102 0.05010 0.01532
## CONTIL 0.73836 0.14260 0.11216
## DATGEN 1.02816 0.10688 0.30363
## DEC 0.84305 0.08102 0.33783
## DELTA 0.48946 0.09029 0.12163
## GENMIL 0.26776 0.06268 0.07919
## GERBER 0.62481 0.07697 0.23694
## IBM 0.45302 0.05046 0.27523
## MOBIL 0.71352 0.06407 0.36882
## PANAM 0.73014 0.12251 0.14337
## PSNH 0.21263 0.10896 0.01763
## TANDY 1.05549 0.10565 0.31986
## TEXACO 0.61328 0.06808 0.27661
## WEYER 0.81687 0.06445 0.43083
# alternatively use R apply function for list objects - lapply or sapply
extractRegVals = function(x) {
# x is an lm object
beta.val = coef(x)[2]
residual.sd.val = summary(x)$sigma
r2.val = summary(x)$r.squared
ret.vals = c(beta.val, residual.sd.val, r2.val)
names(ret.vals) = c("beta", "residual.sd", "r.square")
return(ret.vals)
}
reg.vals = sapply(reg.list, FUN=extractRegVals)
t(reg.vals)
## beta residual.sd r.square
## CITCRP 0.66778 0.06716 0.31777
## CONED 0.09102 0.05010 0.01532
## CONTIL 0.73836 0.14260 0.11216
## DATGEN 1.02816 0.10688 0.30363
## DEC 0.84305 0.08102 0.33783
## DELTA 0.48946 0.09029 0.12163
## GENMIL 0.26776 0.06268 0.07919
## GERBER 0.62481 0.07697 0.23694
## IBM 0.45302 0.05046 0.27523
## MOBIL 0.71352 0.06407 0.36882
## PANAM 0.73014 0.12251 0.14337
## PSNH 0.21263 0.10896 0.01763
## TANDY 1.05549 0.10565 0.31986
## TEXACO 0.61328 0.06808 0.27661
## WEYER 0.81687 0.06445 0.43083