options(width = 70, digits=4)

# load required packages
#install.packages("ellipse")
library(ellipse)
## Warning: package 'ellipse' was built under R version 3.5.3
## 
## Attaching package: 'ellipse'
## The following object is masked from 'package:graphics':
## 
##     pairs
library(PerformanceAnalytics)   # performance and risk analysis functions
## Warning: package 'PerformanceAnalytics' was built under R version
## 3.5.3
## Loading required package: xts
## Warning: package 'xts' was built under R version 3.5.3
## Loading required package: zoo
## Warning: package 'zoo' was built under R version 3.5.3
## 
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric
## 
## Attaching package: 'PerformanceAnalytics'
## The following object is masked from 'package:graphics':
## 
##     legend
library(zoo)
#install.packages("readxl")
library(readxl)
## Warning: package 'readxl' was built under R version 3.5.3
#install.packages("writexl")
library(writexl)
## Warning: package 'writexl' was built under R version 3.5.3
?read_excel
## starting httpd help server ...
##  done
retdata = read_excel("berndt.xlsx")
retdata
## # A tibble: 120 x 18
##    date                CITCRP  CONED CONTIL DATGEN    DEC  DELTA
##    <dttm>               <dbl>  <dbl>  <dbl>  <dbl>  <dbl>  <dbl>
##  1 1978-01-31 00:00:00 -0.115 -0.079 -0.129 -0.084 -0.1   -0.028
##  2 1978-02-28 00:00:00 -0.019 -0.003  0.037 -0.097 -0.063 -0.033
##  3 1978-03-31 00:00:00  0.059  0.022  0.003  0.063  0.01   0.07 
##  4 1978-04-30 00:00:00  0.127 -0.005  0.18   0.179  0.165  0.15 
##  5 1978-05-31 00:00:00  0.005 -0.014  0.061  0.052  0.038 -0.031
##  6 1978-06-30 00:00:00  0.007  0.034 -0.059 -0.023 -0.021  0.023
##  7 1978-07-31 00:00:00  0.032  0.011  0.066  0.143  0.107  0.185
##  8 1978-08-31 00:00:00  0.088  0.024  0.033  0.026 -0.017 -0.021
##  9 1978-09-30 00:00:00  0.011  0.048 -0.013 -0.031 -0.037 -0.081
## 10 1978-10-31 00:00:00 -0.071 -0.067 -0.123 -0.085 -0.077 -0.153
## # ... with 110 more rows, and 11 more variables: GENMIL <dbl>,
## #   GERBER <dbl>, IBM <dbl>, MARKET <dbl>, MOBIL <dbl>, PANAM <dbl>,
## #   PSNH <dbl>, TANDY <dbl>, TEXACO <dbl>, WEYER <dbl>, RKFREE <dbl>
str(retdata)
## Classes 'tbl_df', 'tbl' and 'data.frame':    120 obs. of  18 variables:
##  $ date  : POSIXct, format: "1978-01-31" ...
##  $ CITCRP: num  -0.115 -0.019 0.059 0.127 0.005 0.007 0.032 0.088 0.011 -0.071 ...
##  $ CONED : num  -0.079 -0.003 0.022 -0.005 -0.014 0.034 0.011 0.024 0.048 -0.067 ...
##  $ CONTIL: num  -0.129 0.037 0.003 0.18 0.061 -0.059 0.066 0.033 -0.013 -0.123 ...
##  $ DATGEN: num  -0.084 -0.097 0.063 0.179 0.052 -0.023 0.143 0.026 -0.031 -0.085 ...
##  $ DEC   : num  -0.1 -0.063 0.01 0.165 0.038 -0.021 0.107 -0.017 -0.037 -0.077 ...
##  $ DELTA : num  -0.028 -0.033 0.07 0.15 -0.031 0.023 0.185 -0.021 -0.081 -0.153 ...
##  $ GENMIL: num  -0.099 0.018 -0.023 0.046 0.063 0.008 0.075 -0.051 -0.012 -0.032 ...
##  $ GERBER: num  -0.048 0.16 -0.036 0.004 0.046 0.028 -0.012 -0.079 0.104 -0.138 ...
##  $ IBM   : num  -0.029 -0.043 -0.063 0.13 -0.018 -0.004 0.092 0.049 -0.051 -0.046 ...
##  $ MARKET: num  -0.045 0.01 0.05 0.063 0.067 0.007 0.071 0.079 0.002 -0.189 ...
##  $ MOBIL : num  -0.046 -0.017 0.049 0.077 -0.011 -0.043 0.028 0.056 0.064 -0.069 ...
##  $ PANAM : num  0.025 -0.073 0.184 0.089 0.082 0.019 0.204 0.031 0.075 -0.25 ...
##  $ PSNH  : num  -0.008 -0.025 0.026 -0.008 0.019 0.032 -0.014 0.045 0.061 -0.033 ...
##  $ TANDY : num  -0.075 -0.004 0.124 0.055 0.176 -0.014 0.194 0.222 -0.1 -0.206 ...
##  $ TEXACO: num  -0.054 -0.01 0.015 0 -0.029 -0.025 0.042 0 0.01 -0.066 ...
##  $ WEYER : num  -0.116 -0.135 0.084 0.144 -0.031 0.005 0.164 0.039 -0.021 -0.09 ...
##  $ RKFREE: num  0.00487 0.00494 0.00526 0.00491 0.00513 ...
returns.mat = as.matrix(retdata[, c(-1,-11, -18)])
market.mat = as.matrix(retdata[,11, drop=F])
n.obs = nrow(returns.mat)
X.mat = cbind(rep(1,n.obs),market.mat)
colnames(X.mat)[1] = "intercept"
XX.mat = crossprod(X.mat)
# multivariate least squares
G.hat = solve(XX.mat)%*%crossprod(X.mat,returns.mat)
# can also use solve(qr(X.mat), returns.mat)
beta.hat = G.hat[2,]
E.hat = returns.mat - X.mat%*%G.hat
diagD.hat = diag(crossprod(E.hat)/(n.obs-2))
# compute R2 values from multivariate regression
sumSquares = apply(returns.mat, 2, function(x) {sum( (x - mean(x))^2 )})
R.square = 1 - (n.obs-2)*diagD.hat/sumSquares
R.square
##  CITCRP   CONED  CONTIL  DATGEN     DEC   DELTA  GENMIL  GERBER 
## 0.31777 0.01532 0.11216 0.30363 0.33783 0.12163 0.07919 0.23694 
##     IBM   MOBIL   PANAM    PSNH   TANDY  TEXACO   WEYER 
## 0.27523 0.36882 0.14337 0.01763 0.31986 0.27661 0.43083
# print and plot results
cbind(beta.hat, diagD.hat, R.square)
##        beta.hat diagD.hat R.square
## CITCRP  0.66778  0.004511  0.31777
## CONED   0.09102  0.002510  0.01532
## CONTIL  0.73836  0.020334  0.11216
## DATGEN  1.02816  0.011423  0.30363
## DEC     0.84305  0.006564  0.33783
## DELTA   0.48946  0.008152  0.12163
## GENMIL  0.26776  0.003928  0.07919
## GERBER  0.62481  0.005924  0.23694
## IBM     0.45302  0.002546  0.27523
## MOBIL   0.71352  0.004105  0.36882
## PANAM   0.73014  0.015008  0.14337
## PSNH    0.21263  0.011872  0.01763
## TANDY   1.05549  0.011162  0.31986
## TEXACO  0.61328  0.004634  0.27661
## WEYER   0.81687  0.004154  0.43083
par(mfrow=c(1,2))
barplot(beta.hat, horiz=T, main="Beta values", col="blue", cex.names = 0.75, las=1)
barplot(R.square, horiz=T, main="R-square values", col="blue", cex.names = 0.75, las=1)

par(mfrow=c(1,1))

# compute single index model covariance/correlation matrices
cov.si = as.numeric(var(market.mat))*beta.hat%*%t(beta.hat) + diag(diagD.hat)
cor.si = cov2cor(cov.si)
# plot correlations using plotcorr() from ellipse package
rownames(cor.si) = colnames(cor.si)
ord <- order(cor.si[1,])
ordered.cor.si <- cor.si[ord, ord]
plotcorr(ordered.cor.si, col=cm.colors(11)[5*ordered.cor.si + 6])

# compute global min variance portfolio
# use single index covariance
w.gmin.si = solve(cov.si)%*%rep(1,nrow(cov.si))
w.gmin.si = w.gmin.si/sum(w.gmin.si)
colnames(w.gmin.si) = "single.index"
# use sample covariance
w.gmin.sample = solve(var(returns.mat))%*%rep(1,nrow(cov.si))
w.gmin.sample = w.gmin.sample/sum(w.gmin.sample)
colnames(w.gmin.sample) = "sample"
cbind(w.gmin.si, sample = w.gmin.sample)
##        single.index    sample
## CITCRP     0.043792 -0.060353
## CONED      0.375702  0.376287
## CONTIL     0.005229 -0.002152
## DATGEN    -0.023476 -0.065582
## DEC       -0.004413  0.036255
## DELTA      0.052499  0.031554
## GENMIL     0.181880  0.197734
## GERBER     0.042721 -0.029664
## IBM        0.186566  0.284569
## MOBIL      0.033722  0.022567
## PANAM      0.007792  0.010707
## PSNH       0.066180  0.075171
## TANDY     -0.027191 -0.018677
## TEXACO     0.057823  0.199624
## WEYER      0.001173 -0.058040
par(mfrow=c(2,1))
barplot(t(w.gmin.si), horiz=F, main="Single Index Weights", col="blue", cex.names = 0.75, las=2)
barplot(t(w.gmin.sample), horiz=F, main="Sample Weights", col="blue", cex.names = 0.75, las=2)

par(mfrow=c(1,1))

# compare means and sd values on global min variance portfolios
mu.vals = colMeans(returns.mat)
mu.gmin.si = as.numeric(crossprod(w.gmin.si, mu.vals))
sd.gmin.si = as.numeric(sqrt(t(w.gmin.si)%*%cov.si%*%w.gmin.si))
mu.gmin.sample = as.numeric(crossprod(w.gmin.sample, mu.vals))
sd.gmin.sample = as.numeric(sqrt(t(w.gmin.sample)%*%var(returns.mat)%*%w.gmin.sample))
cbind(mu.gmin.si,mu.gmin.sample, sd.gmin.si, sd.gmin.sample)
##      mu.gmin.si mu.gmin.sample sd.gmin.si sd.gmin.sample
## [1,]    0.01365        0.01382    0.03257        0.03264
##
## use lm function to compute single index model regressions for each asset
##

asset.names = colnames(returns.mat)
asset.names
##  [1] "CITCRP" "CONED"  "CONTIL" "DATGEN" "DEC"    "DELTA"  "GENMIL"
##  [8] "GERBER" "IBM"    "MOBIL"  "PANAM"  "PSNH"   "TANDY"  "TEXACO"
## [15] "WEYER"
# initialize list object to hold regression objects
reg.list = list()

# loop over all assets and estimate time series regression
i = "CITCRP"
for (i in asset.names) {
  reg.df = retdata[, c(i, "MARKET")]
  si.formula = as.formula(paste(i,"~", "MARKET", sep=" "))
  reg.list[[i]] = lm(si.formula, data=reg.df)
}

# examine the elements of reg.list  - they are lm objects!
names(reg.list)
##  [1] "CITCRP" "CONED"  "CONTIL" "DATGEN" "DEC"    "DELTA"  "GENMIL"
##  [8] "GERBER" "IBM"    "MOBIL"  "PANAM"  "PSNH"   "TANDY"  "TEXACO"
## [15] "WEYER"
class(reg.list$CITCRP)
## [1] "lm"
reg.list$CITCRP
## 
## Call:
## lm(formula = si.formula, data = reg.df)
## 
## Coefficients:
## (Intercept)       MARKET  
##     0.00252      0.66778
summary(reg.list$CITCRP)
## 
## Call:
## lm(formula = si.formula, data = reg.df)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.16432 -0.05012  0.00226  0.04351  0.22467 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  0.00252    0.00626    0.40     0.69    
## MARKET       0.66778    0.09007    7.41    2e-11 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.0672 on 118 degrees of freedom
## Multiple R-squared:  0.318,  Adjusted R-squared:  0.312 
## F-statistic:   55 on 1 and 118 DF,  p-value: 2.03e-11
# plot actual vs. fitted over time
# use chart.TimeSeries() function from PerformanceAnalytics package
dataToPlot = cbind(fitted(reg.list$CITCRP), retdata$CITCRP)
colnames(dataToPlot) = c("Fitted","Actual")
dataToPlot.xts<-as.xts(dataToPlot, order.by = retdata$date)
chart.TimeSeries(dataToPlot.xts, main="Single Index Model for CITCRP",
                 colorset=c("black","blue"), legend.loc="bottomleft")

# scatterplot of the single index model regression
plot(retdata$MARKET, retdata$CITCRP, main="SI model for CITCRP",
     type="p", pch=16, col="blue",
     xlab="MARKET", ylab="CITCRP")
abline(h=0, v=0)
abline(reg.list$CITCRP, lwd=2, col="red")

## extract beta values, residual sd's and R2's from list of regression objects
## brute force loop
reg.vals = matrix(0, length(asset.names), 3)
rownames(reg.vals) = asset.names
colnames(reg.vals) = c("beta", "residual.sd", "r.square")
for (i in names(reg.list)) {
  tmp.fit = reg.list[[i]]
  tmp.summary = summary(tmp.fit)
  reg.vals[i, "beta"] = coef(tmp.fit)[2]
  reg.vals[i, "residual.sd"] = tmp.summary$sigma
  reg.vals[i, "r.square"] = tmp.summary$r.squared
}
reg.vals
##           beta residual.sd r.square
## CITCRP 0.66778     0.06716  0.31777
## CONED  0.09102     0.05010  0.01532
## CONTIL 0.73836     0.14260  0.11216
## DATGEN 1.02816     0.10688  0.30363
## DEC    0.84305     0.08102  0.33783
## DELTA  0.48946     0.09029  0.12163
## GENMIL 0.26776     0.06268  0.07919
## GERBER 0.62481     0.07697  0.23694
## IBM    0.45302     0.05046  0.27523
## MOBIL  0.71352     0.06407  0.36882
## PANAM  0.73014     0.12251  0.14337
## PSNH   0.21263     0.10896  0.01763
## TANDY  1.05549     0.10565  0.31986
## TEXACO 0.61328     0.06808  0.27661
## WEYER  0.81687     0.06445  0.43083
# alternatively use R apply function for list objects - lapply or sapply
extractRegVals = function(x) {
  # x is an lm object
  beta.val = coef(x)[2]
  residual.sd.val = summary(x)$sigma
  r2.val = summary(x)$r.squared
  ret.vals = c(beta.val, residual.sd.val, r2.val)
  names(ret.vals) = c("beta", "residual.sd", "r.square")
  return(ret.vals)
}
reg.vals = sapply(reg.list, FUN=extractRegVals)
t(reg.vals)
##           beta residual.sd r.square
## CITCRP 0.66778     0.06716  0.31777
## CONED  0.09102     0.05010  0.01532
## CONTIL 0.73836     0.14260  0.11216
## DATGEN 1.02816     0.10688  0.30363
## DEC    0.84305     0.08102  0.33783
## DELTA  0.48946     0.09029  0.12163
## GENMIL 0.26776     0.06268  0.07919
## GERBER 0.62481     0.07697  0.23694
## IBM    0.45302     0.05046  0.27523
## MOBIL  0.71352     0.06407  0.36882
## PANAM  0.73014     0.12251  0.14337
## PSNH   0.21263     0.10896  0.01763
## TANDY  1.05549     0.10565  0.31986
## TEXACO 0.61328     0.06808  0.27661
## WEYER  0.81687     0.06445  0.43083