This problem set is designed to review material on the multiple regression model and time series. Include both your R code and output in your answers.
Using a sequence of simple regressions computed in R, show how to obtain the multiple regression coefficient on \(P_2\) in the multi dataset from the DataAnalytics package.
library(DataAnalytics)
data(multi)
out=with(multi,
lm(Sales~p1+p2)
)
lmSumm(out)
## Multiple Regression Analysis:
## 3 regressors(including intercept) and 100 observations
##
## lm(formula = Sales ~ p1 + p2)
##
## Coefficients:
## Estimate Std Error t value p value
## (Intercept) 115.70 8.548 13.54 0
## p1 -97.66 2.669 -36.60 0
## p2 108.80 1.409 77.20 0
## ---
## Standard Error of the Regression: 28.42
## Multiple R-squared: 0.987 Adjusted R-squared: 0.987
## Overall F stat: 3717.29 on 2 and 97 DF, pvalue= 0
out_purge=with(multi,
lm(p2~p1)
)
err_2.1 = out_purge$residuals
out_multi_single =with(multi,
lm(Sales~err_2.1)
)
lmSumm(out_multi_single)
## Multiple Regression Analysis:
## 2 regressors(including intercept) and 100 observations
##
## lm(formula = Sales ~ err_2.1)
##
## Coefficients:
## Estimate Std Error t value p value
## (Intercept) 517.1 11.380 45.43 0
## err_2.1 108.8 5.645 19.27 0
## ---
## Standard Error of the Regression: 113.8
## Multiple R-squared: 0.791 Adjusted R-squared: 0.789
## Overall F stat: 371.44 on 1 and 98 DF, pvalue= 0
We could observe from above analysis that the result of regressing p2 on p1 and regressing the errors from this purge to the Sales(Dependent variable), We obtain the multiple regression coefficient on \(P_2\),i.e., 108.8
Use matrix formulas and R code – i.e., use %*% not lm – to reproduce the least squares coefficients and standard errors shown on slide 17 of Chapter II. The countryret dataset is in the DataAnalytics package.
data(countryret)
out = lm(usa~canada +uk + australia + france + germany + japan, data=countryret)
#lmSumm(out)
countryret_f = na.omit(countryret[,c("usa","canada","uk","australia","france","germany","japan")])
Y=countryret_f$usa
X= cbind(rep(1,length(Y)),countryret_f$canada, countryret_f$uk,countryret_f$australia,countryret_f$france,countryret_f$germany,countryret_f$japan)
#least squares coefficients
b = chol2inv(chol(crossprod(X)))%*%crossprod(X,Y)
slope =as.vector(t(b))
names(slope) = c("Intercept", "canada","uk","australia","france","germany","japan")
print(slope)
## Intercept canada uk australia france
## 0.006135614 0.444362109 0.225690196 -0.056688434 0.166742081
## germany japan
## -0.064792831 -0.051027942
#standard errors
e = Y - X%*%b
ssq = sum(e*e)/(length(Y) - ncol(X))
Var_b= ssq*chol2inv(chol(crossprod(X)))
stderr = sqrt(diag(Var_b))
names(stderr) = c("Intercept", "canada","uk","australia","france","germany","japan")
print(stderr)
## Intercept canada uk australia france germany
## 0.00230897 0.06958673 0.06491489 0.05036627 0.06133779 0.05723881
## japan
## 0.03461495
Run the regression of VWNFX on vwretd.
VWNFX when vwretd = 0.05 using the formulas in the class notes.library(DataAnalytics)
library(reshape2)
data(Vanguard)
data(marketRf)
Van=Vanguard[,c(1,2,5)]
V_reshaped=dcast(Van,date~ticker,value.var="mret")
Van_mkt=merge(V_reshaped,marketRf,by="date")
df=na.omit(Van_mkt[,c("date","VWNFX","vwretd")])
N = nrow(df)
out=lm(VWNFX~vwretd,data=Van_mkt)
std_err = sqrt(sum(out$residuals^2)/(N - 2))
mean_x = mean(df$vwretd)
s_x = sd(df$vwretd)
vwretd_f = 0.05
t_stat = qt(0.95, df=N-2)
s_pred = std_err*((((0.05 - mean_x)^2)/((N - 1)*(s_x)^2) + (1/N) + 1)^0.5)
fit = out$coef[1] + out$coef[2]*vwretd_f
lwr = out$coef[1] + out$coef[2]*vwretd_f - t_stat*s_pred
upr = out$coef[1] + out$coef[2]*vwretd_f + t_stat*s_pred
predict_int = as.vector(c(fit,lwr,upr))
names(predict_int) = c("fit","lower_int", "upper_int")
print(predict_int)
## fit lower_int upper_int
## 0.04553055 0.01744646 0.07361465
predict command.predict(out, new=data.frame(vwretd=0.05), int="prediction", level = 0.90)
## fit lwr upr
## 1 0.04553055 0.01744646 0.07361465
Define the mean return vector and the symmetric variance-covariance matrix for 3 assets as follows:
\[ \mu = \begin{bmatrix} 0.010 \\ 0.015 \\ 0.025 \end{bmatrix} \hspace{3em} \Sigma = \begin{bmatrix} 0.0016 & 0.0010 & 0.0015 \\ & 0.0020 & 0.0019 \\ & & 0.0042 \end{bmatrix} \]
diag(), X*Y, or X%*%Y) in your answer. You may not use a loop and you may not use the R function cov2cor.mu = matrix(rbind(0.010, 0.015, 0.025),nrow = 3, ncol = 1)
sigma_covar = matrix(rbind(cbind(0.0016, 0.0010, 0.0015), cbind(0.0010, 0.0020, 0.0019), cbind(0.0015, 0.0019, 0.0042)), nrow = 3, ncol = 3)
D_mat = diag(1/(sqrt(diag(sigma_covar))))
cor_matrix = D_mat%*%sigma_covar%*%D_mat
cor_matrix
## [,1] [,2] [,3]
## [1,] 1.0000000 0.5590170 0.5786376
## [2,] 0.5590170 1.0000000 0.6555623
## [3,] 0.5786376 0.6555623 1.0000000
wt = matrix(rbind(0.3,0.4,0.3), nrow = 3, ncol = 1)
mu_portfolio = t(wt)%*%mu
mu_p = as.vector(mu_portfolio)
names(mu_p) = c("Mean of portfolio")
stdev_p = as.vector(sqrt(t(wt)%*%sigma_covar%*%wt))
names(stdev_p) = c("Standard Dev of portfolio")
mu_p
## Mean of portfolio
## 0.0165
stdev_p
## Standard Dev of portfolio
## 0.04252058
Using the same data as in Question 3 above and following the lecture slides (Chapter 3, section g), test the general linear hypothesis that \(\beta_{up} = \beta_{down}\) in the following regression. Note that if you account for the NA values properly, you should get a slightly different result than what is presented in the lecture slides.
\[ VWNFX_t = \alpha + \beta_{up}*vwretd_t^{+} + \beta_{down}*vwretd_t^{-} + \varepsilon_t \]
V_reshaped = dcast(Van, date~ticker, value.var = "mret")
Van_mkt=merge(V_reshaped,marketRf,by="date")
Van_mkt = na.omit(Van_mkt[,c("date","VWNFX","vwretd")])
mkt_up=ifelse(Van_mkt$vwretd>0,1,0)
Van_mkt$upvw = mkt_up*Van_mkt$vwretd
Van_mkt$dwnvw = (1-mkt_up)*Van_mkt$vwretd
mkt_timing=lm(VWNFX~upvw+dwnvw,data=Van_mkt)
lmSumm(mkt_timing)
## Multiple Regression Analysis:
## 3 regressors(including intercept) and 336 observations
##
## lm(formula = VWNFX ~ upvw + dwnvw, data = Van_mkt)
##
## Coefficients:
## Estimate Std Error t value p value
## (Intercept) 0.001526 0.001486 1.03 0.305
## upvw 0.876200 0.038740 22.62 0.000
## dwnvw 0.901600 0.037830 23.83 0.000
## ---
## Standard Error of the Regression: 0.017
## Multiple R-squared: 0.846 Adjusted R-squared: 0.846
## Overall F stat: 917.85 on 2 and 333 DF, pvalue= 0
R = matrix(c(0,1,-1),byrow=TRUE, nrow=1)
r=c(0)
X = cbind(c(rep(1,nrow(Van_mkt))),Van_mkt$upvw, Van_mkt$dwnvw)
b=as.vector(mkt_timing$coef)
QFmat = chol2inv(chol(crossprod(X)))
QFmat=R%*%QFmat%*%t(R)
Violation = R%*%b-matrix(r,ncol=1)
fnum= t(Violation)%*%chol2inv(chol(QFmat))%*%Violation
n_minus_k=nrow(Van_mkt)-length(b)
fdenom= nrow(R)*sum(mkt_timing$resid**2)/n_minus_k
f = fnum/fdenom
f
## [,1]
## [1,] 0.1558804
pvalue=1-pf(f,df1=nrow(R),df2=n_minus_k)
pvalue
## [,1]
## [1,] 0.6932308
Retrieve the Apple stock price series using the quantmod package (as done in the notes). Plot the autocorrelations of the difference in log prices.
library(quantmod)
options("getSymbols.warning4.0"=FALSE)
getSymbols("AAPL")
## [1] "AAPL"
lnAclose=log(AAPL[,4])
acf(diff(lnAclose),na.action=na.omit)
par(mfrow=c(2,1))
plot(lnAclose)
plot(diff(lnAclose))