knitr::opts_chunk$set(echo = TRUE)
chooseCRANmirror(graphics=FALSE, ind=1)
#Problem 1
# Clear all variables and prior sessions
rm(list=ls(all=TRUE))
# Load Libraries
library(DAAG)
library(lattice)
library(foreign)
library(MASS)
library(car)
require(stats)
require(stats4)
library(KernSmooth)
library(fastICA)
library(cluster)
library(leaps)
library(mgcv)
library(rpart)
library(pan)
library(mgcv)
library(DAAG)
library("TTR")
library(tis)
require("datasets")
require(graphics)
library("forecast")
install.packages("astsa")
##
## The downloaded binary packages are in
## /var/folders/vc/t_bbs3715hjc0pf0v6j56mb80000gn/T//RtmpnkkZq9/downloaded_packages
require(astsa)
library(xtable)
library(stats)
library(corrplot) # We'll use corrplot later on in this example too.
library(visreg) # This library will allow us to show multivariate graphs.
library("rmarkdown")
data(nsw74psid1)
attach(nsw74psid1)
quartz()
par(mfrow=c(2,2))
truehist(trt,col='steelblue3',main="Treatment Group",xlab="trt", ylab="Fraction")
lines(density(trt),lwd=2)
truehist(age,col='steelblue3',main="Age",xlab="Age(years)", ylab="Fraction")
lines(density(age),lwd=2)
truehist(educ,col='steelblue3',main="Education",xlab="Education(years)", ylab="Fraction")
lines(density(educ),lwd=2)
truehist(black,col='steelblue3',main="Black",xlab="Black", ylab="Fraction")
lines(density(black),lwd=2)
quartz()
par(mfrow=c(2,2))
truehist(hisp,col='steelblue3',main="Hispanic",xlab="Hisp", ylab="Fraction")
lines(density(hisp),lwd=2)
truehist(marr,col='steelblue3',main="Married",xlab="Married", ylab="Fraction")
lines(density(marr),lwd=2)
truehist(nodeg,col='steelblue3',main="No Degree",xlab="No Degree", ylab="Fraction")
lines(density(nodeg),lwd=2)
truehist(re74,col='steelblue3',main="Real Earnings in 1974",xlab="Real Earnings in 1974", ylab="Fraction")
lines(density(re74),lwd=2)
quartz()
par(mfrow=c(2,2))
truehist(re75,col='steelblue3',main="Real Earnings in 1975",xlab="Real Earnings in 1975", ylab="Fraction")
lines(density(re75),lwd=2)
truehist(re78,col='steelblue3',main="Real Earnings in 1978",xlab="Real Earnings in 1978", ylab="Fraction")
lines(density(re78),lwd=2)
y1 <- lm(re78~ trt+ (age + educ + re74 + re75) +
(black + hisp + marr + nodeg), data = nsw74psid1)
summary(y1)
##
## Call:
## lm(formula = re78 ~ trt + (age + educ + re74 + re75) + (black +
## hisp + marr + nodeg), data = nsw74psid1)
##
## Residuals:
## Min 1Q Median 3Q Max
## -64870 -4302 -435 3786 110412
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -129.74276 1688.51706 -0.077 0.9388
## trt 751.94643 915.25723 0.822 0.4114
## age -83.56559 20.81380 -4.015 6.11e-05 ***
## educ 592.61020 103.30278 5.737 1.07e-08 ***
## re74 0.27812 0.02792 9.960 < 2e-16 ***
## re75 0.56809 0.02756 20.613 < 2e-16 ***
## black -570.92797 495.17772 -1.153 0.2490
## hisp 2163.28118 1092.29036 1.981 0.0478 *
## marr 1240.51952 586.25391 2.116 0.0344 *
## nodeg 590.46695 646.78417 0.913 0.3614
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 10070 on 2665 degrees of freedom
## Multiple R-squared: 0.5864, Adjusted R-squared: 0.585
## F-statistic: 419.8 on 9 and 2665 DF, p-value: < 2.2e-16
quartz()
ss=regsubsets(re78~ trt+ (age + educ + re74 + re75) +
(black + hisp + marr + nodeg),method=c("exhaustive"),nbest=3,data=nsw74psid1)
subsets(ss,statistic="cp",legend=F,main="Mallows CP",col="steelblue4")
## Abbreviation
## trt t
## age a
## educ e
## re74 r74
## re75 r75
## black b
## hisp h
## marr m
## nodeg n
legend(8,1500,bty="n",legend=c('r78=Real earnings in 1975','t=treatment','a=age','e=years of education','r74=Real earnings in 1974','r75=Real earnings in 1975','r74=Real earnings in 1974','b=black','h=hispanic','m=married','n=no degree'),col="steelblue4",cex=5)
y2<- lm(re78~ re74 + re75)
quartz()
plot(y2$fit,y2$res, pch=20,col="skyblue4",lwd=10)
abline(h=0,lwd=2,col = "red3")
vif(y2)
## re74 re75
## 3.738247 3.738247
quartz()
newdatacor = cor(nsw74psid1)
corrplot(newdatacor, method = "number")
#g)Plot the Cook’s Distance values. Are there any outliers? If so, discuss what you would do with them. #Yes, there are outliers, as some points have significantly larger Cook’s distance statistics. To deal with them, I would create multiple models with and without the outliers, or put less weight on the outliers and compare the results to make sure that the outliers do not have too large or an effect on my model.
quartz()
y2_cook=cooks.distance(y2)
plot(y2_cook,ylab="Cook's distance",type='o',main="Cook's Distance Plot",col="skyblue4", pch=20,lwd=.25)
#h)Plot a histogram of the residuals and discuss the results.
quartz()
truehist(y2$res,15,col="skyblue3",xlab="Residuals",ylab="Fraction",main="Histogram of Residuals")
#i) Plot the QQ Normal Plot and discuss the results. #The QQ Normal Plot reveals that the residuals are not normally distributed, as it is very off-line with the expected 45 degree line produced from a normal distribution.
quartz()
qqnorm(y2$res,col="skyblue4", pch=20,lwd=1,main="QQ Normal Plot")
quartz()
plot(y2$fit,re78,pch=20,col="skyblue4",cex=1,xlab="Predicted Response",ylab="Observed Response",main="Observed vs. Predicted Response \nNew Model",cex.axis=0.8,cex.main=1.0)
lines(lowess(y2$fit,re78),lwd=2)
abline(0,1,col="red",lwd=2,lty=2)
text(100000,20000,expression(R^2==0.826))
legend(80000,16000, c(expression(y[obs]==y[pred]), "Lowess Smoother"), fill =c("red", "black"),cex=1,bty="y")
summary(GRGDP)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -2.7075 0.3273 0.7679 0.7958 1.3107 3.9343
summary(RETURN)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -26.937 -0.915 2.023 1.962 5.753 20.117
quartz()
par(mfrow=c(2,2))
truehist(GRGDP,col='steelblue3',main="US GDP Qarterly Growth Rates",xlab="GRGDP", ylab="Fraction")
lines(density(GRGDP),lwd=2)
truehist(RETURN,col='steelblue3',main="SP 500 quarterly returns",xlab="RETURN (%)", ylab="Fraction")
lines(density(RETURN),lwd=2)
# Read in the data into a data file and attach names:
names(z)= c("date","GRGDP","RETURN")
attach(z)
# Convert data to time series format:
GRGDP_ts<-ts(GRGDP,start=c(1950,2),freq=4)
t1<-seq(1950.50, 2012.25,length=length(GRGDP_ts))
RETURN_ts<-ts(RETURN,start=c(1950,2),freq=4)
t2<-seq(1950.50, 2012.25,length=length(RETURN_ts))
#Plot time series
quartz()
par(mfrow=c(2,1))
plot.ts(GRGDP_ts)
nberShade()
plot.ts(RETURN_ts)
nberShade()
# Read in the data into a data file and attach names:
RPCE_data<-read.table("3.1_exercises_data.csv",header=T, sep=",")
names(RPCE_data)= c("date","rpce","rdpi")
attach(RPCE_data)
# Convert data to time series format:
RPCE_ts<-ts(rpce,start=c(1959,1),freq=12)
RDPI_ts<-ts(rdpi,start=c(1959,1),freq=12)
# Growth rates of Real consumption and Real disposable personal income
#calculate changes in natural log
change_log_RPCE<- log(RPCE_ts)- log(lag(RPCE_ts))
change_log_RDPI<- log(RDPI_ts) - log(lag(RDPI_ts))
t10<-seq(1959, 2012.33,length=length(change_log_RDPI))
r1<-lm(change_log_RPCE ~ t10)
quartz()
par(mfrow=c(2,1))
plot(change_log_RPCE,ylab="Real Consumption Growth", xlab="Time", lwd=2, col='skyblue3', xlim=c(1959,2013))
r2<-lm(change_log_RDPI ~ t10)
plot(change_log_RDPI,ylab="Real Disposable Personal Income Growth", xlab="Time", lwd=2, col='skyblue3', xlim=c(1959,2013))
r3<-lm(change_log_RDPI ~ change_log_RPCE)
summary(r3)
##
## Call:
## lm(formula = change_log_RDPI ~ change_log_RPCE)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.049532 -0.002385 0.000158 0.002529 0.043006
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.0018274 0.0003103 -5.890 6.26e-09 ***
## change_log_RPCE 0.3042768 0.0509016 5.978 3.77e-09 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.007021 on 637 degrees of freedom
## Multiple R-squared: 0.05312, Adjusted R-squared: 0.05163
## F-statistic: 35.73 on 1 and 637 DF, p-value: 3.768e-09
lag_growth<- change_log_RDPI
r4<-lm(change_log_RDPI ~ change_log_RPCE)
summary(r4)
##
## Call:
## lm(formula = change_log_RDPI ~ change_log_RPCE)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.049532 -0.002385 0.000158 0.002529 0.043006
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.0018274 0.0003103 -5.890 6.26e-09 ***
## change_log_RPCE 0.3042768 0.0509016 5.978 3.77e-09 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.007021 on 637 degrees of freedom
## Multiple R-squared: 0.05312, Adjusted R-squared: 0.05163
## F-statistic: 35.73 on 1 and 637 DF, p-value: 3.768e-09
# Read in the data into a data file and attach names:
USGDP_data<-read.table("3.3a_exercises_data.csv",header=T, sep=",")
names(USGDP_data)= c("date","rgdp")
attach(USGDP_data)
# Convert data to time series format:
RGDP_ts<-ts(rgdp,start=c(1947,1),freq=4)
t4<-seq(1947, 2012,length=length(RGDP_ts))
# Read in the data into a data file and attach names:
JPY_USD_data<-read.table("3.3b_exercises_data.csv",header=T, sep=",")
names(USGDP_data)= c("DATE","jpy_usd")
attach(JPY_USD_data)
# Convert data to time series format:
JPY_USD_ts<-ts(rgdp,start=c(1971,4),freq=365)
t5<-seq(1971, 2012.5,length=length(JPY_USD_ts))
# Read in the data into a data file and attach names:
TreasuryYield_data<-read.table("3.3c_exercises_data.csv",header=T, sep=",")
names(TreasuryYield_data)= c("DATE","CMRate10Yr")
attach(TreasuryYield_data)
# Convert data to time series format:
Yield_ts<-ts(rgdp,start=c(1962,2),freq=365)
t6<-seq(1962, 2012.5,length=length(Yield_ts))
# Read in the data into a data file and attach names:
unemployment_data<-read.table("3.3d_exercises_data.csv",header=T, sep=",")
names(unemployment_data)= c("DATE","unemrate")
attach(unemployment_data)
# Convert data to time series format:
unemployment_ts<-ts(unemrate,start=c(1948,1),freq=12)
t7<-seq(1948, 2012.4167,length=length(unemployment_ts))
#Plot time series for part a-d
quartz()
par(mfrow=c(2,2))
plot.ts(RGDP_ts)
nberShade()
plot.ts(JPY_USD_ts)
nberShade()
plot.ts(Yield_ts)
nberShade()
plot.ts(RETURN_ts)
nberShade(unemployment_ts)
# Read in the data into a data file and attach names:
Housing_data<-read.table("Chapter4.3_exercises_data.csv",header=T, sep=",")
names(Housing_data)= c("OBS","P","R")
attach(Housing_data)
# Convert data to time series format:
Housing_ts<-ts(P,start=c(1980,2),freq=4)
t8<-seq(1980.25, 2011.75,length=length(Housing_ts))
Pgrowth = log(Housing_ts)-log(lag(Housing_ts))
Housinggrowth_ts<-ts(Pgrowth,start=c(1980,2),freq=4)
Int_rate_ts<-ts(R,start=c(1980,2),freq=4)
Rchange = log(Int_rate_ts)-log(lag(Int_rate_ts))
Int_rate_change_ts<-ts(Rchange,start=c(1980,2),freq=4)
#Quarterly House Prices
quartz()
par(mfrow=c(2,1))
#ACF Functions
acf(Housing_ts,main="ACF of Quarterly House Prices")
#PACF Functions
pacf(Housing_ts,main="PACF of the Quarterly House Prices")
#**************************
#House Price Growth
quartz()
par(mfrow=c(2,1))
#ACF Functions
acf(Housinggrowth_ts,main="ACF of House Price Growth")
#PACF Functions
pacf(Housinggrowth_ts,main="PACF of House Price Growth")
#**************************
#Interest Rates
quartz()
par(mfrow=c(2,1))
#ACF Functions
acf(Int_rate_ts,main="ACF of Interest Ratee")
#PACF Functions
pacf(Int_rate_ts,main="PACF of the Interest Rates")
#**************************
#Interest rates changes
quartz()
par(mfrow=c(2,1))
#ACF Functions
acf(Int_rate_change_ts,main="ACF of the Interest Rates Changes")
#PACF Functions
pacf(Int_rate_change_ts,main="PACF of the Interest Rates Changes")