#1 Check your working directory
getwd()
## [1] "/Users/zosiajiang/Desktop/Harrisburg Application - Zhuoxin Jiang/ANLY 565"
#2 Set your working directory to “ANLY 580/RScript”.
setwd("~/Desktop/Harrisburg Application - Zhuoxin Jiang/ANLY 565")
#3 Download “ffrategdp.xls” data file and set the “observation_date” variable to the date format and the “FEDFUNDS” and “GDPC1” variables to the numeric format. # The “FEDFUNDS” variable represents the effective federal funds rate, which imdicates the interest rate at which depository institutions trade federal funds (balances held at Federal Reserve Banks) with each other overnight. The “GDPC1” variable represents real gross domestic product.
library(readxl)
df <- read_xls("ffrategdp.xls")
str(df)
## tibble [260 × 3] (S3: tbl_df/tbl/data.frame)
## $ observation_date: POSIXct[1:260], format: "1954-07-01" "1954-10-01" ...
## $ FEDFUNDS : num [1:260] 1.027 0.987 1.343 1.5 1.94 ...
## $ GDPC1 : num [1:260] 2683 2735 2813 2859 2898 ...
# format variables
df$observation_date <- as.Date(df$observation_date)
df$FEDFUNDS <- as.numeric(df$FEDFUNDS)
df$GDPC1 <- as.numeric(df$GDPC1)
range(df$observation_date)
## [1] "1954-07-01" "2019-04-01"
head(df)
## # A tibble: 6 x 3
## observation_date FEDFUNDS GDPC1
## <date> <dbl> <dbl>
## 1 1954-07-01 1.03 2683.
## 2 1954-10-01 0.987 2735.
## 3 1955-01-01 1.34 2813.
## 4 1955-04-01 1.5 2859.
## 5 1955-07-01 1.94 2898.
## 6 1955-10-01 2.36 2915.
#4 By using ts() function create a time series object that contains two variables: “FEDFUNDS” and “GDPC1”. Label it as “ffrategdpts”.
ffrategdpts <- ts(df[-1],start = c(1954,3), end = c(2019,2), frequency = 4 )
str(ffrategdpts)
## Time-Series [1:260, 1:2] from 1954 to 2019: 1.027 0.987 1.343 1.5 1.94 ...
## - attr(*, "dimnames")=List of 2
## ..$ : NULL
## ..$ : chr [1:2] "FEDFUNDS" "GDPC1"
#5 Create two stand alone variables “fedrate” and “gdp” that take on values of the “FEDFUNDS” and “GDPC1” variables from the “ffrategdpts” data set.
head(ffrategdpts)
## FEDFUNDS GDPC1
## [1,] 1.0266667 2682.601
## [2,] 0.9866667 2735.091
## [3,] 1.3433333 2813.212
## [4,] 1.5000000 2858.988
## [5,] 1.9400000 2897.598
## [6,] 2.3566667 2914.993
str(ffrategdpts)
## Time-Series [1:260, 1:2] from 1954 to 2019: 1.027 0.987 1.343 1.5 1.94 ...
## - attr(*, "dimnames")=List of 2
## ..$ : NULL
## ..$ : chr [1:2] "FEDFUNDS" "GDPC1"
fedrate <- ffrategdpts[,1]
gdp <- ffrategdpts[,2]
#6 When the federal funds rate goes down, the comercial loan interest rates go down too. This means that people can borrow cheaply and invest in their businesses, which will result in higher gross domestic product. Therefore, you suspect that the federal funds rate has a negative correlation with GDP. To test this hypothesis you decide to use lm() function to estimate the coeficients of a linear regression model in which “gdp” is a dependent variable and “fedrate” is an idependent variable. Save the estimated model as gdpfr.lm. Based on the results of this model can you make any conclusions about the nature of the relationship between the gdp and the federal funds rate?
Answer: based on the result, fedrate is a significant variable in predicting gpd. The coefficient -526.10 indicates that gdp is negatively related with fedrate. Worth mentioning that the Adjusted R-squared is only 0.1519, indicating am inaccurate model.
gdpfr.lm <- lm(gdp ~ fedrate)
summary(gdpfr.lm)
##
## Call:
## lm(formula = gdp ~ fedrate)
##
## Residuals:
## Min 1Q Median 3Q Max
## -8680.0 -3843.8 526.3 3522.0 8398.7
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 11915.26 467.51 25.487 < 2e-16 ***
## fedrate -538.30 78.18 -6.885 4.38e-11 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 4504 on 258 degrees of freedom
## Multiple R-squared: 0.1552, Adjusted R-squared: 0.1519
## F-statistic: 47.4 on 1 and 258 DF, p-value: 4.382e-11
#7 You have suspected that the “gdp” variable may contain a unit root.By using Augmented Dickey Fuller method test “gdp” variable for the the presence of unit root. Does “gdp” variable contain a unit root? Is “gdp” variable stationary?
Answer: since p value is large than 0.05 on Augmented Dickey-Fuller Test , the conclusion of the adf.test is to retain the null hypothesis that the series have unit roots. Also combining the acf plot, gdp variable is non-stationary.
library(tseries)
## Registered S3 method overwritten by 'quantmod':
## method from
## as.zoo.data.frame zoo
adf.test(gdp)
##
## Augmented Dickey-Fuller Test
##
## data: gdp
## Dickey-Fuller = -1.5023, Lag order = 6, p-value = 0.7855
## alternative hypothesis: stationary
acf(gdp)
#8 By using Augmented Dickey Fuller method test “fedrate” variable for the the presence of unit root. Does “fedrate” variable contain a unit root? Is “fedrate” variable stationary?
Answer: since p value is large than 0.05 on Augmented Dickey-Fuller Test , the conclusion of the adf.test is to retain the null hypothesis that the series have unit roots. Also combining the acf plot, fedrate variable is non-stationary.
adf.test(fedrate)
##
## Augmented Dickey-Fuller Test
##
## data: fedrate
## Dickey-Fuller = -2.783, Lag order = 6, p-value = 0.2462
## alternative hypothesis: stationary
acf(fedrate)
#9 The Phillips-Ouliaris test shows whether there is evidence that the series are cointegrated, which justifies the use of a regression model. Are “gdp” and “fedrate” variables cointegrated? Is “gdpfr.lm” a suitable model to explore the relationship between “gdp” and “fedrate”?
Answer: The Phillips-Ouliaris test shows there is evidence that the series are not cointegrated (p-value large than 0.05, cannot reject null hypothesis). gdpfr.lm is a suitable model.
po.test(cbind(gdp, fedrate))
## Warning in po.test(cbind(gdp, fedrate)): p-value greater than printed p-value
##
## Phillips-Ouliaris Cointegration Test
##
## data: cbind(gdp, fedrate)
## Phillips-Ouliaris demeaned = -2.2286, Truncation lag parameter = 2,
## p-value = 0.15
#10 Create the following 2 new variables: # “gdpgrowth” - that represents quarterly percentage chagne in GDP # “fedratediff” - that represents quarterly difference in the federal funds rate (simple difference) # To each of the varibles add “NA” as the first observation. This will ensure that the new variables are of the same length as the existing variables.
gdpgrowth <- c(NA,diff(gdp)/lag(gdp,-1)*100 )
head(gdpgrowth)
## [1] NA 1.9566831 2.8562487 1.6271792 1.3504779 0.6003248
fedratediff <- c(NA, diff(fedrate))
head(fedratediff)
## [1] NA -0.0400000 0.3566667 0.1566667 0.4400000 0.4166667
#11 By using ts() and cbind() functions add “gdpgrowth” and “fedratediff” variables to the “ffrategdpts” data set.
df2 <- ts(cbind(gdpgrowth,fedratediff), start =c(1954,3), end = c(2019,2), frequency = 4 )
head(df2)
## gdpgrowth fedratediff
## [1,] NA NA
## [2,] 1.9566831 -0.0400000
## [3,] 2.8562487 0.3566667
## [4,] 1.6271792 0.1566667
## [5,] 1.3504779 0.4400000
## [6,] 0.6003248 0.4166667
ffrategdpts <- cbind(df2,ffrategdpts)
str(ffrategdpts)
## Time-Series [1:260, 1:4] from 1954 to 2019: NA 1.96 2.86 1.63 1.35 ...
## - attr(*, "dimnames")=List of 2
## ..$ : NULL
## ..$ : chr [1:4] "df2.gdpgrowth" "df2.fedratediff" "ffrategdpts.FEDFUNDS" "ffrategdpts.GDPC1"
# rename column names
colnames(ffrategdpts) <- c("gdpgrowth", "fedratediff", "FEDFUNDS", "GDPC1")
#12 Use na.omit() function to get rid of the missing values in the “ffrategdpts” data set. Save the new data set as “ffrategdptscc”.
ffrategdptscc <- na.omit(ffrategdpts)
head(ffrategdptscc)
## gdpgrowth fedratediff FEDFUNDS GDPC1
## [1,] 1.9566831 -0.0400000 0.9866667 2735.091
## [2,] 2.8562487 0.3566667 1.3433333 2813.212
## [3,] 1.6271792 0.1566667 1.5000000 2858.988
## [4,] 1.3504779 0.4400000 1.9400000 2897.598
## [5,] 0.6003248 0.4166667 2.3566667 2914.993
## [6,] -0.3884057 0.1266667 2.4833333 2903.671
#13 Create 2 new variables: # “ggdp” - takes on values of the “gdpgrowth” from the “ffrategdptscc” # “dfrate” - takes on values of the “fedratediff” from the “ffrategdptscc”
ggdp <- ffrategdptscc[,1]
dfrate <- ffrategdptscc[,2]
#14 Use to Augmented Dickey-Fuller test to determine whether “ggdp” and “dfrate” are statiomary or not. Does “ggdp” contain a unit root? Is “ggdp” stationary? Does “dfrate” contain a unit root? Is “dfrate” stationary?
Answer: since p value is less than 0.05 on Augmented Dickey-Fuller Test , the conclusion of the adf.test is to reject the null hypothesis that ggdp and dfrate series have unit roots, which means ggdp and dfrate is stationary.
adf.test(ggdp)
## Warning in adf.test(ggdp): p-value smaller than printed p-value
##
## Augmented Dickey-Fuller Test
##
## data: ggdp
## Dickey-Fuller = -6.0671, Lag order = 6, p-value = 0.01
## alternative hypothesis: stationary
adf.test(dfrate)
## Warning in adf.test(dfrate): p-value smaller than printed p-value
##
## Augmented Dickey-Fuller Test
##
## data: dfrate
## Dickey-Fuller = -7.2109, Lag order = 6, p-value = 0.01
## alternative hypothesis: stationary
#15 Use lm() function to estimate the coeficients of a linear regression model in which “ggdp” is a dependent variable and “dfrate” is as an idependent variable. Lable these estimates as “ggdp.dfrate.lm”. Based on the findings of the linear regression model what is the nature of the relationship between the growth rate of real gdp and difference in federal funds rate?
Answer: difference in federal funds rate is an significant variable in predicting the growth rate of real gdp. Also, difference in federal funds rate has an positive relationship with it. However, Adjusted R-squared is 0.1018, which indicates poor model accuracy.
ggdp.dfrate.lm <- lm(ggdp ~ dfrate)
summary(ggdp.dfrate.lm)
##
## Call:
## lm(formula = ggdp ~ dfrate)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.90290 -0.40464 -0.00131 0.42111 2.93096
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.76121 0.05114 14.88 < 2e-16 ***
## dfrate 0.32673 0.05940 5.50 9.14e-08 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.823 on 257 degrees of freedom
## Multiple R-squared: 0.1053, Adjusted R-squared: 0.1018
## F-statistic: 30.25 on 1 and 257 DF, p-value: 9.144e-08
#16 Create a variable called “ggdp.dfrate.lm.resid” that represents the residual series obtained from the “ggdp.dfrate.lm” regression.
ggdp.dfrate.lm.resid <- ggdp.dfrate.lm$residuals
#17 Constract acf and pacf functions for “ggdp.dfrate.lm.resid”. What can you say about the goodness of the fit of the model?
Answer:the fit of the model is not perfect as we can see the autocorrelation from acf and pacf plots (significant lags).
acf(ggdp.dfrate.lm.resid)
pacf(ggdp.dfrate.lm.resid)
#18 Maybe vector autoregression model would prove a better fit. Upload “vars” library that contains VAR() function
library(vars)
## Loading required package: MASS
## Loading required package: strucchange
## Loading required package: zoo
##
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
## Loading required package: sandwich
## Loading required package: urca
## Loading required package: lmtest
#19 Estimate a VAR model for the “ggdp” and “dfrate” variables. In this modelincludes 3 lags of each variable. Save the estimates of the var model as “ggdp.dfrate.var”
ggdp.dfrate.var <- VAR(cbind(ggdp,dfrate), p = 3, type = "trend")
coef(ggdp.dfrate.var)
## $ggdp
## Estimate Std. Error t value Pr(>|t|)
## ggdp.l1 0.324346779 0.0638048645 5.08341772 7.288016e-07
## dfrate.l1 -0.020511529 0.0630905367 -0.32511261 7.453688e-01
## ggdp.l2 0.295653241 0.0630572221 4.68864995 4.534155e-06
## dfrate.l2 -0.352935097 0.0637934116 -5.53246939 7.980323e-08
## ggdp.l3 0.093258635 0.0633473231 1.47217957 1.422352e-01
## dfrate.l3 0.006540845 0.0656761751 0.09959235 9.207481e-01
## trend 0.001063776 0.0004282042 2.48427286 1.363989e-02
##
## $dfrate
## Estimate Std. Error t value Pr(>|t|)
## ggdp.l1 0.243789828 0.0634633835 3.8414250 1.552692e-04
## dfrate.l1 0.243621515 0.0627528787 3.8822365 1.326314e-04
## ggdp.l2 0.052200154 0.0627197424 0.8322763 4.060505e-01
## dfrate.l2 -0.255109914 0.0634519919 -4.0205186 7.701112e-05
## ggdp.l3 -0.052778955 0.0630082909 -0.8376510 4.030301e-01
## dfrate.l3 0.211064262 0.0653246789 3.2310034 1.399820e-03
## trend -0.001031574 0.0004259125 -2.4220330 1.614880e-02
#20 Use plot() and irf() functions to obtain and plot impulse response functions for each variable. IRF illustrates the behavior of a variable in response to one standard deviation shock in its own value and in the value of the other variable. Based on the these graph what conclusions can you draw about the nature of the relationship between the growth rate of gdp and the differene in federal funds rate? Any potential explanations?
Answer: Most of the time, differene in federal funds rate negatively affect the growth rate of gdp. Lag 3 is the turning point. It can result from the potential underlying factors, such as the develpment rate of economy.
plot(irf(ggdp.dfrate.var, response="ggdp", boot=T, nsteps = 4))
plot(irf(ggdp.dfrate.var, response="dfrate", boot=T, nsteps = 4))
#21 Use resid() function to obtain the residuals from the ggdp equation of the “ggdp.dfrate.var” model. Save this residual series as “var.ggdp.resid”.
var.ggdp.resid <- resid(ggdp.dfrate.var)[,1]
#22 Use resid() function to obtain the residuals from the dfrate equation of the “ggdp.dfrate.var” model.Save this residual series as “var.dfrate.resid”.
var.dfrate.resid <- resid(ggdp.dfrate.var)[,2]
#24 Plot acf and pacf functions for the ’var.ggdp.resid“. Does”ggdp.dfrate.var" model provide a good fit to explain growth rate of gdp?
Answer: It fits better since there is no autocorrelation in the residuals.
acf(var.ggdp.resid)
pacf(var.ggdp.resid)
#25 Plot acf and pacf functions for the ’var.dfrate.resid“. Does”ggdp.dfrate.var" model provide a good fit to explain the difference in federal funds rate?
Answer: there is still some autocorrelation in the model residuals. It’s not a good model fit.
acf(var.dfrate.resid)
pacf(var.dfrate.resid)
#26 Use “ggdp.dfrate.var” model and predict() function to forecast growth rate of gdp and change in federal funds rate over the upcoming year. Save the predicted values as “VAR.pred”
VAR.pred <- predict(ggdp.dfrate.var, n.ahead = 4)
VAR.pred
## $ggdp
## fcst lower upper CI
## [1,] 0.6309280 -0.9292571 2.191113 1.560185
## [2,] 0.7101314 -0.9288717 2.349134 1.639003
## [3,] 0.7821502 -1.0291380 2.593438 1.811288
## [4,] 0.8437790 -1.0187770 2.706335 1.862556
##
## $dfrate
## fcst lower upper CI
## [1,] -0.10401903 -1.655854 1.447816 1.551835
## [2,] -0.11413444 -1.769390 1.541121 1.655255
## [3,] -0.09381149 -1.795873 1.608250 1.702061
## [4,] -0.09254682 -1.796220 1.611126 1.703673
#27 Use ts() function and VAR.pred forecast to create a new variable “ggdp.pred”. It should contain the forcasted values of the growth rate of gdp over the next 4 quarters.
ggdp.pred <- ts(VAR.pred$fcst$ggdp[,1], st = c(2019,3), fr = 4)
ggdp.pred
## Qtr1 Qtr2 Qtr3 Qtr4
## 2019 0.6309280 0.7101314
## 2020 0.7821502 0.8437790
#28 Use ts() function and VAR.pred forecast to create a new variable “dfrate.pred”. It should contain the prediction of the change in the federal funds rate over the next 4 quarters.
dfrate.pred <- ts(VAR.pred$fcst$dfrate[,1], st = c(2019,3), fr = 4)
dfrate.pred
## Qtr1 Qtr2 Qtr3 Qtr4
## 2019 -0.10401903 -0.11413444
## 2020 -0.09381149 -0.09254682
#29 Plot the times series graph of the past growth rates of gdp alongside its future forecasted values. Do you expect the gdp to grow over the next 4 quarters? Is a new recession likely to happen in the coming year?
Answer: Based on the plot (red line at the right), we can expect the gdp to grow over the next 4 quarters. A recession is less likely to happen.
ts.plot(cbind(ggdp, ggdp.pred), lty = 1:2,col=c("blue","red"))
#30 Plot the times series graph of the past changes of the federal funds rate alongside its future forecasted values. Do you expect the federal funds rate to increase over the next 4 quarters? Should one take out a loan now?
Answer:The federal funds rate expect to decrease over the next 4 quarters based on the plot (red part). It’s a good timing to take out a loan.
ts.plot(cbind(dfrate, dfrate.pred), lty = 1:2,col=c("blue","red"))