Preparing & Cleaning Dataset
#CLEANING THE DATA
#1. Renaming the Column Names (For Simplicity, to avoid periods)
columnnames <- c("Year", "Month", "Day", "InterestTargetRate",
"InterestUpperTargetRate", "InterestLowerTargetRate",
"InterestRate", "RealGDPChange", "UnemploymentRate",
"InflationRate")
colnames(econ) <- columnnames
#2. Converting Year, Month, Day, into New Date Column (Y/M/D) using Lubridate
econ$Date <- as.Date(paste(econ$Year, econ$Month, econ$Day, sep='-'))
head(econ$Date) #converting the year, month date format through YY-MM-DD
## [1] "1954-07-01" "1954-08-01" "1954-09-01" "1954-10-01" "1954-11-01"
## [6] "1954-12-01"
#The purpose for is to provide an easier analysis of the variables across different year, month or day
#3. Dealing with Missing Values & Creating New Subsets
#Our Response Variable (Inflation), starts from 1958, while Explanatory Variables (Unemployment, Interest Rate, GDP) starts from 1954
#Meanwhile FED started Interest Rate Target Rates in 1982, before switching FED to Interest Upper and Lower Targets in 2008
#Therefore we will create a new dataset from economics which removes all missing values of response variable(inflation), our dataset for main analysis only consists of inflation rate, interest rate, unemployment rate (1958 - 2017) using dplyr
macroecon <- econ %>%
#only selecting our appropriate variables
select(Date, InflationRate, InterestRate,
UnemploymentRate) %>%
#removing all observations where inflation rate is missing
filter(!is.na(InflationRate))
## Warning: package 'bindrcpp' was built under R version 3.3.2
#df has no missing variables
Main Analysis: Analyzing Inflation Determinants
#We have created a new dataset which consist of of our response variable and indepent variable
head(macroecon)
## Date InflationRate InterestRate UnemploymentRate
## 1 1958-01-01 3.2 2.72 5.8
## 2 1958-02-01 3.2 1.67 6.4
## 3 1958-03-01 2.8 1.20 6.7
## 4 1958-04-01 2.4 1.26 7.4
## 5 1958-05-01 2.4 0.63 7.4
## 6 1958-06-01 2.1 0.93 7.3
#Examining the relationships between inflation and unemployment & interest rate
#Bivariate Plot of Inflation with Interest Rate
pairs(InflationRate~InterestRate, macroecon)

cor1 <- cor(macroecon$InflationRate, macroecon$InterestRate)
cor1 #really strong positive correlation at 0.83
## [1] 0.7843761
#this met ai priori expectations because high interest rates are a result of high inflation
#in order to force consumers to save instead of spend
#Bivariate Plot of Inflation with Unemployment Rate
pairs(InflationRate~UnemploymentRate, macroecon)

cor2 <- cor(macroecon$InflationRate, macroecon$UnemploymentRate)
cor2 #relatively weaker positive correlation
## [1] 0.2065216
#given that high inflation is associated with business uncertainty
#this can result to a small uptick in unemployment
#To narrow down, we need to create two segments to differentiate between times of boom and depression
#ECONOMIC BOOM: Subset to examine inflation determinants during the 1990's Economic Boom
econboom <- macroecon %>%
filter(Date > "1991-03-01" & Date < "2001-03-01")
#see descriptive stats for 1990's boom
econboom %>%
group_by(year(Date)) %>%
#the inflation remained around 2 -3 % during the 1990's,
#interest rates remain relatively high around 5% to curtail inflation
#unemployment is also relatively high
summarise(avginflation = mean(InflationRate), sdinflation = sd(InflationRate),
avginterest = mean(InterestRate), sdinterest = sd(InterestRate),
avgunemployment = mean(UnemploymentRate), sdunemployment = sd(UnemploymentRate))
## # A tibble: 11 x 7
## `year(Date)` avginflation sdinflation avginterest sdinterest
## <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 1991 4.711111 0.29344695 5.441111 0.52570059
## 2 1992 3.650000 0.23548789 3.521667 0.41424265
## 3 1993 3.308333 0.17816404 3.022500 0.04048007
## 4 1994 2.850000 0.10000000 4.201667 0.78822970
## 5 1995 2.991667 0.06685579 5.836667 0.16455634
## 6 1996 2.716667 0.12673045 5.298333 0.09777835
## 7 1997 2.391667 0.15642793 5.460000 0.12030265
## 8 1998 2.275000 0.13568011 5.353333 0.31075665
## 9 1999 2.083333 0.13371158 4.970000 0.26498714
## 10 2000 2.433333 0.18748737 6.235833 0.37862090
## 11 2001 2.650000 0.07071068 5.735000 0.34648232
## # ... with 2 more variables: avgunemployment <dbl>, sdunemployment <dbl>
#ECONOMIC RECESSION: Subset to examine inflation determinants in the offset of Economic Crisis 2008 until today
econcrisis <- macroecon %>%
filter(Date > "2007-01-01" & Date < "2017-03-16")
#see descriptive stats for 2008 recession
econcrisis %>%
group_by(year(Date)) %>%
#whats surprising is that inflation remains around 1-2%
#meanwhile interest rates were close to 0 to force consumers to spend
#saw the rise of unemployment to 9% in the peak of 2010
summarise(avginflation = mean(InflationRate), sdinflation = sd(InflationRate),
avginterest = mean(InterestRate), sdinterest = sd(InterestRate),
avgunemployment = mean(UnemploymentRate), sdunemployment = sd(UnemploymentRate))
## # A tibble: 11 x 7
## `year(Date)` avginflation sdinflation avginterest sdinterest
## <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 2007 2.2909091 0.18140863 4.99818182 0.359104948
## 2 2008 2.3083333 0.21933094 1.92750000 1.052556412
## 3 2009 1.6916667 0.15050420 0.16000000 0.033028913
## 4 2010 0.9583333 0.26443192 0.17500000 0.028123106
## 5 2011 1.6666667 0.44175957 0.10166667 0.035376760
## 6 2012 2.1166667 0.16966991 0.14000000 0.026628761
## 7 2013 1.7583333 0.11645002 0.10750000 0.028959219
## 8 2014 1.7500000 0.13142575 0.08916667 0.013113722
## 9 2015 1.8250000 0.13568011 0.13250000 0.035451632
## 10 2016 2.1916667 0.06685579 0.39500000 0.049817850
## 11 2017 2.2500000 0.07071068 0.65500000 0.007071068
## # ... with 2 more variables: avgunemployment <dbl>, sdunemployment <dbl>
#1. Linear Regression Analysis & Results
#Simple linear regression model with one explanatory variable, one response variable
mod1 <- lm(InflationRate~InterestRate, data = macroecon)
coef(mod1)
## (Intercept) InterestRate
## 0.9361124 0.5523570
summary(mod1)
##
## Call:
## lm(formula = InflationRate ~ InterestRate, data = macroecon)
##
## Residuals:
## Min 1Q Median 3Q Max
## -3.5359 -1.1943 -0.2150 0.7744 7.4331
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.93611 0.10250 9.133 <2e-16 ***
## InterestRate 0.55236 0.01642 33.647 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.598 on 708 degrees of freedom
## Multiple R-squared: 0.6152, Adjusted R-squared: 0.6147
## F-statistic: 1132 on 1 and 708 DF, p-value: < 2.2e-16
rsquared(mod1)
## [1] 0.6152459
#Interest rate explains 0.61 variability for inflation rate
plotModel(mod1, system = "ggplot2")

#Multivariate linear regression model with one explanatory variable, one response variable
mod2 <- lm(InflationRate~InterestRate + UnemploymentRate, data = macroecon)
coef(mod2)
## (Intercept) InterestRate UnemploymentRate
## -1.1438569 0.5525966 0.3425274
summary(mod2)
##
## Call:
## lm(formula = InflationRate ~ InterestRate + UnemploymentRate,
## data = macroecon)
##
## Residuals:
## Min 1Q Median 3Q Max
## -4.3930 -1.0343 -0.2910 0.6892 6.9076
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -1.14386 0.24029 -4.760 2.35e-06 ***
## InterestRate 0.55260 0.01548 35.701 < 2e-16 ***
## UnemploymentRate 0.34253 0.03623 9.454 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.506 on 707 degrees of freedom
## Multiple R-squared: 0.6584, Adjusted R-squared: 0.6575
## F-statistic: 681.4 on 2 and 707 DF, p-value: < 2.2e-16
rsquared(mod2)
## [1] 0.6584294
#Both variables explain 0.65 variability which indicates that unemployment didn't necessarily help the model
#Both InterestRate and Unemployment Rate is statistically significant
#F Test is statistically significant which suggest that (P value less than signifigance level)
#Model provides a better fit than the intercept only model
#Economic Boom
#Multivariate linear regression model with one explanatory variable, one response variable
mod3 <- lm(InflationRate~InterestRate + UnemploymentRate, data = econboom)
coef(mod3)
## (Intercept) InterestRate UnemploymentRate
## -3.1615519 0.3938410 0.7406414
summary(mod3)
##
## Call:
## lm(formula = InflationRate ~ InterestRate + UnemploymentRate,
## data = econboom)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.45841 -0.20497 -0.06805 0.17962 0.97165
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -3.16155 0.33126 -9.544 2.78e-16 ***
## InterestRate 0.39384 0.03627 10.859 < 2e-16 ***
## UnemploymentRate 0.74064 0.03217 23.022 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.2928 on 116 degrees of freedom
## Multiple R-squared: 0.8353, Adjusted R-squared: 0.8324
## F-statistic: 294.1 on 2 and 116 DF, p-value: < 2.2e-16
rsquared(mod3)
## [1] 0.8352619
#indicates that interest rate and unemployment rate explains 0.83 variability of inflation around its mean
#higher than our initial rsquared from model1
#F Statistic is significant
#Economic Crisis
#Multivariate linear regression model with one explanatory variable, one response variable
mod4 <- lm(InflationRate~InterestRate + UnemploymentRate, data = econcrisis)
coef(mod4)
## (Intercept) InterestRate UnemploymentRate
## 2.72512251 0.05290796 -0.13011330
summary(mod4)
##
## Call:
## lm(formula = InflationRate ~ InterestRate + UnemploymentRate,
## data = econcrisis)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.91211 -0.22318 0.01275 0.20037 0.65059
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2.72512 0.14412 18.909 < 2e-16 ***
## InterestRate 0.05291 0.02347 2.255 0.026 *
## UnemploymentRate -0.13011 0.01873 -6.946 2.2e-10 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.3269 on 118 degrees of freedom
## Multiple R-squared: 0.4428, Adjusted R-squared: 0.4333
## F-statistic: 46.88 on 2 and 118 DF, p-value: 1.039e-15
rsquared(mod4)
## [1] 0.4427576
#indicates that interest rate and unemployment rate explains 0.45 variability of inflation around its mean
#lower than our initial rsquared from model2
#Interest Rate is not statistically significant in recession years
#Monetary Policy might be ineffective to increase inflation during economic recession
#Conclusion: interest rate and unemployment rate plays a stronger role to inflation in booms than recession
#2. Veryfing the assumptions for multivariate linear model
#linearity of the model
#normality of residuals
#homoscedasticity of errors
linearity <- mplot(mod2, which =1, system = "ggplot2")
linearity
## [[1]]
## `geom_smooth()` using method = 'loess'

# scatterplot of residuals versus fitted: linearity of model;
# some linearity can be observed
norm_residuals <- mplot(mod2, which =2, system = "ggplot2")
norm_residuals
## [[1]]

# normality of residuals:
# we can see that there is deviation observed at tail end
homoscedasticity <- mplot(mod2, which =4, system = "ggplot2")
homoscedasticity
## [[1]]

#model diagnostics: observing outliers through Cook's distance method.
#Some observations from 200 to 300 have outliers with a cook's distance greater than 0.01.
#identifying extreme values within the data
library(broom)
## Warning: package 'broom' was built under R version 3.3.2
outliers <- augment(mod2) %>%
filter(.cooksd >0.01)
## Warning: Deprecated: please use `purrr::possibly()` instead
## Warning: Deprecated: please use `purrr::possibly()` instead
## Warning: Deprecated: please use `purrr::possibly()` instead
## Warning: Deprecated: please use `purrr::possibly()` instead
## Warning: Deprecated: please use `purrr::possibly()` instead
head(outliers) #high inflation rates refer to the 1970's oil inflation prices
## InflationRate InterestRate UnemploymentRate .fitted .se.fit .resid
## 1 10.6 10.06 6.0 6.470430 0.09581499 4.129570
## 2 11.2 9.45 6.6 6.338862 0.09043460 4.861138
## 3 11.1 8.53 7.2 6.035990 0.08809014 5.064010
## 4 11.5 7.13 8.1 5.570629 0.09818724 5.929371
## 5 11.7 6.24 8.1 5.078818 0.09458452 6.621182
## 6 11.4 5.54 8.6 4.863264 0.10798749 6.536736
## .hat .sigma .cooksd .std.resid
## 1 0.004046364 1.499263 0.01022053 2.747162
## 2 0.003604685 1.496145 0.01260540 3.233115
## 3 0.003420209 1.495191 0.01297462 3.367732
## 4 0.004249210 1.490649 0.02213609 3.944866
## 5 0.003943104 1.486506 0.02559869 4.404458
## 6 0.005139785 1.487013 0.03260014 4.350898
Secondary Analysis: FED’s Interest Rate Targets
#Has the FED's monetary policy been succesful in keeping interest rate targets in the years leading to 2008 Crisis?
#Creating a subset of the dataframe with interest rate and interest rate target using dplyr
#2000 - 2008
interestratetarget <- econ %>%
filter(Date > "2000-12-01" & Date < "2008-12-01") %>%
select(Date, InterestTargetRate, InterestRate) %>%
#removing all na values for interest rates
filter(!is.na(InterestRate)) %>%
#create a new column to show the difference
mutate(diff = InterestRate - InterestTargetRate)
#Creating a subset of the dataframe to show interest rate targets with upper and lower bounds
interestrateboundaries <- econ %>%
filter(Date > "2008-12-16" & Date < "2017-03-16") %>%
select(Date, InterestUpperTargetRate, InterestLowerTargetRate, InterestRate) %>%
#removing all na values for interest rates
filter(!is.na(InterestRate))
Data Visualization 1: Scatterplots
#Scatterplots to Understand Relationships of Inflation with Explanatory Variables
#Inflation Rate vs Interest Rate (1958 - 2017)
macroecon %>%
group_by(year(Date)) %>%
#a.create a scatterplot of interest rate vs inflation rate
ggplot(aes(InterestRate, InflationRate)) +
#b.color based on their year
geom_point(aes(color = year(Date))) +
#linear regression method ("lm")
geom_smooth(aes(color = year(Date)), method = "lm") +
#c.label all axes and titles graphs
labs(x = "Interest Rate (%)", y = "Inflation Rate (%)",
title = "Interest Rate vs Inflation Rate (1958-2017)")

#we can see a strong positive correlation between inflation and interest rates
#as inflation increases, FED increases interest rates to curtail spending
#Inflation Rate vs Unemployment Rate (1958 - 2017)
macroecon %>%
group_by(year(Date)) %>%
#a.create a scatterplot of unemployment vs inflation rate
ggplot(aes(UnemploymentRate, InflationRate)) +
#b.color based on their year
geom_point(aes(color = year(Date))) +
geom_smooth(aes(color = year(Date)), method = "lm") +
#c.label all axes and titles graphs
labs(x = "Unemployment Rate (%)", y = "Inflation Rate (%)",
title = "Unemployment Rate vs Inflation Rate (1958-2017)")

#we can see a relatively weak correlation between the two variables
#this can be seen from the weak correlation coefficient in the earlier section
Data Visualization 2: Plotting Inflation, Interest Rate, Unemployment Rate Over Time
#Plotting Inflation Rate, Interest Rate, Unemployment Rate Over Time
#Inflation Time Series
macroecon %>%
group_by(Date) %>%
ggplot(aes(Date, InflationRate)) +
geom_line(colour ="#0072B2")+
labs(x = "Date", y = "Inflation Rate (%)",
title = "FED Inflation Rate Year-Wise (1958 - 2017)")+
theme(plot.title = element_text(face="bold"))

#over time we can see the late 1970's saw a spike in inflation rate due to the rise of oil prices
#in recent years the FED have been able to curtail inflation targets
#Interest Rate Time Series
macroecon %>%
group_by(Date) %>%
ggplot(aes(Date, InterestRate)) +
geom_line(colour ="#D55E00")+
labs(x = "Date", y = "Interest Rate (%)",
title = "FED Interest Rate Year-Wise (1958 - 2017)")+
theme(plot.title = element_text(face="bold"))

#high interest rates in the early 1980's followed soon after inflation
#ever since the 2008 economic crisis, interest rates have been close to zero
#Unemployment Time Series
macroecon %>%
group_by(Date) %>%
ggplot(aes(Date, UnemploymentRate)) +
geom_line(colour ="#009E73")+
labs(x = "Date", y = "Unemployment Rate (%)",
title = "FED Unemployment Rate Year-Wise (1958 - 2017)")+
theme(plot.title = element_text(face="bold"))

#unemployment reached its peak during the early 1980's and in the offset of Great Depression
#in recent years it has been decreasing to 4.6%
Data Visualization 3: Comparing Economic Boom during 1990’s vs Economic Recession 2008
#Comparing Economic Boom vs Economic Recession
#Inflation, Interest Rates, Unemployment Rate During Economic Boom
econboom %>%
group_by(Date) %>%
ggplot(aes(Date)) +
geom_line(aes(y = InflationRate, colour = "InflationRate")) +
geom_line(aes(y = InterestRate, colour = "InterestRate")) +
geom_line(aes(y = UnemploymentRate, colour = "UnemploymentRate")) +
labs(x = "Date", y = "Percent (%)",
title = "1990's Boom: Inflation, Interest Rate & Unemployment Rate")+
theme(plot.title = element_text(face="bold"))

#during the economic boom of 1990's we saw a steady decline in unemployment rate
#inflation rate decreasing since 1992 is a consequence of FED increasing interest rates
#Inflation, Interest Rates, Unemployment Rate During Economic Crisis
econcrisis %>%
group_by(Date) %>%
ggplot(aes(Date)) +
geom_line(aes(y = InflationRate, colour = "InflationRate")) +
geom_line(aes(y = InterestRate, colour = "InterestRate")) +
geom_line(aes(y = UnemploymentRate, colour = "UnemploymentRate")) +
labs(x = "Date", y = "Percent (%)",
title = "2008 Economic Crisis: Inflation, Interest Rate & Unemployment Rate")+
theme(plot.title = element_text(face="bold"))

#during the economic crisis, we can see a staggering increase in unemployment rate before it decreases
#at the same time interest rates were decreased close to zero to force consumers to spend
#however, as we can see inflation remains low hovering below 2.5%
Data Visualization 4: Meeting Interest Rate Targets
#Meeting Interest Rate Targets (2000 - 2008)
interestratetarget %>%
group_by(Date) %>%
ggplot(aes(Date)) +
geom_line(aes(y = InterestTargetRate, colour = "InterestTargetRate")) +
geom_line(aes(y = InterestRate, colour = "InterestRate")) +
labs(x = "Date", y = "Interest Rate(%)",
title = "Effectiveness of Monetary Policy (2000 - 2008)")+
theme(plot.title = element_text(face="bold"))

#the fed has been succesfull when it comes to making sure interest rates are lower target
#Upper and Lower Bounds
interestrateboundaries %>%
group_by(Date) %>%
ggplot(aes(Date)) +
geom_line(aes(y = InterestUpperTargetRate, colour = "InterestUpperTargetRate")) +
geom_line(aes(y = InterestLowerTargetRate, colour = "InterestLowerTargetRate")) +
geom_line(aes(y = InterestRate, colour = "InterestRate")) +
labs(x = "Date", y = "Interest Rate(%)",
title = "Interest Rate Upper and Lower Bound (2008 - 2017)")+
theme(plot.title = element_text(face="bold"))

#The upper target remains at 0.25 until end of 2016
#the lower target remains at 0.0 until the end of 2016
#the FED has been sucessful in keeping interest rates between these bounds