# Load libraries
library(dplyr)
library(janitor)
library(lubridate)
library(readr)
library(zoo)
library(ggplot2)
library(tidyr)
library(forecast)
library(WDI)
wdi_data = WDI(indicator = c('gdpPercap'="NY.GDP.PCAP.KD"), # interest rate spread
start = 1960, end = 2020,
extra=TRUE) %>%
as_tibble()
For part 1, load the WDI data and calculate yearly GDP growth (%) for the United Kingdom, France, and India ((current GDP - lag GDP) / current GDP). Plot the data for both countries on one chart colored by country. Which country has higher average growth during this time period? Do the data appear to be stationary?
country_data <- subset(wdi_data, country == "United Kingdom" | country == "France" | country == "India")
table(country_data$country)
##
## France India United Kingdom
## 61 61 61
Here we’ve filtered down to only include the data from the UK, France, and India.
gdp_growth_data = wdi_data %>%
arrange(country,year) %>%
group_by(country) %>%
mutate(gdp_growth = (gdpPercap - lag(gdpPercap))/lag(gdpPercap)) %>%
ungroup() %>%
dplyr::select(country,year,gdp_growth) %>%
drop_na()
min(gdp_growth_data$year)
## [1] 1961
max(gdp_growth_data$year)
## [1] 2020
country_growth_data = country_data %>%
arrange(country,year) %>%
group_by(country) %>%
mutate(gdp_growth = (gdpPercap - lag(gdpPercap))/lag(gdpPercap)) %>%
ungroup() %>%
dplyr::select(country,year,gdp_growth) %>%
drop_na()
ggplot(country_growth_data)+
geom_line(aes(year,gdp_growth, color = country))+
theme_bw()+
xlab("Year")+
ylab("GDP Per Capita Growth")+
labs(title = "GDP Per Capita Growth Rate, 1961-2020")
Now create plots specifically for the UK, France, and India
gdp_growth_data = country_data %>%
arrange(country,year) %>%
group_by(country) %>%
mutate(gdp_growth = (gdpPercap - lag(gdpPercap))/lag(gdpPercap)) %>%
ungroup() %>%
dplyr::select(country,year,gdp_growth) %>%
drop_na()
ggplot(gdp_growth_data)+
geom_line(aes(year,gdp_growth))+
theme_bw()+
xlab("Year")+
ylab("GDP Per Capita Growth")+
labs(title = "GDP Per Capita Growth Rate, 1961-2020")
Next, plot the ACF and PACF associated with GDP growth. What trends do you observe in these visualizations?
#country_data <- na.omit(country_data)
par(mfrow = c(1,2))
acf(country_growth_data$gdp_growth)
pacf(country_growth_data$gdp_growth)
table(country_growth_data$country)
##
## France India United Kingdom
## 60 60 60
Reassess the ACF and PACF plots. Do they indicate that a certain type of ARMA model may be most appropriate? Next, fit an ARMA model to the data, comparing several unique models and selecting the best one based on the AIC or BIC. Which “order” shows the best performance? What does the selected order tell you about the data-generating process of the time series?
auto.arima(country_growth_data$gdp_growth)
## Series: country_growth_data$gdp_growth
## ARIMA(1,0,1) with zero mean
##
## Coefficients:
## ar1 ma1
## 0.9898 -0.8509
## s.e. 0.0106 0.0431
##
## sigma^2 estimated as 0.0007163: log likelihood=396.58
## AIC=-787.16 AICc=-787.02 BIC=-777.58