This report explores countries gross domestic product (GDP) from 1960-2020. Exploratory data analysis will be conducted to create a logistic regression model.
# file path
library(readr)
GDP_DATA = read_csv("/Users/kennyg/Downloads/Countries GDP 1960-2020.csv", name_repair = "minimal")
head(GDP_DATA)
## # A tibble: 6 × 63
## `Country Name` `Country Code` `1960` `1961` `1962` `1963` `1964` `1965`
## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Africa Eastern… AFE 1.93e10 1.97e10 2.15e10 2.57e10 2.35e10 2.68e10
## 2 Africa Western… AFW 1.04e10 1.11e10 1.19e10 1.27e10 1.38e10 1.49e10
## 3 Australia AUS 1.86e10 1.97e10 1.99e10 2.15e10 2.38e10 2.60e10
## 4 Austria AUT 6.59e 9 7.31e 9 7.76e 9 8.37e 9 9.17e 9 9.99e 9
## 5 Burundi BDI 1.96e 8 2.03e 8 2.14e 8 2.33e 8 2.61e 8 1.59e 8
## 6 Belgium BEL 1.17e10 1.24e10 1.33e10 1.43e10 1.60e10 1.74e10
## # ℹ 55 more variables: `1966` <dbl>, `1967` <dbl>, `1968` <dbl>, `1969` <dbl>,
## # `1970` <dbl>, `1971` <dbl>, `1972` <dbl>, `1973` <dbl>, `1974` <dbl>,
## # `1975` <dbl>, `1976` <dbl>, `1977` <dbl>, `1978` <dbl>, `1979` <dbl>,
## # `1980` <dbl>, `1981` <dbl>, `1982` <dbl>, `1983` <dbl>, `1984` <dbl>,
## # `1985` <dbl>, `1986` <dbl>, `1987` <dbl>, `1988` <dbl>, `1989` <dbl>,
## # `1990` <dbl>, `1991` <dbl>, `1992` <dbl>, `1993` <dbl>, `1994` <dbl>,
## # `1995` <dbl>, `1996` <dbl>, `1997` <dbl>, `1998` <dbl>, `1999` <dbl>, …
GDP_DATA = GDP_DATA %>%
clean_names()
# check for any loading issues
any(is.na(GDP_DATA))
## [1] TRUE
summary(GDP_DATA)
## country_name country_code x1960 x1961
## Length:120 Length:120 Min. :1.201e+07 Min. :1.159e+07
## Class :character Class :character 1st Qu.:4.363e+08 1st Qu.:4.747e+08
## Mode :character Mode :character Median :2.724e+09 Median :2.667e+09
## Mean :7.738e+10 Mean :7.919e+10
## 3rd Qu.:2.926e+10 3rd Qu.:3.041e+10
## Max. :1.390e+12 Max. :1.440e+12
## NA's :1
## x1962 x1963 x1964
## Min. :1.254e+07 Min. :1.283e+07 Min. :1.342e+07
## 1st Qu.:4.716e+08 1st Qu.:5.082e+08 1st Qu.:5.418e+08
## Median :3.051e+09 Median :3.571e+09 Median :3.184e+09
## Mean :8.418e+10 Mean :9.092e+10 Mean :9.976e+10
## 3rd Qu.:3.295e+10 3rd Qu.:3.775e+10 3rd Qu.:3.687e+10
## Max. :1.550e+12 Max. :1.670e+12 Max. :1.820e+12
##
## x1965 x1966 x1967
## Min. :1.359e+07 Min. :1.447e+07 Min. :1.584e+07
## 1st Qu.:6.561e+08 1st Qu.:6.940e+08 1st Qu.:7.417e+08
## Median :3.590e+09 Median :4.231e+09 Median :4.194e+09
## Mean :1.092e+11 Mean :1.178e+11 Mean :1.245e+11
## 3rd Qu.:4.106e+10 3rd Qu.:4.430e+10 3rd Qu.:4.380e+10
## Max. :1.990e+12 Max. :2.160e+12 Max. :2.290e+12
##
## x1968 x1969 x1970
## Min. :1.460e+07 Min. :1.585e+07 Min. :1.630e+07
## 1st Qu.:7.689e+08 1st Qu.:7.863e+08 1st Qu.:8.549e+08
## Median :4.571e+09 Median :5.727e+09 Median :6.061e+09
## Mean :1.339e+11 Mean :1.481e+11 Mean :1.621e+11
## 3rd Qu.:4.683e+10 3rd Qu.:5.355e+10 3rd Qu.:6.300e+10
## Max. :2.480e+12 Max. :2.730e+12 Max. :2.990e+12
##
## x1971 x1972 x1973
## Min. :1.962e+07 Min. :2.294e+07 Min. :2.420e+07
## 1st Qu.:8.933e+08 1st Qu.:1.069e+09 1st Qu.:1.290e+09
## Median :6.511e+09 Median :6.548e+09 Median :8.058e+09
## Mean :1.780e+11 Mean :2.042e+11 Mean :2.501e+11
## 3rd Qu.:6.409e+10 3rd Qu.:7.239e+10 3rd Qu.:9.262e+10
## Max. :3.300e+12 Max. :3.800e+12 Max. :4.640e+12
##
## x1974 x1975 x1976
## Min. :3.151e+07 Min. :3.324e+07 Min. :3.010e+07
## 1st Qu.:1.636e+09 1st Qu.:2.108e+09 1st Qu.:2.355e+09
## Median :1.098e+10 Median :1.073e+10 Median :1.196e+10
## Mean :2.907e+11 Mean :3.232e+11 Mean :3.499e+11
## 3rd Qu.:1.210e+11 3rd Qu.:1.330e+11 3rd Qu.:1.330e+11
## Max. :5.350e+12 Max. :5.960e+12 Max. :6.480e+12
##
## x1977 x1978 x1979
## Min. :4.450e+07 Min. :4.943e+07 Min. :5.884e+07
## 1st Qu.:2.452e+09 1st Qu.:2.611e+09 1st Qu.:2.986e+09
## Median :1.429e+10 Median :1.617e+10 Median :2.045e+10
## Mean :3.952e+11 Mean :4.597e+11 Mean :5.341e+11
## 3rd Qu.:1.540e+11 3rd Qu.:1.645e+11 3rd Qu.:1.960e+11
## Max. :7.330e+12 Max. :8.630e+12 Max. :1.000e+13
##
## x1980 x1981 x1982
## Min. :6.846e+07 Min. :8.089e+07 Min. :8.602e+07
## 1st Qu.:3.717e+09 1st Qu.:3.365e+09 1st Qu.:3.272e+09
## Median :2.407e+10 Median :2.655e+10 Median :2.607e+10
## Mean :6.052e+11 Mean :6.383e+11 Mean :6.310e+11
## 3rd Qu.:2.338e+11 3rd Qu.:2.535e+11 3rd Qu.:2.590e+11
## Max. :1.130e+13 Max. :1.170e+13 Max. :1.160e+13
##
## x1983 x1984 x1985
## Min. :8.687e+07 Min. :9.860e+07 Min. :1.110e+08
## 1st Qu.:3.199e+09 1st Qu.:3.390e+09 1st Qu.:3.251e+09
## Median :2.400e+10 Median :2.266e+10 Median :2.470e+10
## Mean :6.379e+11 Mean :6.598e+11 Mean :6.962e+11
## 3rd Qu.:2.425e+11 3rd Qu.:2.600e+11 3rd Qu.:2.470e+11
## Max. :1.180e+13 Max. :1.220e+13 Max. :1.290e+13
##
## x1986 x1987 x1988
## Min. :1.307e+08 Min. :1.477e+08 Min. :1.727e+08
## 1st Qu.:3.793e+09 1st Qu.:3.710e+09 1st Qu.:3.833e+09
## Median :2.822e+10 Median :3.277e+10 Median :3.652e+10
## Mean :8.049e+11 Mean :9.059e+11 Mean :1.008e+12
## 3rd Qu.:2.550e+11 3rd Qu.:2.900e+11 3rd Qu.:2.980e+11
## Max. :1.520e+13 Max. :1.730e+13 Max. :1.930e+13
##
## x1989 x1990 x1991
## Min. :1.925e+08 Min. :2.173e+08 Min. :2.205e+08
## 1st Qu.:4.289e+09 1st Qu.:4.520e+09 1st Qu.:4.065e+09
## Median :3.904e+10 Median :4.202e+10 Median :4.567e+10
## Mean :1.049e+12 Mean :1.181e+12 Mean :1.229e+12
## 3rd Qu.:2.970e+11 3rd Qu.:3.375e+11 3rd Qu.:3.380e+11
## Max. :2.010e+13 Max. :2.270e+13 Max. :2.370e+13
##
## x1992 x1993 x1994
## Min. :2.421e+08 Min. :2.638e+08 Min. :2.894e+08
## 1st Qu.:4.320e+09 1st Qu.:4.597e+09 1st Qu.:4.547e+09
## Median :4.844e+10 Median :5.088e+10 Median :5.465e+10
## Mean :1.316e+12 Mean :1.347e+12 Mean :1.459e+12
## 3rd Qu.:3.578e+11 3rd Qu.:3.608e+11 3rd Qu.:3.922e+11
## Max. :2.540e+13 Max. :2.580e+13 Max. :2.790e+13
##
## x1995 x1996 x1997
## Min. :3.135e+08 Min. :3.315e+08 Min. :3.478e+08
## 1st Qu.:5.237e+09 1st Qu.:5.574e+09 1st Qu.:5.738e+09
## Median :6.489e+10 Median :6.956e+10 Median :7.264e+10
## Mean :1.624e+12 Mean :1.672e+12 Mean :1.676e+12
## 3rd Qu.:4.062e+11 3rd Qu.:4.425e+11 3rd Qu.:4.685e+11
## Max. :3.100e+13 Max. :3.170e+13 Max. :3.160e+13
##
## x1998 x1999 x2000
## Min. :3.736e+08 Min. :3.907e+08 Min. :3.963e+08
## 1st Qu.:6.173e+09 1st Qu.:5.870e+09 1st Qu.:6.148e+09
## Median :6.718e+10 Median :6.907e+10 Median :7.994e+10
## Mean :1.668e+12 Mean :1.729e+12 Mean :1.800e+12
## 3rd Qu.:4.452e+11 3rd Qu.:4.860e+11 3rd Qu.:5.460e+11
## Max. :3.150e+13 Max. :3.270e+13 Max. :3.380e+13
##
## x2001 x2002 x2003
## Min. :4.300e+08 Min. :4.619e+08 Min. :4.699e+08
## 1st Qu.:6.251e+09 1st Qu.:6.176e+09 1st Qu.:7.285e+09
## Median :7.648e+10 Median :8.211e+10 Median :8.940e+10
## Mean :1.793e+12 Mean :1.862e+12 Mean :2.088e+12
## 3rd Qu.:5.412e+11 3rd Qu.:5.902e+11 3rd Qu.:6.588e+11
## Max. :3.360e+13 Max. :3.490e+13 Max. :3.910e+13
##
## x2004 x2005 x2006
## Min. :5.069e+08 Min. :5.472e+08 Min. :6.109e+08
## 1st Qu.:8.564e+09 1st Qu.:9.574e+09 1st Qu.:1.029e+10
## Median :1.036e+11 Median :1.215e+11 Median :1.430e+11
## Mean :2.360e+12 Mean :2.580e+12 Mean :2.825e+12
## 3rd Qu.:7.640e+11 3rd Qu.:8.792e+11 3rd Qu.:9.938e+11
## Max. :4.410e+13 Max. :4.780e+13 Max. :5.180e+13
##
## x2007 x2008 x2009
## Min. :6.844e+08 Min. :6.954e+08 Min. :6.749e+08
## 1st Qu.:1.218e+10 1st Qu.:1.377e+10 1st Qu.:1.267e+10
## Median :1.650e+11 Median :1.810e+11 Median :1.740e+11
## Mean :3.215e+12 Mean :3.567e+12 Mean :3.426e+12
## 3rd Qu.:1.182e+12 3rd Qu.:1.270e+12 3rd Qu.:1.250e+12
## Max. :5.830e+13 Max. :6.400e+13 Max. :6.070e+13
##
## x2010 x2011 x2012
## Min. :6.812e+08 Min. :6.761e+08 Min. :6.929e+08
## 1st Qu.:1.434e+10 1st Qu.:1.776e+10 1st Qu.:1.769e+10
## Median :2.135e+11 Median :2.365e+11 Median :2.250e+11
## Mean :3.811e+12 Mean :4.266e+12 Mean :4.404e+12
## 3rd Qu.:1.440e+12 3rd Qu.:1.610e+12 3rd Qu.:1.680e+12
## Max. :6.650e+13 Max. :7.370e+13 Max. :7.530e+13
##
## x2013 x2014 x2015
## Min. :7.212e+08 Min. :7.277e+08 Min. :7.554e+08
## 1st Qu.:1.878e+10 1st Qu.:1.959e+10 1st Qu.:1.942e+10
## Median :2.345e+11 Median :2.395e+11 Median :2.165e+11
## Mean :4.565e+12 Mean :4.712e+12 Mean :4.499e+12
## 3rd Qu.:1.790e+12 3rd Qu.:1.850e+12 3rd Qu.:1.680e+12
## Max. :7.740e+13 Max. :7.960e+13 Max. :7.510e+13
##
## x2016 x2017 x2018
## Min. :7.744e+08 Min. :7.922e+08 Min. :8.113e+08
## 1st Qu.:2.071e+10 1st Qu.:2.204e+10 1st Qu.:2.354e+10
## Median :2.310e+11 Median :2.525e+11 Median :2.750e+11
## Mean :4.565e+12 Mean :4.886e+12 Mean :5.207e+12
## 3rd Qu.:1.560e+12 3rd Qu.:1.670e+12 3rd Qu.:1.750e+12
## Max. :7.630e+13 Max. :8.120e+13 Max. :8.630e+13
##
## x2019 x2020
## Min. :8.250e+08 Min. :8.075e+08
## 1st Qu.:2.328e+10 1st Qu.:2.052e+10
## Median :2.740e+11 Median :2.580e+11
## Mean :5.303e+12 Mean :5.147e+12
## 3rd Qu.:1.800e+12 3rd Qu.:1.702e+12
## Max. :8.760e+13 Max. :8.470e+13
##
# create a heatmap
library(pheatmap)
numeric_cols = GDP_DATA[, sapply(GDP_DATA, is.numeric)]
scaled_data = scale(numeric_cols)
pheatmap(scaled_data,
main = "GDP per Country from 1960-2020",
show_rownames = FALSE,
clustering_method = "complete",
color = colorRampPalette(c("blue", "white", "red"))(50)
)
### 3.2 Distribution of Data
# Select a few years for a handful of histograms
numeric_cols %>%
pivot_longer(cols = everything(), names_to = "variable", values_to = "value")
## # A tibble: 7,320 × 2
## variable value
## <chr> <dbl>
## 1 x1960 19313106302
## 2 x1961 19723488057
## 3 x1962 21493920015
## 4 x1963 25733212134
## 5 x1964 23527443251
## 6 x1965 26810567154
## 7 x1966 29152157362
## 8 x1967 30173172663
## 9 x1968 32877055829
## 10 x1969 37744346869
## # ℹ 7,310 more rows
gdp_long3 = GDP_DATA %>%
pivot_longer(cols = -c(1, 2),
names_to = "Year_raw",
values_to = "GDP") %>%
mutate(Year = as.integer(gsub("[^0-9]", "", Year_raw))) %>%
select(-Year_raw) %>%
filter(!is.na(Year))
gdp_long3 %>%
filter(Year %in% c(1960, 1970, 1980, 1990, 2000, 2010, 2020)) %>%
ggplot(aes(x = GDP)) +
geom_histogram(bins = 30, fill = "steelblue", colour = "black") +
scale_x_log10() +
facet_wrap(~ Year, scales = "free_y") +
labs(title = "Histogram of GDP for selected years")
gdp_summary = gdp_long3 %>%
group_by(Year) %>%
summarise(
median = median(GDP, na.rm = TRUE),
q25 = quantile(GDP, 0.25, na.rm = TRUE),
q75 = quantile(GDP, 0.75, na.rm = TRUE)
)
ggplot(gdp_summary, aes(x = Year)) +
geom_ribbon(aes(ymin = q25, ymax = q75), fill = "grey70", alpha = 0.5) +
geom_line(aes(y = median), color = "steelblue", linewidth = 1) +
scale_y_log10() +
labs(y = "GDP", title = "GDP percentile over time")
Average GDP has increased over time, thus every country has had
signficant growth in the last 60 years.
gdp_long3 = gdp_long3 %>%
group_by(country_code) %>%
arrange(Year) %>%
mutate(growth = (GDP - lag(GDP)) / lag(GDP) * 100)
# plot growth rates
top_countries = c("USA", "CHN", "IND", "GBR", "BRA", "JPN", "DEU", "ZAF")
gdp_long3 %>%
filter(country_code %in% top_countries, Year >= 1970) %>%
ggplot(aes(x = Year, y = growth, colour = country_name)) +
geom_line() +
geom_hline(yintercept = 0, linestyle = "dashed", alpha = 0.5) +
labs(y = "Annual GDP Growth (%)", title = "GDP Growth rates over time") +
theme_minimal()
The data shows South Africa experienced the highest annual GDP growth in
the last 60 years (above 50% between 2000 and 2010), however they
experienced the lowest annual GDP growth (at around -20% in 1985). The
United States and China had the most consistent annual GDP growth in the
last 60 years.
gdp_long3 = gdp_long3 %>%
group_by(country_code) %>%
arrange(Year) %>%
mutate(
growth = (GDP - lag(GDP)) / lag(GDP) * 100,
recession = ifelse(growth < 0, 1, 0)
) %>%
ungroup()
gdp_long3 = gdp_long3 %>%
group_by(country_code) %>%
arrange(Year) %>%
mutate(
lag1_growth = lag(growth),
lag2_growth = lag(growth, 2)
) %>%
ungroup()
model_data = gdp_long3 %>%
filter(!is.na(recession), !is.na(lag1_growth), !is.na(lag2_growth))
train_data = model_data %>% filter(Year <= 2000)
test_data = model_data %>% filter(Year > 2000)
# create a base formula
formula = recession ~ lag1_growth + lag2_growth
if(length(unique(train_data$country_code)) > 1) {
formula = update(formula, ~ . + factor(country_code))
}
if(length(unique(train_data$Year)) > 1) {
formula = update(formula, ~ . + Year)
}
model = glm(formula, data = train_data, family = binomial)
summary(model)
##
## Call:
## glm(formula = formula, family = binomial, data = train_data)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -91.542842 7.671072 -11.934 < 2e-16 ***
## lag1_growth -0.021151 0.003021 -7.002 2.52e-12 ***
## lag2_growth 0.006005 0.002782 2.158 0.0309 *
## factor(country_code)AFW 0.275455 0.542163 0.508 0.6114
## factor(country_code)AUS -0.511187 0.600891 -0.851 0.3949
## factor(country_code)AUT -0.301649 0.582216 -0.518 0.6044
## factor(country_code)BDI 0.237057 0.540548 0.439 0.6610
## factor(country_code)BEL 0.020886 0.556099 0.038 0.9700
## factor(country_code)BEN 0.180012 0.547903 0.329 0.7425
## factor(country_code)BFA 0.152774 0.547177 0.279 0.7801
## factor(country_code)BGD -0.171160 0.567989 -0.301 0.7632
## factor(country_code)BHS -0.693355 0.627318 -1.105 0.2690
## factor(country_code)BLZ -0.711286 0.626325 -1.136 0.2561
## factor(country_code)BMU -2.441366 1.090357 -2.239 0.0252 *
## factor(country_code)BOL -0.744518 0.627414 -1.187 0.2354
## factor(country_code)BWA -1.235267 0.725813 -1.702 0.0888 .
## factor(country_code)CAF 0.271633 0.541380 0.502 0.6158
## factor(country_code)CAN -1.319567 0.724032 -1.823 0.0684 .
## factor(country_code)CHL 0.011025 0.556942 0.020 0.9842
## factor(country_code)CHN -0.483063 0.599270 -0.806 0.4202
## factor(country_code)CIV 0.326881 0.541679 0.603 0.5462
## factor(country_code)CMR -0.329848 0.585557 -0.563 0.5732
## factor(country_code)COD 0.702670 0.528163 1.330 0.1834
## factor(country_code)COG 0.157918 0.548960 0.288 0.7736
## factor(country_code)COL -0.138570 0.566685 -0.245 0.8068
## factor(country_code)CRI -1.737725 0.831768 -2.089 0.0367 *
## factor(country_code)CSS -0.509608 0.599573 -0.850 0.3954
## factor(country_code)DOM -0.981172 0.668663 -1.467 0.1423
## factor(country_code)DZA -0.529219 0.602755 -0.878 0.3799
## factor(country_code)EAP -0.705383 0.625992 -1.127 0.2598
## factor(country_code)EAR -0.984070 0.664396 -1.481 0.1386
## factor(country_code)EAS -0.950331 0.665481 -1.428 0.1533
## factor(country_code)ECU 0.276497 0.541022 0.511 0.6093
## factor(country_code)EMU -0.131945 0.567526 -0.232 0.8162
## factor(country_code)ESP -0.288155 0.582786 -0.494 0.6210
## factor(country_code)FIN 0.026326 0.556413 0.047 0.9623
## factor(country_code)FJI -0.145666 0.567206 -0.257 0.7973
## factor(country_code)FRA -0.140697 0.567511 -0.248 0.8042
## factor(country_code)GAB 0.567812 0.533235 1.065 0.2869
## factor(country_code)GBR -0.126958 0.566302 -0.224 0.8226
## factor(country_code)GHA 0.520897 0.528556 0.986 0.3244
## factor(country_code)GRC -0.126654 0.566915 -0.223 0.8232
## factor(country_code)GTM -0.992424 0.664955 -1.492 0.1356
## factor(country_code)GUY 0.375438 0.533588 0.704 0.4817
## factor(country_code)HIC -1.741972 0.830688 -2.097 0.0360 *
## factor(country_code)HKG -1.240285 0.725042 -1.711 0.0871 .
## factor(country_code)HND -0.330081 0.581820 -0.567 0.5705
## factor(country_code)HPC -0.173582 0.566577 -0.306 0.7593
## factor(country_code)HTI -0.509612 0.601631 -0.847 0.3970
## factor(country_code)IBD -0.729962 0.626568 -1.165 0.2440
## factor(country_code)IBT -0.731718 0.626544 -1.168 0.2429
## factor(country_code)IDA -0.002298 0.555436 -0.004 0.9967
## factor(country_code)IDB -0.177133 0.568492 -0.312 0.7554
## factor(country_code)IDX -0.160147 0.565938 -0.283 0.7772
## factor(country_code)IND -0.521473 0.599880 -0.869 0.3847
## factor(country_code)IRL -0.461634 0.600312 -0.769 0.4419
## factor(country_code)ISL -0.122631 0.567716 -0.216 0.8290
## factor(country_code)ITA -0.130014 0.567823 -0.229 0.8189
## factor(country_code)JAM -0.335070 0.582427 -0.575 0.5651
## factor(country_code)JPN -0.683287 0.628691 -1.087 0.2771
## factor(country_code)KEN 0.163346 0.547230 0.298 0.7653
## factor(country_code)KNA -1.719361 0.830378 -2.071 0.0384 *
## factor(country_code)KOR -0.634105 0.630551 -1.006 0.3146
## factor(country_code)LKA -0.523959 0.599816 -0.874 0.3824
## factor(country_code)LMC -0.516734 0.600290 -0.861 0.3893
## factor(country_code)LMY -0.732413 0.626586 -1.169 0.2424
## factor(country_code)LSO 0.023751 0.556812 0.043 0.9660
## factor(country_code)LTE -1.317456 0.724275 -1.819 0.0689 .
## factor(country_code)LUX -0.288690 0.581320 -0.497 0.6195
## factor(country_code)MAR 0.020909 0.556018 0.038 0.9700
## factor(country_code)MDG 0.393568 0.535621 0.735 0.4625
## factor(country_code)MEX -0.705974 0.630320 -1.120 0.2627
## factor(country_code)MIC -0.731960 0.626608 -1.168 0.2428
## factor(country_code)MWI -0.002944 0.559580 -0.005 0.9958
## factor(country_code)MYS -0.700600 0.628459 -1.115 0.2649
## factor(country_code)NAC -15.383956 379.211807 -0.041 0.9676
## factor(country_code)NER 0.887400 0.522491 1.698 0.0894 .
## factor(country_code)NGA 0.654483 0.533772 1.226 0.2201
## factor(country_code)NIC -0.355338 0.588767 -0.604 0.5462
## factor(country_code)NLD 0.036702 0.556263 0.066 0.9474
## factor(country_code)NOR -0.493629 0.600810 -0.822 0.4113
## factor(country_code)NPL -0.009894 0.554754 -0.018 0.9858
## factor(country_code)OED -1.741954 0.830648 -2.097 0.0360 *
## factor(country_code)PAK -0.326082 0.581124 -0.561 0.5747
## factor(country_code)PAN -2.476344 1.090340 -2.271 0.0231 *
## factor(country_code)PER 0.298132 0.541501 0.551 0.5819
## factor(country_code)PHL -0.154367 0.566471 -0.273 0.7852
## factor(country_code)PNG 0.287266 0.541078 0.531 0.5955
## factor(country_code)PRE 0.430197 0.537454 0.800 0.4235
## factor(country_code)PRI -15.359785 379.011165 -0.041 0.9677
## factor(country_code)PRT -0.285145 0.581896 -0.490 0.6241
## factor(country_code)PST -1.743445 0.830702 -2.099 0.0358 *
## factor(country_code)RWA -0.771709 0.634235 -1.217 0.2237
## factor(country_code)SAS -0.740222 0.626041 -1.182 0.2371
## factor(country_code)SDN 0.408690 0.538484 0.759 0.4479
## factor(country_code)SEN 0.528256 0.529747 0.997 0.3187
## factor(country_code)SGP -0.912671 0.665964 -1.370 0.1705
## factor(country_code)SLE 0.974653 0.522540 1.865 0.0622 .
## factor(country_code)SSA 0.147490 0.546955 0.270 0.7874
## factor(country_code)SSF 0.147616 0.546944 0.270 0.7872
## factor(country_code)SUR -0.561001 0.605044 -0.927 0.3538
## factor(country_code)SWE -0.521545 0.601499 -0.867 0.3859
## factor(country_code)SWZ -0.130296 0.568937 -0.229 0.8189
## factor(country_code)SYC -0.681465 0.627108 -1.087 0.2772
## factor(country_code)TCD 0.248856 0.540016 0.461 0.6449
## factor(country_code)TEA -0.705863 0.626007 -1.128 0.2595
## factor(country_code)TGO 0.287339 0.541081 0.531 0.5954
## factor(country_code)THA -0.958761 0.666432 -1.439 0.1503
## factor(country_code)TLA -0.970217 0.665558 -1.458 0.1449
## factor(country_code)TSA -0.740222 0.626041 -1.182 0.2371
## factor(country_code)TSS 0.147616 0.546944 0.270 0.7872
## factor(country_code)TTO 0.146034 0.547387 0.267 0.7896
## factor(country_code)TUR 0.041404 0.558338 0.074 0.9409
## factor(country_code)UGA -0.018115 0.559572 -0.032 0.9742
## factor(country_code)UMC -0.730303 0.626709 -1.165 0.2439
## factor(country_code)URY 0.555964 0.530039 1.049 0.2942
## factor(country_code)USA -15.383198 379.294798 -0.041 0.9676
## factor(country_code)VCT -0.963931 0.664277 -1.451 0.1468
## factor(country_code)WLD -1.302634 0.723940 -1.799 0.0720 .
## factor(country_code)ZAF -0.324349 0.582347 -0.557 0.5775
## factor(country_code)ZMB 0.509055 0.531981 0.957 0.3386
## factor(country_code)ZWE 0.397220 0.534594 0.743 0.4575
## Year 0.045621 0.003858 11.825 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 4464.5 on 4558 degrees of freedom
## Residual deviance: 3959.4 on 4436 degrees of freedom
## AIC: 4205.4
##
## Number of Fisher Scoring iterations: 15
# Predict using the test data
test_data$pred_prob = predict(model, newdata = test_data, type = "response")
# ROC
library(pROC)
roc_obj = roc(test_data$recession, test_data$pred_prob)
plot(roc_obj, main = paste("ROC Curve (AUC =", round(auc(roc_obj), 3), ")"))
### 4.2 Logistic curve
# Grid of lagged growth ZAF
new_data = data.frame(
lag1_growth = seq(-15, 15, length.out = 100),
lag2_growth = median(train_data$lag2_growth, na.rm = TRUE),
country_code = "ZAF",
Year = 1985
)
# Probability Predictions ZAF
new_data$pred_prob = predict(model, newdata = new_data, type = "response")
# Plot Curve ZAF
ggplot(new_data, aes(x = lag1_growth, y = pred_prob)) +
geom_line(size = 1.2, color = "steelblue") +
labs(x = "GDP Growth in previous years (%)",
y = "Predicted Probability of Recession",
title = "Logistic Regression of Lagged GDP Growth in South Africa (1985)") +
theme_minimal()
# Grid of lagged growth JPN
new_data = data.frame(
lag1_growth = seq(-15, 15, length.out = 100),
lag2_growth = median(train_data$lag2_growth, na.rm = TRUE),
country_code = "JPN",
Year = 1986
)
# Probability Predictions JPN
new_data$pred_prob = predict(model, newdata = new_data, type = "response")
# Plot Curve JPN
ggplot(new_data, aes(x = lag1_growth, y = pred_prob)) +
geom_line(size = 1.2, color = "steelblue") +
labs(x = "GDP Growth in Previous Years (%)",
y = "Predicted Probabilty of Recession",
title = "Logistic Regression of Lagged GDP Growth in Japan (1986)") +
theme_minimal()
## 5. Conclusion The Area under the Curve is 0.618. The figure alone is
better than random testing (AUC > 0.5), however it is not greater
than or equal to 0.7, thus the model has weak predictive power. Because
if one were to randomly select one recession year and a non-recession
year there is a 61.8% probability that the model will assign a
probability leaning more towards a recession year than a non-recession
year. When logistic regression was applied to Japan in 1986 and South
Africa in 1985, it was as GDP grew in the previous year, the predicted
probability of recession fell, judging by the downward sloping curve.
Although past GDP growth is significant to predicting a recession, it is
not strong enough to do so on its own, it needs to be done with the use
of other macroeconomic factors such as interest rates, inflation,
unemployment etc.