1. Introudction

This report explores countries gross domestic product (GDP) from 1960-2020. Exploratory data analysis will be conducted to create a logistic regression model.

2. Loading the data

# file path 
library(readr)
GDP_DATA = read_csv("/Users/kennyg/Downloads/Countries GDP 1960-2020.csv", name_repair = "minimal")
head(GDP_DATA)
## # A tibble: 6 × 63
##   `Country Name`  `Country Code`  `1960`  `1961`  `1962`  `1963`  `1964`  `1965`
##   <chr>           <chr>            <dbl>   <dbl>   <dbl>   <dbl>   <dbl>   <dbl>
## 1 Africa Eastern… AFE            1.93e10 1.97e10 2.15e10 2.57e10 2.35e10 2.68e10
## 2 Africa Western… AFW            1.04e10 1.11e10 1.19e10 1.27e10 1.38e10 1.49e10
## 3 Australia       AUS            1.86e10 1.97e10 1.99e10 2.15e10 2.38e10 2.60e10
## 4 Austria         AUT            6.59e 9 7.31e 9 7.76e 9 8.37e 9 9.17e 9 9.99e 9
## 5 Burundi         BDI            1.96e 8 2.03e 8 2.14e 8 2.33e 8 2.61e 8 1.59e 8
## 6 Belgium         BEL            1.17e10 1.24e10 1.33e10 1.43e10 1.60e10 1.74e10
## # ℹ 55 more variables: `1966` <dbl>, `1967` <dbl>, `1968` <dbl>, `1969` <dbl>,
## #   `1970` <dbl>, `1971` <dbl>, `1972` <dbl>, `1973` <dbl>, `1974` <dbl>,
## #   `1975` <dbl>, `1976` <dbl>, `1977` <dbl>, `1978` <dbl>, `1979` <dbl>,
## #   `1980` <dbl>, `1981` <dbl>, `1982` <dbl>, `1983` <dbl>, `1984` <dbl>,
## #   `1985` <dbl>, `1986` <dbl>, `1987` <dbl>, `1988` <dbl>, `1989` <dbl>,
## #   `1990` <dbl>, `1991` <dbl>, `1992` <dbl>, `1993` <dbl>, `1994` <dbl>,
## #   `1995` <dbl>, `1996` <dbl>, `1997` <dbl>, `1998` <dbl>, `1999` <dbl>, …
GDP_DATA = GDP_DATA %>%
  clean_names()
# check for any loading issues
any(is.na(GDP_DATA))
## [1] TRUE

3. Exploratory Data Analysis

3.1 Summary of Statistics

summary(GDP_DATA)
##  country_name       country_code           x1960               x1961          
##  Length:120         Length:120         Min.   :1.201e+07   Min.   :1.159e+07  
##  Class :character   Class :character   1st Qu.:4.363e+08   1st Qu.:4.747e+08  
##  Mode  :character   Mode  :character   Median :2.724e+09   Median :2.667e+09  
##                                        Mean   :7.738e+10   Mean   :7.919e+10  
##                                        3rd Qu.:2.926e+10   3rd Qu.:3.041e+10  
##                                        Max.   :1.390e+12   Max.   :1.440e+12  
##                                        NA's   :1                              
##      x1962               x1963               x1964          
##  Min.   :1.254e+07   Min.   :1.283e+07   Min.   :1.342e+07  
##  1st Qu.:4.716e+08   1st Qu.:5.082e+08   1st Qu.:5.418e+08  
##  Median :3.051e+09   Median :3.571e+09   Median :3.184e+09  
##  Mean   :8.418e+10   Mean   :9.092e+10   Mean   :9.976e+10  
##  3rd Qu.:3.295e+10   3rd Qu.:3.775e+10   3rd Qu.:3.687e+10  
##  Max.   :1.550e+12   Max.   :1.670e+12   Max.   :1.820e+12  
##                                                             
##      x1965               x1966               x1967          
##  Min.   :1.359e+07   Min.   :1.447e+07   Min.   :1.584e+07  
##  1st Qu.:6.561e+08   1st Qu.:6.940e+08   1st Qu.:7.417e+08  
##  Median :3.590e+09   Median :4.231e+09   Median :4.194e+09  
##  Mean   :1.092e+11   Mean   :1.178e+11   Mean   :1.245e+11  
##  3rd Qu.:4.106e+10   3rd Qu.:4.430e+10   3rd Qu.:4.380e+10  
##  Max.   :1.990e+12   Max.   :2.160e+12   Max.   :2.290e+12  
##                                                             
##      x1968               x1969               x1970          
##  Min.   :1.460e+07   Min.   :1.585e+07   Min.   :1.630e+07  
##  1st Qu.:7.689e+08   1st Qu.:7.863e+08   1st Qu.:8.549e+08  
##  Median :4.571e+09   Median :5.727e+09   Median :6.061e+09  
##  Mean   :1.339e+11   Mean   :1.481e+11   Mean   :1.621e+11  
##  3rd Qu.:4.683e+10   3rd Qu.:5.355e+10   3rd Qu.:6.300e+10  
##  Max.   :2.480e+12   Max.   :2.730e+12   Max.   :2.990e+12  
##                                                             
##      x1971               x1972               x1973          
##  Min.   :1.962e+07   Min.   :2.294e+07   Min.   :2.420e+07  
##  1st Qu.:8.933e+08   1st Qu.:1.069e+09   1st Qu.:1.290e+09  
##  Median :6.511e+09   Median :6.548e+09   Median :8.058e+09  
##  Mean   :1.780e+11   Mean   :2.042e+11   Mean   :2.501e+11  
##  3rd Qu.:6.409e+10   3rd Qu.:7.239e+10   3rd Qu.:9.262e+10  
##  Max.   :3.300e+12   Max.   :3.800e+12   Max.   :4.640e+12  
##                                                             
##      x1974               x1975               x1976          
##  Min.   :3.151e+07   Min.   :3.324e+07   Min.   :3.010e+07  
##  1st Qu.:1.636e+09   1st Qu.:2.108e+09   1st Qu.:2.355e+09  
##  Median :1.098e+10   Median :1.073e+10   Median :1.196e+10  
##  Mean   :2.907e+11   Mean   :3.232e+11   Mean   :3.499e+11  
##  3rd Qu.:1.210e+11   3rd Qu.:1.330e+11   3rd Qu.:1.330e+11  
##  Max.   :5.350e+12   Max.   :5.960e+12   Max.   :6.480e+12  
##                                                             
##      x1977               x1978               x1979          
##  Min.   :4.450e+07   Min.   :4.943e+07   Min.   :5.884e+07  
##  1st Qu.:2.452e+09   1st Qu.:2.611e+09   1st Qu.:2.986e+09  
##  Median :1.429e+10   Median :1.617e+10   Median :2.045e+10  
##  Mean   :3.952e+11   Mean   :4.597e+11   Mean   :5.341e+11  
##  3rd Qu.:1.540e+11   3rd Qu.:1.645e+11   3rd Qu.:1.960e+11  
##  Max.   :7.330e+12   Max.   :8.630e+12   Max.   :1.000e+13  
##                                                             
##      x1980               x1981               x1982          
##  Min.   :6.846e+07   Min.   :8.089e+07   Min.   :8.602e+07  
##  1st Qu.:3.717e+09   1st Qu.:3.365e+09   1st Qu.:3.272e+09  
##  Median :2.407e+10   Median :2.655e+10   Median :2.607e+10  
##  Mean   :6.052e+11   Mean   :6.383e+11   Mean   :6.310e+11  
##  3rd Qu.:2.338e+11   3rd Qu.:2.535e+11   3rd Qu.:2.590e+11  
##  Max.   :1.130e+13   Max.   :1.170e+13   Max.   :1.160e+13  
##                                                             
##      x1983               x1984               x1985          
##  Min.   :8.687e+07   Min.   :9.860e+07   Min.   :1.110e+08  
##  1st Qu.:3.199e+09   1st Qu.:3.390e+09   1st Qu.:3.251e+09  
##  Median :2.400e+10   Median :2.266e+10   Median :2.470e+10  
##  Mean   :6.379e+11   Mean   :6.598e+11   Mean   :6.962e+11  
##  3rd Qu.:2.425e+11   3rd Qu.:2.600e+11   3rd Qu.:2.470e+11  
##  Max.   :1.180e+13   Max.   :1.220e+13   Max.   :1.290e+13  
##                                                             
##      x1986               x1987               x1988          
##  Min.   :1.307e+08   Min.   :1.477e+08   Min.   :1.727e+08  
##  1st Qu.:3.793e+09   1st Qu.:3.710e+09   1st Qu.:3.833e+09  
##  Median :2.822e+10   Median :3.277e+10   Median :3.652e+10  
##  Mean   :8.049e+11   Mean   :9.059e+11   Mean   :1.008e+12  
##  3rd Qu.:2.550e+11   3rd Qu.:2.900e+11   3rd Qu.:2.980e+11  
##  Max.   :1.520e+13   Max.   :1.730e+13   Max.   :1.930e+13  
##                                                             
##      x1989               x1990               x1991          
##  Min.   :1.925e+08   Min.   :2.173e+08   Min.   :2.205e+08  
##  1st Qu.:4.289e+09   1st Qu.:4.520e+09   1st Qu.:4.065e+09  
##  Median :3.904e+10   Median :4.202e+10   Median :4.567e+10  
##  Mean   :1.049e+12   Mean   :1.181e+12   Mean   :1.229e+12  
##  3rd Qu.:2.970e+11   3rd Qu.:3.375e+11   3rd Qu.:3.380e+11  
##  Max.   :2.010e+13   Max.   :2.270e+13   Max.   :2.370e+13  
##                                                             
##      x1992               x1993               x1994          
##  Min.   :2.421e+08   Min.   :2.638e+08   Min.   :2.894e+08  
##  1st Qu.:4.320e+09   1st Qu.:4.597e+09   1st Qu.:4.547e+09  
##  Median :4.844e+10   Median :5.088e+10   Median :5.465e+10  
##  Mean   :1.316e+12   Mean   :1.347e+12   Mean   :1.459e+12  
##  3rd Qu.:3.578e+11   3rd Qu.:3.608e+11   3rd Qu.:3.922e+11  
##  Max.   :2.540e+13   Max.   :2.580e+13   Max.   :2.790e+13  
##                                                             
##      x1995               x1996               x1997          
##  Min.   :3.135e+08   Min.   :3.315e+08   Min.   :3.478e+08  
##  1st Qu.:5.237e+09   1st Qu.:5.574e+09   1st Qu.:5.738e+09  
##  Median :6.489e+10   Median :6.956e+10   Median :7.264e+10  
##  Mean   :1.624e+12   Mean   :1.672e+12   Mean   :1.676e+12  
##  3rd Qu.:4.062e+11   3rd Qu.:4.425e+11   3rd Qu.:4.685e+11  
##  Max.   :3.100e+13   Max.   :3.170e+13   Max.   :3.160e+13  
##                                                             
##      x1998               x1999               x2000          
##  Min.   :3.736e+08   Min.   :3.907e+08   Min.   :3.963e+08  
##  1st Qu.:6.173e+09   1st Qu.:5.870e+09   1st Qu.:6.148e+09  
##  Median :6.718e+10   Median :6.907e+10   Median :7.994e+10  
##  Mean   :1.668e+12   Mean   :1.729e+12   Mean   :1.800e+12  
##  3rd Qu.:4.452e+11   3rd Qu.:4.860e+11   3rd Qu.:5.460e+11  
##  Max.   :3.150e+13   Max.   :3.270e+13   Max.   :3.380e+13  
##                                                             
##      x2001               x2002               x2003          
##  Min.   :4.300e+08   Min.   :4.619e+08   Min.   :4.699e+08  
##  1st Qu.:6.251e+09   1st Qu.:6.176e+09   1st Qu.:7.285e+09  
##  Median :7.648e+10   Median :8.211e+10   Median :8.940e+10  
##  Mean   :1.793e+12   Mean   :1.862e+12   Mean   :2.088e+12  
##  3rd Qu.:5.412e+11   3rd Qu.:5.902e+11   3rd Qu.:6.588e+11  
##  Max.   :3.360e+13   Max.   :3.490e+13   Max.   :3.910e+13  
##                                                             
##      x2004               x2005               x2006          
##  Min.   :5.069e+08   Min.   :5.472e+08   Min.   :6.109e+08  
##  1st Qu.:8.564e+09   1st Qu.:9.574e+09   1st Qu.:1.029e+10  
##  Median :1.036e+11   Median :1.215e+11   Median :1.430e+11  
##  Mean   :2.360e+12   Mean   :2.580e+12   Mean   :2.825e+12  
##  3rd Qu.:7.640e+11   3rd Qu.:8.792e+11   3rd Qu.:9.938e+11  
##  Max.   :4.410e+13   Max.   :4.780e+13   Max.   :5.180e+13  
##                                                             
##      x2007               x2008               x2009          
##  Min.   :6.844e+08   Min.   :6.954e+08   Min.   :6.749e+08  
##  1st Qu.:1.218e+10   1st Qu.:1.377e+10   1st Qu.:1.267e+10  
##  Median :1.650e+11   Median :1.810e+11   Median :1.740e+11  
##  Mean   :3.215e+12   Mean   :3.567e+12   Mean   :3.426e+12  
##  3rd Qu.:1.182e+12   3rd Qu.:1.270e+12   3rd Qu.:1.250e+12  
##  Max.   :5.830e+13   Max.   :6.400e+13   Max.   :6.070e+13  
##                                                             
##      x2010               x2011               x2012          
##  Min.   :6.812e+08   Min.   :6.761e+08   Min.   :6.929e+08  
##  1st Qu.:1.434e+10   1st Qu.:1.776e+10   1st Qu.:1.769e+10  
##  Median :2.135e+11   Median :2.365e+11   Median :2.250e+11  
##  Mean   :3.811e+12   Mean   :4.266e+12   Mean   :4.404e+12  
##  3rd Qu.:1.440e+12   3rd Qu.:1.610e+12   3rd Qu.:1.680e+12  
##  Max.   :6.650e+13   Max.   :7.370e+13   Max.   :7.530e+13  
##                                                             
##      x2013               x2014               x2015          
##  Min.   :7.212e+08   Min.   :7.277e+08   Min.   :7.554e+08  
##  1st Qu.:1.878e+10   1st Qu.:1.959e+10   1st Qu.:1.942e+10  
##  Median :2.345e+11   Median :2.395e+11   Median :2.165e+11  
##  Mean   :4.565e+12   Mean   :4.712e+12   Mean   :4.499e+12  
##  3rd Qu.:1.790e+12   3rd Qu.:1.850e+12   3rd Qu.:1.680e+12  
##  Max.   :7.740e+13   Max.   :7.960e+13   Max.   :7.510e+13  
##                                                             
##      x2016               x2017               x2018          
##  Min.   :7.744e+08   Min.   :7.922e+08   Min.   :8.113e+08  
##  1st Qu.:2.071e+10   1st Qu.:2.204e+10   1st Qu.:2.354e+10  
##  Median :2.310e+11   Median :2.525e+11   Median :2.750e+11  
##  Mean   :4.565e+12   Mean   :4.886e+12   Mean   :5.207e+12  
##  3rd Qu.:1.560e+12   3rd Qu.:1.670e+12   3rd Qu.:1.750e+12  
##  Max.   :7.630e+13   Max.   :8.120e+13   Max.   :8.630e+13  
##                                                             
##      x2019               x2020          
##  Min.   :8.250e+08   Min.   :8.075e+08  
##  1st Qu.:2.328e+10   1st Qu.:2.052e+10  
##  Median :2.740e+11   Median :2.580e+11  
##  Mean   :5.303e+12   Mean   :5.147e+12  
##  3rd Qu.:1.800e+12   3rd Qu.:1.702e+12  
##  Max.   :8.760e+13   Max.   :8.470e+13  
## 
# create a heatmap
library(pheatmap)
numeric_cols = GDP_DATA[, sapply(GDP_DATA, is.numeric)]
scaled_data = scale(numeric_cols)

pheatmap(scaled_data, 
         main = "GDP per Country from 1960-2020",
         show_rownames = FALSE,
         clustering_method = "complete",
         color = colorRampPalette(c("blue", "white", "red"))(50)
         )

### 3.2 Distribution of Data

# Select a few years for a handful of histograms
numeric_cols %>%
  pivot_longer(cols = everything(), names_to = "variable", values_to = "value")
## # A tibble: 7,320 × 2
##    variable       value
##    <chr>          <dbl>
##  1 x1960    19313106302
##  2 x1961    19723488057
##  3 x1962    21493920015
##  4 x1963    25733212134
##  5 x1964    23527443251
##  6 x1965    26810567154
##  7 x1966    29152157362
##  8 x1967    30173172663
##  9 x1968    32877055829
## 10 x1969    37744346869
## # ℹ 7,310 more rows
gdp_long3 = GDP_DATA %>%
  pivot_longer(cols = -c(1, 2),
               names_to = "Year_raw",
               values_to = "GDP") %>%
  mutate(Year = as.integer(gsub("[^0-9]", "", Year_raw))) %>%
  select(-Year_raw) %>%
  filter(!is.na(Year))

gdp_long3 %>%
  filter(Year %in% c(1960, 1970, 1980, 1990, 2000, 2010, 2020)) %>%
  ggplot(aes(x = GDP)) +
  geom_histogram(bins = 30, fill = "steelblue", colour = "black") +
  scale_x_log10() + 
  facet_wrap(~ Year, scales = "free_y") +
  labs(title = "Histogram of GDP for selected years")

gdp_summary = gdp_long3 %>%
  group_by(Year) %>%
  summarise(
    median = median(GDP, na.rm = TRUE),
    q25 = quantile(GDP, 0.25, na.rm = TRUE),
    q75 = quantile(GDP, 0.75, na.rm = TRUE)
    )

ggplot(gdp_summary, aes(x = Year)) +
  geom_ribbon(aes(ymin = q25, ymax = q75), fill = "grey70", alpha = 0.5) +
  geom_line(aes(y = median), color = "steelblue", linewidth = 1) +
  scale_y_log10() +
  labs(y = "GDP", title = "GDP percentile over time")

Average GDP has increased over time, thus every country has had signficant growth in the last 60 years.

gdp_long3 = gdp_long3 %>%
  group_by(country_code) %>%
  arrange(Year) %>%
  mutate(growth = (GDP - lag(GDP)) / lag(GDP) * 100)

# plot growth rates
top_countries = c("USA", "CHN", "IND", "GBR", "BRA", "JPN", "DEU", "ZAF")

gdp_long3 %>%
  filter(country_code %in% top_countries, Year >= 1970) %>%
  ggplot(aes(x = Year, y = growth, colour = country_name)) +
  geom_line() +
  geom_hline(yintercept = 0, linestyle = "dashed", alpha = 0.5) +
  labs(y = "Annual GDP Growth (%)", title = "GDP Growth rates over time") +
  theme_minimal()

The data shows South Africa experienced the highest annual GDP growth in the last 60 years (above 50% between 2000 and 2010), however they experienced the lowest annual GDP growth (at around -20% in 1985). The United States and China had the most consistent annual GDP growth in the last 60 years.

4. Build a Logistic Regression Model

gdp_long3 = gdp_long3 %>%
  group_by(country_code) %>%
  arrange(Year) %>%
  mutate(
    growth = (GDP - lag(GDP)) / lag(GDP) * 100,
    recession = ifelse(growth < 0, 1, 0)
  ) %>%
  ungroup()

gdp_long3 = gdp_long3 %>%
  group_by(country_code) %>%
  arrange(Year) %>%
  mutate(
    lag1_growth = lag(growth),
    lag2_growth = lag(growth, 2)
  ) %>%
  ungroup()
model_data = gdp_long3 %>%
  filter(!is.na(recession), !is.na(lag1_growth), !is.na(lag2_growth))

4.1 Split the data

train_data = model_data %>% filter(Year <= 2000)
test_data = model_data %>% filter(Year > 2000)
# create a base formula
formula = recession ~ lag1_growth + lag2_growth

if(length(unique(train_data$country_code)) > 1) {
  formula = update(formula, ~ . + factor(country_code))
}
if(length(unique(train_data$Year)) > 1) {
  formula = update(formula, ~ . + Year)
}

model = glm(formula, data = train_data, family = binomial)
summary(model)
## 
## Call:
## glm(formula = formula, family = binomial, data = train_data)
## 
## Coefficients:
##                           Estimate Std. Error z value Pr(>|z|)    
## (Intercept)             -91.542842   7.671072 -11.934  < 2e-16 ***
## lag1_growth              -0.021151   0.003021  -7.002 2.52e-12 ***
## lag2_growth               0.006005   0.002782   2.158   0.0309 *  
## factor(country_code)AFW   0.275455   0.542163   0.508   0.6114    
## factor(country_code)AUS  -0.511187   0.600891  -0.851   0.3949    
## factor(country_code)AUT  -0.301649   0.582216  -0.518   0.6044    
## factor(country_code)BDI   0.237057   0.540548   0.439   0.6610    
## factor(country_code)BEL   0.020886   0.556099   0.038   0.9700    
## factor(country_code)BEN   0.180012   0.547903   0.329   0.7425    
## factor(country_code)BFA   0.152774   0.547177   0.279   0.7801    
## factor(country_code)BGD  -0.171160   0.567989  -0.301   0.7632    
## factor(country_code)BHS  -0.693355   0.627318  -1.105   0.2690    
## factor(country_code)BLZ  -0.711286   0.626325  -1.136   0.2561    
## factor(country_code)BMU  -2.441366   1.090357  -2.239   0.0252 *  
## factor(country_code)BOL  -0.744518   0.627414  -1.187   0.2354    
## factor(country_code)BWA  -1.235267   0.725813  -1.702   0.0888 .  
## factor(country_code)CAF   0.271633   0.541380   0.502   0.6158    
## factor(country_code)CAN  -1.319567   0.724032  -1.823   0.0684 .  
## factor(country_code)CHL   0.011025   0.556942   0.020   0.9842    
## factor(country_code)CHN  -0.483063   0.599270  -0.806   0.4202    
## factor(country_code)CIV   0.326881   0.541679   0.603   0.5462    
## factor(country_code)CMR  -0.329848   0.585557  -0.563   0.5732    
## factor(country_code)COD   0.702670   0.528163   1.330   0.1834    
## factor(country_code)COG   0.157918   0.548960   0.288   0.7736    
## factor(country_code)COL  -0.138570   0.566685  -0.245   0.8068    
## factor(country_code)CRI  -1.737725   0.831768  -2.089   0.0367 *  
## factor(country_code)CSS  -0.509608   0.599573  -0.850   0.3954    
## factor(country_code)DOM  -0.981172   0.668663  -1.467   0.1423    
## factor(country_code)DZA  -0.529219   0.602755  -0.878   0.3799    
## factor(country_code)EAP  -0.705383   0.625992  -1.127   0.2598    
## factor(country_code)EAR  -0.984070   0.664396  -1.481   0.1386    
## factor(country_code)EAS  -0.950331   0.665481  -1.428   0.1533    
## factor(country_code)ECU   0.276497   0.541022   0.511   0.6093    
## factor(country_code)EMU  -0.131945   0.567526  -0.232   0.8162    
## factor(country_code)ESP  -0.288155   0.582786  -0.494   0.6210    
## factor(country_code)FIN   0.026326   0.556413   0.047   0.9623    
## factor(country_code)FJI  -0.145666   0.567206  -0.257   0.7973    
## factor(country_code)FRA  -0.140697   0.567511  -0.248   0.8042    
## factor(country_code)GAB   0.567812   0.533235   1.065   0.2869    
## factor(country_code)GBR  -0.126958   0.566302  -0.224   0.8226    
## factor(country_code)GHA   0.520897   0.528556   0.986   0.3244    
## factor(country_code)GRC  -0.126654   0.566915  -0.223   0.8232    
## factor(country_code)GTM  -0.992424   0.664955  -1.492   0.1356    
## factor(country_code)GUY   0.375438   0.533588   0.704   0.4817    
## factor(country_code)HIC  -1.741972   0.830688  -2.097   0.0360 *  
## factor(country_code)HKG  -1.240285   0.725042  -1.711   0.0871 .  
## factor(country_code)HND  -0.330081   0.581820  -0.567   0.5705    
## factor(country_code)HPC  -0.173582   0.566577  -0.306   0.7593    
## factor(country_code)HTI  -0.509612   0.601631  -0.847   0.3970    
## factor(country_code)IBD  -0.729962   0.626568  -1.165   0.2440    
## factor(country_code)IBT  -0.731718   0.626544  -1.168   0.2429    
## factor(country_code)IDA  -0.002298   0.555436  -0.004   0.9967    
## factor(country_code)IDB  -0.177133   0.568492  -0.312   0.7554    
## factor(country_code)IDX  -0.160147   0.565938  -0.283   0.7772    
## factor(country_code)IND  -0.521473   0.599880  -0.869   0.3847    
## factor(country_code)IRL  -0.461634   0.600312  -0.769   0.4419    
## factor(country_code)ISL  -0.122631   0.567716  -0.216   0.8290    
## factor(country_code)ITA  -0.130014   0.567823  -0.229   0.8189    
## factor(country_code)JAM  -0.335070   0.582427  -0.575   0.5651    
## factor(country_code)JPN  -0.683287   0.628691  -1.087   0.2771    
## factor(country_code)KEN   0.163346   0.547230   0.298   0.7653    
## factor(country_code)KNA  -1.719361   0.830378  -2.071   0.0384 *  
## factor(country_code)KOR  -0.634105   0.630551  -1.006   0.3146    
## factor(country_code)LKA  -0.523959   0.599816  -0.874   0.3824    
## factor(country_code)LMC  -0.516734   0.600290  -0.861   0.3893    
## factor(country_code)LMY  -0.732413   0.626586  -1.169   0.2424    
## factor(country_code)LSO   0.023751   0.556812   0.043   0.9660    
## factor(country_code)LTE  -1.317456   0.724275  -1.819   0.0689 .  
## factor(country_code)LUX  -0.288690   0.581320  -0.497   0.6195    
## factor(country_code)MAR   0.020909   0.556018   0.038   0.9700    
## factor(country_code)MDG   0.393568   0.535621   0.735   0.4625    
## factor(country_code)MEX  -0.705974   0.630320  -1.120   0.2627    
## factor(country_code)MIC  -0.731960   0.626608  -1.168   0.2428    
## factor(country_code)MWI  -0.002944   0.559580  -0.005   0.9958    
## factor(country_code)MYS  -0.700600   0.628459  -1.115   0.2649    
## factor(country_code)NAC -15.383956 379.211807  -0.041   0.9676    
## factor(country_code)NER   0.887400   0.522491   1.698   0.0894 .  
## factor(country_code)NGA   0.654483   0.533772   1.226   0.2201    
## factor(country_code)NIC  -0.355338   0.588767  -0.604   0.5462    
## factor(country_code)NLD   0.036702   0.556263   0.066   0.9474    
## factor(country_code)NOR  -0.493629   0.600810  -0.822   0.4113    
## factor(country_code)NPL  -0.009894   0.554754  -0.018   0.9858    
## factor(country_code)OED  -1.741954   0.830648  -2.097   0.0360 *  
## factor(country_code)PAK  -0.326082   0.581124  -0.561   0.5747    
## factor(country_code)PAN  -2.476344   1.090340  -2.271   0.0231 *  
## factor(country_code)PER   0.298132   0.541501   0.551   0.5819    
## factor(country_code)PHL  -0.154367   0.566471  -0.273   0.7852    
## factor(country_code)PNG   0.287266   0.541078   0.531   0.5955    
## factor(country_code)PRE   0.430197   0.537454   0.800   0.4235    
## factor(country_code)PRI -15.359785 379.011165  -0.041   0.9677    
## factor(country_code)PRT  -0.285145   0.581896  -0.490   0.6241    
## factor(country_code)PST  -1.743445   0.830702  -2.099   0.0358 *  
## factor(country_code)RWA  -0.771709   0.634235  -1.217   0.2237    
## factor(country_code)SAS  -0.740222   0.626041  -1.182   0.2371    
## factor(country_code)SDN   0.408690   0.538484   0.759   0.4479    
## factor(country_code)SEN   0.528256   0.529747   0.997   0.3187    
## factor(country_code)SGP  -0.912671   0.665964  -1.370   0.1705    
## factor(country_code)SLE   0.974653   0.522540   1.865   0.0622 .  
## factor(country_code)SSA   0.147490   0.546955   0.270   0.7874    
## factor(country_code)SSF   0.147616   0.546944   0.270   0.7872    
## factor(country_code)SUR  -0.561001   0.605044  -0.927   0.3538    
## factor(country_code)SWE  -0.521545   0.601499  -0.867   0.3859    
## factor(country_code)SWZ  -0.130296   0.568937  -0.229   0.8189    
## factor(country_code)SYC  -0.681465   0.627108  -1.087   0.2772    
## factor(country_code)TCD   0.248856   0.540016   0.461   0.6449    
## factor(country_code)TEA  -0.705863   0.626007  -1.128   0.2595    
## factor(country_code)TGO   0.287339   0.541081   0.531   0.5954    
## factor(country_code)THA  -0.958761   0.666432  -1.439   0.1503    
## factor(country_code)TLA  -0.970217   0.665558  -1.458   0.1449    
## factor(country_code)TSA  -0.740222   0.626041  -1.182   0.2371    
## factor(country_code)TSS   0.147616   0.546944   0.270   0.7872    
## factor(country_code)TTO   0.146034   0.547387   0.267   0.7896    
## factor(country_code)TUR   0.041404   0.558338   0.074   0.9409    
## factor(country_code)UGA  -0.018115   0.559572  -0.032   0.9742    
## factor(country_code)UMC  -0.730303   0.626709  -1.165   0.2439    
## factor(country_code)URY   0.555964   0.530039   1.049   0.2942    
## factor(country_code)USA -15.383198 379.294798  -0.041   0.9676    
## factor(country_code)VCT  -0.963931   0.664277  -1.451   0.1468    
## factor(country_code)WLD  -1.302634   0.723940  -1.799   0.0720 .  
## factor(country_code)ZAF  -0.324349   0.582347  -0.557   0.5775    
## factor(country_code)ZMB   0.509055   0.531981   0.957   0.3386    
## factor(country_code)ZWE   0.397220   0.534594   0.743   0.4575    
## Year                      0.045621   0.003858  11.825  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 4464.5  on 4558  degrees of freedom
## Residual deviance: 3959.4  on 4436  degrees of freedom
## AIC: 4205.4
## 
## Number of Fisher Scoring iterations: 15
# Predict using the test data
test_data$pred_prob = predict(model, newdata = test_data, type = "response")

# ROC
library(pROC)
roc_obj = roc(test_data$recession, test_data$pred_prob)
plot(roc_obj, main = paste("ROC Curve (AUC =", round(auc(roc_obj), 3), ")"))

### 4.2 Logistic curve

# Grid of lagged growth ZAF
new_data = data.frame(
  lag1_growth = seq(-15, 15, length.out = 100),
  lag2_growth = median(train_data$lag2_growth, na.rm = TRUE),
  country_code = "ZAF",
  Year = 1985
)

# Probability Predictions ZAF
new_data$pred_prob = predict(model, newdata = new_data, type = "response")

# Plot Curve ZAF
ggplot(new_data, aes(x = lag1_growth, y = pred_prob)) +
  geom_line(size = 1.2, color = "steelblue") +
  labs(x = "GDP Growth in previous years (%)",
       y = "Predicted Probability of Recession",
       title = "Logistic Regression of Lagged GDP Growth in South Africa (1985)") +
  theme_minimal()

# Grid of lagged growth JPN
new_data = data.frame(
  lag1_growth = seq(-15, 15, length.out = 100),
  lag2_growth = median(train_data$lag2_growth, na.rm = TRUE), 
  country_code = "JPN",
  Year = 1986
)

# Probability Predictions JPN
new_data$pred_prob = predict(model, newdata = new_data, type = "response")

# Plot Curve JPN
ggplot(new_data, aes(x = lag1_growth, y = pred_prob)) +
  geom_line(size = 1.2, color = "steelblue") +
  labs(x = "GDP Growth in Previous Years (%)",
       y = "Predicted Probabilty of Recession",
       title = "Logistic Regression of Lagged GDP Growth in Japan (1986)") +
  theme_minimal()

## 5. Conclusion The Area under the Curve is 0.618. The figure alone is better than random testing (AUC > 0.5), however it is not greater than or equal to 0.7, thus the model has weak predictive power. Because if one were to randomly select one recession year and a non-recession year there is a 61.8% probability that the model will assign a probability leaning more towards a recession year than a non-recession year. When logistic regression was applied to Japan in 1986 and South Africa in 1985, it was as GDP grew in the previous year, the predicted probability of recession fell, judging by the downward sloping curve. Although past GDP growth is significant to predicting a recession, it is not strong enough to do so on its own, it needs to be done with the use of other macroeconomic factors such as interest rates, inflation, unemployment etc.