This is a working paper for my final graduation thesis.

1 Preparation

# Packages
# setup library
Packages <- c("tidyverse", "countrycode", "WDI", "plotly", "patchwork", "modelr", "robustbase")
lapply(Packages, library, character.only = TRUE)

# set time span
start_year <- 1970
end_year <- 2020
caption <- "Source: Arthor's creation based on the data extracted from World Bank"

# set graphic functions
errorbar_lm <- function(data, title = "title", subtitle = "subtitle") {
  data$model <- data$model %>% set_names(c(df$country %>% unique())) 
  data$summary <- lapply(data$model, summary)
  
  # create data frame for plotting error bars
  coefficient <- c()
  p_value <- c()
  sd_error <- c()
  for (i in 1:length(data$country)) {
    coefficient[i] <- data$summary[i][[1]]$coefficients[2, 1]
    p_value[i] <- data$summary[i][[1]]$coefficients[2, 4]
    sd_error[i] <- data$summary[i][[1]]$coefficients[2, 2]
  }
  
  df_graphics <- tibble(
    country = data$country,
    coefficient = coefficient,
    p_value = p_value,
    sd_error = sd_error,
    ci_lower = coefficient - 1.96*sd_error,
    ci_upper = coefficient + 1.96*sd_error,
    p_logical = ifelse(p_value > 0.05, "p>0.05", "p<=0.05") 
  )
  
  # plot with ggplot2
  ggplot(df_graphics, 
         aes(country, 
             coefficient, 
             colour = p_logical)) +
    geom_hline(yintercept = 0, alpha = 0.25, linetype = "dashed") +
    geom_point() +
    geom_errorbar(aes(ymin = ci_lower, ymax = ci_upper)) +
    scale_color_manual(values = c( "#33ccff", "#ff9980")) +
    labs(title = title,
         subtitle = str_wrap(subtitle, 80),
         x = "Country",
         y = "Coefficient") +
    facet_wrap(~p_logical, scales = "free_x") +
    theme_minimal() +
    theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0),
          legend.position = "none")
}

errorbar_rob <- function(data, title = "title", subtitle = "subtitle") {
  data$model <- data$model %>% set_names(c(df$country %>% unique())) 
  data$summary <- lapply(data$model, summary)
  
  # create data frame for plotting error bars
  coefficient <- c()
  p_value <- c()
  sd_error <- c()
  for (i in 1:length(data$country)) {
    coefficient[i] <- data$summary[i][[1]]$coefficients[2, 1]
    p_value[i] <- data$summary[i][[1]]$coefficients[2, 4]
    sd_error[i] <- data$summary[i][[1]]$sigma
  }
  
  df_graphics <- tibble(
    country = data$country,
    coefficient = coefficient,
    p_value = p_value,
    sd_error = sd_error,
    ci_lower = coefficient - 1.96*sd_error,
    ci_upper = coefficient + 1.96*sd_error,
    p_logical = ifelse(p_value > 0.05, "p>0.05", "p<=0.05") 
  )
  
  # plot with ggplot2
  ggplot(df_graphics, 
         aes(country, 
             coefficient, 
             colour = p_logical)) +
    geom_hline(yintercept = 0, alpha = 0.25, linetype = "dashed") +
    geom_point() +
    geom_errorbar(aes(ymin = ci_lower, ymax = ci_upper)) +
    scale_color_manual(values = c( "#33ccff", "#ff9980")) +
    labs(title = title,
         subtitle = str_wrap(subtitle, 80),
         x = "Country",
         y = "Coefficient",
         caption = caption) +
    facet_wrap(~p_logical, scales = "free_x") +
    theme_minimal() +
    theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0),
          legend.position = "none")
}

2 Introduction

Okun’s Law defines the relations of economic growth rate (%) and unemployment rate (%) as follows;

\[ Y_t = \hat{\alpha} + \hat{\beta_1}U_t \]

\(Y_t = GDP\) at a \(t\) year $U_t = $ Unemployment at a \(t\) year

3 Data and model Specifications

In this essay, I test the robustness of Okun’s law by using database from the World Bank among OECD countries. The reason I narrow down to OCED coutntries are 1) world-wide regression is too vague as an analysis and 2) that the economy systems are different between the developed and the developing countries. The time frame is from 1970 and 2020.

  • The Difference Model

\[ \Delta u_t = \beta_0 - \beta_1 \Delta Y_t + \epsilon_t, \qquad t = 1, \, ...,\, T, \qquad (1)\\ \] \[ Y: Real\,GDP\,Growth\,Rate\\ u: Unemployment\,Rate\\ \]

  • The Gap Model

\[ y_t - y_t* = -\beta(u_t - u_t*) + \epsilon_t, \qquad (2) \\ \] \[ y : Real\,GDP\,Growth\,Rate\\ y*: Natural\,GDP\,Growth\,Rate\\ u : Unemployment\,Rate\\ u*: Natural\,Uemployment\,Rate\\ \]

4 Importing Data

# Importing data
# get the OECD coutnries name
url <- "https://raw.githubusercontent.com/OxfordEconomics/CountryLists/master/countryList-OECD.csv"
oecd_country <- read_csv(url) %>% 
  rename(country = `OecdMembers2019-07-31`)

# get world countires' name for continent data
world_country <- countrycode::codelist[,c(3,6)] %>% 
  setNames(c("continent", "country"))

# create data frame with OECD countries and a continent's column
df_oecd <-
  oecd_country %>%
  add_row(country = "South Korea") %>% 
  add_row(country = "Slovakia") %>% 
  # add_row(country = "Czechoslovakia") %>% 
  left_join(world_country) %>% 
  na.omit()

# convert countries'name to iso3c data structure
iso3c_names <- countrycode::countrycode(
  sourcevar = df_oecd$country,
  origin = "country.name",
  destination = "iso3c"
) 

# unemployment, gdp data, gdp growth rate, and total population
input <- c("SL.UEM.TOTL.ZS", "NY.GDP.MKTP.CD", "NY.GDP.MKTP.KD.ZG", "SP.POP.TOTL")

# import data from world bank database by using `WDI` package
df_worldbank <- input %>%
  purrr::map(~WDI(country = iso3c_names,
                  indicator = ., 
                  start = start_year,
                  end = end_year)) %>%
  reduce(merge) %>% 
  setNames(c("iso2c", "country", "year", "unemploy_rate", "gdp", "gdp_growth", "total_population")) %>% 
  as_tibble()
  

# tidy df_worldbank
df <- df_worldbank %>% 
  left_join(df_oecd, by = "country") %>%
  mutate(continent = ifelse(country == "Korea, Rep.", "Asia", continent),
         country = as.factor(country),
         continent = as.factor(continent),
         unemploy_num = unemploy_rate * total_population) %>% 
  select(country, year, continent, gdp, gdp_growth, unemploy_rate, unemploy_num) %>% 
  na.omit()

5 Checking data

\[ \frac {\Delta ln(Yt) - \Delta ln(Y_{t-1})}{\Delta ln(Y)} \]

DT::datatable(df %>% 
                mutate(gdp = gdp / 1e6) %>% 
                mutate_if(is.numeric, ~round(., digits = 2)), 
              rownames = FALSE, 
              colnames = c("Country", "Year", "Continet", "GDP($mil)", "GDP Growth", "Unemploy (%)", "Unemploy (total)"),
              extensions = 'Buttons',
              options = list(autoWidth = TRUE,
                             pageLength = 10,
                             dom = 'Bfrtip',
                             buttons = list('copy', 'print', 
                                            list(extend = 'collection',
                                                 buttons = c('csv', 'excel', 'pdf'),
                                                 text = 'Download')),
                             scrollX = TRUE,
                             scrollCollapse = TRUE),
              class = 'cell-border stripe',
              caption = "The data is frame extracted from WB. You can explore the data by using 'search bar' on the top right.")

6 Visualization

Since I got data from the above code, now I run a linear regression model by using “unemployment” on X axis and “gdp_grow” on Y axis. Accoding to Okun, Okun’s Law discribes their negative relations, so when unemployment goes up, it states, the gdp growth rate goes down.

# visualization of Okun's law by country
ggplot(df, aes(unemploy_rate, gdp_growth, colour = continent)) +
  geom_point(alpha = .25) +
  geom_smooth(method = "lm", size = 0.5, alpha = .25, colour = "black", se = FALSE) +
  scale_x_continuous(breaks = seq(0, 25, by = 5)) +
  facet_wrap(~country, ncol = 1, drop = TRUE, scales = "free") +
  labs(title = "Empirical research on Okun's Law over OECD countries",
       x = "Unemployment rate (%)",
       y = "GDP growth rate (%)",
       caption = caption) +
  scale_x_continuous(expand = c(0, 0)) +
  theme_bw()

7 Modeling

7.1 The Difference Model

7.1.1 The Difference Model

\[ \Delta u_t = \beta_0 + \beta_1 \Delta Y_t + \epsilon_t, \qquad t = 1, \, ...,\, T, \qquad (1)\\ \]

\(\qquad u:\,\)Percentage of the unemployed

\(\qquad Y:\,\)Real GDP growth

# modeling of the difference model
df_lm <- df %>% 
  group_by(country) %>% 
  nest() %>% 
  mutate(model = map(data, ~lm(unemploy_rate ~ gdp_growth, data = .)))

# plot error bars
errorbar_lm(df_lm,
         title = "Beta1 Accross OECD countries with the difference model",
         subtitle = "")

7.1.2 The Difference Model (log)

\[ ln(u_t) = \beta_0 + \beta_1 ln(Y_t) + \epsilon_t, \qquad t = 1, \, ...,\, T, \qquad (1)\\ \]

\(\qquad u:\,\)Number of the unemployed

\(\qquad Y:\,\)Real GDP in USD

# modeling of the difference model (log)
df_lm_log <- df %>% 
  group_by(country) %>% 
  nest() %>% 
  mutate(model = map(data, ~lm(log(unemploy_num) ~ log(gdp), data = .)))

# plot error bars
errorbar_lm(df_lm_log, 
         title = "Beta Accross OECD coutnries with the difference model (log)",
         subtitle = "")

7.1.3 The Difference Model (log and no trend)

\[ ln(\Delta u_t) = \beta_0 + \beta_1 ln(\Delta Y) + \beta_2y_{t-1} + \epsilon_t, \qquad t = 1, \, ...,\, T, \qquad (1)\\ \]

\(\qquad u:\,\)Number of the unemployed

\(\qquad Y:\,\)Real GDP in USD

# modeling of the difference model (log)
df_lm_log2 <- df %>% 
  mutate(year_lag = year - 1) %>%
  group_by(country) %>% 
  nest() %>% 
  mutate(model = map(data, ~lmrob(log(unemploy_num) ~ log(gdp) + year_lag, data = .)))

# plot error bars
errorbar_rob(df_lm_log2, 
         title = "Beta Accross OECD coutnries with the difference model (log)",
         subtitle = "")

7.1.4 The Difference Model (log and auto correlation)

\[ \Delta u_t = \beta_0 + \beta_1 \Delta lnY + \epsilon_t, \qquad t = 1, \, ...,\, T, \qquad (1)\\ \] \(\qquad u:\,\)The unemployment rate

\(\qquad Y:\,\)Real GDP in USD

# modeling of the difference model (log)
df_lm_log3 <-  df %>% 
  group_by(country) %>%
  nest() %>%
  unnest(data) %>% 
  mutate(gdp_log_delta = log(gdp) - lag(log(gdp), 1L),
         unemploy_rate_dif = unemploy_rate - lag(unemploy_rate, n = 1L)) %>% 
  nest() %>%
  mutate(model = map(data, ~lm(unemploy_rate_dif ~ gdp_log_delta, data = .)))
library(sandwich)
library(lmtest)
library(gtsummary)
library(gt)

# for one country with `summary` function
df_lm_log3$model[[1]] %>% 
  tbl_regression() %>% 
  add_global_p() %>%
  bold_p(t = 0.05) %>% 
  modify_caption(paste(df_lm_log3$country[1]))

# for one country with `robust standard error`
m <- df_lm_log3$model[[1]]
result <- coeftest(m, vcov = vcovHC(m, type="HC2"))
set_names(list(result), "Austria")

# for OECD countries
m <- NULL
result <- NULL
rob_st <- list()
for (i in 1:length(df_lm_log3$country)) {
  m <- df_lm_log3$model[[i]]
  result <- coeftest(m, vcov = vcovHC(m, type="HC1"))
  # result <- set_names(list(result), df_lm_log3$country[i])
  rob_st[[c(df_lm_log3$country[i])]] <- result
}
setNames(rob_st, df_lm_log3$country)


summary(df_lm_log3$model) %>% setNames(df_lm_log3$country)

lapply(df_lm_log3$model, summary)[[1]] 
# plot error bars
errorbar_lm(df_lm_log3, 
         title = "Beta Accross OECD coutnries with the difference model (log)",
         subtitle = "Most of the countries do not have p value less than 0.05, meaning
         the null hypothesis can not be rejected.")

7.1.4.1 Visualization

library(ggiraphExtra)
ggPredict(df_lm_log3$model[[1]], se = TRUE, interactive = FALSE) +
  labs(title = df_lm_log3$country[[1]])

for(i in 1:length(df_lm_log3$country)) {
  ggPredict(df_lm_log3$model[[i]], se = TRUE, interactive = FALSE) +
  labs(title = df_lm_log3$country[[i]])
}

7.2 The Gap Model

  • The Gap Model

\[ y_t - y_t* = -\beta(u_t - u_t*) + \epsilon_t, \qquad (2) \\ \] \[ y : Real\,GDP\,Growth\,Rate\\ y*: Natural\,GDP\,Growth\,Rate\\ u : Unemployment\,Rate\\ u*: Natural\,Uemployment\,Rate\\ \]

# set the natural gdp growth rate
y_natural <- 2

# set the natural unemployment rate
u_natural <- 3

# modeling of the gap model
df_gap <- df %>%
  mutate(gdp_growth = gdp_growth - y_natural,
         unemployment = unemploy_rate - u_natural) %>% 
  group_by(country) %>% 
  nest() %>% 
  mutate(model = map(data, ~lm(gdp_growth ~ unemployment, data = .)))

# plot error bars
errorbar_lm(df_gap, 
         title = "Beta Accross OCED countriest the gap model",
         subtitle = "Most of the countries do not have p value less than 0.05, meaning the null hypothesis can not be rejected.")

  • Gap Model (log)

\[ ln(y_t) - ln(y_t*) = -\beta(u_t - u_t*) + \epsilon_t, \qquad (2) \\ \]

\[ y : Real\,GDP\,Growth\,Rate\\ y*: Natural\,GDP\,Growth\,Rate\\ u : Unemployment\,Rate\\ u*: Natural\,Uemployment\,Rate\\ \]

# modeling of the gap model (log)
df_gap_log <- df %>%
  mutate(gdp = log(gdp) - y_natural,
         unemployment = log(unemploy_num) - u_natural) %>% 
  group_by(country) %>% 
  nest() %>% 
  mutate(model = map(data, ~lm(gdp ~ unemployment, data = .)))

# plot error bars
errorbar_lm(df_gap_log,
         title = "Beta Accross OCED countriest the gap model (log)",
         subtitle = "Most of the countries do not have p value less than 0.05, meaning the null hypothesis can not be rejected.")

8 References

  • Lee, J. I. M. 2000. “The Robustness of Okun ’ s Law : Evidence from OECD Countries.” 22(2).
  • Zanin, Luca. 2014. “On Okun’s Law in OECD Countries: An Analysis by Age Cohorts.” Economics Letters 125(2):243–48. doi: 10.1016/j.econlet.2014.08.030.