This is a working paper for my final graduation thesis.
# Packages
# setup library
Packages <- c("tidyverse", "countrycode", "WDI", "plotly", "patchwork", "modelr", "robustbase")
lapply(Packages, library, character.only = TRUE)
# set time span
start_year <- 1970
end_year <- 2020
caption <- "Source: Arthor's creation based on the data extracted from World Bank"
# set graphic functions
errorbar_lm <- function(data, title = "title", subtitle = "subtitle") {
data$model <- data$model %>% set_names(c(df$country %>% unique()))
data$summary <- lapply(data$model, summary)
# create data frame for plotting error bars
coefficient <- c()
p_value <- c()
sd_error <- c()
for (i in 1:length(data$country)) {
coefficient[i] <- data$summary[i][[1]]$coefficients[2, 1]
p_value[i] <- data$summary[i][[1]]$coefficients[2, 4]
sd_error[i] <- data$summary[i][[1]]$coefficients[2, 2]
}
df_graphics <- tibble(
country = data$country,
coefficient = coefficient,
p_value = p_value,
sd_error = sd_error,
ci_lower = coefficient - 1.96*sd_error,
ci_upper = coefficient + 1.96*sd_error,
p_logical = ifelse(p_value > 0.05, "p>0.05", "p<=0.05")
)
# plot with ggplot2
ggplot(df_graphics,
aes(country,
coefficient,
colour = p_logical)) +
geom_hline(yintercept = 0, alpha = 0.25, linetype = "dashed") +
geom_point() +
geom_errorbar(aes(ymin = ci_lower, ymax = ci_upper)) +
scale_color_manual(values = c( "#33ccff", "#ff9980")) +
labs(title = title,
subtitle = str_wrap(subtitle, 80),
x = "Country",
y = "Coefficient") +
facet_wrap(~p_logical, scales = "free_x") +
theme_minimal() +
theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0),
legend.position = "none")
}
errorbar_rob <- function(data, title = "title", subtitle = "subtitle") {
data$model <- data$model %>% set_names(c(df$country %>% unique()))
data$summary <- lapply(data$model, summary)
# create data frame for plotting error bars
coefficient <- c()
p_value <- c()
sd_error <- c()
for (i in 1:length(data$country)) {
coefficient[i] <- data$summary[i][[1]]$coefficients[2, 1]
p_value[i] <- data$summary[i][[1]]$coefficients[2, 4]
sd_error[i] <- data$summary[i][[1]]$sigma
}
df_graphics <- tibble(
country = data$country,
coefficient = coefficient,
p_value = p_value,
sd_error = sd_error,
ci_lower = coefficient - 1.96*sd_error,
ci_upper = coefficient + 1.96*sd_error,
p_logical = ifelse(p_value > 0.05, "p>0.05", "p<=0.05")
)
# plot with ggplot2
ggplot(df_graphics,
aes(country,
coefficient,
colour = p_logical)) +
geom_hline(yintercept = 0, alpha = 0.25, linetype = "dashed") +
geom_point() +
geom_errorbar(aes(ymin = ci_lower, ymax = ci_upper)) +
scale_color_manual(values = c( "#33ccff", "#ff9980")) +
labs(title = title,
subtitle = str_wrap(subtitle, 80),
x = "Country",
y = "Coefficient",
caption = caption) +
facet_wrap(~p_logical, scales = "free_x") +
theme_minimal() +
theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0),
legend.position = "none")
}Okun’s Law defines the relations of economic growth rate (%) and unemployment rate (%) as follows;
\[ Y_t = \hat{\alpha} + \hat{\beta_1}U_t \]
\(Y_t = GDP\) at a \(t\) year $U_t = $ Unemployment at a \(t\) year
In this essay, I test the robustness of Okun’s law by using database from the World Bank among OECD countries. The reason I narrow down to OCED coutntries are 1) world-wide regression is too vague as an analysis and 2) that the economy systems are different between the developed and the developing countries. The time frame is from 1970 and 2020.
\[ \Delta u_t = \beta_0 - \beta_1 \Delta Y_t + \epsilon_t, \qquad t = 1, \, ...,\, T, \qquad (1)\\ \] \[ Y: Real\,GDP\,Growth\,Rate\\ u: Unemployment\,Rate\\ \]
\[ y_t - y_t* = -\beta(u_t - u_t*) + \epsilon_t, \qquad (2) \\ \] \[ y : Real\,GDP\,Growth\,Rate\\ y*: Natural\,GDP\,Growth\,Rate\\ u : Unemployment\,Rate\\ u*: Natural\,Uemployment\,Rate\\ \]
# Importing data
# get the OECD coutnries name
url <- "https://raw.githubusercontent.com/OxfordEconomics/CountryLists/master/countryList-OECD.csv"
oecd_country <- read_csv(url) %>%
rename(country = `OecdMembers2019-07-31`)
# get world countires' name for continent data
world_country <- countrycode::codelist[,c(3,6)] %>%
setNames(c("continent", "country"))
# create data frame with OECD countries and a continent's column
df_oecd <-
oecd_country %>%
add_row(country = "South Korea") %>%
add_row(country = "Slovakia") %>%
# add_row(country = "Czechoslovakia") %>%
left_join(world_country) %>%
na.omit()
# convert countries'name to iso3c data structure
iso3c_names <- countrycode::countrycode(
sourcevar = df_oecd$country,
origin = "country.name",
destination = "iso3c"
)
# unemployment, gdp data, gdp growth rate, and total population
input <- c("SL.UEM.TOTL.ZS", "NY.GDP.MKTP.CD", "NY.GDP.MKTP.KD.ZG", "SP.POP.TOTL")
# import data from world bank database by using `WDI` package
df_worldbank <- input %>%
purrr::map(~WDI(country = iso3c_names,
indicator = .,
start = start_year,
end = end_year)) %>%
reduce(merge) %>%
setNames(c("iso2c", "country", "year", "unemploy_rate", "gdp", "gdp_growth", "total_population")) %>%
as_tibble()
# tidy df_worldbank
df <- df_worldbank %>%
left_join(df_oecd, by = "country") %>%
mutate(continent = ifelse(country == "Korea, Rep.", "Asia", continent),
country = as.factor(country),
continent = as.factor(continent),
unemploy_num = unemploy_rate * total_population) %>%
select(country, year, continent, gdp, gdp_growth, unemploy_rate, unemploy_num) %>%
na.omit()\[ \frac {\Delta ln(Yt) - \Delta ln(Y_{t-1})}{\Delta ln(Y)} \]
DT::datatable(df %>%
mutate(gdp = gdp / 1e6) %>%
mutate_if(is.numeric, ~round(., digits = 2)),
rownames = FALSE,
colnames = c("Country", "Year", "Continet", "GDP($mil)", "GDP Growth", "Unemploy (%)", "Unemploy (total)"),
extensions = 'Buttons',
options = list(autoWidth = TRUE,
pageLength = 10,
dom = 'Bfrtip',
buttons = list('copy', 'print',
list(extend = 'collection',
buttons = c('csv', 'excel', 'pdf'),
text = 'Download')),
scrollX = TRUE,
scrollCollapse = TRUE),
class = 'cell-border stripe',
caption = "The data is frame extracted from WB. You can explore the data by using 'search bar' on the top right.")Since I got data from the above code, now I run a linear regression model by using “unemployment” on X axis and “gdp_grow” on Y axis. Accoding to Okun, Okun’s Law discribes their negative relations, so when unemployment goes up, it states, the gdp growth rate goes down.
# visualization of Okun's law by country
ggplot(df, aes(unemploy_rate, gdp_growth, colour = continent)) +
geom_point(alpha = .25) +
geom_smooth(method = "lm", size = 0.5, alpha = .25, colour = "black", se = FALSE) +
scale_x_continuous(breaks = seq(0, 25, by = 5)) +
facet_wrap(~country, ncol = 1, drop = TRUE, scales = "free") +
labs(title = "Empirical research on Okun's Law over OECD countries",
x = "Unemployment rate (%)",
y = "GDP growth rate (%)",
caption = caption) +
scale_x_continuous(expand = c(0, 0)) +
theme_bw()\[ \Delta u_t = \beta_0 + \beta_1 \Delta Y_t + \epsilon_t, \qquad t = 1, \, ...,\, T, \qquad (1)\\ \]
\(\qquad u:\,\)Percentage of the unemployed
\(\qquad Y:\,\)Real GDP growth
# modeling of the difference model
df_lm <- df %>%
group_by(country) %>%
nest() %>%
mutate(model = map(data, ~lm(unemploy_rate ~ gdp_growth, data = .)))
# plot error bars
errorbar_lm(df_lm,
title = "Beta1 Accross OECD countries with the difference model",
subtitle = "")\[ ln(u_t) = \beta_0 + \beta_1 ln(Y_t) + \epsilon_t, \qquad t = 1, \, ...,\, T, \qquad (1)\\ \]
\(\qquad u:\,\)Number of the unemployed
\(\qquad Y:\,\)Real GDP in USD
# modeling of the difference model (log)
df_lm_log <- df %>%
group_by(country) %>%
nest() %>%
mutate(model = map(data, ~lm(log(unemploy_num) ~ log(gdp), data = .)))
# plot error bars
errorbar_lm(df_lm_log,
title = "Beta Accross OECD coutnries with the difference model (log)",
subtitle = "")\[ ln(\Delta u_t) = \beta_0 + \beta_1 ln(\Delta Y) + \beta_2y_{t-1} + \epsilon_t, \qquad t = 1, \, ...,\, T, \qquad (1)\\ \]
\(\qquad u:\,\)Number of the unemployed
\(\qquad Y:\,\)Real GDP in USD
# modeling of the difference model (log)
df_lm_log2 <- df %>%
mutate(year_lag = year - 1) %>%
group_by(country) %>%
nest() %>%
mutate(model = map(data, ~lmrob(log(unemploy_num) ~ log(gdp) + year_lag, data = .)))
# plot error bars
errorbar_rob(df_lm_log2,
title = "Beta Accross OECD coutnries with the difference model (log)",
subtitle = "")\[ \Delta u_t = \beta_0 + \beta_1 \Delta lnY + \epsilon_t, \qquad t = 1, \, ...,\, T, \qquad (1)\\ \] \(\qquad u:\,\)The unemployment rate
\(\qquad Y:\,\)Real GDP in USD
# modeling of the difference model (log)
df_lm_log3 <- df %>%
group_by(country) %>%
nest() %>%
unnest(data) %>%
mutate(gdp_log_delta = log(gdp) - lag(log(gdp), 1L),
unemploy_rate_dif = unemploy_rate - lag(unemploy_rate, n = 1L)) %>%
nest() %>%
mutate(model = map(data, ~lm(unemploy_rate_dif ~ gdp_log_delta, data = .)))library(sandwich)
library(lmtest)
library(gtsummary)
library(gt)
# for one country with `summary` function
df_lm_log3$model[[1]] %>%
tbl_regression() %>%
add_global_p() %>%
bold_p(t = 0.05) %>%
modify_caption(paste(df_lm_log3$country[1]))
# for one country with `robust standard error`
m <- df_lm_log3$model[[1]]
result <- coeftest(m, vcov = vcovHC(m, type="HC2"))
set_names(list(result), "Austria")
# for OECD countries
m <- NULL
result <- NULL
rob_st <- list()
for (i in 1:length(df_lm_log3$country)) {
m <- df_lm_log3$model[[i]]
result <- coeftest(m, vcov = vcovHC(m, type="HC1"))
# result <- set_names(list(result), df_lm_log3$country[i])
rob_st[[c(df_lm_log3$country[i])]] <- result
}
setNames(rob_st, df_lm_log3$country)
summary(df_lm_log3$model) %>% setNames(df_lm_log3$country)
lapply(df_lm_log3$model, summary)[[1]] # plot error bars
errorbar_lm(df_lm_log3,
title = "Beta Accross OECD coutnries with the difference model (log)",
subtitle = "Most of the countries do not have p value less than 0.05, meaning
the null hypothesis can not be rejected.")\[ y_t - y_t* = -\beta(u_t - u_t*) + \epsilon_t, \qquad (2) \\ \] \[ y : Real\,GDP\,Growth\,Rate\\ y*: Natural\,GDP\,Growth\,Rate\\ u : Unemployment\,Rate\\ u*: Natural\,Uemployment\,Rate\\ \]
# set the natural gdp growth rate
y_natural <- 2
# set the natural unemployment rate
u_natural <- 3
# modeling of the gap model
df_gap <- df %>%
mutate(gdp_growth = gdp_growth - y_natural,
unemployment = unemploy_rate - u_natural) %>%
group_by(country) %>%
nest() %>%
mutate(model = map(data, ~lm(gdp_growth ~ unemployment, data = .)))
# plot error bars
errorbar_lm(df_gap,
title = "Beta Accross OCED countriest the gap model",
subtitle = "Most of the countries do not have p value less than 0.05, meaning the null hypothesis can not be rejected.")\[ ln(y_t) - ln(y_t*) = -\beta(u_t - u_t*) + \epsilon_t, \qquad (2) \\ \]
\[ y : Real\,GDP\,Growth\,Rate\\ y*: Natural\,GDP\,Growth\,Rate\\ u : Unemployment\,Rate\\ u*: Natural\,Uemployment\,Rate\\ \]
# modeling of the gap model (log)
df_gap_log <- df %>%
mutate(gdp = log(gdp) - y_natural,
unemployment = log(unemploy_num) - u_natural) %>%
group_by(country) %>%
nest() %>%
mutate(model = map(data, ~lm(gdp ~ unemployment, data = .)))
# plot error bars
errorbar_lm(df_gap_log,
title = "Beta Accross OCED countriest the gap model (log)",
subtitle = "Most of the countries do not have p value less than 0.05, meaning the null hypothesis can not be rejected.")