# Clear R environment:
rm(list = ls())
# Load some R packages (download from https://www.mediafire.com/file/ojayuymg68eraei/HO1_remove_some_columns.dta/file):
library(dplyr)
library(stringr)
library(haven)
#==========================================
# Data set: HO1_remove_some_columns.dta
#==========================================
# Load data:
read_dta("F:/HO1_remove_some_columns.dta") -> ho1
#------------------------
# Create Household ID
#------------------------
# Function adds zero:
add_zero <- function(x) {
y <- case_when(x < 10 ~ str_c("0", x), TRUE ~ as.character(x))
return(y)
}
# Some columns for creating household ID:
code_components <- c("tinh", "huyen", "xa", "diaban", "hoso")
# Use the function:
ho1 %>%
mutate(tinh_n = add_zero(tinh),
huyen_n = add_zero(huyen),
xa_n = add_zero(xa),
diaban_n = add_zero(diaban),
hoso_n = add_zero(hoso)) %>%
mutate(h_code = str_c(tinh_n, huyen_n, xa_n, diaban_n, hoso_n)) -> ho1
# Remove duplications:
ho1 %>% filter(!duplicated(h_code)) -> ho1_nonDup
#--------------------------
# Extract province names
#--------------------------
# Extract province info:
library(stringi)
ho1_nonDup$tinh %>%
attributes() %>%
.$labels %>% data.frame() -> df_province
# Rename for DF:
names(df_province) <- "province_code"
# Create some columns and relabel for provinces:
df_province %>%
mutate(province_vie = row.names(df_province)) %>%
mutate(province_eng = stri_trans_general(province_vie, "Latin-ASCII")) %>%
mutate(province_eng = str_replace_all(province_eng, "Tinh |Thanh pho ", "")) %>%
mutate(province_code = add_zero(province_code)) -> df_province
# Join the two data sets:
full_join(ho1_nonDup,
df_province %>% select(province_code, province_eng),
by = c("tinh_n" = "province_code")) -> ho1_nonDup
# Relabel for region:
ho1_nonDup %>% mutate(ttnt = case_when(ttnt == 1 ~ "Urban", TRUE ~ "Rural")) -> ho1_nonDup
# Calculate mean of financial aid and expenditure for health care (rural vs urban):
ho1_nonDup %>%
group_by(ttnt, province_eng) %>%
summarise(avg_medical_aid = mean(m3c15, na.rm = TRUE)) %>%
ungroup() %>%
arrange(province_eng) -> medical_aid
ho1_nonDup %>%
group_by(ttnt, province_eng) %>%
summarise(avg_medical_exp = mean(m3ct, na.rm = TRUE)) %>%
ungroup() %>%
arrange(province_eng) -> medical_exp
# Calculate averall means:
ho1_nonDup %>%
group_by(province_eng) %>%
summarise(Overall = mean(m3c15, na.rm = TRUE)) %>%
ungroup() %>%
arrange(province_eng) -> medical_overall_aid
ho1_nonDup %>%
group_by(province_eng) %>%
summarise(Overall = mean(m3ct, na.rm = TRUE)) %>%
ungroup() %>%
arrange(province_eng) -> medical_overall_exp
# Convert to wide form and add overall health expenditure:
library(tidyr)
medical_aid %>%
spread(key = "ttnt", value = "avg_medical_aid") %>%
full_join(medical_overall_aid) -> medical_aid_wide
medical_exp %>%
spread(key = "ttnt", value = "avg_medical_exp") %>%
full_join(medical_overall_exp) -> medical_exp_wide
# Rearrange by overall expenditure:
medical_aid_wide %>%
arrange(Overall) %>%
mutate(province_eng = factor(province_eng, province_eng)) -> medical_aid_wide
medical_exp_wide %>%
arrange(Overall) %>%
mutate(province_eng = factor(province_eng, province_eng)) -> medical_exp_wide
#=======================
# Data Visualization
#=======================
# Load some R packages:
library(ggeconodist) # install.packages("ggeconodist", repos = "https://cinc.rud.is")
library(ggplot2)
library(showtext)
# Select Ubuntu Condensed font:
showtext.auto()
font_add_google(name = "Ubuntu Condensed", family = "ubu")
my_font <- "ubu"
# Figure 1:
medical_aid_wide %>%
ggplot(aes(x = province_eng)) +
geom_econodist(aes(ymin = Rural, median = Overall, ymax = Urban),
median_col = "firebrick",
stat = "identity",
median_point_size = 1.5,
show.legend = TRUE) +
coord_flip() +
theme_econodist() +
scale_y_continuous(expand = c(0, 0), limits = range(0, 6000), position = "right") +
labs(title = "Figure 1: The urban-rual gap in financial aid for health care by household, 2018",
caption = "Data Source: VHLSS 2018, GSO") +
theme(axis.title.y = element_blank()) +
theme(axis.text.y = element_text(family = my_font, size = 10)) +
theme(axis.text.x = element_text(family = my_font)) +
theme(plot.caption = element_text(family = my_font, size = 8, face = "italic")) +
theme(plot.title = element_text(family = my_font, size = 16)) -> g1
grid.newpage()
g1 %>%
left_align(c("title", "caption")) %>%
add_econodist_legend(
econodist_legend_grob(
tenth_lab = "Rural",
ninetieth_lab = "Urban",
med_lab = "Overall",
med_col = "firebrick",
family = my_font,
label_size = 12,
),
below = "title"
) %>%
grid.draw()
# Figure 2:
medical_exp_wide %>%
ggplot(aes(x = province_eng)) +
geom_econodist(aes(ymin = Rural, median = Overall, ymax = Urban),
median_col = "firebrick", stat = "identity", median_point_size = 1.5, show.legend = TRUE) +
coord_flip() +
theme_econodist() +
scale_y_continuous(expand = c(0, 0), limits = range(0, 12000), position = "right") +
labs(title = "Figure 2: The urban-rual gap in expenditure for health care by household, 2018",
caption = "Data Source: VHLSS 2018, GSO") +
theme(axis.title.y = element_blank()) +
theme(axis.text.y = element_text(family = my_font, size = 10)) +
theme(axis.text.x = element_text(family = my_font)) +
theme(plot.caption = element_text(family = my_font, size = 8, face = "italic")) +
theme(plot.title = element_text(family = my_font, size = 16)) -> g2
grid.newpage()
g2 %>%
left_align(c("title", "caption")) %>%
add_econodist_legend(
econodist_legend_grob(
tenth_lab = "Rural",
ninetieth_lab = "Urban",
med_lab = "Overall",
med_col = "firebrick",
family = my_font,
label_size = 12,
),
below = "title"
) %>%
grid.draw()