Using data from VHLSS2022, this chart visualizes information from 48,204 households in Vietnam regarding the changes in their income compared to the same month last year.
The chart above indicates that over 85% of households experienced a rise or stability in their household income when compared to the corresponding period of the previous year, with 44% noting an increase and 41% reporting no change. This trend was predominantly observed in the Northern and Central regions.
Mekong Delta stands out as the region witnessing the most significant decline in income compared to the same period last year, particularly affecting households impacted by COVID-19, with a 29% decrease.
# Remove
rm(list = ls())
# Set wd and load data
setwd("C:/Users/PC/Desktop/VHLSS2022_Muc8/Practice R")
# Import data
library(haven)
muc8 <- read_dta('C:/Users/PC/Desktop/VHLSS2022_Muc8/SL_TTC_M8.dta')
library(tidyverse)
library(summarytools)
# View data
View(muc8)
glimpse(muc8)
attach(muc8)
{ # Frequencies
table(vung)
vung_noassis <- table(vung,m8_c707)
margin.table(vung_noassis, 1) # A frequencies (summed over B)
margin.table(vung_noassis, 2) # B frequencies (summed over A)
prop.table(vung_noassis)
prop.table(vung_noassis, 1) # row percentages
prop.table(vung_noassis, 2) # column percentages
prop.table(vung_noassis,1) *100 #*100
prop.table(vung_noassis, 2) *100 #*100
table(m8_c3, m8_c702, vung)
vung_noassis <- table(Khu_vuc = vung, Assistance_Status = m8_c707) #Label variable
print(vung_noassis)
}
{ # Summary tools
#install.packages("summarytools")
library("summarytools")
summarytools::freq(vung, order = "freq")
freq(vung)
with(muc8, by(vung, m8_c707, freq))
with(muc8, by(m8_c707, vung, freq))
}
# Cau 3: Compared to this month last year, how has the household income changed for you
names(muc8)
muc8_c3 <- muc8 %>%
select("vung", "m8_c3")
library(stringi)
muc8_c3 <- muc8_c3 %>%
mutate(vung_eng = stri_trans_general(vung, "Latin-ASCII")) %>%
mutate(
vung_eng1 = case_when(
vung_eng == "Bac Trung Bo" ~ "North Central Coast",
vung_eng == "Dong Bac bo" ~ "Northeast",
vung_eng == "Dong bang song Hong" ~ "Red River Delta",
vung_eng == "Nam Trung Bo" ~ "South Central Coast",
vung_eng == "Tay Bac Bo" ~ "Northwest",
vung_eng == "Tay Nam Bo" ~ "Mekong River Delta",
vung_eng == "Tay Nguyen" ~ "Central Highlands",
vung_eng == "Vung Dong Nam Bo" ~ "Southeast",
TRUE ~ vung_eng # Keep the original value if none of the conditions match
)) %>%
mutate(
phrase = case_when(
m8_c3 == 1 ~ "Increase significantly",
m8_c3 == 2 ~ "Increase slightly",
m8_c3 == 3 ~ "No change",
m8_c3 == 4 ~ "Decrease slightly",
m8_c3 == 5 ~ "Decrease significantly",
m8_c3 == 6 ~ "Don't know",
TRUE ~ as.character(m8_c3) # Keep the original value if none of the conditions match
))
freq(muc8_c3$phrase)
freq(muc8_c3$vung_eng1)
muc8_c3_c <- muc8_c3 %>%
select(phrase, vung_eng1) %>%
group_by(vung_eng1, phrase) %>%
summarize(count = n()) %>%
mutate(percentage = count / sum(count) * 100)
# Long to wide
muc8_c3_cwide <- muc8_c3_c %>%
select(vung_eng1, count, phrase) %>%
pivot_wider(
names_from = "vung_eng1",
values_from = "count"
)# Visualization
muc8_c3_c$phrase <- factor(muc8_c3_c$phrase, levels = c("Don't know",
"Decrease significantly",
"Decrease slightly",
"No change",
"Increase slightly",
"Increase significantly"))
freq(muc8_c3_c$phrase)
muc8_c3_c$vung_eng1 <- factor(muc8_c3_c$vung_eng1,levels = c("Central Highlands",
"Northwest",
"North Central Coast",
"South Central Coast",
"Southeast",
"Northeast",
"Mekong River Delta",
"Red River Delta"))
freq(muc8_c3_c$vung_eng1)
library(showtext)
showtext.auto()
my_font <- "Roboto Condensed"
font_add_google(name = my_font, family = my_font)# Number of household
g1 <- ggplot(muc8_c3_c, aes(x = vung_eng1, y = count, fill = phrase)) +
geom_bar(stat = "identity")+
coord_flip()+
# Change theme
theme_minimal()+
# Background
theme(axis.title = element_blank())+
theme(plot.background = element_rect(fill = "grey90", color = "grey90")) +
theme(panel.background = element_rect(fill = "grey90", color = "grey90")) +
theme(panel.grid.major.y = element_blank())+
theme(panel.grid.minor = element_blank())+
theme(panel.grid.major.x = element_line(color = "grey80", linewidth = 0.1))+
# Scale x
scale_fill_manual(name = "",
values=c("#FFF6E9","#FF9843","#FFDD95","#B4B4B8","#9BCF53", "#416D19"))+
scale_y_continuous(position = "right")+
labs(title = "The household income changed compared to this month last year",
subtitle = "Number of household",
caption = "Sources: VHLSS2022")+
theme(plot.title.position = "plot")+
theme(legend.position = "bottom",
legend.key.height = unit(0.2, "cm"))+
guides(fill = guide_legend(nrow = 1,
label.position = "bottom",
reverse = TRUE))+
# Adjust title, sub, caption
theme(plot.title = element_text(size = 40, color = "#416D19", face = "bold"))+
theme(plot.subtitle = element_text(size = 20, color = "grey40", face = "italic"))+
theme(plot.caption = element_text(size = 20, color = "grey40", face = "italic"))+
theme(axis.text = element_text(size = 25, color = "grey50", face = "bold"))+
theme(legend.title = element_text(size = 25, color = "grey40", face = "bold"))+
theme(legend.text = element_text(size = 20, color = "grey40"))+
# Adjust plot margin
theme(plot.margin = unit(c(0.3, 0.5, 0.1, 0.5), "cm"))+
theme(legend.spacing.y = unit(0.2, "cm"))+
theme(legend.spacing.x = unit(0.2, "cm"))
ggsave("muc8c3.png", width = 6, height = 4,dpi = 300,units = c("in"))# Percentage of household
g2 <- ggplot(muc8_c3_c, aes(x = vung_eng1, y = percentage, fill = phrase)) +
geom_bar(stat = "identity")+
coord_flip()+
# Change theme
theme_minimal()+
# Background
theme(axis.title = element_blank())+
theme(plot.background = element_rect(fill = "grey90", color = "grey90")) +
theme(panel.background = element_rect(fill = "grey90", color = "grey90")) +
theme(panel.grid.major.y = element_blank())+
theme(panel.grid.minor = element_blank())+
theme(panel.grid.major.x = element_line(color = "grey80", linewidth = 0.1))+
# Scale x
scale_fill_manual(name = "",
values=c("#FFF6E9","#FF9843","#FFDD95","#B4B4B8","#9BCF53", "#416D19"))+
scale_y_continuous(position = "right")+
labs(title = "The household income changed compared to this month last year",
subtitle = "Percentage of household",
caption = "Sources: VHLSS2022")+
theme(plot.title.position = "plot")+
theme(legend.position = "bottom",
legend.key.height = unit(0.2, "cm"))+
guides(fill = guide_legend(nrow = 1,
label.position = "bottom",
reverse = TRUE))+
# Adjust title, sub, caption
theme(plot.title = element_text(size = 40, color = "#416D19", face = "bold"))+
theme(plot.subtitle = element_text(size = 20, color = "grey40", face = "italic"))+
theme(plot.caption = element_text(size = 20, color = "grey40", face = "italic"))+
theme(axis.text = element_text(size = 25, color = "grey50", face = "bold"))+
theme(legend.title = element_text(size = 25, color = "grey40", face = "bold"))+
theme(legend.text = element_text(size = 20, color = "grey40"))+
# Adjust plot margin
theme(plot.margin = unit(c(0.3, 0.5, 0.1, 0.5), "cm"))+
theme(legend.spacing.y = unit(0.2, "cm"))+
theme(legend.spacing.x = unit(0.2, "cm"))
ggsave("muc8c3_per.png", width = 6, height = 4,dpi = 300,units = c("in"))