Motivation

Using data from VHLSS2022, this chart visualizes information from 48,204 households in Vietnam regarding the changes in their income compared to the same month last year.

The chart above indicates that over 85% of households experienced a rise or stability in their household income when compared to the corresponding period of the previous year, with 44% noting an increase and 41% reporting no change. This trend was predominantly observed in the Northern and Central regions.

Mekong Delta stands out as the region witnessing the most significant decline in income compared to the same period last year, particularly affecting households impacted by COVID-19, with a 29% decrease.

Data Processing

  # Remove
  rm(list = ls())

  # Set wd and load data
  setwd("C:/Users/PC/Desktop/VHLSS2022_Muc8/Practice R")
  
  # Import data
  library(haven)
  muc8 <- read_dta('C:/Users/PC/Desktop/VHLSS2022_Muc8/SL_TTC_M8.dta')
  
  library(tidyverse)
  library(summarytools)

  # View data
    View(muc8) 
    glimpse(muc8)
    
    attach(muc8)
    { # Frequencies
      
      table(vung)
      vung_noassis <- table(vung,m8_c707)
      margin.table(vung_noassis, 1) # A frequencies (summed over B)
      margin.table(vung_noassis, 2) # B frequencies (summed over A)
      
      prop.table(vung_noassis)
      prop.table(vung_noassis, 1) # row percentages
      prop.table(vung_noassis, 2) # column percentages
      
      prop.table(vung_noassis,1) *100 #*100
      prop.table(vung_noassis, 2) *100 #*100
      
      table(m8_c3, m8_c702, vung)
      vung_noassis <- table(Khu_vuc = vung, Assistance_Status = m8_c707) #Label variable
      
      print(vung_noassis)
   
    }
    
    { # Summary tools
      #install.packages("summarytools")
      library("summarytools")
      summarytools::freq(vung, order = "freq")
      freq(vung)
      with(muc8, by(vung, m8_c707, freq))
      with(muc8, by(m8_c707, vung, freq))
    }
    
  # Cau 3: Compared to this month last year, how has the household income changed for you
    names(muc8)
    
    muc8_c3 <- muc8 %>% 
      select("vung", "m8_c3")
    
    library(stringi)
    muc8_c3 <- muc8_c3 %>% 
      mutate(vung_eng = stri_trans_general(vung, "Latin-ASCII")) %>% 
      mutate(
        vung_eng1 = case_when(
          vung_eng == "Bac Trung Bo" ~ "North Central Coast",
          vung_eng == "Dong Bac bo" ~ "Northeast",
          vung_eng == "Dong bang song Hong" ~ "Red River Delta",
          vung_eng == "Nam Trung Bo" ~ "South Central Coast",
          vung_eng == "Tay Bac Bo" ~ "Northwest",
          vung_eng == "Tay Nam Bo" ~ "Mekong River Delta",
          vung_eng == "Tay Nguyen" ~ "Central Highlands",
          vung_eng == "Vung Dong Nam Bo" ~ "Southeast",
          TRUE ~ vung_eng  # Keep the original value if none of the conditions match
        )) %>% 
      mutate(
        phrase = case_when(
          m8_c3 ==  1 ~ "Increase significantly",
          m8_c3 == 2 ~ "Increase slightly",
          m8_c3 == 3 ~ "No change",
          m8_c3 == 4 ~ "Decrease slightly",
          m8_c3 == 5 ~ "Decrease significantly",
          m8_c3 == 6 ~ "Don't know",
          TRUE ~ as.character(m8_c3)  # Keep the original value if none of the conditions match
        ))
    
    freq(muc8_c3$phrase)
    freq(muc8_c3$vung_eng1)
    
    muc8_c3_c <- muc8_c3 %>% 
      select(phrase, vung_eng1) %>% 
      group_by(vung_eng1, phrase) %>% 
      summarize(count = n()) %>%
      mutate(percentage = count / sum(count) * 100)
    
    # Long to wide
    muc8_c3_cwide <- muc8_c3_c %>% 
      select(vung_eng1, count, phrase) %>% 
      pivot_wider(
        names_from = "vung_eng1",
        values_from = "count"
      )

Visualization

# Visualization
    
    muc8_c3_c$phrase <- factor(muc8_c3_c$phrase, levels = c("Don't know",
                                                            "Decrease significantly", 
                                                            "Decrease slightly", 
                                                            "No change", 
                                                            "Increase slightly", 
                                                            "Increase significantly"))
    freq(muc8_c3_c$phrase)
    
    muc8_c3_c$vung_eng1 <- factor(muc8_c3_c$vung_eng1,levels = c("Central Highlands",
                                                                 "Northwest",
                                                                 "North Central Coast",
                                                                 "South Central Coast",
                                                                 "Southeast",
                                                                 "Northeast",
                                                                 "Mekong River Delta",
                                                                 "Red River Delta"))
    freq(muc8_c3_c$vung_eng1)
    
    library(showtext)
    
    showtext.auto()
    
    my_font <- "Roboto Condensed"
    
    font_add_google(name = my_font, family = my_font)

Number of household

# Number of household
    
    g1 <- ggplot(muc8_c3_c, aes(x = vung_eng1, y = count, fill = phrase)) + 
      geom_bar(stat = "identity")+
      coord_flip()+
      # Change theme
      theme_minimal()+
      # Background
      theme(axis.title = element_blank())+
      theme(plot.background = element_rect(fill = "grey90", color = "grey90")) +
      theme(panel.background = element_rect(fill = "grey90", color = "grey90")) +
      theme(panel.grid.major.y = element_blank())+
      theme(panel.grid.minor = element_blank())+
      theme(panel.grid.major.x = element_line(color = "grey80", linewidth = 0.1))+
      # Scale x
      scale_fill_manual(name = "",
        values=c("#FFF6E9","#FF9843","#FFDD95","#B4B4B8","#9BCF53", "#416D19"))+
      scale_y_continuous(position = "right")+
      labs(title = "The household income changed compared to this month last year",
           subtitle = "Number of household",
           caption = "Sources: VHLSS2022")+
      theme(plot.title.position = "plot")+
      theme(legend.position = "bottom",
            legend.key.height = unit(0.2, "cm"))+
      guides(fill = guide_legend(nrow = 1,
                                 label.position = "bottom",
                                 reverse = TRUE))+
      # Adjust title, sub, caption
      theme(plot.title = element_text(size = 40, color = "#416D19", face = "bold"))+
      theme(plot.subtitle = element_text(size = 20, color = "grey40", face = "italic"))+
      theme(plot.caption = element_text(size = 20, color = "grey40", face = "italic"))+
      theme(axis.text = element_text(size = 25, color = "grey50", face = "bold"))+
      theme(legend.title = element_text(size = 25, color = "grey40", face = "bold"))+
      theme(legend.text = element_text(size = 20, color = "grey40"))+
      # Adjust plot margin
      theme(plot.margin = unit(c(0.3, 0.5, 0.1, 0.5), "cm"))+
      theme(legend.spacing.y = unit(0.2, "cm"))+
      theme(legend.spacing.x = unit(0.2, "cm"))
      
   
    ggsave("muc8c3.png", width = 6, height = 4,dpi = 300,units = c("in"))

Percentage of household

# Percentage of household
    
    g2 <- ggplot(muc8_c3_c, aes(x = vung_eng1, y = percentage, fill = phrase)) + 
      geom_bar(stat = "identity")+
      coord_flip()+
      # Change theme
      theme_minimal()+
      # Background
      theme(axis.title = element_blank())+
      theme(plot.background = element_rect(fill = "grey90", color = "grey90")) +
      theme(panel.background = element_rect(fill = "grey90", color = "grey90")) +
      theme(panel.grid.major.y = element_blank())+
      theme(panel.grid.minor = element_blank())+
      theme(panel.grid.major.x = element_line(color = "grey80", linewidth = 0.1))+
      # Scale x
      scale_fill_manual(name = "",
                        values=c("#FFF6E9","#FF9843","#FFDD95","#B4B4B8","#9BCF53", "#416D19"))+
      scale_y_continuous(position = "right")+
      labs(title = "The household income changed compared to this month last year",
           subtitle = "Percentage of household",
           caption = "Sources: VHLSS2022")+
      theme(plot.title.position = "plot")+
      theme(legend.position = "bottom",
            legend.key.height = unit(0.2, "cm"))+
      guides(fill = guide_legend(nrow = 1,
                                 label.position = "bottom",
                                 reverse = TRUE))+
      # Adjust title, sub, caption
      theme(plot.title = element_text(size = 40, color = "#416D19", face = "bold"))+
      theme(plot.subtitle = element_text(size = 20, color = "grey40", face = "italic"))+
      theme(plot.caption = element_text(size = 20, color = "grey40", face = "italic"))+
      theme(axis.text = element_text(size = 25, color = "grey50", face = "bold"))+
      theme(legend.title = element_text(size = 25, color = "grey40", face = "bold"))+
      theme(legend.text = element_text(size = 20, color = "grey40"))+
      # Adjust plot margin
      theme(plot.margin = unit(c(0.3, 0.5, 0.1, 0.5), "cm"))+
      theme(legend.spacing.y = unit(0.2, "cm"))+
      theme(legend.spacing.x = unit(0.2, "cm"))
    
    ggsave("muc8c3_per.png", width = 6, height = 4,dpi = 300,units = c("in"))

Combine chart

library(patchwork)
    g1/g2
    
    ggsave("muc8c3_combine.png", width = 6, height = 8,dpi = 300,units = c("in"))