Preparation

Load packages

pacman::p_load(haven, tibble, tidyverse, naniar, knitr, magrittr, kableExtra, scales)

Load data

midline_group <- read_dta("00-data/midline-data/Midline_IE_1_SC_G.dta")
midline_individual <- read_dta("00-data/midline-data/Midline_IE_2_L.dta")

Data dictionary

# Function to create the data dictionary
svy_dict <- function(x, ...) {
  dict <- tibble(
    var_names = colnames(x),
    var_labs = sjlabelled::get_label(x),
    val_labs = sjlabelled::get_labels(x, values = "n"),
    ...
  )
  return(dict)
}


# Create the data dictionary for the single survey
midline_group_dict <- svy_dict(midline_group)

midline_ind_dict <- svy_dict(midline_individual)

head(midline_ind_dict)
## # A tibble: 6 × 3
##   var_names    var_labs              val_labs     
##   <chr>        <chr>                 <named list> 
## 1 cim          "cim"                 <chr [2,815]>
## 2 nis_d        ""                    <chr [2,815]>
## 3 sexemembre   "SEXE DU MEMBRE"      <chr [2]>    
## 4 hhsize       ""                    <NULL>       
## 5 adresse      ""                    <chr [1,386]>
## 6 statut_wrong "STATUT DE RESIDENCE" <chr [4]>

The data dictionary allows us to look for keywords within questions more easily.

Summary Statistics

Age

summary_age <- midline_individual %>%
  summarise(
    Variable = "Age",
    Missing_Values = sum(is.na(q56)),
    Non_Missing_Values = sum(!is.na(q56)),
    Mean = mean(q56, na.rm = TRUE),
    Median = median(q56, na.rm = TRUE),
    SD = sd(q56, na.rm = TRUE),
    Min = min(q56, na.rm = TRUE),
    Max = max(q56, na.rm = TRUE)
  )

# Create and style the table
summary_age %>%
  kable(caption = "Summary Statistics for Age (q56)", align = 'c') %>%
  kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive"),
                full_width = F, 
                position = "center")
Summary Statistics for Age (q56)
Variable Missing_Values Non_Missing_Values Mean Median SD Min Max
Age 205 12416 42.90214 41 13.47602 12 121

Gender

summary_gender <- midline_individual %>%
  summarise(
    Man = sum(sexemembre == 1, na.rm = TRUE),
    Woman = sum(sexemembre == 2, na.rm = TRUE),
    Missing = sum(is.na(sexemembre))
  ) %>%
  mutate(Total = Man + Woman)

# Create and style the table for gender
summary_gender %>%
  kable(caption = "Gender Distribution", align = 'c') %>%
  kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive"),
                full_width = F, 
                position = "center")
Gender Distribution
Man Woman Missing Total
7355 5061 205 12416

Household size

# Summary statistics for hhsize (household size)
summary_hhsize <- midline_individual %>%
  summarise(
    Variable = "Household Size",
    Missing_Values = sum(is.na(hhsize)),
    Mean = mean(hhsize, na.rm = TRUE),
    Median = median(hhsize, na.rm = TRUE),
    SD = sd(hhsize, na.rm = TRUE),
    Min = min(hhsize, na.rm = TRUE),
    Max = max(hhsize, na.rm = TRUE)
  )

# Create and style the table
summary_hhsize %>%
  kable(caption = "Summary Statistics for Household Size (hhsize)", align = 'c') %>%
  kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive"),
                full_width = F, 
                position = "center")
Summary Statistics for Household Size (hhsize)
Variable Missing_Values Mean Median SD Min Max
Household Size 205 4.89441 5 2.584201 1 16

Nationality

CD136 [NOM] est de quelle nationalité ? 1=Congolaise 2=RDC 3=Camerounaise 4=RCA 5=Rwandaise 6=Malienne 7=Sénégalaise 8=Béninoise 9=angolaise 10=chinoide 10=autrte afrique 12=Francaise 13=Autre Europe 14=Autre

cd136_labels <- c(
  `1` = "Congolese",
  `2` = "DR Congo",
  `3` = "Cameroonian",
  `4` = "Central African",
  `5` = "Rwandan",
  `6` = "Malian",
  `7` = "Senegalese",
  `8` = "Beninese",
  `9` = "Angolan",
  `10` = "Chinese",
  `11` = "Other African",
  `12` = "French",
  `13` = "Other European",
  `14` = "Other Nationality"
)

# Summary statistics for CD136 (What is [NAME]'s nationality?)
summary_cd136 <- midline_individual %>%
  mutate(
    CD136_label = factor(CD136, levels = names(cd136_labels), labels = cd136_labels)
  ) %>%
  group_by(CD136_label) %>%
  summarise(
    Count = n(),
    Percentage = round((n() / nrow(midline_individual)) * 100, 2)
  )

summary_cd136 %>%
  kable(caption = "What is [NAME]'s nationality? (CD136)", align = 'c') %>%
  kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive"),
                full_width = F, 
                position = "center")
What is [NAME]’s nationality? (CD136)
CD136_label Count Percentage
Congolese 9526 75.48
DR Congo 1411 11.18
Cameroonian 11 0.09
Central African 1655 13.11
Rwandan 9 0.07
Malian 1 0.01
Angolan 1 0.01
Chinese 1 0.01
Other African 2 0.02
Other Nationality 4 0.03

CD137 Is [NAME] a refugee or asylum seeker in the ROC? 1 refugee 2 asylum seeker 3 Other

cd137_labels <- c(
  `1` = "Refugee",
  `2` = "Asylum Seeker",
  `3` = "Other"
)

# Summary statistics for CD137 (Is [NAME] a refugee or asylum seeker in the ROC?)
summary_cd137 <- midline_individual %>%
  mutate(
    CD137_label = factor(CD137, levels = names(cd137_labels), labels = cd137_labels)
  ) %>%
  group_by(CD137_label) %>%
  summarise(
    Count = n(),
    Percentage = round((n() / nrow(midline_individual)) * 100, 2)
  )

summary_cd137 %>%
  kable(caption = "Is [NAME] a refugee or asylum seeker in the ROC? (CD137)", 
        align = 'c') %>%
  kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive"),
                full_width = F, 
                position = "center")
Is [NAME] a refugee or asylum seeker in the ROC? (CD137)
CD137_label Count Percentage
Refugee 2548 20.19
Asylum Seeker 48 0.38
Other 499 3.95
NA 9526 75.48

The number of NAs is consistent with the amount of Congolese nationals

Education

CE101. Has [NAME] ever been to school during her life? “1=‘Yes’ 2=‘No’

summary_ce101 <- midline_individual %>%
  mutate(
    CE101_label = factor(CE101, levels = c(1, 2), labels = c("Yes", "No"))
  ) %>%
  group_by(CE101_label) %>%
  summarise(
    Count = n(),
    Percentage = (n() / nrow(midline_individual)) * 100
  )

summary_ce101 %>%
  kable(caption = "Has [NAME] ever been to school during her life? (CE101)", 
        align = 'c') %>%
  kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive"),
                full_width = F, 
                position = "center")
Has [NAME] ever been to school during her life? (CE101)
CE101_label Count Percentage
Yes 7416 58.75921
No 2790 22.10601
NA 2415 19.13478

Figure out why CE101 has so many NAs

# Define age groups
midline_individual <- midline_individual %>%
  mutate(
    Age_Group = cut(q56, breaks = c(0, 5, 10, 15, 20, 25, 30, 35, 40, 
                                    45, 50, 55, 60, Inf), right = FALSE, 
                    labels = c("0-4", "5-9", "10-14", "15-19", "20-24", 
                               "25-29", "30-34", "35-39", "40-44", "45-49", 
                               "50-54", "55-59", "60+")),
    CE101_label = factor(CE101, levels = c(1, 2), labels = c("Yes", "No")))

# Cross-tabulation of CE101 with age groups, including NAs
cross_table <- midline_individual %>%
  group_by(Age_Group, CE101_label) %>%
  summarise(Count = n(), .groups = 'drop') %>%
  pivot_wider(names_from = CE101_label, values_from = Count, 
              values_fill = list(Count = 0)) %>%
  mutate(Total = Yes + No + `NA`) %>%
  select(Age_Group, Yes, No, `NA`, Total)

cross_table %>%
  kable(caption = "Table of Ever been to School (CE101) by Age Group", align = 'c') %>%
  kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive"),
                full_width = F, 
                position = "center")
Table of Ever been to School (CE101) by Age Group
Age_Group Yes No NA Total
10-14 15 3 3 21
15-19 47 23 24 94
20-24 327 154 219 700
25-29 527 228 312 1067
30-34 829 345 430 1604
35-39 1210 401 458 2069
40-44 1135 332 313 1780
45-49 1093 263 227 1583
50-54 805 279 153 1237
55-59 547 203 107 857
60+ 766 510 128 1404
NA 115 49 41 205

Doesn’t seem like there’s necessarily a tendency of NAs for specific age groups…

Relevant Outcomes

Consumption (quantity - volume/amount, and quality - type)

Cereals:

  • bg5q501a
    • French: SUR LES 7 DERNIERS JOURS, COMBIEN DE JOURS AVEZ-VOUS CONSOMMÉ DES CÉRÉALES, DES
    • English: In the last 7 days, how many days did you consume cereals?
  • bg5q501b
    • French: QUELLE EST LA PRINCIPALE SOURCE DES ALIMENTS CONSOMMES ?
    • English: What is the main source of the consumed food?

Beans:

  • bg5q502a
    • French: SUR LES 7 DERNIERS JOURS, COMBIEN DE JOURS AVEZ-VOUS CONSOMMÉ DES HARICOTS, DES
    • English: In the last 7 days, how many days did you consume beans?
  • bg5q502b
    • French: QUELLE EST LA PRINCIPALE SOURCE DES ALIMENTS CONSOMMES ?
    • English: What is the main source of the consumed food?

Milk and Dairy:

  • bg5q503a
    • French: SUR LES 7 DERNIERS JOURS, COMBIEN DE JOURS AVEZ-VOUS CONSOMMÉ DU LAIT OU UN AUTR
    • English: In the last 7 days, how many days did you consume milk or other dairy products?
  • bg5q503b
    • French: QUELLE EST LA PRINCIPALE SOURCE DES ALIMENTS CONSOMMES ?
    • English: What is the main source of the consumed food?

Meat, Poultry, Fish:

  • bg5q504a
    • French: SUR LES 7 DERNIERS JOURS, COMBIEN DE JOURS AVEZ-VOUS CONSOMMÉ DE LA VIANDE, DU P
    • English: In the last 7 days, how many days did you consume meat, poultry, or fish?

Muscle Meat:

  • bg5q504aa
    • French: SUR LES 7 DERNIERS JOURS, COMBIEN DE JOURS AVEZ-VOUS CONSOMMÉ DE LA VIANDE (MUSC
    • English: In the last 7 days, how many days did you consume muscle meat?
  • bg5q504ba
    • French: QUELLE EST LA PRINCIPALE SOURCE DES ALIMENTS CONSOMMES ?
    • English: What is the main source of the consumed food?

Offal:

  • bg5q504ab
    • French: SUR LES 7 DERNIERS JOURS, COMBIEN DE JOURS AVEZ-VOUS CONSOMMÉ DES ABATS: FOIE, R
    • English: In the last 7 days, how many days did you consume offal: liver, kidneys, etc.?
  • bg5q504bb
    • French: QUELLE EST LA PRINCIPALE SOURCE DES ALIMENTS CONSOMMES ?
    • English: What is the main source of the consumed food?

Fish:

  • bg5q504ac
    • French: SUR LES 7 DERNIERS JOURS, COMBIEN DE JOURS AVEZ-VOUS CONSOMMÉ DU POISSON: AUTRE
    • English: In the last 7 days, how many days did you consume fish?
  • bg5q504bc
    • French: QUELLE EST LA PRINCIPALE SOURCE DES ALIMENTS CONSOMMES ?
    • English: What is the main source of the consumed food?

Eggs:

  • bg5q504ad
    • French: SUR LES 7 DERNIERS JOURS, COMBIEN DE JOURS AVEZ-VOUS CONSOMMÉ DES ŒUFS ?
    • English: In the last 7 days, how many days did you consume eggs?
  • bg5q504bd _ French: QUELLE EST LA PRINCIPALE SOURCE DES ALIMENTS CONSOMMES ?
    • English: What is the main source of the consumed food?

Vegetables:

  • bg5q505a
    • French: SUR LES 7 DERNIERS JOURS, COMBIEN DE JOURS AVEZ-VOUS CONSOMMÉ DES LÉGUMES ?
    • English: In the last 7 days, how many days did you consume vegetables?
  • bg5q505b
    • French: QUELLE EST LA PRINCIPALE SOURCE DES ALIMENTS CONSOMMES ?
    • English: What is the main source of the consumed food?

Detailed Vegetable Consumption:

  • bg5q505aa
    • French: SUR LES 7 DERNIERS JOURS, COMBIEN DE JOURS AVEZ-VOUS CONSOMMÉ DES LÉGUMES DE COU
    • English: In the last 7 days, how many days did you consume root vegetables?
  • bg5q505ab
    • French: SUR LES 7 DERNIERS JOURS, COMBIEN DE JOURS AVEZ-VOUS CONSOMMÉ DES LÉGUMES-FEUILL
    • English: In the last 7 days, how many days did you consume leafy vegetables?

Main Source of Detailed Vegetable Consumption:

  • bg5q505ba
    • French: QUELLE EST LA PRINCIPALE SOURCE DES ALIMENTS CONSOMMES ?
    • English: What is the main source of the consumed food?
  • bg5q505bb
    • French: QUELLE EST LA PRINCIPALE SOURCE DES ALIMENTS CONSOMMES ?
    • English: What is the main source of the consumed food?

Fruits:

  • bg5q506a
    • French: SUR LES 7 DERNIERS JOURS, COMBIEN DE JOURS AVEZ-VOUS CONSOMMÉ DES FRUITS : BANAN
    • English: In the last 7 days, how many days did you consume fruits?
  • bg5q506b
    • French: QUELLE EST LA PRINCIPALE SOURCE DES ALIMENTS CONSOMMES ?
    • English: What is the main source of the consumed food?

Oil, Fat, Butter:

  • bg5q507a
    • French: SUR LES 7 DERNIERS JOURS, COMBIEN DE JOURS AVEZ-VOUS CONSOMMÉ DE L’HUILE/GRAS/BE
    • English: In the last 7 days, how many days did you consume oil/fat/butter?
  • bg5q507b
    • French: QUELLE EST LA PRINCIPALE SOURCE DES ALIMENTS CONSOMMES ?
    • English: What is the main source of the consumed food?

Sugar and Sugary Products:

  • bg5q508a
    • French: SUR LES 7 DERNIERS JOURS, COMBIEN DE JOURS AVEZ-VOUS CONSOMMÉ DU SUCRE OU PRODUI
    • English: In the last 7 days, how many days did you consume sugar or sugary products?
  • bg5q508b
    • French: QUELLE EST LA PRINCIPALE SOURCE DES ALIMENTS CONSOMMES ?
    • English: What is the main source of the consumed food?

Spices and Condiments:

  • bg5q509a
    • French: SUR LES 7 DERNIERS JOURS, COMBIEN DE JOURS AVEZ-VOUS CONSOMMÉ DES EPICES/CONDIM
    • English: In the last 7 days, how many days did you consume spices/condiments?
  • bg5q509b
    • French: QUELLE EST LA PRINCIPALE SOURCE DES ALIMENTS CONSOMMES ?
    • English: What is the main source of the consumed food?
long_data <- midline_individual %>%
  pivot_longer(
    cols = starts_with("bg5q"),
    names_to = "variable",
    values_to = "value"
  )

long_data <- long_data %>%
  mutate(
    food_type = case_when(
      str_detect(variable, "501a$") ~ "cereals",
      str_detect(variable, "502a$") ~ "beans",
      str_detect(variable, "503a$") ~ "milk_dairy",
      str_detect(variable, "504aa$") ~ "muscle_meat",
      str_detect(variable, "504ab$") ~ "offal",
      str_detect(variable, "504ac$") ~ "fish",
      str_detect(variable, "504ad$") ~ "eggs",
      str_detect(variable, "505aa$") ~ "root_vegetables",
      str_detect(variable, "505ab$") ~ "leafy_vegetables",
      str_detect(variable, "506a$") ~ "fruits",
      str_detect(variable, "507a$") ~ "oil_fat_butter",
      str_detect(variable, "508a$") ~ "sugar_products",
      str_detect(variable, "509a$") ~ "spices_condiments",
      TRUE ~ NA_character_
    ),
    question_type = case_when(
      str_detect(variable, "a$") ~ "consumption_days",
      str_detect(variable, "b$") ~ "source"
    )
  ) %>%
  filter(!is.na(food_type) & !is.na(question_type))

# Calculate the avg. number of consumption days for each food type at the individual level
consumption_summary <- long_data %>%
  filter(question_type == "consumption_days") %>%
  group_by(iid, food_type) %>%
  summarize(
    total_days = sum(as.numeric(value), na.rm = TRUE),
    avg_days = mean(as.numeric(value), na.rm = TRUE)
  ) %>%
  group_by(food_type) %>%
  summarize(
    avg_days = mean(avg_days, na.rm = TRUE),
    median_days = median(total_days, na.rm = TRUE),
    min_days = min(total_days, na.rm = TRUE),
    max_days = max(total_days, na.rm = TRUE))
## `summarise()` has grouped output by 'iid'. You can override using the `.groups`
## argument.
consumption_distribution <- ggplot(consumption_summary, 
                                   aes(x = food_type, y = avg_days)) +
  geom_col() +
  labs(
    title = "Average Number of Consumption Days for Each Food Type",
    x = "Food Type",
    y = "Average Number of Days"
  ) +
  ylim(0, max(consumption_summary$avg_days, na.rm = TRUE) + 1) +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))


consumption_distribution

Individual level

# Calculate the total and average number of consumption days for each 
# food type at the individual level
consumption_summary_ind <- long_data %>%
  filter(question_type == "consumption_days") %>%
  group_by(iid, food_type) %>%
  summarize(
    total_days = sum(as.numeric(value), na.rm = TRUE),
    .groups = 'drop' # Ensure ungrouped result
  ) %>%
  pivot_wider(names_from = food_type, values_from = total_days, 
              values_fill = list(total_days = 0))
# Convert the data to a long format for heatmap visualization
heatmap_data <- consumption_summary_ind %>%
  pivot_longer(cols = -iid, names_to = "food_type", values_to = "total_days")

ggplot(heatmap_data, aes(x = food_type, y = as.factor(iid), fill = total_days)) +
  geom_tile() +
  scale_fill_gradient(low = "white", high = "blue", limits = c(0, 7), breaks = 0:7) +
  labs(
    title = "Individual Consumption Heatmap",
    x = "Food Type",
    y = "Individuals",
    fill = "Total Days"
  ) +
  theme(
    axis.text.x = element_text(angle = 45, hjust = 1, size = 8),
    axis.text.y = element_blank(), # Hide y-axis text
    axis.ticks.y = element_blank()  # Hide y-axis ticks
  )

Identifying individuals with zero consumption

By gender

# Define food consumption variables
food_consumption_vars <- c("bg5q501a", "bg5q502a", "bg5q503a", "bg5q504a", "bg5q504aa", 
                           "bg5q504ab", "bg5q504ac", "bg5q504ad", "bg5q505a", "bg5q505aa", 
                           "bg5q505ab", "bg5q506a", "bg5q507a", "bg5q508a", "bg5q509a")

# Transform the data to long format
long_data <- midline_individual %>%
  select(iid, sexemembre, all_of(food_consumption_vars)) %>%
  pivot_longer(cols = -c(iid, sexemembre), names_to = "food_type", 
               values_to = "consumption_days")

# Identify individuals with zero consumption for all food types
zero_consumption_individuals <- long_data %>%
  group_by(iid, sexemembre) %>%
  summarize(total_consumption = sum(as.numeric(consumption_days), na.rm = TRUE)) %>%
  filter(total_consumption == 0)
## `summarise()` has grouped output by 'iid'. You can override using the `.groups`
## argument.
# Summarize the gender distribution
gender_distribution_zero_consumption_summary <- zero_consumption_individuals %>%
  ungroup() %>%
  summarize(
    Men = sum(sexemembre == 1, na.rm = TRUE),
    Women = sum(sexemembre == 2, na.rm = TRUE),
    Missing = sum(is.na(sexemembre))
  ) %>%
  mutate(Total = Men + Women + Missing)

gender_distribution_table <- gender_distribution_zero_consumption_summary %>%
  kable("html", col.names = c("Men", "Women", "Missing", "Total"),
        caption = "Gender Distribution of Individuals with Zero Food Consumption") %>%
  kable_styling(full_width = F, bootstrap_options = c("striped", "hover", "condensed"))

gender_distribution_table
Gender Distribution of Individuals with Zero Food Consumption
Men Women Missing Total
389 375 205 969

Age

# Update age groups with more detailed breakdowns and assign to Age_Group2
midline_individual <- midline_individual %>%
  mutate(
    Age_Group2 = cut(q56, breaks = c(0, 5, 10, 15, 20, 25, 30, 35, 
                                     40, 45, 50, 55, 60, Inf), right = FALSE, 
                     labels = c("0-4", "5-9", "10-14", "15-19", "20-24", 
                                "25-29", "30-34", "35-39", "40-44", "45-49", 
                                "50-54", "55-59", "60+")))

# Transform the data to long format
long_data_age <- midline_individual %>%
  select(iid, Age_Group2, all_of(food_consumption_vars)) %>%
  pivot_longer(cols = -c(iid, Age_Group2), names_to = "food_type", 
               values_to = "consumption_days")

# Identify individuals with zero consumption for all food types
zero_consumption_individuals_age <- long_data_age %>%
  group_by(iid, Age_Group2) %>%
  summarize(total_consumption = sum(as.numeric(consumption_days), na.rm = TRUE)) %>%
  filter(total_consumption == 0)
## `summarise()` has grouped output by 'iid'. You can override using the `.groups`
## argument.
# Summarize the age distribution
age_distribution_zero_consumption_summary <- zero_consumption_individuals_age %>%
  ungroup() %>%
  count(Age_Group2) %>%
  rename(Count = n)

age_distribution_table <- age_distribution_zero_consumption_summary %>%
  kable("html", col.names = c("Age Group", "Count"),
        caption = "Age Distribution of Individuals with Zero Food Consumption") %>%
  kable_styling(full_width = F, bootstrap_options = c("striped", "hover", "condensed"))

age_distribution_table
Age Distribution of Individuals with Zero Food Consumption
Age Group Count
15-19 17
20-24 40
25-29 50
30-34 87
35-39 109
40-44 110
45-49 90
50-54 88
55-59 31
60+ 142
NA 205

Zero consumption based on nationality

# Transform data to long format
long_data_nationality <- midline_individual %>%
  select(iid, CD136, all_of(food_consumption_vars)) %>%
  pivot_longer(cols = -c(iid, CD136), names_to = "food_type", 
               values_to = "consumption_days")

# Identify individuals with zero consumption for all food types
zero_consumption_ind_nationality <- long_data_nationality %>%
  group_by(iid, CD136) %>%
  summarize(total_consumption = sum(as.numeric(consumption_days), na.rm = TRUE)) %>%
  filter(total_consumption == 0)
## `summarise()` has grouped output by 'iid'. You can override using the `.groups`
## argument.
# Summarize nationality distribution
nationality_distribution_zero_consumption_summary <- zero_consumption_ind_nationality %>%
  ungroup() %>%
  count(CD136) %>%
  mutate(Nationality = recode(CD136, !!!cd136_labels)) %>%
  select(Nationality, n) %>%
  rename(Count = n)

# Summary table
nationality_distribution_zero_consumption_summary %>%
  kable("html", col.names = c("Nationality", "Count"),
        caption = "Nationality Distribution of Individuals with Zero Food Consumption") %>%
  kable_styling(full_width = F, bootstrap_options = c("striped", "hover", "condensed"))
Nationality Distribution of Individuals with Zero Food Consumption
Nationality Count
Congolese 799
DR Congo 84
Central African 86

Income and (self) employment status

A701 “During the last 7 days, has [NAME] worked at least one hour, with or without pay, in a field or garden belonging to him or another member of the household? Or has [NAME] Did he raise animals, fish or hunt?

A702 “During the last 7 days, has [NAME] worked at least one hour, with or without pay, in a business, a processing activity, provided a service on his own account or on behalf of a other member of the household? For example as a craftsman, trader or lawyer, doctor or other self-employed person?

A703 “During the last 7 days, has [NAME] worked at least one hour, for a company, for the State, for a boss or any other person who is not a member of your household? (even part-time or occasionally)

A704 During the last 7 days, has [NAME] worked at least one hour as an apprentice with or without pay?

1 Yes 2 No -8 Don't know-9 Refused to answer”

# Summarize distribution of responses for each question
work_activity_distribution <- midline_individual %>%
  select(iid, A701, A702, A703, A704) %>%
  pivot_longer(cols = starts_with("A7"), names_to = "question", 
               values_to = "response") %>%
  group_by(question, response) %>%
  summarize(count = n(), .groups = 'drop')

# Rename questions
work_activity_distribution <- work_activity_distribution %>%
  mutate(question = recode(question,
                           "A701" = "Agriculture",
                           "A702" = "Business_Service",
                           "A703" = "Other_Employee",
                           "A704" = "Apprentice"))

# Calculate percentages within each question group
work_activity_distribution <- work_activity_distribution %>%
  group_by(question) %>%
  mutate(percentage = (count / sum(count)) * 100)

# Pivot data to wide format
summary_work_activity <- work_activity_distribution %>%
  pivot_wider(names_from = question, values_from = c(count, percentage), 
              names_sep = "_") %>%
  arrange(response)

# Ensure columns are numeric numeric and round them to two decimal places
summary_work_activity <- summary_work_activity %>%
  mutate(across(starts_with("percentage"), ~ round(as.numeric(.), 2)))

summary_work_activity %>%
  kable("html", col.names = c("Response", "Agriculture N", "Business/Service N",
                              "Other Employee N", "Apprentice N", "Agriculture %",
                              "Business Service %", "Other Employee %", "Apprentice %"),
        caption = "Summary of Work Activities in the Last 7 Days") %>%
  kable_styling(full_width = F, bootstrap_options = c("striped", "hover", "condensed"))
Summary of Work Activities in the Last 7 Days
Response Agriculture N Business/Service N Other Employee N Apprentice N Agriculture % Business Service % Other Employee % Apprentice %
0 4747 6764 7246 7199 37.61 53.59 57.41 57.04
1 2766 747 153 143 21.92 5.92 1.21 1.13
NA 5108 5110 5222 5279 40.47 40.49 41.38 41.83

A713 Average number of hours of work per day

A714 Average number of working days per week

A715 Average number of months of work per year

# Distribution of working time questions
working_time_summary <- midline_individual %>%
  select(iid, A713, A714, A715) %>%
  summarize(
    avg_hours_per_day = round(mean(A713, na.rm = TRUE), 2),
    sd_hours_per_day = round(sd(A713, na.rm = TRUE), 2),
    min_hours_per_day = round(min(A713, na.rm = TRUE), 2),
    max_hours_per_day = round(max(A713, na.rm = TRUE), 2),
    median_hours_per_day = round(median(A713, na.rm = TRUE), 2),
    na_hours_per_day = sum(is.na(A713)),
    avg_days_per_week = round(mean(A714, na.rm = TRUE), 2),
    sd_days_per_week = round(sd(A714, na.rm = TRUE), 2),
    min_days_per_week = round(min(A714, na.rm = TRUE), 2),
    max_days_per_week = round(max(A714, na.rm = TRUE), 2),
    median_days_per_week = round(median(A714, na.rm = TRUE), 2),
    na_days_per_week = sum(is.na(A714)),
    avg_months_per_year = round(mean(A715, na.rm = TRUE), 2),
    sd_months_per_year = round(sd(A715, na.rm = TRUE), 2),
    min_months_per_year = round(min(A715, na.rm = TRUE), 2),
    max_months_per_year = round(max(A715, na.rm = TRUE), 2),
    median_months_per_year = round(median(A715, na.rm = TRUE), 2),
    na_months_per_year = sum(is.na(A715))
  )

# Summary table
working_time_distribution <- tibble(
  Statistic = c("Mean", "Standard Deviation", "Minimum", "Maximum", "Median", "NA Count"),
  `Average number of hours of work per day` = c(
    working_time_summary$avg_hours_per_day, 
    working_time_summary$sd_hours_per_day, 
    working_time_summary$min_hours_per_day, 
    working_time_summary$max_hours_per_day, 
    working_time_summary$median_hours_per_day, 
    working_time_summary$na_hours_per_day
  ),
  `Average number of working days per week` = c(
    working_time_summary$avg_days_per_week, 
    working_time_summary$sd_days_per_week, 
    working_time_summary$min_days_per_week, 
    working_time_summary$max_days_per_week, 
    working_time_summary$median_days_per_week, 
    working_time_summary$na_days_per_week
  ),
  `Average number of months of work per year` = c(
    working_time_summary$avg_months_per_year, 
    working_time_summary$sd_months_per_year, 
    working_time_summary$min_months_per_year, 
    working_time_summary$max_months_per_year, 
    working_time_summary$median_months_per_year, 
    working_time_summary$na_months_per_year
  )
)

working_time_distribution %>%
  kable("html", col.names = c("Statistic", "Hours per Day", "Days per Week", "Months per Year"),
        caption = "Summary of Working Time Distribution") %>%
  kable_styling(full_width = F, bootstrap_options = c("striped", "hover", "condensed"))
Summary of Working Time Distribution
Statistic Hours per Day Days per Week Months per Year
Mean 5.85 4.98 9.37
Standard Deviation 2.36 1.46 3.06
Minimum 0.00 0.00 0.00
Maximum 16.00 7.00 12.00
Median 6.00 5.00 11.00
NA Count 9474.00 9474.00 9474.00

A716 Form of payment for main employment 1 Fixed salary (month, fortnight, week) 2 On the working day or hour 3 On task 4 Committee 5 Benefits 6 In kind (products, food, etc.) 7 Is not paid -8 Don't know-9 Refused to answer

A717 Last month’s income for this job

Schooling of Children, school enrolment

Health

Social cohesion

Female empowerement (HH decision making)