Background

In this report, we explored SUSENAS (Socio-economic National Survey) data. The data exploration in this report aims to picture the linked mental health issue with socio-economic as well as demographic aspects in Indonesia in 2023.

##1. Load library used in this report

library (tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ ggplot2   3.5.1     ✔ tibble    3.2.1
## ✔ lubridate 1.9.3     ✔ tidyr     1.3.1
## ✔ purrr     1.0.2     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(foreign)
library(survey)
## Loading required package: grid
## Loading required package: Matrix
## 
## Attaching package: 'Matrix'
## 
## The following objects are masked from 'package:tidyr':
## 
##     expand, pack, unpack
## 
## Loading required package: survival
## 
## Attaching package: 'survey'
## 
## The following object is masked from 'package:graphics':
## 
##     dotchart
library(ggplot2)
library(gridExtra)
## 
## Attaching package: 'gridExtra'
## 
## The following object is masked from 'package:dplyr':
## 
##     combine
library(randomForest)
## Warning: package 'randomForest' was built under R version 4.4.1
## randomForest 4.7-1.2
## Type rfNews() to see new features/changes/bug fixes.
## 
## Attaching package: 'randomForest'
## 
## The following object is masked from 'package:gridExtra':
## 
##     combine
## 
## The following object is masked from 'package:dplyr':
## 
##     combine
## 
## The following object is masked from 'package:ggplot2':
## 
##     margin
library(ROSE)
## Loaded ROSE 0.0-4

##2. Read the SUSENAS data

susenas_rt <- read.dbf("data/kor23_rt.dbf")
susenas_ind_1 <- read.dbf("data/kor23_ind_1.dbf")
susenas_ind_2 <- read.dbf("data/kor23_ind_2.dbf")

##3. prepare the data
a) Prepare SUSENAS individuals data

#join all SUSENAS individuals data
susenas_ind <- cbind(susenas_ind_1,susenas_ind_2[, setdiff(names(susenas_ind_2), names(susenas_ind_1))])
rm(susenas_ind_1,susenas_ind_2)

#select variables from SUSENAS individuals data used for the analysis
susenas_ind_selected_var <- susenas_ind %>%
  select(URUT,PSU,SSU,WI1,WI2,R101,R102,R105,R401,R403,R404,R405,R407,R612,R614,R701,R703_A,R703_B,R703_C,R703_D,R703_X,R706,R707,R708,R811_D,R905,R1002, R1003, R1004, R1005, R1006, R1007, R1008, R1009, R1010,R1101_A,R1101_B,R1101_C,R1101_D,R1101_E,R1101_X,FWT)
susenas_ind_selected_var <- susenas_ind_selected_var %>%
  rename(id_prov = R101,
         id_kab = R102,
         klas_desakota = R105,
         member_num = R401,
         link_w_headhh = R403,
         marital_status = R404,
         sex = R405,
         age = R407,
         education = R612,
         certificate = R614,
         bank_acc = R701,
         work = R703_A,
         school = R703_B,
         takecare_hh = R703_C,
         other_activity = R703_D,
         no_activity = R703_X,
         main_sector = R706,
         work_status = R707,
         working_hrs = R708,
         criminal_victim = R905,
         disability_1 = R1002,
         disability_2 = R1003,
         disability_3 = R1004,
         disability_4 = R1005,
         disability_5 = R1006,
         disability_6 = R1008,
         disability_7 = R1009,
         emotional_issue = R1007,
         ATENSI = R1010,
         BPJS_PBI = R1101_A,
         BPJS_nonPBLI= R1101_B,
         jamkesda = R1101_C,
         private_insurance = R1101_D,
         office_insurance = R1101_E,
         no_insurance = R1101_X,
         inet_socmed = R811_D,
         ind_weight = FWT
         )

susenas_ind_selected_var <- susenas_ind_selected_var %>%
  mutate(no_activity =as.numeric(case_when(as.character(no_activity)  == "X" ~ 1,
                   TRUE ~ 0)),
         work =as.numeric(case_when(as.character(work)  == "A" ~ 1,
                   TRUE ~ 0)),
         school =as.numeric(case_when(as.character(school)  == "B" ~ 1,
                   TRUE ~ 0)),
         takecare_hh =as.numeric(case_when(as.character(takecare_hh)  == "C" ~ 1,
                   TRUE ~ 0)),
         other_activity =as.numeric(case_when(as.character(other_activity)  == "D" ~ 1,
                   TRUE ~ 0))
                   )

susenas_ind_selected_var <- susenas_ind_selected_var %>%
  mutate(bank_acc= case_when(bank_acc == 1 ~ 1,
                             TRUE ~ 0),
         criminal_victim = case_when(criminal_victim == 1 ~ 1,
                                     TRUE~0),
         emotional_issue = case_when(emotional_issue == 8 ~ 0,
                                     TRUE ~ 1),
         BPJS_PBI =as.numeric(case_when(as.character(BPJS_PBI)  == "A" ~ 1,
                   TRUE ~ 0)),
         BPJS_nonPBLI =as.numeric(case_when(as.character(BPJS_nonPBLI)  == "B" ~ 1,
                   TRUE ~ 0)),
         jamkesda = as.numeric(case_when(as.character(jamkesda) == "C" ~ 1,
                                         TRUE ~ 0)),
         private_insurance = as.numeric(case_when(as.character(private_insurance) == "D" ~ 1,
                                                  TRUE ~ 0)),
         office_insurance = as.numeric(case_when(as.character(office_insurance) == "E" ~1,
                                                 TRUE ~ 0)),
         no_insurance = as.numeric(case_when(as.character(no_insurance) == "X" ~ 1,
                                             TRUE ~ 0)),
         inet_socmed = as.numeric(case_when(as.character(inet_socmed) == "D" ~ 1,
                                             TRUE ~ 0))
         )
#create new dataset contain only people with emotional issue

df_emotional <- susenas_ind_selected_var %>%
  filter(age >= 15 & no_activity == 0 & emotional_issue == 1)


hh_emotional <- df_emotional %>%
  group_by(URUT, PSU,SSU, WI1,WI2) %>%
  summarise(
    sum_emotional_member = n(),
    member_list = paste(member_num, collapse = ", "),
    link_w_headhh_list = paste(link_w_headhh, collapse = ", "),
  )
## `summarise()` has grouped output by 'URUT', 'PSU', 'SSU', 'WI1'. You can
## override using the `.groups` argument.


b) prepare SUSENAS households data

#select variables from SUSENAS households data used for the analysis
all_hh_data <- left_join(susenas_rt, hh_emotional, by = c("URUT", "PSU", "SSU", "WI1", "WI2"))
susenas_hh_selected_var <- all_hh_data %>%
  dplyr::select(URUT,PSU,SSU,WI1,WI2,R1901A,R1901B,R1901C,R1901D,R1901E,R1901F,R1901G,R1901H,R1901I,R1901J,R1701,R1702,R1703,R1704,R1705,R1706, R1707,R1708, R2101B, sum_emotional_member,member_list,link_w_headhh_list,FWT)
susenas_hh_selected_var <- susenas_hh_selected_var %>%
  rename(KUR = R1901A,
         bank_umum = R1901B,
         BPR  = R1901C,
         koperasi = R1901D,
         perorangan = R1901E,
         pegadaian = R1901F,
         leasing = R1901G,
         bumdes = R1901H,
         online = R1901I,
         other_loan = R1901J,
         food_1 = R1701,
         food_2 = R1702,
         food_3 = R1703,
         food_4 = R1704,
         food_5 = R1705,
         food_6 = R1706,
         food_7 = R1707,
         food_8 = R1708,
         breadwinner = R2101B
         )
susenas_hh_selected_var <- susenas_hh_selected_var %>%
  mutate(food_1 = case_when(food_1 == 1 ~ 1,
                            TRUE ~0),
         food_2 = case_when(food_2 == 1 ~ 1,
                            TRUE ~ 0),
         food_3 = case_when(food_3 == 1 ~ 1,
                            TRUE ~0),
         food_4 = case_when(food_4 == 1 ~ 1,
                            TRUE ~0),
         food_5 = case_when(food_5 == 1 ~ 1,
                            TRUE ~0),
         food_6 = case_when(food_6 == 1 ~ 1,
                            TRUE ~0),
         food_7 = case_when(food_7 == 1 ~ 1,
                            TRUE ~0),
         food_8 = case_when(food_8 == 1 ~ 1,
                            TRUE ~0)
         )
susenas_hh_selected_var <- susenas_hh_selected_var %>%
  mutate(KUR = as.factor(KUR),
         bank_umum = as.factor(bank_umum),
         BPR = as.factor(BPR),
         koperasi = as.factor(koperasi),
         perorangan = as.factor(perorangan),
         pegadaian = as.factor(pegadaian),
         leasing = as.factor(leasing),
         bumdes = as.factor(bumdes),
         online = as.factor(online),
         other_loan = as.factor(other_loan),
         food_1 = as.factor(food_1),
         food_2 = as.factor(food_2),
         food_3 = as.factor(food_3),
         food_4 = as.factor(food_4),
         food_5 = as.factor(food_5),
         food_6 = as.factor(food_6),
         food_7 = as.factor(food_7),
         food_8 = as.factor(food_8),
         breadwinner = as.factor(breadwinner)
         )

#join hh data to individuals data

# Step 1: create consistent ID for both households and individuals data
susenas_hh_selected_var <- susenas_hh_selected_var %>%
  mutate(hhid = paste(URUT, PSU, SSU, WI1,WI2, sep = "_"))

susenas_ind_selected_var <- susenas_ind_selected_var %>%
  mutate(hhid = paste(URUT, PSU, SSU, WI1,WI2, sep = "_"))

# Step 2: count HH members (ART) per households from individuals data
art_count <- susenas_ind_selected_var %>%
  count(hhid, name = "n_art") %>%
  arrange(hhid)

# Step 3: duplicate row in households data
susenas_hh_selected_var_expanded <- susenas_hh_selected_var %>%
  left_join(art_count, by = "hhid") %>%
  slice(rep(1:n(), n_art)) %>%
  group_by(hhid) %>%
  mutate(art_index = row_number()) %>%
  ungroup()
susenas_hh_selected_var_expanded <- susenas_hh_selected_var_expanded %>%
  select(!c(URUT,PSU,SSU,WI1,WI2))

# Step 4: join individuals and expanded households data
susenas_ind_selected_var <- susenas_ind_selected_var %>%
  group_by(hhid) %>%
  mutate(art_index = row_number()) %>%
  ungroup()

all_susenas_dataset <- susenas_ind_selected_var %>%
  left_join(susenas_hh_selected_var_expanded, by = c("hhid", "art_index"))

# Step 5: 
all_susenas_dataset <- all_susenas_dataset %>%
  mutate(breadwinner = as.numeric(breadwinner))
all_susenas_dataset <- all_susenas_dataset %>%
  mutate(is_breadwinner =  case_when(member_num == breadwinner ~ 1,
                                    TRUE ~ 0))
all_susenas_dataset <- all_susenas_dataset %>%
  mutate(who_breadwinner = case_when(is_breadwinner == 1 & link_w_headhh == 1 ~ "breadwinner is head of hh",
                                     is_breadwinner == 1 & link_w_headhh != 1 ~ "breadwinner is member of hh",
                                     TRUE ~ "other"))

all_susenas_dataset <- all_susenas_dataset %>%
  mutate(loan = as.factor(case_when(KUR == "1" | 
                                      bank_umum == "1" | 
                                      BPR == "1" | 
                                      koperasi == "1" | 
                                      perorangan == "1" | 
                                      pegadaian == "1" |
                                      leasing == "1" |
                                      bumdes == "1" |
                                      online == "1" |
                                      other_loan == "1" ~ 1,
                                    TRUE ~ 0)),
         food_access = as.factor(case_when(food_1 == "1" |
                                             food_2 == "1" |
                                             food_3 == "1" |
                                             food_4 == "1" |
                                             food_5 == "1" |
                                             food_6 == "1" |
                                             food_7 == "1" |
                                             food_8 == "1" ~ 1,
                                           TRUE ~ 0)))
write.csv(all_susenas_dataset, "data/all susenas mental health data.csv")
all_susenas_dataset %>% 
  count(is_breadwinner)
## # A tibble: 2 × 2
##   is_breadwinner      n
##            <dbl>  <int>
## 1              0 908852
## 2              1 314525
  1. Calculate direct estimate for people with emotional issue
#filter individuals who ages > 15, has activities and  emotional issue
df_filtered <- all_susenas_dataset %>%
  filter(age >= 15 & no_activity == 0 & other_activity == 0 & !certificate %in% c(2,7,12)) %>%
  mutate(
    indicator = case_when(emotional_issue == 1 & disability_6 == 4 & disability_7 == 8 ~ 1,
                                           TRUE ~ 0)
  )
  1. by province
# create survey design
design <- svydesign(
  ids = ~PSU,
  weights = ~ind_weight,
  data = df_filtered,
  nest = TRUE
)

# calculate proportion of individuals with emotional issues at province level
estimate <- svyby(
  ~indicator,
  ~id_prov,
  design,
  svymean,
  vartype = c("se", "cv"), # cv = coefficient of variation (for RSE)
  na.rm = TRUE
)

# Add percentage of people with emotional issue and RSE
estimate <- estimate %>%
  mutate(
    emotional_ppl_percent = indicator * 100,
    RSE = cv * 100
  ) %>%
  select(id_prov, emotional_ppl_percent, RSE) %>%
  arrange(desc(emotional_ppl_percent))

#show table
print(estimate)
##    id_prov emotional_ppl_percent      RSE
## 82      82             0.8638813 21.56892
## 65      65             0.8410333 44.47454
## 53      53             0.7597209 20.37287
## 51      51             0.6898938 48.50477
## 64      64             0.6226809 47.64223
## 75      75             0.6119824 26.09597
## 11      11             0.4936171 24.38398
## 52      52             0.4593889 24.07165
## 34      34             0.4373724 30.03918
## 12      12             0.4361417 28.79201
## 61      61             0.4298728 22.29671
## 81      81             0.4028206 29.04274
## 91      91             0.3714537 42.37433
## 74      74             0.3366578 27.70516
## 14      14             0.3130789 26.28735
## 73      73             0.2987801 21.59724
## 71      71             0.2978563 28.17824
## 32      32             0.2915908 20.24354
## 33      33             0.2701253 14.16946
## 21      21             0.2595682 55.47039
## 62      62             0.2517158 28.94652
## 94      94             0.2427528 24.96461
## 36      36             0.2375810 30.64821
## 35      35             0.2214743 19.03589
## 13      13             0.2195950 25.19782
## 19      19             0.2077838 36.60753
## 72      72             0.1999424 29.00416
## 63      63             0.1911884 27.90022
## 76      76             0.1796457 33.19232
## 31      31             0.1794899 31.38319
## 17      17             0.1771807 29.64295
## 16      16             0.1699196 36.97627
## 15      15             0.1202024 35.87600
## 18      18             0.1194867 35.50428
estimate %>%
  count(RSE>25)
##   RSE > 25  n
## 1    FALSE 10
## 2     TRUE 24
  1. data visualization
gg_RSE_prov <- ggplot(estimate, aes(y = RSE)) +
  geom_boxplot(fill = "skyblue", color = "darkblue") +
  labs(
    title = " RSE of people with emotional issue at province level",
    y = "RSE (Coefficient of Variation, %)",
    x = ""
  ) +
  theme_minimal() +
  theme(
    plot.title = element_text(
      face = "bold",     
      size = 8,         
      hjust = 0.5        
    )
  )

gg_est_prov <- ggplot(estimate, aes(y = emotional_ppl_percent)) +
  geom_boxplot(fill = "salmon", color = "darkred") +
  labs(
    title = "People with emotional issue at province level",
    y = "percentage (%)",
    x = ""
  ) +
  theme_minimal() +
  theme(
    plot.title = element_text(
      face = "bold",     
      size = 8,         
      hjust = 0.5        
    )
  )

grid.arrange(gg_est_prov,gg_RSE_prov,ncol=2, nrow =1 )

#rank of people with emotional issues by province
gg_bar <- ggplot(estimate, aes(x = reorder(id_prov, emotional_ppl_percent), y = emotional_ppl_percent)) +  
geom_bar(stat = "identity", fill = "steelblue") +
  coord_flip() +
  labs(
    title = "People with emotional issue at province level (age 15 or above)",
    x = "Province",
    y = "Persentage (%)"
  ) +
  theme_minimal() +
  theme(
    plot.title = element_text(face = "bold", size = 16, hjust = 0.5),
    axis.text.y = element_text(size = 10)
  )

# Tampilkan chart
print(gg_bar)


5. people with emotional issue by demographic status
a) education level
Education level in Indonesia is classified into 3 categories:
- primary education : elementary - jh school
- secondary education : high school
- higher education : diploma and above

#reclassified education level
df_filtered <- df_filtered %>%
  mutate(edu_level = case_when(education <11 ~ "primary",
                               education < 18 ~ "secondary",
                               TRUE ~ "higher"))
# create survey design
design <- svydesign(
  ids = ~PSU,
  weights = ~ind_weight,
  data = df_filtered,
  nest = TRUE
)

# calculate proportion of individuals with emotional issues by education level
estimate_edu <- svyby(
  ~indicator,
  ~edu_level,
  design,
  svymean,
  vartype = c("se", "cv"), # cv = coefficient of variation (for RSE)
  na.rm = TRUE
)

# Add percentage of people with emotional issue and RSE
estimate_edu <- estimate_edu %>%
  mutate(
    emotional_ppl_percent = indicator * 100,
    RSE = cv * 100
  ) %>%
  select(edu_level, emotional_ppl_percent, RSE) %>%
  arrange(desc(emotional_ppl_percent))

#show table
print(estimate_edu)
##           edu_level emotional_ppl_percent       RSE
## primary     primary             0.4078795  5.998741
## secondary secondary             0.1940649 11.091296
## higher       higher             0.1498156 16.390285
  1. sex

code 1 = male
code 2 = female

df_filtered <- df_filtered %>%
  mutate(sex = case_when(sex == 1 ~ "male",
                         TRUE ~ "female"))

# create survey design
design <- svydesign(
  ids = ~PSU,
  weights = ~ind_weight,
  data = df_filtered,
  nest = TRUE
)

# calculate proportion of individuals with emotional issues by sex
estimate_sex <- svyby(
  ~indicator,
  ~sex,
  design,
  svymean,
  vartype = c("se", "cv"), # cv = coefficient of variation (for RSE)
  na.rm = TRUE
)

# Add percentage of people with emotional issue and RSE
estimate_sex <- estimate_sex %>%
  mutate(
    emotional_ppl_percent = indicator * 100,
    RSE = cv * 100
  ) %>%
  select(sex, emotional_ppl_percent, RSE) %>%
  arrange(desc(emotional_ppl_percent))

#show table
print(estimate_sex)
##           sex emotional_ppl_percent      RSE
## female female             0.3068408 6.654773
## male     male             0.2901522 7.271349
  1. living area

code 1 = urban
code 2 = rural

df_filtered <- df_filtered %>%
  mutate(klas_desakota = case_when(klas_desakota == 1 ~ "urban",
                         TRUE ~ "rural"))

# create survey design
design <- svydesign(
  ids = ~PSU,
  weights = ~ind_weight,
  data = df_filtered,
  nest = TRUE
)



# calculate proportion of individuals with emotional issues by living area
estimate_living <- svyby(
  ~indicator,
  ~klas_desakota,
  design,
  svymean,
  vartype = c("se", "cv"), # cv = coefficient of variation (for RSE)
  na.rm = TRUE
)

# Add percentage of people with emotional issue and RSE
estimate_living <- estimate_living %>%
  mutate(
    emotional_ppl_percent = indicator * 100,
    RSE = cv * 100
  ) %>%
  select(klas_desakota, emotional_ppl_percent, RSE) %>%
  arrange(desc(emotional_ppl_percent))

#show table
print(estimate_living)
##       klas_desakota emotional_ppl_percent      RSE
## rural         rural             0.3401562 6.859786
## urban         urban             0.2681041 9.506759
#table(all_susenas_dataset$klas_desakota)
  1. marital status

1 = not marrried
2 = married
3 = divorced
4 = widowed

#table(all_susenas_dataset$marital_status)
df_filtered <- df_filtered %>%
  mutate(marital_status = case_when(marital_status == 1 ~ "not married",
                                    marital_status == 2 ~ "married",
                                    marital_status == 3 ~ "divorced",
                                    marital_status == 4 ~ "widowed",
                         TRUE ~ "other"))


# create survey design
design <- svydesign(
  ids = ~PSU,
  weights = ~ind_weight,
  data = df_filtered,
  nest = TRUE
)

# calculate proportion of individuals with emotional issues by marital status
estimate_marital <- svyby(
  ~indicator,
  ~marital_status,
  design,
  svymean,
  vartype = c("se", "cv"), # cv = coefficient of variation (for RSE)
  na.rm = TRUE
)

# Add percentage of people with emotional issue and RSE
estimate_marital <- estimate_marital %>%
  mutate(
    emotional_ppl_percent = indicator * 100,
    RSE = cv * 100
  ) %>%
  select(marital_status, emotional_ppl_percent, RSE) %>%
  arrange(desc(emotional_ppl_percent))

#show table
print(estimate_marital)
##             marital_status emotional_ppl_percent       RSE
## divorced          divorced             0.8408921 19.854345
## widowed            widowed             0.7835037 10.048911
## married            married             0.2899289  7.396978
## not married    not married             0.1726188 10.322551
  1. business sector
    in this analysis, we categorize business sectors into 3 classes:
    1 = agriculture and mining
    2 = manufactures
    3 = services
#reclassified business sector class
#table(all_susenas_dataset)
df_filtered <- df_filtered %>%
  mutate(business_sector_class =as.factor(case_when(main_sector < 8 ~ "agriculture and mining",
                               main_sector < 11 ~ "manufactures",
                               TRUE ~ "services")) )


# create survey design
design <- svydesign(
  ids = ~PSU,
  weights = ~ind_weight,
  data = df_filtered,
  nest = TRUE
)

# calculate proportion of individuals with emotional issues by education level
estimate_business <- svyby(
  ~indicator,
  ~business_sector_class,
  design,
  svymean,
  vartype = c("se", "cv"), # cv = coefficient of variation (for RSE)
  na.rm = TRUE
)

# Add percentage of people with emotional issue and RSE
estimate_business <- estimate_business %>%
  mutate(
    emotional_ppl_percent = indicator * 100,
    RSE = cv * 100
  ) %>%
  select(business_sector_class, emotional_ppl_percent, RSE) %>%
  arrange(desc(emotional_ppl_percent))

#show table
print(estimate_business)
##                         business_sector_class emotional_ppl_percent       RSE
## agriculture and mining agriculture and mining             0.3607187  5.962015
## manufactures                     manufactures             0.2255705 17.472294
## services                             services             0.2222222 11.236870
#all_susenas_dataset <- all_susenas_dataset %>%
#  mutate(age_category = as.factor(case_when(age < 25 ~"15-24",
#                                            age < 35 ~ "25-34",
#                                            age < 45 ~ "35-44",
#                                            age < 55 ~ "45-54",
#                                            age < 65 ~ "55-64",
#                                            TRUE ~ ">65")))

#df_filtered <- all_susenas_dataset %>%
#  filter(age >= 15 & no_activity == 0) %>%
#  mutate(
#    indicator = ifelse(emotional_issue == 1, 1, 0)
#  )


 # create survey design
#design <- svydesign(
#  ids = ~PSU,
#  weights = ~ind_weight,
#  data = df_filtered,
#  nest = TRUE
#) 


#estimate_hhstatus <- svyby(
#  ~indicator,
#  ~interaction(link_w_headhh, age_category),
#  design,
#  svymean,
#  vartype = c("se", "cv"), # cv = coefficient of variation (for RSE)
#  na.rm = TRUE
#)
#estimate_hhstatus <- estimate_hhstatus %>%
#  rename(hh_status_age = "interaction(link_w_headhh, age_category)")



# Add percentage of people with emotional issue and RSE
#estimate_hhstatus <- estimate_hhstatus %>%
#  mutate(
#    emotional_ppl_per100000population = indicator * 100000,
#    RSE = cv * 100
#  ) %>%
#  select(hh_status_age, emotional_ppl_per100000population, RSE) %>%
#  na.omit()

#show table
#print(estimate_hhstatus)

1 = head of HH
2 = spouse
3 = children
4 = foster children
5 = son/daughter in-law
6 = grand children
7 = parents/ mothe/father in-law
8 = maid
9 = other

#table(all_susenas_dataset$link_w_headhh)


df_filtered <- df_filtered %>%
  mutate(link_w_headhh_cat = as.factor(case_when(link_w_headhh == 1 ~"head of HH",
                                             link_w_headhh == 2 ~"spouse",
                                             link_w_headhh == 3 ~"children",
                                             link_w_headhh == 4 ~"foster children",
                                             link_w_headhh == 5 ~"son/daughter in-law",
                                             link_w_headhh == 6 ~"grand children",
                                             link_w_headhh == 7 ~"parents/ mother/father in-law",
                                             link_w_headhh == 8 ~"maid",
                                            TRUE ~ "other")))
df_filtered <- df_filtered %>%
  mutate(link_w_headhh_cat = case_when(link_w_headhh_cat == "foster children" ~"children",
                                            TRUE ~ link_w_headhh_cat))

df_filtered_hh <- df_filtered %>%
  filter(!link_w_headhh_cat %in% c("other","parents/ mother/father in-law","maid","grand children","son/daughter in-law"))




 # create survey design
design <- svydesign(
  ids = ~PSU,
  weights = ~ind_weight,
  data = df_filtered_hh,
  nest = TRUE
) 

estimate_hhstatus <- svyby(
  ~indicator,
  ~link_w_headhh_cat,
  design,
  svymean,
  vartype = c("se", "cv"), # cv = coefficient of variation (for RSE)
  na.rm = TRUE
)




# Add percentage of people with emotional issue and RSE
estimate_hhstatus <- estimate_hhstatus %>%
  mutate(
    emotional_ppl_percent = indicator * 100,
    RSE = cv * 100
  ) %>%
  select(link_w_headhh_cat, emotional_ppl_percent, RSE) %>%
  na.omit() %>%
  arrange(desc(emotional_ppl_percent))

#show table
print(estimate_hhstatus)
##            link_w_headhh_cat emotional_ppl_percent       RSE
## head of HH        head of HH             0.3851177  7.133556
## spouse                spouse             0.2854501  8.956151
## children            children             0.1800704 10.774414
  1. age
#estimate_age <- svyby(
#  ~indicator,
#  ~age_category,
#  design,
#  svymean,
#  vartype = c("se", "cv"), # cv = coefficient of variation (for RSE)
#  na.rm = TRUE
#)




# Add percentage of people with emotional issue and RSE
#estimate_age <- estimate_age %>%
#  mutate(
#    emotional_ppl_per100000population = indicator * 100000,
#    RSE = cv * 100
#  ) %>%
#  select(age_category, emotional_ppl_per100000population, RSE) %>%
#  na.omit()

#show table
#print(estimate_age)


h) working hours
we categorize working hours as 3 classes:
1 = short working hrs (less than 40 hrs/week)
2 = normal working hrs (40-42 hrs/week)
3 = long working hours (more than 42 hrs/week)

df_filtered <- df_filtered %>%
  mutate(workinghrs_cat = case_when(working_hrs < 40 ~ "short working hrs (less than 40 hrs/week)",
                                    working_hrs < 43 ~ "normal working hrs (40-42 hrs/week)",
                                    TRUE ~ "long working hours (more than 42 hrs/week)"))

# create survey design
design <- svydesign(
  ids = ~PSU,
  weights = ~ind_weight,
  data = df_filtered,
  nest = TRUE
)

# calculate proportion of individuals with emotional issues at province level
estimate <- svyby(
  ~indicator,
  ~workinghrs_cat,
  design,
  svymean,
  vartype = c("se", "cv"), # cv = coefficient of variation (for RSE)
  na.rm = TRUE
)

# Add percentage of people with emotional issue and RSE
estimate <- estimate %>%
  mutate(
    emotional_ppl_percent = indicator * 100,
    RSE = cv * 100
  ) %>%
  select(workinghrs_cat, emotional_ppl_percent, RSE) %>%
  arrange(desc(emotional_ppl_percent))

#show table
print(estimate)
##                                                                        workinghrs_cat
## short working hrs (less than 40 hrs/week)   short working hrs (less than 40 hrs/week)
## long working hours (more than 42 hrs/week) long working hours (more than 42 hrs/week)
## normal working hrs (40-42 hrs/week)               normal working hrs (40-42 hrs/week)
##                                            emotional_ppl_percent       RSE
## short working hrs (less than 40 hrs/week)              0.3783420  6.097524
## long working hours (more than 42 hrs/week)             0.1904020 11.884896
## normal working hrs (40-42 hrs/week)                    0.1492351 14.389105


working status:
1. employer 2. employee 3. freelance

df_filtered <- df_filtered %>%
  mutate(work_status_cat = case_when(work_status < 4 ~ "employer",
                                    work_status < 5 ~ "employee",
                                    work_status <6 ~ "freelance",
                                    TRUE ~ "unpaid"))




# create survey design
design <- svydesign(
  ids = ~PSU,
  weights = ~ind_weight,
  data = df_filtered,
  nest = TRUE
)

# calculate proportion of individuals with emotional issues at province level
estimate <- svyby(
  ~indicator,
  ~work_status_cat,
  design,
  svymean,
  vartype = c("se", "cv"), # cv = coefficient of variation (for RSE)
  na.rm = TRUE
)

# Add percentage of people with emotional issue and RSE
estimate <- estimate %>%
  mutate(
    emotional_ppl_percent = indicator * 100,
    RSE = cv * 100
  ) %>%
  select(work_status_cat, emotional_ppl_percent, RSE) %>%
  arrange(desc(emotional_ppl_percent))

#show table
print(estimate)
##           work_status_cat emotional_ppl_percent       RSE
## employer         employer             0.3599199  6.297785
## freelance       freelance             0.3595997 16.970515
## unpaid             unpaid             0.3317508 13.552831
## employee         employee             0.1524755 12.860303


h) bank account possession
1 = has bank account
0 = doesn’t have bank account

#table(all_susenas_dataset$bank_acc)
df_filtered <- df_filtered %>%
  mutate(bank_acc = case_when(bank_acc == 1 ~ "has bank account",
                                    TRUE ~ "doesn't have bank account"))



# create survey design
design <- svydesign(
  ids = ~PSU,
  weights = ~ind_weight,
  data = df_filtered,
  nest = TRUE
)

estimate_bank <- svyby(
  ~indicator,
  ~bank_acc,
  design,
  svymean,
  vartype = c("se", "cv"), # cv = coefficient of variation (for RSE)
  na.rm = TRUE
)

# Add percentage of people with emotional issue and RSE
estimate_bank <- estimate_bank %>%
  mutate(
    emotional_ppl_percent = indicator * 100,
    RSE = cv * 100
  ) %>%
  select(bank_acc, emotional_ppl_percent, RSE) %>%
  arrange(desc(emotional_ppl_percent))

#show table
print(estimate_bank)
##                                            bank_acc emotional_ppl_percent
## doesn't have bank account doesn't have bank account             0.3546933
## has bank account                   has bank account             0.2354734
##                                RSE
## doesn't have bank account 6.212884
## has bank account          8.917173


i) internet usage
1 = use internet for social media
0 = doesn’t use internet for social media

df_filtered <- df_filtered %>%
  mutate(inet_socmed = case_when(inet_socmed == 1 ~ "use internet for social media",
                                    TRUE ~ "doesn't use internet for social media"))


# create survey design
design <- svydesign(
  ids = ~PSU,
  weights = ~ind_weight,
  data = df_filtered,
  nest = TRUE
)

estimate_inet <- svyby(
  ~indicator,
  ~inet_socmed,
  design,
  svymean,
  vartype = c("se", "cv"), # cv = coefficient of variation (for RSE)
  na.rm = TRUE
)

# Add percentage of people with emotional issue and RSE
estimate_inet <- estimate_inet %>%
  mutate(
    emotional_ppl_percent = indicator * 100,
    RSE = cv * 100
  ) %>%
  select(inet_socmed, emotional_ppl_percent, RSE) %>%
  arrange(desc(emotional_ppl_percent))

#show table
print(estimate_inet)
##                                                                 inet_socmed
## doesn't use internet for social media doesn't use internet for social media
## use internet for social media                 use internet for social media
##                                       emotional_ppl_percent       RSE
## doesn't use internet for social media             0.5014692  6.378691
## use internet for social media                     0.1399722 10.627068


i) criminal victim
1 = victim
0 = not victim

df_filtered <- df_filtered %>%
  mutate(criminal_victim = case_when(criminal_victim == 1 ~ "victim",
                                    TRUE ~ "not victim"))


# create survey design
design <- svydesign(
  ids = ~PSU,
  weights = ~ind_weight,
  data = df_filtered,
  nest = TRUE
)

estimate_victim <- svyby(
  ~indicator,
  ~criminal_victim,
  design,
  svymean,
  vartype = c("se", "cv"), # cv = coefficient of variation (for RSE)
  na.rm = TRUE
)

# Add percentage of people with emotional issue and RSE
estimate_victim <- estimate_victim %>%
  mutate(
    emotional_ppl_percent = indicator * 100,
    RSE = cv * 100
  ) %>%
  select(criminal_victim, emotional_ppl_percent, RSE) %>%
  arrange(desc(emotional_ppl_percent))

#show table
print(estimate_victim)
##            criminal_victim emotional_ppl_percent       RSE
## not victim      not victim             0.2988008  5.937708
## victim              victim             0.2986253 30.294089


ATENSI
1 : yes
5 : no

df_filtered <- df_filtered %>%
  mutate(ATENSI = case_when(ATENSI == 1 ~ "get ATENSI",
                                    TRUE ~ "doesn't get ATENSI"))

df_filtered_ATENSI <- df_filtered %>%
  mutate(indicator = case_when(emotional_issue == 1 & disability_1 == 4 & disability_2 == 8 & disability_3 == 4 & disability_4 == 8 & disability_5 == 4 & disability_6 == 4 & disability_7 == 8 ~ 1,
                                           TRUE ~ 0) )

# create survey design
design <- svydesign(
  ids = ~PSU,
  weights = ~ind_weight,
  data = df_filtered_ATENSI,
  nest = TRUE
)


estimate_atensi <- svyby(
  ~indicator,
  ~ATENSI,
  design,
  svymean,
  vartype = c("se", "cv"), # cv = coefficient of variation (for RSE)
  na.rm = TRUE
)

# Add percentage of people with emotional issue and RSE
estimate_atensi <- estimate_atensi %>%
  mutate(
    emotional_ppl_percent = indicator * 100,
    RSE = cv * 100
  ) %>%
  select(ATENSI, emotional_ppl_percent, RSE) %>%
  arrange(desc(emotional_ppl_percent))

#show table
print(estimate_atensi)
##                                ATENSI emotional_ppl_percent      RSE
## get ATENSI                 get ATENSI             1.2985903 58.02271
## doesn't get ATENSI doesn't get ATENSI             0.1013668  8.88175


office insurance
1 : yes
0 : no

df_filtered <- df_filtered %>%
  mutate(office_insurance = case_when(office_insurance == 1 ~ "has office insurance",
                                    TRUE ~ "doesn't has insurance"))


# create survey design
design <- svydesign(
  ids = ~PSU,
  weights = ~ind_weight,
  data = df_filtered,
  nest = TRUE
)

# calculate proportion of individuals with emotional issues at province level
estimate <- svyby(
  ~indicator,
  ~office_insurance,
  design,
  svymean,
  vartype = c("se", "cv"), # cv = coefficient of variation (for RSE)
  na.rm = TRUE
)

# Add percentage of people with emotional issue and RSE
estimate <- estimate %>%
  mutate(
    emotional_ppl_percent = indicator * 100,
    RSE = cv * 100
  ) %>%
  select(office_insurance, emotional_ppl_percent, RSE) %>%
  arrange(desc(emotional_ppl_percent))

#show table
print(estimate)
##                            office_insurance emotional_ppl_percent       RSE
## doesn't has insurance doesn't has insurance             0.3033545  5.872343
## has office insurance   has office insurance             0.1431933 37.633211


breadwinner

# create survey design
design <- svydesign(
  ids = ~PSU,
  weights = ~ind_weight,
  data = df_filtered,
  nest = TRUE
)

estimate <- svyby(
  ~indicator,
  ~who_breadwinner,
  design,
  svymean,
  vartype = c("se", "cv"), # cv = coefficient of variation (for RSE)
  na.rm = TRUE
)

# Add percentage of people with emotional issue and RSE
estimate <- estimate %>%
  mutate(
    emotional_ppl_percent = indicator * 100,
    RSE = cv * 100
  ) %>%
  select(who_breadwinner, emotional_ppl_percent, RSE) %>%
  arrange(desc(emotional_ppl_percent))

#show table
print(estimate)
##                                         who_breadwinner emotional_ppl_percent
## breadwinner is head of hh     breadwinner is head of hh             1.1672682
## other                                             other             0.3014135
## breadwinner is member of hh breadwinner is member of hh             0.2410437
##                                   RSE
## breadwinner is head of hh   13.531968
## other                        6.506013
## breadwinner is member of hh  8.536162


lack of food access
0 : have food access 1 : lack of food access

df_filtered <- df_filtered %>%
  mutate(food_access = case_when(food_access == 0 ~ "have food access",
                                    TRUE ~ "lack of food access"))

# create survey design
design <- svydesign(
  ids = ~PSU,
  weights = ~ind_weight,
  data = df_filtered,
  nest = TRUE
)


estimate <- svyby(
  ~indicator,
  ~food_access,
  design,
  svymean,
  vartype = c("se", "cv"), # cv = coefficient of variation (for RSE)
  na.rm = TRUE
)

# Add percentage of people with emotional issue and RSE
estimate <- estimate %>%
  mutate(
    emotional_ppl_percent = indicator * 100,
    RSE = cv * 100
  ) %>%
  select(food_access, emotional_ppl_percent, RSE) %>%
  arrange(desc(emotional_ppl_percent))

#show table
print(estimate)
##                             food_access emotional_ppl_percent      RSE
## lack of food access lack of food access             0.6251217 9.208469
## have food access       have food access             0.2285330 7.017349


worries for food
0 : no worries 1 : worries

df_filtered <- df_filtered %>%
  mutate(food_1 = case_when(food_1 == 0 ~ "not worry about the food",
                                    TRUE ~ "worry about the food"))


# create survey design
design <- svydesign(
  ids = ~PSU,
  weights = ~ind_weight,
  data = df_filtered,
  nest = TRUE
)

estimate <- svyby(
  ~indicator,
  ~food_1,
  design,
  svymean,
  vartype = c("se", "cv"), # cv = coefficient of variation (for RSE)
  na.rm = TRUE
)

# Add percentage of people with emotional issue and RSE
estimate <- estimate %>%
  mutate(
    emotional_ppl_percent = indicator * 100,
    RSE = cv * 100
  ) %>%
  select(food_1, emotional_ppl_percent, RSE) %>%
  arrange(desc(emotional_ppl_percent))

#show table
print(estimate)
##                                            food_1 emotional_ppl_percent
## worry about the food         worry about the food             0.5895922
## not worry about the food not worry about the food             0.2442054
##                               RSE
## worry about the food     9.662126
## not worry about the food 6.705720


loan
0 = dont have loan 1 = have loan

df_filtered <- df_filtered %>%
  mutate(loan = case_when(loan == 0 ~ "doesn't have loan",
                                    TRUE ~ "has loan"))


# create survey design
design <- svydesign(
  ids = ~PSU,
  weights = ~ind_weight,
  data = df_filtered,
  nest = TRUE
)

estimate <- svyby(
  ~indicator,
  ~loan,
  design,
  svymean,
  vartype = c("se", "cv"), # cv = coefficient of variation (for RSE)
  na.rm = TRUE
)

# Add percentage of people with emotional issue and RSE
estimate <- estimate %>%
  mutate(
    emotional_ppl_percent = indicator * 100,
    RSE = cv * 100
  ) %>%
  select(loan, emotional_ppl_percent, RSE) %>%
  arrange(desc(emotional_ppl_percent))

#show table
print(estimate)
##                                loan emotional_ppl_percent       RSE
## has loan                   has loan             0.3224556 12.539471
## doesn't have loan doesn't have loan             0.2914100  6.263758
write.csv(all_susenas_dataset, "all susenas dataset.csv")
write.csv(df_filtered, "data for supervised learning.csv")