In this report, we explored SUSENAS (Socio-economic National Survey) data. The data exploration in this report aims to picture the linked mental health issue with socio-economic as well as demographic aspects in Indonesia in 2023.
##1. Load library used in this report
library (tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.5
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ ggplot2 3.5.1 ✔ tibble 3.2.1
## ✔ lubridate 1.9.3 ✔ tidyr 1.3.1
## ✔ purrr 1.0.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(foreign)
library(survey)
## Loading required package: grid
## Loading required package: Matrix
##
## Attaching package: 'Matrix'
##
## The following objects are masked from 'package:tidyr':
##
## expand, pack, unpack
##
## Loading required package: survival
##
## Attaching package: 'survey'
##
## The following object is masked from 'package:graphics':
##
## dotchart
library(ggplot2)
library(gridExtra)
##
## Attaching package: 'gridExtra'
##
## The following object is masked from 'package:dplyr':
##
## combine
library(randomForest)
## Warning: package 'randomForest' was built under R version 4.4.1
## randomForest 4.7-1.2
## Type rfNews() to see new features/changes/bug fixes.
##
## Attaching package: 'randomForest'
##
## The following object is masked from 'package:gridExtra':
##
## combine
##
## The following object is masked from 'package:dplyr':
##
## combine
##
## The following object is masked from 'package:ggplot2':
##
## margin
library(ROSE)
## Loaded ROSE 0.0-4
##2. Read the SUSENAS data
susenas_rt <- read.dbf("data/kor23_rt.dbf")
susenas_ind_1 <- read.dbf("data/kor23_ind_1.dbf")
susenas_ind_2 <- read.dbf("data/kor23_ind_2.dbf")
##3. prepare the data
a) Prepare SUSENAS individuals data
#join all SUSENAS individuals data
susenas_ind <- cbind(susenas_ind_1,susenas_ind_2[, setdiff(names(susenas_ind_2), names(susenas_ind_1))])
rm(susenas_ind_1,susenas_ind_2)
#select variables from SUSENAS individuals data used for the analysis
susenas_ind_selected_var <- susenas_ind %>%
select(URUT,PSU,SSU,WI1,WI2,R101,R102,R105,R401,R403,R404,R405,R407,R612,R614,R701,R703_A,R703_B,R703_C,R703_D,R703_X,R706,R707,R708,R811_D,R905,R1002, R1003, R1004, R1005, R1006, R1007, R1008, R1009, R1010,R1101_A,R1101_B,R1101_C,R1101_D,R1101_E,R1101_X,FWT)
susenas_ind_selected_var <- susenas_ind_selected_var %>%
rename(id_prov = R101,
id_kab = R102,
klas_desakota = R105,
member_num = R401,
link_w_headhh = R403,
marital_status = R404,
sex = R405,
age = R407,
education = R612,
certificate = R614,
bank_acc = R701,
work = R703_A,
school = R703_B,
takecare_hh = R703_C,
other_activity = R703_D,
no_activity = R703_X,
main_sector = R706,
work_status = R707,
working_hrs = R708,
criminal_victim = R905,
disability_1 = R1002,
disability_2 = R1003,
disability_3 = R1004,
disability_4 = R1005,
disability_5 = R1006,
disability_6 = R1008,
disability_7 = R1009,
emotional_issue = R1007,
ATENSI = R1010,
BPJS_PBI = R1101_A,
BPJS_nonPBLI= R1101_B,
jamkesda = R1101_C,
private_insurance = R1101_D,
office_insurance = R1101_E,
no_insurance = R1101_X,
inet_socmed = R811_D,
ind_weight = FWT
)
susenas_ind_selected_var <- susenas_ind_selected_var %>%
mutate(no_activity =as.numeric(case_when(as.character(no_activity) == "X" ~ 1,
TRUE ~ 0)),
work =as.numeric(case_when(as.character(work) == "A" ~ 1,
TRUE ~ 0)),
school =as.numeric(case_when(as.character(school) == "B" ~ 1,
TRUE ~ 0)),
takecare_hh =as.numeric(case_when(as.character(takecare_hh) == "C" ~ 1,
TRUE ~ 0)),
other_activity =as.numeric(case_when(as.character(other_activity) == "D" ~ 1,
TRUE ~ 0))
)
susenas_ind_selected_var <- susenas_ind_selected_var %>%
mutate(bank_acc= case_when(bank_acc == 1 ~ 1,
TRUE ~ 0),
criminal_victim = case_when(criminal_victim == 1 ~ 1,
TRUE~0),
emotional_issue = case_when(emotional_issue == 8 ~ 0,
TRUE ~ 1),
BPJS_PBI =as.numeric(case_when(as.character(BPJS_PBI) == "A" ~ 1,
TRUE ~ 0)),
BPJS_nonPBLI =as.numeric(case_when(as.character(BPJS_nonPBLI) == "B" ~ 1,
TRUE ~ 0)),
jamkesda = as.numeric(case_when(as.character(jamkesda) == "C" ~ 1,
TRUE ~ 0)),
private_insurance = as.numeric(case_when(as.character(private_insurance) == "D" ~ 1,
TRUE ~ 0)),
office_insurance = as.numeric(case_when(as.character(office_insurance) == "E" ~1,
TRUE ~ 0)),
no_insurance = as.numeric(case_when(as.character(no_insurance) == "X" ~ 1,
TRUE ~ 0)),
inet_socmed = as.numeric(case_when(as.character(inet_socmed) == "D" ~ 1,
TRUE ~ 0))
)
#create new dataset contain only people with emotional issue
df_emotional <- susenas_ind_selected_var %>%
filter(age >= 15 & no_activity == 0 & emotional_issue == 1)
hh_emotional <- df_emotional %>%
group_by(URUT, PSU,SSU, WI1,WI2) %>%
summarise(
sum_emotional_member = n(),
member_list = paste(member_num, collapse = ", "),
link_w_headhh_list = paste(link_w_headhh, collapse = ", "),
)
## `summarise()` has grouped output by 'URUT', 'PSU', 'SSU', 'WI1'. You can
## override using the `.groups` argument.
b) prepare SUSENAS households data
#select variables from SUSENAS households data used for the analysis
all_hh_data <- left_join(susenas_rt, hh_emotional, by = c("URUT", "PSU", "SSU", "WI1", "WI2"))
susenas_hh_selected_var <- all_hh_data %>%
dplyr::select(URUT,PSU,SSU,WI1,WI2,R1901A,R1901B,R1901C,R1901D,R1901E,R1901F,R1901G,R1901H,R1901I,R1901J,R1701,R1702,R1703,R1704,R1705,R1706, R1707,R1708, R2101B, sum_emotional_member,member_list,link_w_headhh_list,FWT)
susenas_hh_selected_var <- susenas_hh_selected_var %>%
rename(KUR = R1901A,
bank_umum = R1901B,
BPR = R1901C,
koperasi = R1901D,
perorangan = R1901E,
pegadaian = R1901F,
leasing = R1901G,
bumdes = R1901H,
online = R1901I,
other_loan = R1901J,
food_1 = R1701,
food_2 = R1702,
food_3 = R1703,
food_4 = R1704,
food_5 = R1705,
food_6 = R1706,
food_7 = R1707,
food_8 = R1708,
breadwinner = R2101B
)
susenas_hh_selected_var <- susenas_hh_selected_var %>%
mutate(food_1 = case_when(food_1 == 1 ~ 1,
TRUE ~0),
food_2 = case_when(food_2 == 1 ~ 1,
TRUE ~ 0),
food_3 = case_when(food_3 == 1 ~ 1,
TRUE ~0),
food_4 = case_when(food_4 == 1 ~ 1,
TRUE ~0),
food_5 = case_when(food_5 == 1 ~ 1,
TRUE ~0),
food_6 = case_when(food_6 == 1 ~ 1,
TRUE ~0),
food_7 = case_when(food_7 == 1 ~ 1,
TRUE ~0),
food_8 = case_when(food_8 == 1 ~ 1,
TRUE ~0)
)
susenas_hh_selected_var <- susenas_hh_selected_var %>%
mutate(KUR = as.factor(KUR),
bank_umum = as.factor(bank_umum),
BPR = as.factor(BPR),
koperasi = as.factor(koperasi),
perorangan = as.factor(perorangan),
pegadaian = as.factor(pegadaian),
leasing = as.factor(leasing),
bumdes = as.factor(bumdes),
online = as.factor(online),
other_loan = as.factor(other_loan),
food_1 = as.factor(food_1),
food_2 = as.factor(food_2),
food_3 = as.factor(food_3),
food_4 = as.factor(food_4),
food_5 = as.factor(food_5),
food_6 = as.factor(food_6),
food_7 = as.factor(food_7),
food_8 = as.factor(food_8),
breadwinner = as.factor(breadwinner)
)
#join hh data to individuals data
# Step 1: create consistent ID for both households and individuals data
susenas_hh_selected_var <- susenas_hh_selected_var %>%
mutate(hhid = paste(URUT, PSU, SSU, WI1,WI2, sep = "_"))
susenas_ind_selected_var <- susenas_ind_selected_var %>%
mutate(hhid = paste(URUT, PSU, SSU, WI1,WI2, sep = "_"))
# Step 2: count HH members (ART) per households from individuals data
art_count <- susenas_ind_selected_var %>%
count(hhid, name = "n_art") %>%
arrange(hhid)
# Step 3: duplicate row in households data
susenas_hh_selected_var_expanded <- susenas_hh_selected_var %>%
left_join(art_count, by = "hhid") %>%
slice(rep(1:n(), n_art)) %>%
group_by(hhid) %>%
mutate(art_index = row_number()) %>%
ungroup()
susenas_hh_selected_var_expanded <- susenas_hh_selected_var_expanded %>%
select(!c(URUT,PSU,SSU,WI1,WI2))
# Step 4: join individuals and expanded households data
susenas_ind_selected_var <- susenas_ind_selected_var %>%
group_by(hhid) %>%
mutate(art_index = row_number()) %>%
ungroup()
all_susenas_dataset <- susenas_ind_selected_var %>%
left_join(susenas_hh_selected_var_expanded, by = c("hhid", "art_index"))
# Step 5:
all_susenas_dataset <- all_susenas_dataset %>%
mutate(breadwinner = as.numeric(breadwinner))
all_susenas_dataset <- all_susenas_dataset %>%
mutate(is_breadwinner = case_when(member_num == breadwinner ~ 1,
TRUE ~ 0))
all_susenas_dataset <- all_susenas_dataset %>%
mutate(who_breadwinner = case_when(is_breadwinner == 1 & link_w_headhh == 1 ~ "breadwinner is head of hh",
is_breadwinner == 1 & link_w_headhh != 1 ~ "breadwinner is member of hh",
TRUE ~ "other"))
all_susenas_dataset <- all_susenas_dataset %>%
mutate(loan = as.factor(case_when(KUR == "1" |
bank_umum == "1" |
BPR == "1" |
koperasi == "1" |
perorangan == "1" |
pegadaian == "1" |
leasing == "1" |
bumdes == "1" |
online == "1" |
other_loan == "1" ~ 1,
TRUE ~ 0)),
food_access = as.factor(case_when(food_1 == "1" |
food_2 == "1" |
food_3 == "1" |
food_4 == "1" |
food_5 == "1" |
food_6 == "1" |
food_7 == "1" |
food_8 == "1" ~ 1,
TRUE ~ 0)))
write.csv(all_susenas_dataset, "data/all susenas mental health data.csv")
all_susenas_dataset %>%
count(is_breadwinner)
## # A tibble: 2 × 2
## is_breadwinner n
## <dbl> <int>
## 1 0 908852
## 2 1 314525
#filter individuals who ages > 15, has activities and emotional issue
df_filtered <- all_susenas_dataset %>%
filter(age >= 15 & no_activity == 0 & other_activity == 0 & !certificate %in% c(2,7,12)) %>%
mutate(
indicator = case_when(emotional_issue == 1 & disability_6 == 4 & disability_7 == 8 ~ 1,
TRUE ~ 0)
)
# create survey design
design <- svydesign(
ids = ~PSU,
weights = ~ind_weight,
data = df_filtered,
nest = TRUE
)
# calculate proportion of individuals with emotional issues at province level
estimate <- svyby(
~indicator,
~id_prov,
design,
svymean,
vartype = c("se", "cv"), # cv = coefficient of variation (for RSE)
na.rm = TRUE
)
# Add percentage of people with emotional issue and RSE
estimate <- estimate %>%
mutate(
emotional_ppl_percent = indicator * 100,
RSE = cv * 100
) %>%
select(id_prov, emotional_ppl_percent, RSE) %>%
arrange(desc(emotional_ppl_percent))
#show table
print(estimate)
## id_prov emotional_ppl_percent RSE
## 82 82 0.8638813 21.56892
## 65 65 0.8410333 44.47454
## 53 53 0.7597209 20.37287
## 51 51 0.6898938 48.50477
## 64 64 0.6226809 47.64223
## 75 75 0.6119824 26.09597
## 11 11 0.4936171 24.38398
## 52 52 0.4593889 24.07165
## 34 34 0.4373724 30.03918
## 12 12 0.4361417 28.79201
## 61 61 0.4298728 22.29671
## 81 81 0.4028206 29.04274
## 91 91 0.3714537 42.37433
## 74 74 0.3366578 27.70516
## 14 14 0.3130789 26.28735
## 73 73 0.2987801 21.59724
## 71 71 0.2978563 28.17824
## 32 32 0.2915908 20.24354
## 33 33 0.2701253 14.16946
## 21 21 0.2595682 55.47039
## 62 62 0.2517158 28.94652
## 94 94 0.2427528 24.96461
## 36 36 0.2375810 30.64821
## 35 35 0.2214743 19.03589
## 13 13 0.2195950 25.19782
## 19 19 0.2077838 36.60753
## 72 72 0.1999424 29.00416
## 63 63 0.1911884 27.90022
## 76 76 0.1796457 33.19232
## 31 31 0.1794899 31.38319
## 17 17 0.1771807 29.64295
## 16 16 0.1699196 36.97627
## 15 15 0.1202024 35.87600
## 18 18 0.1194867 35.50428
estimate %>%
count(RSE>25)
## RSE > 25 n
## 1 FALSE 10
## 2 TRUE 24
gg_RSE_prov <- ggplot(estimate, aes(y = RSE)) +
geom_boxplot(fill = "skyblue", color = "darkblue") +
labs(
title = " RSE of people with emotional issue at province level",
y = "RSE (Coefficient of Variation, %)",
x = ""
) +
theme_minimal() +
theme(
plot.title = element_text(
face = "bold",
size = 8,
hjust = 0.5
)
)
gg_est_prov <- ggplot(estimate, aes(y = emotional_ppl_percent)) +
geom_boxplot(fill = "salmon", color = "darkred") +
labs(
title = "People with emotional issue at province level",
y = "percentage (%)",
x = ""
) +
theme_minimal() +
theme(
plot.title = element_text(
face = "bold",
size = 8,
hjust = 0.5
)
)
grid.arrange(gg_est_prov,gg_RSE_prov,ncol=2, nrow =1 )
#rank of people with emotional issues by province
gg_bar <- ggplot(estimate, aes(x = reorder(id_prov, emotional_ppl_percent), y = emotional_ppl_percent)) +
geom_bar(stat = "identity", fill = "steelblue") +
coord_flip() +
labs(
title = "People with emotional issue at province level (age 15 or above)",
x = "Province",
y = "Persentage (%)"
) +
theme_minimal() +
theme(
plot.title = element_text(face = "bold", size = 16, hjust = 0.5),
axis.text.y = element_text(size = 10)
)
# Tampilkan chart
print(gg_bar)
5. people with emotional issue by demographic status
a)
education level
Education level in Indonesia is classified into 3
categories:
- primary education : elementary - jh school
-
secondary education : high school
- higher education : diploma and
above
#reclassified education level
df_filtered <- df_filtered %>%
mutate(edu_level = case_when(education <11 ~ "primary",
education < 18 ~ "secondary",
TRUE ~ "higher"))
# create survey design
design <- svydesign(
ids = ~PSU,
weights = ~ind_weight,
data = df_filtered,
nest = TRUE
)
# calculate proportion of individuals with emotional issues by education level
estimate_edu <- svyby(
~indicator,
~edu_level,
design,
svymean,
vartype = c("se", "cv"), # cv = coefficient of variation (for RSE)
na.rm = TRUE
)
# Add percentage of people with emotional issue and RSE
estimate_edu <- estimate_edu %>%
mutate(
emotional_ppl_percent = indicator * 100,
RSE = cv * 100
) %>%
select(edu_level, emotional_ppl_percent, RSE) %>%
arrange(desc(emotional_ppl_percent))
#show table
print(estimate_edu)
## edu_level emotional_ppl_percent RSE
## primary primary 0.4078795 5.998741
## secondary secondary 0.1940649 11.091296
## higher higher 0.1498156 16.390285
code 1 = male
code 2 = female
df_filtered <- df_filtered %>%
mutate(sex = case_when(sex == 1 ~ "male",
TRUE ~ "female"))
# create survey design
design <- svydesign(
ids = ~PSU,
weights = ~ind_weight,
data = df_filtered,
nest = TRUE
)
# calculate proportion of individuals with emotional issues by sex
estimate_sex <- svyby(
~indicator,
~sex,
design,
svymean,
vartype = c("se", "cv"), # cv = coefficient of variation (for RSE)
na.rm = TRUE
)
# Add percentage of people with emotional issue and RSE
estimate_sex <- estimate_sex %>%
mutate(
emotional_ppl_percent = indicator * 100,
RSE = cv * 100
) %>%
select(sex, emotional_ppl_percent, RSE) %>%
arrange(desc(emotional_ppl_percent))
#show table
print(estimate_sex)
## sex emotional_ppl_percent RSE
## female female 0.3068408 6.654773
## male male 0.2901522 7.271349
code 1 = urban
code 2 = rural
df_filtered <- df_filtered %>%
mutate(klas_desakota = case_when(klas_desakota == 1 ~ "urban",
TRUE ~ "rural"))
# create survey design
design <- svydesign(
ids = ~PSU,
weights = ~ind_weight,
data = df_filtered,
nest = TRUE
)
# calculate proportion of individuals with emotional issues by living area
estimate_living <- svyby(
~indicator,
~klas_desakota,
design,
svymean,
vartype = c("se", "cv"), # cv = coefficient of variation (for RSE)
na.rm = TRUE
)
# Add percentage of people with emotional issue and RSE
estimate_living <- estimate_living %>%
mutate(
emotional_ppl_percent = indicator * 100,
RSE = cv * 100
) %>%
select(klas_desakota, emotional_ppl_percent, RSE) %>%
arrange(desc(emotional_ppl_percent))
#show table
print(estimate_living)
## klas_desakota emotional_ppl_percent RSE
## rural rural 0.3401562 6.859786
## urban urban 0.2681041 9.506759
#table(all_susenas_dataset$klas_desakota)
1 = not marrried
2 = married
3 = divorced
4 = widowed
#table(all_susenas_dataset$marital_status)
df_filtered <- df_filtered %>%
mutate(marital_status = case_when(marital_status == 1 ~ "not married",
marital_status == 2 ~ "married",
marital_status == 3 ~ "divorced",
marital_status == 4 ~ "widowed",
TRUE ~ "other"))
# create survey design
design <- svydesign(
ids = ~PSU,
weights = ~ind_weight,
data = df_filtered,
nest = TRUE
)
# calculate proportion of individuals with emotional issues by marital status
estimate_marital <- svyby(
~indicator,
~marital_status,
design,
svymean,
vartype = c("se", "cv"), # cv = coefficient of variation (for RSE)
na.rm = TRUE
)
# Add percentage of people with emotional issue and RSE
estimate_marital <- estimate_marital %>%
mutate(
emotional_ppl_percent = indicator * 100,
RSE = cv * 100
) %>%
select(marital_status, emotional_ppl_percent, RSE) %>%
arrange(desc(emotional_ppl_percent))
#show table
print(estimate_marital)
## marital_status emotional_ppl_percent RSE
## divorced divorced 0.8408921 19.854345
## widowed widowed 0.7835037 10.048911
## married married 0.2899289 7.396978
## not married not married 0.1726188 10.322551
#reclassified business sector class
#table(all_susenas_dataset)
df_filtered <- df_filtered %>%
mutate(business_sector_class =as.factor(case_when(main_sector < 8 ~ "agriculture and mining",
main_sector < 11 ~ "manufactures",
TRUE ~ "services")) )
# create survey design
design <- svydesign(
ids = ~PSU,
weights = ~ind_weight,
data = df_filtered,
nest = TRUE
)
# calculate proportion of individuals with emotional issues by education level
estimate_business <- svyby(
~indicator,
~business_sector_class,
design,
svymean,
vartype = c("se", "cv"), # cv = coefficient of variation (for RSE)
na.rm = TRUE
)
# Add percentage of people with emotional issue and RSE
estimate_business <- estimate_business %>%
mutate(
emotional_ppl_percent = indicator * 100,
RSE = cv * 100
) %>%
select(business_sector_class, emotional_ppl_percent, RSE) %>%
arrange(desc(emotional_ppl_percent))
#show table
print(estimate_business)
## business_sector_class emotional_ppl_percent RSE
## agriculture and mining agriculture and mining 0.3607187 5.962015
## manufactures manufactures 0.2255705 17.472294
## services services 0.2222222 11.236870
#all_susenas_dataset <- all_susenas_dataset %>%
# mutate(age_category = as.factor(case_when(age < 25 ~"15-24",
# age < 35 ~ "25-34",
# age < 45 ~ "35-44",
# age < 55 ~ "45-54",
# age < 65 ~ "55-64",
# TRUE ~ ">65")))
#df_filtered <- all_susenas_dataset %>%
# filter(age >= 15 & no_activity == 0) %>%
# mutate(
# indicator = ifelse(emotional_issue == 1, 1, 0)
# )
# create survey design
#design <- svydesign(
# ids = ~PSU,
# weights = ~ind_weight,
# data = df_filtered,
# nest = TRUE
#)
#estimate_hhstatus <- svyby(
# ~indicator,
# ~interaction(link_w_headhh, age_category),
# design,
# svymean,
# vartype = c("se", "cv"), # cv = coefficient of variation (for RSE)
# na.rm = TRUE
#)
#estimate_hhstatus <- estimate_hhstatus %>%
# rename(hh_status_age = "interaction(link_w_headhh, age_category)")
# Add percentage of people with emotional issue and RSE
#estimate_hhstatus <- estimate_hhstatus %>%
# mutate(
# emotional_ppl_per100000population = indicator * 100000,
# RSE = cv * 100
# ) %>%
# select(hh_status_age, emotional_ppl_per100000population, RSE) %>%
# na.omit()
#show table
#print(estimate_hhstatus)
1 = head of HH
2 = spouse
3 = children
4 = foster
children
5 = son/daughter in-law
6 = grand children
7 =
parents/ mothe/father in-law
8 = maid
9 = other
#table(all_susenas_dataset$link_w_headhh)
df_filtered <- df_filtered %>%
mutate(link_w_headhh_cat = as.factor(case_when(link_w_headhh == 1 ~"head of HH",
link_w_headhh == 2 ~"spouse",
link_w_headhh == 3 ~"children",
link_w_headhh == 4 ~"foster children",
link_w_headhh == 5 ~"son/daughter in-law",
link_w_headhh == 6 ~"grand children",
link_w_headhh == 7 ~"parents/ mother/father in-law",
link_w_headhh == 8 ~"maid",
TRUE ~ "other")))
df_filtered <- df_filtered %>%
mutate(link_w_headhh_cat = case_when(link_w_headhh_cat == "foster children" ~"children",
TRUE ~ link_w_headhh_cat))
df_filtered_hh <- df_filtered %>%
filter(!link_w_headhh_cat %in% c("other","parents/ mother/father in-law","maid","grand children","son/daughter in-law"))
# create survey design
design <- svydesign(
ids = ~PSU,
weights = ~ind_weight,
data = df_filtered_hh,
nest = TRUE
)
estimate_hhstatus <- svyby(
~indicator,
~link_w_headhh_cat,
design,
svymean,
vartype = c("se", "cv"), # cv = coefficient of variation (for RSE)
na.rm = TRUE
)
# Add percentage of people with emotional issue and RSE
estimate_hhstatus <- estimate_hhstatus %>%
mutate(
emotional_ppl_percent = indicator * 100,
RSE = cv * 100
) %>%
select(link_w_headhh_cat, emotional_ppl_percent, RSE) %>%
na.omit() %>%
arrange(desc(emotional_ppl_percent))
#show table
print(estimate_hhstatus)
## link_w_headhh_cat emotional_ppl_percent RSE
## head of HH head of HH 0.3851177 7.133556
## spouse spouse 0.2854501 8.956151
## children children 0.1800704 10.774414
#estimate_age <- svyby(
# ~indicator,
# ~age_category,
# design,
# svymean,
# vartype = c("se", "cv"), # cv = coefficient of variation (for RSE)
# na.rm = TRUE
#)
# Add percentage of people with emotional issue and RSE
#estimate_age <- estimate_age %>%
# mutate(
# emotional_ppl_per100000population = indicator * 100000,
# RSE = cv * 100
# ) %>%
# select(age_category, emotional_ppl_per100000population, RSE) %>%
# na.omit()
#show table
#print(estimate_age)
h) working hours
we categorize working hours as 3 classes:
1 = short working hrs (less than 40 hrs/week)
2 = normal
working hrs (40-42 hrs/week)
3 = long working hours (more than 42
hrs/week)
df_filtered <- df_filtered %>%
mutate(workinghrs_cat = case_when(working_hrs < 40 ~ "short working hrs (less than 40 hrs/week)",
working_hrs < 43 ~ "normal working hrs (40-42 hrs/week)",
TRUE ~ "long working hours (more than 42 hrs/week)"))
# create survey design
design <- svydesign(
ids = ~PSU,
weights = ~ind_weight,
data = df_filtered,
nest = TRUE
)
# calculate proportion of individuals with emotional issues at province level
estimate <- svyby(
~indicator,
~workinghrs_cat,
design,
svymean,
vartype = c("se", "cv"), # cv = coefficient of variation (for RSE)
na.rm = TRUE
)
# Add percentage of people with emotional issue and RSE
estimate <- estimate %>%
mutate(
emotional_ppl_percent = indicator * 100,
RSE = cv * 100
) %>%
select(workinghrs_cat, emotional_ppl_percent, RSE) %>%
arrange(desc(emotional_ppl_percent))
#show table
print(estimate)
## workinghrs_cat
## short working hrs (less than 40 hrs/week) short working hrs (less than 40 hrs/week)
## long working hours (more than 42 hrs/week) long working hours (more than 42 hrs/week)
## normal working hrs (40-42 hrs/week) normal working hrs (40-42 hrs/week)
## emotional_ppl_percent RSE
## short working hrs (less than 40 hrs/week) 0.3783420 6.097524
## long working hours (more than 42 hrs/week) 0.1904020 11.884896
## normal working hrs (40-42 hrs/week) 0.1492351 14.389105
working status:
1. employer 2. employee 3. freelance
df_filtered <- df_filtered %>%
mutate(work_status_cat = case_when(work_status < 4 ~ "employer",
work_status < 5 ~ "employee",
work_status <6 ~ "freelance",
TRUE ~ "unpaid"))
# create survey design
design <- svydesign(
ids = ~PSU,
weights = ~ind_weight,
data = df_filtered,
nest = TRUE
)
# calculate proportion of individuals with emotional issues at province level
estimate <- svyby(
~indicator,
~work_status_cat,
design,
svymean,
vartype = c("se", "cv"), # cv = coefficient of variation (for RSE)
na.rm = TRUE
)
# Add percentage of people with emotional issue and RSE
estimate <- estimate %>%
mutate(
emotional_ppl_percent = indicator * 100,
RSE = cv * 100
) %>%
select(work_status_cat, emotional_ppl_percent, RSE) %>%
arrange(desc(emotional_ppl_percent))
#show table
print(estimate)
## work_status_cat emotional_ppl_percent RSE
## employer employer 0.3599199 6.297785
## freelance freelance 0.3595997 16.970515
## unpaid unpaid 0.3317508 13.552831
## employee employee 0.1524755 12.860303
h) bank account possession
1 = has bank account
0 =
doesn’t have bank account
#table(all_susenas_dataset$bank_acc)
df_filtered <- df_filtered %>%
mutate(bank_acc = case_when(bank_acc == 1 ~ "has bank account",
TRUE ~ "doesn't have bank account"))
# create survey design
design <- svydesign(
ids = ~PSU,
weights = ~ind_weight,
data = df_filtered,
nest = TRUE
)
estimate_bank <- svyby(
~indicator,
~bank_acc,
design,
svymean,
vartype = c("se", "cv"), # cv = coefficient of variation (for RSE)
na.rm = TRUE
)
# Add percentage of people with emotional issue and RSE
estimate_bank <- estimate_bank %>%
mutate(
emotional_ppl_percent = indicator * 100,
RSE = cv * 100
) %>%
select(bank_acc, emotional_ppl_percent, RSE) %>%
arrange(desc(emotional_ppl_percent))
#show table
print(estimate_bank)
## bank_acc emotional_ppl_percent
## doesn't have bank account doesn't have bank account 0.3546933
## has bank account has bank account 0.2354734
## RSE
## doesn't have bank account 6.212884
## has bank account 8.917173
i) internet usage
1 = use internet for social media
0
= doesn’t use internet for social media
df_filtered <- df_filtered %>%
mutate(inet_socmed = case_when(inet_socmed == 1 ~ "use internet for social media",
TRUE ~ "doesn't use internet for social media"))
# create survey design
design <- svydesign(
ids = ~PSU,
weights = ~ind_weight,
data = df_filtered,
nest = TRUE
)
estimate_inet <- svyby(
~indicator,
~inet_socmed,
design,
svymean,
vartype = c("se", "cv"), # cv = coefficient of variation (for RSE)
na.rm = TRUE
)
# Add percentage of people with emotional issue and RSE
estimate_inet <- estimate_inet %>%
mutate(
emotional_ppl_percent = indicator * 100,
RSE = cv * 100
) %>%
select(inet_socmed, emotional_ppl_percent, RSE) %>%
arrange(desc(emotional_ppl_percent))
#show table
print(estimate_inet)
## inet_socmed
## doesn't use internet for social media doesn't use internet for social media
## use internet for social media use internet for social media
## emotional_ppl_percent RSE
## doesn't use internet for social media 0.5014692 6.378691
## use internet for social media 0.1399722 10.627068
i) criminal victim
1 = victim
0 = not victim
df_filtered <- df_filtered %>%
mutate(criminal_victim = case_when(criminal_victim == 1 ~ "victim",
TRUE ~ "not victim"))
# create survey design
design <- svydesign(
ids = ~PSU,
weights = ~ind_weight,
data = df_filtered,
nest = TRUE
)
estimate_victim <- svyby(
~indicator,
~criminal_victim,
design,
svymean,
vartype = c("se", "cv"), # cv = coefficient of variation (for RSE)
na.rm = TRUE
)
# Add percentage of people with emotional issue and RSE
estimate_victim <- estimate_victim %>%
mutate(
emotional_ppl_percent = indicator * 100,
RSE = cv * 100
) %>%
select(criminal_victim, emotional_ppl_percent, RSE) %>%
arrange(desc(emotional_ppl_percent))
#show table
print(estimate_victim)
## criminal_victim emotional_ppl_percent RSE
## not victim not victim 0.2988008 5.937708
## victim victim 0.2986253 30.294089
ATENSI
1 : yes
5 : no
df_filtered <- df_filtered %>%
mutate(ATENSI = case_when(ATENSI == 1 ~ "get ATENSI",
TRUE ~ "doesn't get ATENSI"))
df_filtered_ATENSI <- df_filtered %>%
mutate(indicator = case_when(emotional_issue == 1 & disability_1 == 4 & disability_2 == 8 & disability_3 == 4 & disability_4 == 8 & disability_5 == 4 & disability_6 == 4 & disability_7 == 8 ~ 1,
TRUE ~ 0) )
# create survey design
design <- svydesign(
ids = ~PSU,
weights = ~ind_weight,
data = df_filtered_ATENSI,
nest = TRUE
)
estimate_atensi <- svyby(
~indicator,
~ATENSI,
design,
svymean,
vartype = c("se", "cv"), # cv = coefficient of variation (for RSE)
na.rm = TRUE
)
# Add percentage of people with emotional issue and RSE
estimate_atensi <- estimate_atensi %>%
mutate(
emotional_ppl_percent = indicator * 100,
RSE = cv * 100
) %>%
select(ATENSI, emotional_ppl_percent, RSE) %>%
arrange(desc(emotional_ppl_percent))
#show table
print(estimate_atensi)
## ATENSI emotional_ppl_percent RSE
## get ATENSI get ATENSI 1.2985903 58.02271
## doesn't get ATENSI doesn't get ATENSI 0.1013668 8.88175
office insurance
1 : yes
0 : no
df_filtered <- df_filtered %>%
mutate(office_insurance = case_when(office_insurance == 1 ~ "has office insurance",
TRUE ~ "doesn't has insurance"))
# create survey design
design <- svydesign(
ids = ~PSU,
weights = ~ind_weight,
data = df_filtered,
nest = TRUE
)
# calculate proportion of individuals with emotional issues at province level
estimate <- svyby(
~indicator,
~office_insurance,
design,
svymean,
vartype = c("se", "cv"), # cv = coefficient of variation (for RSE)
na.rm = TRUE
)
# Add percentage of people with emotional issue and RSE
estimate <- estimate %>%
mutate(
emotional_ppl_percent = indicator * 100,
RSE = cv * 100
) %>%
select(office_insurance, emotional_ppl_percent, RSE) %>%
arrange(desc(emotional_ppl_percent))
#show table
print(estimate)
## office_insurance emotional_ppl_percent RSE
## doesn't has insurance doesn't has insurance 0.3033545 5.872343
## has office insurance has office insurance 0.1431933 37.633211
breadwinner
# create survey design
design <- svydesign(
ids = ~PSU,
weights = ~ind_weight,
data = df_filtered,
nest = TRUE
)
estimate <- svyby(
~indicator,
~who_breadwinner,
design,
svymean,
vartype = c("se", "cv"), # cv = coefficient of variation (for RSE)
na.rm = TRUE
)
# Add percentage of people with emotional issue and RSE
estimate <- estimate %>%
mutate(
emotional_ppl_percent = indicator * 100,
RSE = cv * 100
) %>%
select(who_breadwinner, emotional_ppl_percent, RSE) %>%
arrange(desc(emotional_ppl_percent))
#show table
print(estimate)
## who_breadwinner emotional_ppl_percent
## breadwinner is head of hh breadwinner is head of hh 1.1672682
## other other 0.3014135
## breadwinner is member of hh breadwinner is member of hh 0.2410437
## RSE
## breadwinner is head of hh 13.531968
## other 6.506013
## breadwinner is member of hh 8.536162
lack of food access
0 : have food access 1 : lack of food
access
df_filtered <- df_filtered %>%
mutate(food_access = case_when(food_access == 0 ~ "have food access",
TRUE ~ "lack of food access"))
# create survey design
design <- svydesign(
ids = ~PSU,
weights = ~ind_weight,
data = df_filtered,
nest = TRUE
)
estimate <- svyby(
~indicator,
~food_access,
design,
svymean,
vartype = c("se", "cv"), # cv = coefficient of variation (for RSE)
na.rm = TRUE
)
# Add percentage of people with emotional issue and RSE
estimate <- estimate %>%
mutate(
emotional_ppl_percent = indicator * 100,
RSE = cv * 100
) %>%
select(food_access, emotional_ppl_percent, RSE) %>%
arrange(desc(emotional_ppl_percent))
#show table
print(estimate)
## food_access emotional_ppl_percent RSE
## lack of food access lack of food access 0.6251217 9.208469
## have food access have food access 0.2285330 7.017349
worries for food
0 : no worries 1 : worries
df_filtered <- df_filtered %>%
mutate(food_1 = case_when(food_1 == 0 ~ "not worry about the food",
TRUE ~ "worry about the food"))
# create survey design
design <- svydesign(
ids = ~PSU,
weights = ~ind_weight,
data = df_filtered,
nest = TRUE
)
estimate <- svyby(
~indicator,
~food_1,
design,
svymean,
vartype = c("se", "cv"), # cv = coefficient of variation (for RSE)
na.rm = TRUE
)
# Add percentage of people with emotional issue and RSE
estimate <- estimate %>%
mutate(
emotional_ppl_percent = indicator * 100,
RSE = cv * 100
) %>%
select(food_1, emotional_ppl_percent, RSE) %>%
arrange(desc(emotional_ppl_percent))
#show table
print(estimate)
## food_1 emotional_ppl_percent
## worry about the food worry about the food 0.5895922
## not worry about the food not worry about the food 0.2442054
## RSE
## worry about the food 9.662126
## not worry about the food 6.705720
loan
0 = dont have loan 1 = have loan
df_filtered <- df_filtered %>%
mutate(loan = case_when(loan == 0 ~ "doesn't have loan",
TRUE ~ "has loan"))
# create survey design
design <- svydesign(
ids = ~PSU,
weights = ~ind_weight,
data = df_filtered,
nest = TRUE
)
estimate <- svyby(
~indicator,
~loan,
design,
svymean,
vartype = c("se", "cv"), # cv = coefficient of variation (for RSE)
na.rm = TRUE
)
# Add percentage of people with emotional issue and RSE
estimate <- estimate %>%
mutate(
emotional_ppl_percent = indicator * 100,
RSE = cv * 100
) %>%
select(loan, emotional_ppl_percent, RSE) %>%
arrange(desc(emotional_ppl_percent))
#show table
print(estimate)
## loan emotional_ppl_percent RSE
## has loan has loan 0.3224556 12.539471
## doesn't have loan doesn't have loan 0.2914100 6.263758
write.csv(all_susenas_dataset, "all susenas dataset.csv")
write.csv(df_filtered, "data for supervised learning.csv")