Load Library
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.5
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ ggplot2 3.5.1 ✔ tibble 3.2.1
## ✔ lubridate 1.9.3 ✔ tidyr 1.3.1
## ✔ purrr 1.0.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(foreign)
library(survey)
## Loading required package: grid
## Loading required package: Matrix
##
## Attaching package: 'Matrix'
##
## The following objects are masked from 'package:tidyr':
##
## expand, pack, unpack
##
## Loading required package: survival
##
## Attaching package: 'survey'
##
## The following object is masked from 'package:graphics':
##
## dotchart
library(haven)
## Warning: package 'haven' was built under R version 4.4.2
library(readxl)
library(caret)
## Warning: package 'caret' was built under R version 4.4.3
## Loading required package: lattice
##
## Attaching package: 'caret'
##
## The following object is masked from 'package:survival':
##
## cluster
##
## The following object is masked from 'package:purrr':
##
## lift
library(dplyr)
library(tidyr)
library(readxl)
library(dplyr)
library(tidyr)
library(ggplot2)
library(corrplot)
## corrplot 0.92 loaded
library(randomForest)
## Warning: package 'randomForest' was built under R version 4.4.3
## randomForest 4.7-1.2
## Type rfNews() to see new features/changes/bug fixes.
##
## Attaching package: 'randomForest'
##
## The following object is masked from 'package:dplyr':
##
## combine
##
## The following object is masked from 'package:ggplot2':
##
## margin
library(rpart)
library(rpart.plot)
## Warning: package 'rpart.plot' was built under R version 4.4.3
library(pROC)
## Warning: package 'pROC' was built under R version 4.4.3
## Type 'citation("pROC")' for a citation.
##
## Attaching package: 'pROC'
##
## The following objects are masked from 'package:stats':
##
## cov, smooth, var
library(srvyr)
## Warning: package 'srvyr' was built under R version 4.4.3
##
## Attaching package: 'srvyr'
##
## The following object is masked from 'package:stats':
##
## filter
============================================================================
1. READ DATA
============================================================================
data_susenas_kor <- read.dbf("data/kor24ind_1_lps.dbf")
data_susenas_pengeluaran <- read.spss("data/BLOK43.sav", to.data.frame=TRUE)
data_gk <- read_excel("data/gk24.xlsx")
data_pedidikan <- read.spss("data/Modul Konsumsi 2024/BLOK42.sav", to.data.frame=TRUE)
df1 <- read.spss("data/KOR24_var Urut dan Kec desa baru.sav", to.data.frame = TRUE)
## re-encoding from CP1252
df2 <- read.spss("data/podes2024_UNTUK MERGING DGN DATA SUSENAS 2024_unik dengan desa Susenas.sav", to.data.frame = TRUE)
## re-encoding from CP1252
============================================================================
2. DATA WRANGLING
============================================================================
1. select kolom
suskor <- data_susenas_kor %>%
mutate(R101 = as.character(R101),
R102 = as.character(R102)) %>%
group_by(URUT) %>%
mutate(size = n()) %>%
ungroup() %>%
rename(art = R403,
sex = R405,
married = R404,
school = R611, #jenjang sekolah sekarang
edu = R614, #pendidikan tertinggi yang ditamatkan
urban = R105,
schol_level = R612,
age = R407) %>%
mutate(art = case_when(art == 1 ~ "krt",
art %in% c(3, 4) ~ "anak"),
sex = case_when(sex == 1 ~ "laki",
sex == 2 ~ "perempuan"),
married = case_when(married == 2 ~ "kawin",
married == 1 ~ "single",
married == 3 ~ "single",
married == 4 ~ "single"),
edu = case_when(edu == 25 ~ "Tidak pernah sekolah",
edu >= 1 & edu < 6 ~ "SD",
edu >= 6 & edu < 11 ~ "SMP",
edu >= 11 & edu < 18 ~ "SMA",
edu >= 18 & edu < 25 ~ "Pendidikan Tinggi",
is.na(edu) ~ "Tidak pernah sekolah"),
schol_level = case_when(schol_level == 25 ~ "Tidak pernah sekolah",
schol_level >= 1 & schol_level < 6 ~ "SD",
schol_level >= 6 & schol_level < 11 ~ "SMP",
schol_level >= 11 & schol_level < 18 ~ "SMA",
schol_level >= 18 & schol_level < 25 ~ "Pendidikan Tinggi",
is.na(schol_level) ~ "Tidak pernah sekolah"),
school = case_when(school == 1 ~ "negeri",
school == 2 ~ "swasta"),
urban = case_when(urban == 1 ~ "kota",
urban == 2 ~ "desa"),
age = case_when(age >=0 & age < 5 ~ "0-4",
age >=5 & age <10 ~ "5-10",
age >=10 & age <15 ~ "10-14",
age >=15 & age <20 ~ "15-20",
age >=20 & age <25 ~ "20-24",
age >=25 & age <30 ~ "25-29",
age >=30 & age <35 ~ "30-34",
age >=35 & age <40 ~ "35-39",
age >=40 & age <45 ~ "40-44",
age >=45 & age <50 ~ "45-49",
age >=50 & age <55 ~ "50-54",
age >=55 & age <60 ~ "55-60",
age >=60 ~ ">=60"),
size = case_when(size <=2 ~ "<=2",
size == 3 ~ "3",
size == 4 ~ "4",
size == 5 ~ "5",
size == 6 ~ "6",
size == 7 ~ "7",
size >= 8 ~ ">=8")) %>%
mutate(art = as.factor(art),
sex = as.factor(sex),
married = as.factor(married),
edu = as.factor(edu),
schol_level = as.factor(schol_level),
school = as.factor(school),
urban = as.factor(urban),
age = as.factor(age),
size = as.factor(size)) %>%
select(URUT, PSU, STRATA, WI1, WI2, FWT, R101, R102,
art,
sex,
married,
edu,
schol_level,
school,
urban,
age,
size)
data_kapita <- data_susenas_pengeluaran %>%
select("URUT", "KAPITA", "WERT", "WEIND")
gk <- data_gk %>%
mutate(provinsi = as.character(provinsi)) %>%
pivot_longer(cols = c("Perkotaan", "Perdesaan"), names_to = "tipe", values_to = "gk") %>%
mutate(urban = ifelse(tipe == "Perkotaan", "kota", "desa"),
R101 = provinsi,
urban = as.factor(urban)) %>%
select(R101, urban, gk)
uang_sekolah <- data_pedidikan %>%
rename(URUT = urut)%>%
mutate(biayasekolah = b42k5.265+b42k5.266+b42k5.267+b42k5.268+b42k5.269+b42k5.270) %>%
select("URUT", "biayasekolah")
datapodes_baru <- df2 %>%
select(r101, r102, r103_baru, r104_baru, r701dk2,r701ek2, r701fk2, r701gk2,
r701hk2, r701ik2, r701jk2, r701dk4, r701ek4,r701fk4, r701gk4,
r701hk4, r701ik4, r701jk4) %>%
mutate(jmlh_sekolah = rowSums(subset(df2, select = c(r701dk2, r701ek2, r701fk2, r701gk2, r701hk2, r701ik2, r701jk2)), na.rm = TRUE),
jarak_sekolah = pmin(r701dk4, r701ek4, r701fk4, r701gk4, r701hk4, r701ik4, r701jk4, na.rm = TRUE),
r101_baru = as.character(r101),
r102_baru = as.character(r102),
r103_baru = as.character(r103_baru),
r104_baru = as.character(r104_baru)
) %>%
select(r101_baru, r102_baru, r103_baru, r104_baru,
jmlh_sekolah,
jarak_sekolah)
data_untuk_merge <- df1 %>%
mutate(r101_baru = as.character(R101),
r102_baru = as.character(R102),
r103_baru = as.character(r103_baru),
r104_baru = as.character(r104_baru)) %>%
select(URUT, r101_baru, r102_baru, r103_baru, r104_baru)
2. join data
data_susenas1 <- left_join(suskor,
data_kapita,
by = "URUT")
data_susenas2 <-left_join(data_susenas1,
uang_sekolah,
by = "URUT")
data_susenas3 <- data_susenas2 %>%
left_join(gk, by = c("R101", "urban")) %>%
mutate(b1 = 17*gk,
b2 = 3.5*gk,
b3 = 1.5*gk) %>%
mutate(kelas_ekonomi = case_when(KAPITA >= b1 ~ "Kelas atas",
KAPITA >= b2 & KAPITA < b1 ~ "Kelas menengah",
KAPITA >= b3 & KAPITA < b2 ~ "Menuju kelas menengah",
KAPITA >= gk & KAPITA < b3 ~ "Rentan miskin",
KAPITA < gk ~ "Miskin")) %>%
mutate(kelas_ekonomi = factor(kelas_ekonomi, levels = c("Miskin",
"Rentan miskin",
"Menuju kelas menengah",
"Kelas menengah",
"Kelas atas")))
susenas_podes1 <- left_join(data_susenas3,
data_untuk_merge,
by = "URUT")
susenas_podes2 <- left_join( susenas_podes1,
datapodes_baru,
by = c("r101_baru", "r102_baru", "r103_baru", "r104_baru"))
3. pasangan data KRT dan anak
#HANYA AKAN GUNAKAN DATA ART YANG MERUPAKAN KRT DAN ANAK
filter_data <- susenas_podes2 %>%
filter(!is.na(art))
##CHECK JUMLAH KOMPOSISI KRT-ANAK
household_summary <- filter_data %>%
group_by(URUT, art) %>%
summarise(count = n(), .groups = "drop") %>%
pivot_wider(names_from = art, values_from = count, values_fill = 0)
##MEMBUAT DATA PASANGAN KRT DAN ANAK
datayuk <- filter_data %>%
select(URUT, PSU, STRATA, WI1, WI2, FWT,
art, sex, age, size, married, edu, school, schol_level,
urban, kelas_ekonomi, biayasekolah,
jmlh_sekolah, jarak_sekolah)
create_father_child_pairs <- function(data) {
# Separate fathers and children
fathers <- data %>% filter(art == "krt")
children <- data %>% filter(art == "anak")
# Create all possible father-child combinations within same household
pairs <- children %>%
left_join(fathers, by = c("URUT", "PSU", "STRATA", "FWT"), suffix = c("_anak", "_krt")) %>%
filter(!is.na(sex_krt)) %>% # Only keep pairs where father exists
mutate(
pair_id = row_number(), # Unique identifier for each pair
ART_krt = "krt",
ART_anak = "anak"
) %>%
select(
URUT, PSU, STRATA, FWT, pair_id,
ART_krt, ART_anak,
school_krt, school_anak,
schol_level_krt, schol_level_anak,
sex_krt, sex_anak,
age_krt, age_anak,
size_krt, size_anak,
married_krt, married_anak,
edu_krt, edu_anak,
urban_krt, urban_anak,
kelas_ekonomi_krt, kelas_ekonomi_anak,
biayasekolah_krt, biayasekolah_anak,
jmlh_sekolah_krt, jmlh_sekolah_anak,
jarak_sekolah_krt, jarak_sekolah_anak
)
return(pairs)
}
pairs_data <- create_father_child_pairs(datayuk)
##FILTER Pasangan KRT DAN ANAK YANG SEKOLAH SD, SMP DAN SMA
filter_pair <- pairs_data %>%
filter(!is.na(school_anak) & schol_level_anak %in% c("SD", "SMP", "SMA"))
#MASTER DATA LENGKAP
datalengkap <- pairs_data %>%
mutate(bersekolah = school_anak) %>%
select(-c(pair_id, school_krt, married_anak, schol_level_krt, edu_anak, urban_anak, kelas_ekonomi_anak, biayasekolah_krt,
jmlh_sekolah_krt))
write_sav(filter_pair, "filter_pair_final.sav")
4. survey design
susenas_design <- svydesign(
ids = ~PSU, # Primary sampling unit
strata = ~STRATA, # Stratification variable
weights = ~FWT, # Final weight (atau WI1 untuk individual analysis)
data = filter_pair, # Your data
nest = TRUE # PSU nested within strata
)
susenas_svy <- filter_pair %>%
as_survey_design(
ids = PSU,
strata = STRATA,
weights = FWT,
nest = TRUE
)
filter_pair_svy <- as_survey_design(
.data = filter_pair,
ids = PSU,
strata = STRATA,
weights = FWT,
nest = TRUE
)
============================================================================
3. EKSPLORATORI DATA ANALISIS
============================================================================
# Distribution of target variable
print("Distribution of School Choice:")
## [1] "Distribution of School Choice:"
svytable(~school_anak, susenas_design)
## school_anak
## negeri swasta
## 36437689 11571696
prop.table(svytable(~school_anak, susenas_design))
## school_anak
## negeri swasta
## 0.7589701 0.2410299
print("Distribution of School Choice:")
## [1] "Distribution of School Choice:"
svytable(~school_anak, susenas_svy)
## school_anak
## negeri swasta
## 36437689 11571696
prop.table(svytable(~school_anak, susenas_svy))
## school_anak
## negeri swasta
## 0.7589701 0.2410299
filter_pair_svy %>%
group_by(kelas_ekonomi_krt, school_anak) %>%
summarise(mean_biayasekolah_anak = survey_mean(biayasekolah_anak, na.rm = TRUE),
min_biayasekolah_anak = min(biayasekolah_anak, na.rm = TRUE),
max_biayasekolah_anak = max(biayasekolah_anak, na.rm = TRUE)) %>%
ungroup()
## # A tibble: 10 × 6
## kelas_ekonomi_krt school_anak mean_biayasekolah_anak mean_biayasekolah_an…¹
## <fct> <fct> <dbl> <dbl>
## 1 Miskin negeri 1858003. 208381.
## 2 Miskin swasta 2299426. 366635.
## 3 Rentan miskin negeri 2226168. 112999.
## 4 Rentan miskin swasta 2592242. 221533.
## 5 Menuju kelas menen… negeri 4129292. 179396.
## 6 Menuju kelas menen… swasta 5302982. 347213.
## 7 Kelas menengah negeri 10055662. 722147.
## 8 Kelas menengah swasta 23085535. 1735855.
## 9 Kelas atas negeri 20665563. 5958187.
## 10 Kelas atas swasta 90900616. 19417288.
## # ℹ abbreviated name: ¹mean_biayasekolah_anak_se
## # ℹ 2 more variables: min_biayasekolah_anak <dbl>, max_biayasekolah_anak <dbl>
filter_pair_svy %>%
group_by(schol_level_anak, school_anak) %>%
summarise(
mean_biayasekolah_anak = survey_mean(biayasekolah_anak, na.rm = TRUE),
min_biayasekolah_anak = min(biayasekolah_anak, na.rm = TRUE),
max_biayasekolah_anak = max(biayasekolah_anak, na.rm = TRUE)
) %>%
ungroup()
## # A tibble: 6 × 6
## schol_level_anak school_anak mean_biayasekolah_anak mean_biayasekolah_anak_se
## <fct> <fct> <dbl> <dbl>
## 1 SD negeri 3896266. 157176.
## 2 SD swasta 11486422. 938072.
## 3 SMA negeri 7593171. 627438.
## 4 SMA swasta 9646244. 1197816.
## 5 SMP negeri 4681132. 299653.
## 6 SMP swasta 9855713. 1205958.
## # ℹ 2 more variables: min_biayasekolah_anak <dbl>, max_biayasekolah_anak <dbl>
============================================================================
4. MODELING DENGAN SURVEY WEIGHTS ~ REGRESI LOGISTIK
============================================================================
# LOGISTIC REGRESSION dengan survey weights
model_svy <- svyglm(
school_anak ~ sex_krt + married_krt + age_krt + size_krt + schol_level_krt +
urban_krt + kelas_ekonomi_krt+ biayasekolah_krt + jmlh_sekolah_krt + jarak_sekolah_krt,
design = susenas_design,
family = quasibinomial() # gunakan quasi untuk complex survey
)
summary(model_svy)
##
## Call:
## svyglm(formula = school_anak ~ sex_krt + married_krt + age_krt +
## size_krt + schol_level_krt + urban_krt + kelas_ekonomi_krt +
## biayasekolah_krt + jmlh_sekolah_krt + jarak_sekolah_krt,
## design = susenas_design, family = quasibinomial())
##
## Survey design:
## svydesign(ids = ~PSU, strata = ~STRATA, weights = ~FWT, data = filter_pair,
## nest = TRUE)
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -6.835e-01 5.774e-01 -1.184 0.23670
## sex_krtperempuan -8.661e-02 3.325e-01 -0.260 0.79453
## married_krtsingle 2.483e-02 3.442e-01 0.072 0.94251
## age_krt20-24 -5.139e-01 1.371e+00 -0.375 0.70792
## age_krt25-29 -1.893e+00 8.536e-01 -2.218 0.02672
## age_krt30-34 2.845e-01 3.506e-01 0.811 0.41725
## age_krt35-39 3.516e-01 3.210e-01 1.095 0.27357
## age_krt40-44 9.909e-02 3.061e-01 0.324 0.74618
## age_krt45-49 8.873e-02 3.002e-01 0.296 0.76758
## age_krt50-54 3.116e-02 3.270e-01 0.095 0.92409
## age_krt55-60 4.037e-01 3.652e-01 1.106 0.26907
## size_krt>=8 -8.686e-01 6.004e-01 -1.447 0.14818
## size_krt3 -2.873e-01 4.178e-01 -0.688 0.49183
## size_krt4 -2.638e-01 4.156e-01 -0.635 0.52559
## size_krt5 -6.040e-01 4.238e-01 -1.425 0.15432
## size_krt6 -1.189e-01 4.498e-01 -0.264 0.79162
## size_krt7 -5.376e-01 4.989e-01 -1.078 0.28140
## schol_level_krtSD -7.385e-02 1.779e-01 -0.415 0.67808
## schol_level_krtSMA -3.788e-02 1.607e-01 -0.236 0.81371
## schol_level_krtSMP 5.414e-02 1.780e-01 0.304 0.76105
## schol_level_krtTidak pernah sekolah 1.687e+00 6.318e-01 2.671 0.00765
## urban_krtkota 1.986e-01 1.196e-01 1.661 0.09699
## kelas_ekonomi_krtRentan miskin -3.046e-02 2.253e-01 -0.135 0.89247
## kelas_ekonomi_krtMenuju kelas menengah -1.843e-01 2.291e-01 -0.804 0.42125
## kelas_ekonomi_krtKelas menengah -2.661e-01 2.632e-01 -1.011 0.31204
## kelas_ekonomi_krtKelas atas -6.842e-01 4.926e-01 -1.389 0.16502
## biayasekolah_krt 6.380e-08 9.660e-09 6.604 5.46e-11
## jmlh_sekolah_krt -1.179e-02 1.721e-02 -0.685 0.49352
## jarak_sekolah_krt -4.469e-01 3.163e-01 -1.413 0.15791
##
## (Intercept)
## sex_krtperempuan
## married_krtsingle
## age_krt20-24
## age_krt25-29 *
## age_krt30-34
## age_krt35-39
## age_krt40-44
## age_krt45-49
## age_krt50-54
## age_krt55-60
## size_krt>=8
## size_krt3
## size_krt4
## size_krt5
## size_krt6
## size_krt7
## schol_level_krtSD
## schol_level_krtSMA
## schol_level_krtSMP
## schol_level_krtTidak pernah sekolah **
## urban_krtkota .
## kelas_ekonomi_krtRentan miskin
## kelas_ekonomi_krtMenuju kelas menengah
## kelas_ekonomi_krtKelas menengah
## kelas_ekonomi_krtKelas atas
## biayasekolah_krt ***
## jmlh_sekolah_krt
## jarak_sekolah_krt
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for quasibinomial family taken to be 1.413235)
##
## Number of Fisher Scoring iterations: 5
# Model evaluation
# Calculate odds ratios
odds_ratios <- exp(coef(model_svy))
conf_intervals <- exp(confint(model_svy))
print("Odds Ratios with 95% Confidence Intervals:")
## [1] "Odds Ratios with 95% Confidence Intervals:"
cbind(OddsRatio = odds_ratios, conf_intervals)
## OddsRatio 2.5 % 97.5 %
## (Intercept) 0.5048343 0.16264988 1.5669097
## sex_krtperempuan 0.9170349 0.47767712 1.7605049
## married_krtsingle 1.0251387 0.52184664 2.0138279
## age_krt20-24 0.5981881 0.04061398 8.8104868
## age_krt25-29 0.1506411 0.02823821 0.8036181
## age_krt30-34 1.3291233 0.66814096 2.6440060
## age_krt35-39 1.4213450 0.75723762 2.6678831
## age_krt40-44 1.1041616 0.60577121 2.0125962
## age_krt45-49 1.0927820 0.60650697 1.9689345
## age_krt50-54 1.0316540 0.54322326 1.9592495
## age_krt55-60 1.4974285 0.73157111 3.0650365
## size_krt>=8 0.4195192 0.12920132 1.3621871
## size_krt3 0.7503065 0.33061387 1.7027714
## size_krt4 0.7680989 0.33995582 1.7354488
## size_krt5 0.5466363 0.23805396 1.2552249
## size_krt6 0.8879403 0.36749454 2.1454411
## size_krt7 0.5841406 0.21953845 1.5542617
## schol_level_krtSD 0.9288075 0.65522429 1.3166230
## schol_level_krtSMA 0.9628265 0.70247301 1.3196733
## schol_level_krtSMP 1.0556340 0.74451979 1.4967541
## schol_level_krtTidak pernah sekolah 5.4046567 1.56532156 18.6609033
## urban_krtkota 1.2196452 0.96466785 1.5420172
## kelas_ekonomi_krtRentan miskin 0.9699949 0.62347689 1.5091017
## kelas_ekonomi_krtMenuju kelas menengah 0.8317058 0.53068772 1.3034682
## kelas_ekonomi_krtKelas menengah 0.7663410 0.45734606 1.2841009
## kelas_ekonomi_krtKelas atas 0.5044809 0.19196711 1.3257528
## biayasekolah_krt 1.0000001 1.00000004 1.0000001
## jmlh_sekolah_krt 0.9882841 0.95548444 1.0222098
## jarak_sekolah_krt 0.6396343 0.34394874 1.1895147