Load Library

library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ ggplot2   3.5.1     ✔ tibble    3.2.1
## ✔ lubridate 1.9.3     ✔ tidyr     1.3.1
## ✔ purrr     1.0.2     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(foreign)
library(survey)
## Loading required package: grid
## Loading required package: Matrix
## 
## Attaching package: 'Matrix'
## 
## The following objects are masked from 'package:tidyr':
## 
##     expand, pack, unpack
## 
## Loading required package: survival
## 
## Attaching package: 'survey'
## 
## The following object is masked from 'package:graphics':
## 
##     dotchart
library(haven)
## Warning: package 'haven' was built under R version 4.4.2
library(readxl)
library(caret)
## Warning: package 'caret' was built under R version 4.4.3
## Loading required package: lattice
## 
## Attaching package: 'caret'
## 
## The following object is masked from 'package:survival':
## 
##     cluster
## 
## The following object is masked from 'package:purrr':
## 
##     lift
library(dplyr)
library(tidyr)
library(readxl)
library(dplyr)
library(tidyr)
library(ggplot2)
library(corrplot)
## corrplot 0.92 loaded
library(randomForest)
## Warning: package 'randomForest' was built under R version 4.4.3
## randomForest 4.7-1.2
## Type rfNews() to see new features/changes/bug fixes.
## 
## Attaching package: 'randomForest'
## 
## The following object is masked from 'package:dplyr':
## 
##     combine
## 
## The following object is masked from 'package:ggplot2':
## 
##     margin
library(rpart)
library(rpart.plot)
## Warning: package 'rpart.plot' was built under R version 4.4.3
library(pROC)
## Warning: package 'pROC' was built under R version 4.4.3
## Type 'citation("pROC")' for a citation.
## 
## Attaching package: 'pROC'
## 
## The following objects are masked from 'package:stats':
## 
##     cov, smooth, var
library(srvyr)
## Warning: package 'srvyr' was built under R version 4.4.3
## 
## Attaching package: 'srvyr'
## 
## The following object is masked from 'package:stats':
## 
##     filter

============================================================================

1. READ DATA

============================================================================

data_susenas_kor <- read.dbf("data/kor24ind_1_lps.dbf")
data_susenas_pengeluaran <- read.spss("data/BLOK43.sav", to.data.frame=TRUE)
data_gk <- read_excel("data/gk24.xlsx")
data_pedidikan <- read.spss("data/Modul Konsumsi 2024/BLOK42.sav", to.data.frame=TRUE)

df1 <- read.spss("data/KOR24_var Urut dan Kec desa baru.sav", to.data.frame = TRUE)
## re-encoding from CP1252
df2 <- read.spss("data/podes2024_UNTUK MERGING DGN DATA SUSENAS 2024_unik dengan desa Susenas.sav", to.data.frame = TRUE)
## re-encoding from CP1252

============================================================================

2. DATA WRANGLING

============================================================================

1. select kolom

suskor <- data_susenas_kor %>%
  mutate(R101 = as.character(R101),
         R102 = as.character(R102)) %>%
  group_by(URUT) %>% 
  mutate(size = n()) %>%
  ungroup() %>%
  rename(art = R403,
         sex = R405,
         married = R404,
         school = R611, #jenjang sekolah sekarang
         edu = R614, #pendidikan tertinggi yang ditamatkan
         urban = R105,
         schol_level = R612,
         age = R407) %>%
  mutate(art = case_when(art == 1 ~ "krt",
                         art %in% c(3, 4) ~ "anak"),
         sex = case_when(sex == 1 ~ "laki",
                         sex == 2 ~ "perempuan"),
         married = case_when(married == 2 ~ "kawin",
                             married == 1 ~ "single",
                             married == 3 ~ "single",
                             married == 4 ~ "single"),
         edu = case_when(edu == 25 ~ "Tidak pernah sekolah",
                         edu >= 1 & edu < 6 ~ "SD",
                         edu >= 6 & edu < 11 ~ "SMP",
                         edu >= 11 & edu < 18 ~ "SMA",
                         edu >= 18 & edu < 25 ~ "Pendidikan Tinggi",
                         is.na(edu) ~ "Tidak pernah sekolah"),
         schol_level = case_when(schol_level == 25 ~ "Tidak pernah sekolah",
                                 schol_level >= 1 & schol_level < 6 ~ "SD",
                                 schol_level >= 6 & schol_level < 11 ~ "SMP",
                                 schol_level >= 11 & schol_level < 18 ~ "SMA",
                                 schol_level >= 18 & schol_level < 25 ~ "Pendidikan Tinggi",
                                 is.na(schol_level) ~ "Tidak pernah sekolah"),
         school = case_when(school == 1 ~ "negeri",
                            school == 2 ~ "swasta"),
         urban = case_when(urban == 1 ~ "kota",
                           urban == 2 ~ "desa"),
         age = case_when(age >=0 & age < 5 ~ "0-4",
                         age >=5 & age <10 ~ "5-10",
                         age >=10 & age <15 ~ "10-14",
                         age >=15 & age <20 ~ "15-20",
                         age >=20 & age <25 ~ "20-24",
                         age >=25 & age <30 ~ "25-29",
                         age >=30 & age <35 ~ "30-34",
                         age >=35 & age <40 ~ "35-39",
                         age >=40 & age <45 ~ "40-44",
                         age >=45 & age <50 ~ "45-49",
                         age >=50 & age <55 ~ "50-54",
                         age >=55 & age <60 ~ "55-60",
                         age >=60 ~ ">=60"),
         size = case_when(size <=2 ~ "<=2",
                          size == 3 ~ "3",
                          size == 4 ~ "4",
                          size == 5 ~ "5",
                          size == 6 ~ "6",
                          size == 7 ~ "7",
                          size >= 8 ~ ">=8")) %>%
  mutate(art = as.factor(art),
         sex = as.factor(sex),
         married = as.factor(married),
         edu = as.factor(edu),
         schol_level = as.factor(schol_level),
         school = as.factor(school),
         urban = as.factor(urban),
         age = as.factor(age),
         size = as.factor(size)) %>%
  select(URUT, PSU, STRATA, WI1, WI2, FWT, R101, R102,
         art,
         sex,
         married,
         edu,
         schol_level,
         school,
         urban,
         age,
         size)

data_kapita <- data_susenas_pengeluaran %>%
  select("URUT", "KAPITA", "WERT", "WEIND")

gk <- data_gk %>%
  mutate(provinsi = as.character(provinsi)) %>%
  pivot_longer(cols = c("Perkotaan", "Perdesaan"), names_to = "tipe", values_to = "gk") %>%
  mutate(urban = ifelse(tipe == "Perkotaan", "kota", "desa"),
         R101 = provinsi,
         urban = as.factor(urban)) %>%
  select(R101, urban, gk)

uang_sekolah <- data_pedidikan %>%
  rename(URUT = urut)%>%
  mutate(biayasekolah = b42k5.265+b42k5.266+b42k5.267+b42k5.268+b42k5.269+b42k5.270) %>%
  select("URUT", "biayasekolah")

datapodes_baru <- df2 %>%
  select(r101, r102, r103_baru, r104_baru, r701dk2,r701ek2, r701fk2, r701gk2, 
         r701hk2, r701ik2, r701jk2, r701dk4, r701ek4,r701fk4, r701gk4,
         r701hk4, r701ik4, r701jk4) %>%
  mutate(jmlh_sekolah = rowSums(subset(df2, select = c(r701dk2, r701ek2, r701fk2, r701gk2, r701hk2, r701ik2, r701jk2)), na.rm = TRUE),
         jarak_sekolah = pmin(r701dk4, r701ek4, r701fk4, r701gk4, r701hk4, r701ik4, r701jk4, na.rm = TRUE),
         r101_baru = as.character(r101),
         r102_baru = as.character(r102),
         r103_baru = as.character(r103_baru),
         r104_baru = as.character(r104_baru)
  ) %>%
  select(r101_baru, r102_baru, r103_baru, r104_baru,
         jmlh_sekolah,
         jarak_sekolah)

data_untuk_merge <- df1 %>%
  mutate(r101_baru = as.character(R101),
         r102_baru = as.character(R102),
         r103_baru = as.character(r103_baru),
         r104_baru = as.character(r104_baru)) %>%
  select(URUT, r101_baru, r102_baru, r103_baru, r104_baru)

2. join data

data_susenas1 <- left_join(suskor, 
                       data_kapita, 
                       by = "URUT")

data_susenas2 <-left_join(data_susenas1,
                      uang_sekolah,
                      by = "URUT")

data_susenas3 <- data_susenas2 %>%
  left_join(gk, by = c("R101", "urban")) %>%
  mutate(b1 = 17*gk,
         b2 = 3.5*gk,
         b3 = 1.5*gk) %>%
  mutate(kelas_ekonomi = case_when(KAPITA >= b1 ~ "Kelas atas",
                                   KAPITA >= b2 & KAPITA < b1 ~ "Kelas menengah",
                                   KAPITA >= b3 & KAPITA < b2 ~ "Menuju kelas menengah",
                                   KAPITA >= gk & KAPITA <  b3 ~ "Rentan miskin",
                                   KAPITA < gk ~ "Miskin")) %>%
  mutate(kelas_ekonomi = factor(kelas_ekonomi, levels = c("Miskin",
                                                          "Rentan miskin",
                                                          "Menuju kelas menengah",
                                                          "Kelas menengah",
                                                          "Kelas atas")))

susenas_podes1 <- left_join(data_susenas3, 
                  data_untuk_merge, 
                  by = "URUT")

susenas_podes2 <- left_join( susenas_podes1,
                   datapodes_baru,
                   by = c("r101_baru", "r102_baru", "r103_baru", "r104_baru"))

3. pasangan data KRT dan anak

#HANYA AKAN GUNAKAN DATA ART YANG MERUPAKAN KRT DAN ANAK
filter_data <- susenas_podes2 %>%
  filter(!is.na(art))

##CHECK JUMLAH KOMPOSISI KRT-ANAK
household_summary <- filter_data %>%
  group_by(URUT, art) %>%
  summarise(count = n(), .groups = "drop") %>%
  pivot_wider(names_from = art, values_from = count, values_fill = 0)

##MEMBUAT DATA PASANGAN KRT DAN ANAK
datayuk <- filter_data %>%
  select(URUT, PSU, STRATA, WI1, WI2, FWT, 
         art, sex, age, size, married, edu, school, schol_level, 
         urban, kelas_ekonomi, biayasekolah, 
         jmlh_sekolah, jarak_sekolah)

create_father_child_pairs <- function(data) {
  # Separate fathers and children
  fathers <- data %>% filter(art == "krt")
  children <- data %>% filter(art == "anak")
  
  # Create all possible father-child combinations within same household
  pairs <- children %>%
    left_join(fathers, by = c("URUT", "PSU", "STRATA", "FWT"), suffix = c("_anak", "_krt")) %>%
    filter(!is.na(sex_krt)) %>%  # Only keep pairs where father exists
    mutate(
      pair_id = row_number(),  # Unique identifier for each pair
      ART_krt = "krt",
      ART_anak = "anak"
    ) %>%
    select(
      URUT, PSU, STRATA, FWT, pair_id,
      ART_krt, ART_anak,
      school_krt, school_anak,
      schol_level_krt, schol_level_anak,
      sex_krt, sex_anak,
      age_krt, age_anak,
      size_krt, size_anak,
      married_krt, married_anak,
      edu_krt, edu_anak,
      urban_krt, urban_anak,
      kelas_ekonomi_krt, kelas_ekonomi_anak,
      biayasekolah_krt, biayasekolah_anak,
      jmlh_sekolah_krt, jmlh_sekolah_anak,
      jarak_sekolah_krt, jarak_sekolah_anak
    )
  
  return(pairs)
}
pairs_data <- create_father_child_pairs(datayuk)


##FILTER Pasangan KRT DAN ANAK YANG SEKOLAH SD, SMP DAN SMA
filter_pair <- pairs_data %>%
  filter(!is.na(school_anak) & schol_level_anak %in% c("SD", "SMP", "SMA"))

#MASTER DATA LENGKAP
datalengkap <- pairs_data %>%
  mutate(bersekolah = school_anak) %>%
  select(-c(pair_id, school_krt, married_anak, schol_level_krt, edu_anak, urban_anak, kelas_ekonomi_anak, biayasekolah_krt,
            jmlh_sekolah_krt))
write_sav(filter_pair, "filter_pair_final.sav")

4. survey design

susenas_design <- svydesign(
  ids = ~PSU,           # Primary sampling unit
  strata = ~STRATA,     # Stratification variable
  weights = ~FWT,       # Final weight (atau WI1 untuk individual analysis)
  data = filter_pair,   # Your data
  nest = TRUE          # PSU nested within strata
)


susenas_svy <- filter_pair %>%
  as_survey_design(
    ids = PSU,
    strata = STRATA,
    weights = FWT,
    nest = TRUE
  )

filter_pair_svy <- as_survey_design(
  .data = filter_pair,
  ids = PSU,
  strata = STRATA,
  weights = FWT,
  nest = TRUE
)

============================================================================

3. EKSPLORATORI DATA ANALISIS

============================================================================

# Distribution of target variable
print("Distribution of School Choice:")
## [1] "Distribution of School Choice:"
svytable(~school_anak, susenas_design)
## school_anak
##   negeri   swasta 
## 36437689 11571696
prop.table(svytable(~school_anak, susenas_design))
## school_anak
##    negeri    swasta 
## 0.7589701 0.2410299
print("Distribution of School Choice:")
## [1] "Distribution of School Choice:"
svytable(~school_anak, susenas_svy)
## school_anak
##   negeri   swasta 
## 36437689 11571696
prop.table(svytable(~school_anak, susenas_svy))
## school_anak
##    negeri    swasta 
## 0.7589701 0.2410299
filter_pair_svy %>%
  group_by(kelas_ekonomi_krt, school_anak) %>%
  summarise(mean_biayasekolah_anak = survey_mean(biayasekolah_anak, na.rm = TRUE),
            min_biayasekolah_anak = min(biayasekolah_anak, na.rm = TRUE),
            max_biayasekolah_anak = max(biayasekolah_anak, na.rm = TRUE)) %>%
  ungroup()
## # A tibble: 10 × 6
##    kelas_ekonomi_krt   school_anak mean_biayasekolah_anak mean_biayasekolah_an…¹
##    <fct>               <fct>                        <dbl>                  <dbl>
##  1 Miskin              negeri                    1858003.                208381.
##  2 Miskin              swasta                    2299426.                366635.
##  3 Rentan miskin       negeri                    2226168.                112999.
##  4 Rentan miskin       swasta                    2592242.                221533.
##  5 Menuju kelas menen… negeri                    4129292.                179396.
##  6 Menuju kelas menen… swasta                    5302982.                347213.
##  7 Kelas menengah      negeri                   10055662.                722147.
##  8 Kelas menengah      swasta                   23085535.               1735855.
##  9 Kelas atas          negeri                   20665563.               5958187.
## 10 Kelas atas          swasta                   90900616.              19417288.
## # ℹ abbreviated name: ¹​mean_biayasekolah_anak_se
## # ℹ 2 more variables: min_biayasekolah_anak <dbl>, max_biayasekolah_anak <dbl>
filter_pair_svy %>%
  group_by(schol_level_anak, school_anak) %>%
  summarise(
    mean_biayasekolah_anak = survey_mean(biayasekolah_anak, na.rm = TRUE),
    min_biayasekolah_anak = min(biayasekolah_anak, na.rm = TRUE),
    max_biayasekolah_anak = max(biayasekolah_anak, na.rm = TRUE)
  ) %>%
  ungroup()
## # A tibble: 6 × 6
##   schol_level_anak school_anak mean_biayasekolah_anak mean_biayasekolah_anak_se
##   <fct>            <fct>                        <dbl>                     <dbl>
## 1 SD               negeri                    3896266.                   157176.
## 2 SD               swasta                   11486422.                   938072.
## 3 SMA              negeri                    7593171.                   627438.
## 4 SMA              swasta                    9646244.                  1197816.
## 5 SMP              negeri                    4681132.                   299653.
## 6 SMP              swasta                    9855713.                  1205958.
## # ℹ 2 more variables: min_biayasekolah_anak <dbl>, max_biayasekolah_anak <dbl>

============================================================================

4. MODELING DENGAN SURVEY WEIGHTS ~ REGRESI LOGISTIK

============================================================================

# LOGISTIC REGRESSION dengan survey weights
model_svy <- svyglm(
  school_anak ~ sex_krt + married_krt + age_krt + size_krt + schol_level_krt + 
    urban_krt + kelas_ekonomi_krt+ biayasekolah_krt + jmlh_sekolah_krt + jarak_sekolah_krt,
  design = susenas_design,
  family = quasibinomial()  # gunakan quasi untuk complex survey
)

summary(model_svy)
## 
## Call:
## svyglm(formula = school_anak ~ sex_krt + married_krt + age_krt + 
##     size_krt + schol_level_krt + urban_krt + kelas_ekonomi_krt + 
##     biayasekolah_krt + jmlh_sekolah_krt + jarak_sekolah_krt, 
##     design = susenas_design, family = quasibinomial())
## 
## Survey design:
## svydesign(ids = ~PSU, strata = ~STRATA, weights = ~FWT, data = filter_pair, 
##     nest = TRUE)
## 
## Coefficients:
##                                          Estimate Std. Error t value Pr(>|t|)
## (Intercept)                            -6.835e-01  5.774e-01  -1.184  0.23670
## sex_krtperempuan                       -8.661e-02  3.325e-01  -0.260  0.79453
## married_krtsingle                       2.483e-02  3.442e-01   0.072  0.94251
## age_krt20-24                           -5.139e-01  1.371e+00  -0.375  0.70792
## age_krt25-29                           -1.893e+00  8.536e-01  -2.218  0.02672
## age_krt30-34                            2.845e-01  3.506e-01   0.811  0.41725
## age_krt35-39                            3.516e-01  3.210e-01   1.095  0.27357
## age_krt40-44                            9.909e-02  3.061e-01   0.324  0.74618
## age_krt45-49                            8.873e-02  3.002e-01   0.296  0.76758
## age_krt50-54                            3.116e-02  3.270e-01   0.095  0.92409
## age_krt55-60                            4.037e-01  3.652e-01   1.106  0.26907
## size_krt>=8                            -8.686e-01  6.004e-01  -1.447  0.14818
## size_krt3                              -2.873e-01  4.178e-01  -0.688  0.49183
## size_krt4                              -2.638e-01  4.156e-01  -0.635  0.52559
## size_krt5                              -6.040e-01  4.238e-01  -1.425  0.15432
## size_krt6                              -1.189e-01  4.498e-01  -0.264  0.79162
## size_krt7                              -5.376e-01  4.989e-01  -1.078  0.28140
## schol_level_krtSD                      -7.385e-02  1.779e-01  -0.415  0.67808
## schol_level_krtSMA                     -3.788e-02  1.607e-01  -0.236  0.81371
## schol_level_krtSMP                      5.414e-02  1.780e-01   0.304  0.76105
## schol_level_krtTidak pernah sekolah     1.687e+00  6.318e-01   2.671  0.00765
## urban_krtkota                           1.986e-01  1.196e-01   1.661  0.09699
## kelas_ekonomi_krtRentan miskin         -3.046e-02  2.253e-01  -0.135  0.89247
## kelas_ekonomi_krtMenuju kelas menengah -1.843e-01  2.291e-01  -0.804  0.42125
## kelas_ekonomi_krtKelas menengah        -2.661e-01  2.632e-01  -1.011  0.31204
## kelas_ekonomi_krtKelas atas            -6.842e-01  4.926e-01  -1.389  0.16502
## biayasekolah_krt                        6.380e-08  9.660e-09   6.604 5.46e-11
## jmlh_sekolah_krt                       -1.179e-02  1.721e-02  -0.685  0.49352
## jarak_sekolah_krt                      -4.469e-01  3.163e-01  -1.413  0.15791
##                                           
## (Intercept)                               
## sex_krtperempuan                          
## married_krtsingle                         
## age_krt20-24                              
## age_krt25-29                           *  
## age_krt30-34                              
## age_krt35-39                              
## age_krt40-44                              
## age_krt45-49                              
## age_krt50-54                              
## age_krt55-60                              
## size_krt>=8                               
## size_krt3                                 
## size_krt4                                 
## size_krt5                                 
## size_krt6                                 
## size_krt7                                 
## schol_level_krtSD                         
## schol_level_krtSMA                        
## schol_level_krtSMP                        
## schol_level_krtTidak pernah sekolah    ** 
## urban_krtkota                          .  
## kelas_ekonomi_krtRentan miskin            
## kelas_ekonomi_krtMenuju kelas menengah    
## kelas_ekonomi_krtKelas menengah           
## kelas_ekonomi_krtKelas atas               
## biayasekolah_krt                       ***
## jmlh_sekolah_krt                          
## jarak_sekolah_krt                         
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for quasibinomial family taken to be 1.413235)
## 
## Number of Fisher Scoring iterations: 5
# Model evaluation
# Calculate odds ratios
odds_ratios <- exp(coef(model_svy))
conf_intervals <- exp(confint(model_svy))
print("Odds Ratios with 95% Confidence Intervals:")
## [1] "Odds Ratios with 95% Confidence Intervals:"
cbind(OddsRatio = odds_ratios, conf_intervals)
##                                        OddsRatio      2.5 %     97.5 %
## (Intercept)                            0.5048343 0.16264988  1.5669097
## sex_krtperempuan                       0.9170349 0.47767712  1.7605049
## married_krtsingle                      1.0251387 0.52184664  2.0138279
## age_krt20-24                           0.5981881 0.04061398  8.8104868
## age_krt25-29                           0.1506411 0.02823821  0.8036181
## age_krt30-34                           1.3291233 0.66814096  2.6440060
## age_krt35-39                           1.4213450 0.75723762  2.6678831
## age_krt40-44                           1.1041616 0.60577121  2.0125962
## age_krt45-49                           1.0927820 0.60650697  1.9689345
## age_krt50-54                           1.0316540 0.54322326  1.9592495
## age_krt55-60                           1.4974285 0.73157111  3.0650365
## size_krt>=8                            0.4195192 0.12920132  1.3621871
## size_krt3                              0.7503065 0.33061387  1.7027714
## size_krt4                              0.7680989 0.33995582  1.7354488
## size_krt5                              0.5466363 0.23805396  1.2552249
## size_krt6                              0.8879403 0.36749454  2.1454411
## size_krt7                              0.5841406 0.21953845  1.5542617
## schol_level_krtSD                      0.9288075 0.65522429  1.3166230
## schol_level_krtSMA                     0.9628265 0.70247301  1.3196733
## schol_level_krtSMP                     1.0556340 0.74451979  1.4967541
## schol_level_krtTidak pernah sekolah    5.4046567 1.56532156 18.6609033
## urban_krtkota                          1.2196452 0.96466785  1.5420172
## kelas_ekonomi_krtRentan miskin         0.9699949 0.62347689  1.5091017
## kelas_ekonomi_krtMenuju kelas menengah 0.8317058 0.53068772  1.3034682
## kelas_ekonomi_krtKelas menengah        0.7663410 0.45734606  1.2841009
## kelas_ekonomi_krtKelas atas            0.5044809 0.19196711  1.3257528
## biayasekolah_krt                       1.0000001 1.00000004  1.0000001
## jmlh_sekolah_krt                       0.9882841 0.95548444  1.0222098
## jarak_sekolah_krt                      0.6396343 0.34394874  1.1895147