KNCAST3.R

# ============================================================
# 변수 처리 및 회귀식 정리
# ============================================================

# ============================================================
# [ 12개월 / 24개월 ]
# ============================================================
#
# 데이터 처리:
#   NCATS, HOME                              → mean()        (쌍둥이 평균)
#   AGE_CARE, EDU_GRUP_Bin, SEINCOME_5CAT,
#   EMP_CARG, IETESTBHX, BBDEVWK            → first()       (시점 내 첫번째)
#   BBSEX                                   → mean()→round()→factor()
#                                              (쌍둥이 성별 다를 수 있음)
#   쌍둥이                                  → 가족당 1행 평균 (독립성 확보)
#   양육자 변경                             → 포함
#
# 회귀식:
#   [BBSEX 제외 예측변수] 쌍둥이 포함
#     Y_i = β0 + β1(RAN_i) + β2(predictor_i) + ε_i
#
#   [BBSEX] 쌍둥이 제외 (singleton only)
#     Y_i = β0 + β1(RAN_i) + β2(BBSEX_i) + ε_i
#
# ============================================================
# [ Pooled ]
# ============================================================
#
# 데이터 처리:
#   NCATS, HOME                              → mean()        (쌍둥이 평균)
#   AGE_CARE, EDU_GRUP_Bin,
#   IETESTBHX, BBDEVWK       [Level-2]      → first()       (가족수준, 불변)
#   RAN                      [Level-2]      → first()       (무작위배정, 불변)
#   SEINCOME_5CAT, EMP_CARG  [Level-1]      → 시점별 first() (시점간 변동)
#   TIME                     [Level-1]      → 0=12개월, 1=24개월
#   BBSEX                    [Level-2]      → mean()→round()→factor()
#                                              (쌍둥이 성별 다를 수 있음)
#   쌍둥이                                  → 시점별 가족당 1행 평균
#   양육자 변경 (QSNCCARE)                  → 제외 (동일 양육자 보장)
#
# 회귀식:
#   [BBSEX 제외 예측변수] 쌍둥이 포함
#     Level-1: Y_ti = β0i + β1(TIME_ti) + β2(predictor_ti) + ε_ti
#     Level-2: β0i  = γ00 + γ01(RAN_i)  + u0i
#     통합:
#     Y_ti = γ00 + γ01(RAN_i) + β1(TIME_ti) + β2(predictor_ti) + u0i + ε_ti
#
#   [BBSEX] 쌍둥이 제외 (singleton only)
#     Y_ti = γ00 + γ01(RAN_i) + β1(TIME_ti) + β2(BBSEX_i) + u0i + ε_ti
#
# ============================================================
# [ 세 분석 비교 ]
# ============================================================
#
#   12m:  Y_i  = β0 + β1·RAN + β2·X                    + ε
#   24m:  Y_i  = β0 + β1·RAN + β2·X                    + ε
#   Pool: Y_ti = γ00 + γ01·RAN + β1·TIME + β2·X + u0i  + ε_ti
#                                    ↑               ↑
#                              반복측정 통제     엄마 클러스터링
#
#         분석    모델   랜덤구조      TIME    RAN    양육자변경
#         12m     lm     없음          X      통제    포함
#         24m     lm     없음          X      통제    포함
#         Pooled  lmer   (1|SUBJNO)    O      통제    제외
# ============================================================


# ============================================================
# 패키지
# ============================================================
library(haven)
library(dplyr)

## 
## Attaching package: 'dplyr'

## The following objects are masked from 'package:stats':
## 
##     filter, lag

## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

library(purrr)
library(tidyr)
library(lme4)

## Loading required package: Matrix

## 
## Attaching package: 'Matrix'

## The following objects are masked from 'package:tidyr':
## 
##     expand, pack, unpack

library(lmerTest)

## 
## Attaching package: 'lmerTest'

## The following object is masked from 'package:lme4':
## 
##     lmer

## The following object is masked from 'package:stats':
## 
##     step

library(broom.mixed)

# ============================================================
# 변수 목록
# ============================================================
ncats_vars <- c("QSNCIA", "QSNCIIA", "QSNCIIIA", "QSNCIVA", "QSNCVA",
                "QSNCVIA", "QSNCMTB", "QSNCBTB", "QSNCMTA",
                "QSNCBTA", "QSNCTTA")

home_vars <- c("QSHOMETOTAL", "QSHOMEREP", "QSHOMEACP", "QSHOMEORG",
               "QSHOMELMT",   "QSHOMEIVM", "QSHOMESML")

outcomes        <- ncats_vars
predictors_main <- c("AGE_CARE", "EDU_GRUP_Bin", "SEINCOME_5CAT",
                     "EMP_CARG", "IETESTBHX", "BBDEVWK")
# BBSEX: 쌍둥이 제외, 별도 처리

base_path <- "E:/Dropbox/STATISTICS/R DATA/"

# ============================================================
# Step 1 — 원시 데이터 로드 + 기본 전처리
# ============================================================
data_all <- read_sav(paste0(base_path, "NCAST_HOME_short.sav"))

data_all_pp <- data_all %>%
  mutate(
    AGE_CARE      = as.numeric(AGE_CARE),
    SEINCOME_5CAT = as.numeric(SEINCOME_5CAT),
    BBDEVWK       = as.numeric(BBDEVWK),
    BBSEX         = as.numeric(BBSEX),
    EDU_GRUP_Bin  = as.numeric(EDU_GRUP_Bin),
    EMP_CARG      = as.numeric(EMP_CARG),
    IETESTBHX     = as.numeric(IETESTBHX),
    TWIN          = as.numeric(TWIN),
    RAN           = as.numeric(RAN),
    TIME          = as.numeric(TIME),
    QSNCCARE      = as.numeric(QSNCCARE)
  )

cat("QSNCCARE 분포:\n")

## QSNCCARE 분포:

print(table(data_all_pp$QSNCCARE, useNA = "always"))

## 
##    1    2    3 <NA> 
## 1503   39    4    0

# ============================================================
# Step 2 — 쌍둥이 평균 함수 (12m / 24m / Pooled 공통)
# 모든 예측변수: first()
# BBSEX만: mean() → round() → factor()
# ============================================================
make_twin_mean_full <- function(data, time_val) {
  data %>%
    filter(TIME == time_val) %>%
    group_by(SUBJNO) %>%
    summarise(
      across(all_of(c(ncats_vars, home_vars)),
             ~ mean(.x, na.rm = TRUE)),
      AGE_CARE      = first(AGE_CARE),
      EDU_GRUP_Bin  = first(EDU_GRUP_Bin),
      SEINCOME_5CAT = first(SEINCOME_5CAT),
      EMP_CARG      = first(EMP_CARG),
      IETESTBHX     = first(IETESTBHX),
      BBDEVWK       = first(BBDEVWK),
      BBSEX         = mean(BBSEX, na.rm = TRUE),
      is_twin       = any(TWIN == 1),
      RAN           = first(RAN),
      TIME          = time_val,
      .groups       = "drop"
    ) %>%
    mutate(
      EDU_GRUP_Bin = factor(EDU_GRUP_Bin),
      EMP_CARG     = factor(EMP_CARG),
      IETESTBHX    = factor(IETESTBHX),
      BBSEX        = factor(round(BBSEX)),
      RAN          = factor(RAN)
    )
}

# ============================================================
# Step 3 — 12m / 24m 데이터
# 양육자 변경 포함, 쌍둥이 평균
# ============================================================
data_12_full <- make_twin_mean_full(data_all_pp, time_val = 0)
data_24_full <- make_twin_mean_full(data_all_pp, time_val = 1)

# ============================================================
# Step 4 — Pooled 데이터
# 양육자 변경(QSNCCARE) 제외 + 쌍둥이 평균 + 1시점 참여자 포함
# ============================================================
carg_pattern <- data_all_pp %>%
  group_by(SUBJNO) %>%
  summarise(
    n_carg      = n_distinct(QSNCCARE, na.rm = TRUE),
    carg_values = paste(sort(unique(QSNCCARE)), collapse = "->"),
    .groups     = "drop"
  )

cat("\n양육자 변경 패턴:\n")

## 
## 양육자 변경 패턴:

carg_pattern %>% filter(n_carg > 1) %>% count(carg_values) %>% print()

## # A tibble: 2 × 2
##   carg_values     n
##   <chr>       <int>
## 1 1->2           27
## 2 1->3            4

carg_change <- carg_pattern %>% filter(n_carg > 1) %>% pull(SUBJNO)
cat("양육자 변경 제외 가족:", length(carg_change), "개\n")

## 양육자 변경 제외 가족: 31 개

data_all_pool <- data_all_pp %>% filter(!SUBJNO %in% carg_change)

data_12_pool <- make_twin_mean_full(data_all_pool, time_val = 0)
data_24_pool <- make_twin_mean_full(data_all_pool, time_val = 1)

data_pool_long <- bind_rows(data_12_pool, data_24_pool) %>%
  mutate(TIME = factor(TIME))  # TIME: Level-1 고정 공변량 (반복측정)

# ============================================================
# Step 5 — 샘플 크기 확인
# ============================================================
pool_info <- data_pool_long %>%
  group_by(SUBJNO) %>%
  summarise(
    n_time  = n_distinct(TIME),
    is_twin = first(is_twin),
    .groups = "drop"
  )

cat("\n============================================================\n")

## 
## ============================================================

cat("샘플 구성 확인\n")

## 샘플 구성 확인

cat("============================================================\n")

## ============================================================

cat("\n[ 12개월 ]\n")

## 
## [ 12개월 ]

cat("전체 가족    :", nrow(data_12_full), "\n")

## 전체 가족    : 748

cat("Singleton    :", sum(!data_12_full$is_twin), "\n")

## Singleton    : 707

cat("Twin         :", sum( data_12_full$is_twin), "\n")

## Twin         : 41

cat("BBSEX 분석 n :", sum(!data_12_full$is_twin), "(singleton only)\n")

## BBSEX 분석 n : 707 (singleton only)

cat("\n[ 24개월 ]\n")

## 
## [ 24개월 ]

cat("전체 가족    :", nrow(data_24_full), "\n")

## 전체 가족    : 719

cat("Singleton    :", sum(!data_24_full$is_twin), "\n")

## Singleton    : 681

cat("Twin         :", sum( data_24_full$is_twin), "\n")

## Twin         : 38

cat("BBSEX 분석 n :", sum(!data_24_full$is_twin), "(singleton only)\n")

## BBSEX 분석 n : 681 (singleton only)

cat("\n[ Pooled ]\n")

## 
## [ Pooled ]

cat("총 관측치    :", nrow(data_pool_long), "\n")

## 총 관측치    : 1405

cat("총 가족      :", nrow(pool_info), "\n")

## 총 가족      : 732

cat("2시점 참여   :", sum(pool_info$n_time == 2), "\n")

## 2시점 참여   : 673

cat("1시점 참여   :", sum(pool_info$n_time == 1), "\n")

## 1시점 참여   : 59

cat("Singleton    :", sum(!pool_info$is_twin), "\n")

## Singleton    : 694

cat("Twin         :", sum( pool_info$is_twin), "\n")

## Twin         : 38

cat("BBSEX 분석 n :", sum(!data_pool_long$is_twin), "(singleton 관측치)\n")

## BBSEX 분석 n : 1332 (singleton 관측치)

cat("\n[ Pooled 시점 × 쌍둥이 교차표 ]\n")

## 
## [ Pooled 시점 × 쌍둥이 교차표 ]

cross <- pool_info %>%
  mutate(
    time_label = ifelse(n_time == 2, "2 timepoints", "1 timepoint"),
    twin_label = ifelse(is_twin, "Twin", "Singleton")
  ) %>%
  count(time_label, twin_label) %>%
  pivot_wider(names_from  = twin_label,
              values_from = n,
              values_fill = 0) %>%
  mutate(Subtotal = Singleton + Twin) %>%
  arrange(time_label)

print(as.data.frame(cross))

##     time_label Singleton Twin Subtotal
## 1  1 timepoint        56    3       59
## 2 2 timepoints       638   35      673

cat("\n소계\n")

## 
## 소계

cat("1시점 - Singleton:", cross$Singleton[cross$time_label == "1 timepoint"],
    "| Twin:", cross$Twin[cross$time_label == "1 timepoint"], "\n")

## 1시점 - Singleton: 56 | Twin: 3

cat("2시점 - Singleton:", cross$Singleton[cross$time_label == "2 timepoints"],
    "| Twin:", cross$Twin[cross$time_label == "2 timepoints"], "\n")

## 2시점 - Singleton: 638 | Twin: 35

# ============================================================
# Step 6 — 분석 함수: 12m / 24m
# lm(outcome ~ RAN + predictor)
# 표준화계수: Beta = B × (SD_x / SD_y)
# BBSEX: 쌍둥이 제외
# ============================================================
run_lm_cross <- function(df, wave_label) {
  
  df_all       <- df
  df_singleton <- filter(df, !is_twin)
  
  map_dfr(outcomes, function(outcome) {
    
    # ── 주요 예측변수: 쌍둥이 포함 ─────────────────────
    res_main <- map_dfr(predictors_main, function(predictor) {
      
      fit <- tryCatch(
        lm(as.formula(paste0(outcome, " ~ RAN + ", predictor)),
           data = df_all),
        error = function(e) NULL
      )
      if (is.null(fit)) return(NULL)
      
      ci   <- confint(fit)
      cf   <- summary(fit)$coefficients
      rows <- rownames(cf)[startsWith(rownames(cf), predictor)]
      if (length(rows) == 0) return(NULL)
      
      sd_y <- sd(df_all[[outcome]],                  na.rm = TRUE)
      sd_x <- sd(as.numeric(df_all[[predictor]]),    na.rm = TRUE)
      
      map_dfr(rows, function(r) {
        b_val <- cf[r, "Estimate"]
        data.frame(
          wave      = wave_label,
          outcome   = outcome,
          predictor = predictor,
          term      = r,
          n         = nrow(df_all),
          twin_excl = FALSE,
          B         = round(b_val, 3),
          Beta      = round(b_val * (sd_x / sd_y), 3),
          SE        = round(cf[r, "Std. Error"], 3),
          CI        = paste0("[", round(ci[r, 1], 3), ", ",
                             round(ci[r, 2], 3), "]"),
          p         = ifelse(cf[r, "Pr(>|t|)"] < .001, "< .001",
                             as.character(round(cf[r, "Pr(>|t|)"], 3)))
        )
      })
    })
    
    # ── BBSEX: 쌍둥이 제외 ─────────────────────────────
    fit_sex <- tryCatch(
      lm(as.formula(paste0(outcome, " ~ RAN + BBSEX")),
         data = df_singleton),
      error = function(e) NULL
    )
    
    res_sex <- NULL
    if (!is.null(fit_sex)) {
      ci    <- confint(fit_sex)
      cf    <- summary(fit_sex)$coefficients
      r     <- "BBSEX1"
      if (r %in% rownames(cf)) {
        sd_y  <- sd(df_singleton[[outcome]],              na.rm = TRUE)
        sd_x  <- sd(as.numeric(df_singleton[["BBSEX"]]),  na.rm = TRUE)
        b_val <- cf[r, "Estimate"]
        res_sex <- data.frame(
          wave      = wave_label,
          outcome   = outcome,
          predictor = "BBSEX",
          term      = r,
          n         = nrow(df_singleton),
          twin_excl = TRUE,
          B         = round(b_val, 3),
          Beta      = round(b_val * (sd_x / sd_y), 3),
          SE        = round(cf[r, "Std. Error"], 3),
          CI        = paste0("[", round(ci[r, 1], 3), ", ",
                             round(ci[r, 2], 3), "]"),
          p         = ifelse(cf[r, "Pr(>|t|)"] < .001, "< .001",
                             as.character(round(cf[r, "Pr(>|t|)"], 3)))
        )
      }
    }
    
    bind_rows(res_main, res_sex)
  })
}

# ============================================================
# Step 7 — 분석 함수: Pooled
# lmer(outcome ~ RAN + TIME + predictor + (1|SUBJNO))
# TIME: Level-1 고정 공변량 (반복측정 통제)
# RAN:  Level-2 고정 공변량
# 표준화계수: Beta = B × (SD_x / SD_y)
# BBSEX: 쌍둥이 제외
# ============================================================
run_lmer_pooled <- function(df_long, wave_label = "Pooled") {
  
  df_all       <- df_long
  df_singleton <- filter(df_long, !is_twin)
  
  map_dfr(outcomes, function(outcome) {
    
    # ── 주요 예측변수: 쌍둥이 포함 ─────────────────────
    res_main <- map_dfr(predictors_main, function(predictor) {
      
      # TIME: Level-1 (반복측정), RAN: Level-2 — 모두 고정 통제
      fit <- tryCatch(
        suppressWarnings(
          lmer(as.formula(paste0(
            outcome, " ~ RAN + TIME + ", predictor, " + (1 | SUBJNO)")),
            data    = df_all,
            REML    = FALSE,
            control = lmerControl(optimizer = "bobyqa"))
        ),
        error = function(e) NULL
      )
      if (is.null(fit)) return(NULL)
      
      sd_y <- sd(df_all[[outcome]],                na.rm = TRUE)
      sd_x <- sd(as.numeric(df_all[[predictor]]),  na.rm = TRUE)
      
      tidy(fit, effects = "fixed", conf.int = TRUE) %>%
        filter(startsWith(term, predictor)) %>%
        mutate(
          wave      = wave_label,
          outcome   = outcome,
          predictor = predictor,
          n         = nrow(df_all),
          twin_excl = FALSE,
          B         = round(estimate,  3),
          Beta      = round(estimate * (sd_x / sd_y), 3),
          SE        = round(std.error, 3),
          CI        = paste0("[", round(conf.low,  3), ", ",
                             round(conf.high, 3), "]"),
          p         = ifelse(p.value < .001, "< .001",
                             as.character(round(p.value, 3)))
        ) %>%
        dplyr::select(wave, outcome, predictor, term,
                      n, twin_excl, B, Beta, SE, CI, p)
    })
    
    # ── BBSEX: 쌍둥이 제외 ─────────────────────────────
    fit_sex <- tryCatch(
      suppressWarnings(
        lmer(as.formula(paste0(
          outcome, " ~ RAN + TIME + BBSEX + (1 | SUBJNO)")),
          data    = df_singleton,
          REML    = FALSE,
          control = lmerControl(optimizer = "bobyqa"))
      ),
      error = function(e) NULL
    )
    
    res_sex <- NULL
    if (!is.null(fit_sex)) {
      sd_y <- sd(df_singleton[[outcome]],              na.rm = TRUE)
      sd_x <- sd(as.numeric(df_singleton[["BBSEX"]]),  na.rm = TRUE)
      res_sex <- tidy(fit_sex, effects = "fixed", conf.int = TRUE) %>%
        filter(startsWith(term, "BBSEX")) %>%
        mutate(
          wave      = wave_label,
          outcome   = outcome,
          predictor = "BBSEX",
          n         = nrow(df_singleton),
          twin_excl = TRUE,
          B         = round(estimate,  3),
          Beta      = round(estimate * (sd_x / sd_y), 3),
          SE        = round(std.error, 3),
          CI        = paste0("[", round(conf.low,  3), ", ",
                             round(conf.high, 3), "]"),
          p         = ifelse(p.value < .001, "< .001",
                             as.character(round(p.value, 3)))
        ) %>%
        dplyr::select(wave, outcome, predictor, term,
                      n, twin_excl, B, Beta, SE, CI, p)
    }
    
    bind_rows(res_main, res_sex)
  })
}

# ============================================================
# Step 8 — 실행
# ============================================================
cat("\n12개월 분석 중...\n")

## 
## 12개월 분석 중...

results_12m <- run_lm_cross(data_12_full, "12M")

cat("24개월 분석 중...\n")

## 24개월 분석 중...

results_24m <- run_lm_cross(data_24_full, "24M")

cat("Pooled 분석 중...\n")

## Pooled 분석 중...

results_pooled <- run_lmer_pooled(data_pool_long, "Pooled")

# ============================================================
# Step 9 — 결과 확인
# ============================================================
cat("\n=== 12개월 결과 (앞 10행) ===\n")

## 
## === 12개월 결과 (앞 10행) ===

print(as.data.frame(head(results_12m, 10)))

##    wave outcome     predictor          term   n twin_excl      B   Beta    SE
## 1   12M  QSNCIA      AGE_CARE      AGE_CARE 748     FALSE  0.004  0.015 0.010
## 2   12M  QSNCIA  EDU_GRUP_Bin EDU_GRUP_Bin1 748     FALSE  0.174  0.044 0.144
## 3   12M  QSNCIA SEINCOME_5CAT SEINCOME_5CAT 748     FALSE  0.019  0.012 0.058
## 4   12M  QSNCIA      EMP_CARG     EMP_CARG1 748     FALSE  0.020  0.009 0.084
## 5   12M  QSNCIA     IETESTBHX    IETESTBHX1 748     FALSE -0.146 -0.058 0.093
## 6   12M  QSNCIA       BBDEVWK       BBDEVWK 748     FALSE  0.060  0.083 0.026
## 7   12M  QSNCIA         BBSEX        BBSEX1 707      TRUE -0.188 -0.082 0.086
## 8   12M QSNCIIA      AGE_CARE      AGE_CARE 748     FALSE -0.004 -0.019 0.007
## 9   12M QSNCIIA  EDU_GRUP_Bin EDU_GRUP_Bin1 748     FALSE  0.049  0.016 0.114
## 10  12M QSNCIIA SEINCOME_5CAT SEINCOME_5CAT 748     FALSE  0.058  0.046 0.046
##                 CI     p
## 1  [-0.015, 0.023] 0.678
## 2  [-0.109, 0.456] 0.228
## 3  [-0.096, 0.133]  0.75
## 4  [-0.145, 0.184] 0.815
## 5  [-0.328, 0.036] 0.115
## 6   [0.008, 0.111] 0.023
## 7  [-0.356, -0.02] 0.029
## 8  [-0.019, 0.011] 0.605
## 9  [-0.174, 0.272] 0.667
## 10 [-0.032, 0.148] 0.208

cat("\n=== 24개월 결과 (앞 10행) ===\n")

## 
## === 24개월 결과 (앞 10행) ===

print(as.data.frame(head(results_24m, 10)))

##    wave outcome     predictor          term   n twin_excl      B   Beta    SE
## 1   24M  QSNCIA      AGE_CARE      AGE_CARE 719     FALSE  0.013  0.053 0.009
## 2   24M  QSNCIA  EDU_GRUP_Bin EDU_GRUP_Bin1 719     FALSE  0.153  0.038 0.148
## 3   24M  QSNCIA SEINCOME_5CAT SEINCOME_5CAT 719     FALSE  0.015  0.011 0.051
## 4   24M  QSNCIA      EMP_CARG     EMP_CARG1 719     FALSE  0.149  0.070 0.080
## 5   24M  QSNCIA     IETESTBHX    IETESTBHX1 719     FALSE  0.129  0.054 0.089
## 6   24M  QSNCIA       BBDEVWK       BBDEVWK 719     FALSE  0.032  0.045 0.026
## 7   24M  QSNCIA         BBSEX        BBSEX1 681      TRUE -0.164 -0.076 0.082
## 8   24M QSNCIIA      AGE_CARE      AGE_CARE 719     FALSE -0.006 -0.023 0.010
## 9   24M QSNCIIA  EDU_GRUP_Bin EDU_GRUP_Bin1 719     FALSE  0.113  0.027 0.157
## 10  24M QSNCIIA SEINCOME_5CAT SEINCOME_5CAT 719     FALSE  0.075  0.052 0.054
##                  CI     p
## 1   [-0.005, 0.031] 0.156
## 2   [-0.139, 0.444] 0.304
## 3   [-0.086, 0.116] 0.766
## 4   [-0.007, 0.306] 0.062
## 5   [-0.046, 0.303] 0.148
## 6    [-0.02, 0.084] 0.234
## 7  [-0.326, -0.002] 0.047
## 8   [-0.025, 0.013] 0.547
## 9   [-0.195, 0.421] 0.471
## 10  [-0.031, 0.181] 0.167

cat("\n=== Pooled 결과 (앞 10행) ===\n")

## 
## === Pooled 결과 (앞 10행) ===

print(as.data.frame(head(results_pooled, 10)))

##      wave outcome     predictor          term    n twin_excl      B   Beta
## 1  Pooled  QSNCIA      AGE_CARE      AGE_CARE 1405     FALSE  0.009  0.031
## 2  Pooled  QSNCIA  EDU_GRUP_Bin EDU_GRUP_Bin1 1405     FALSE  0.130  0.032
## 3  Pooled  QSNCIA SEINCOME_5CAT SEINCOME_5CAT 1405     FALSE  0.005  0.003
## 4  Pooled  QSNCIA      EMP_CARG     EMP_CARG1 1405     FALSE  0.081  0.036
## 5  Pooled  QSNCIA     IETESTBHX    IETESTBHX1 1405     FALSE -0.010 -0.004
## 6  Pooled  QSNCIA       BBDEVWK       BBDEVWK 1405     FALSE  0.050  0.068
## 7  Pooled  QSNCIA         BBSEX        BBSEX1 1332      TRUE -0.154 -0.067
## 8  Pooled QSNCIIA      AGE_CARE      AGE_CARE 1405     FALSE -0.007 -0.028
## 9  Pooled QSNCIIA  EDU_GRUP_Bin EDU_GRUP_Bin1 1405     FALSE  0.108  0.030
## 10 Pooled QSNCIIA SEINCOME_5CAT SEINCOME_5CAT 1405     FALSE  0.079  0.058
##       SE               CI     p
## 1  0.008  [-0.007, 0.024] 0.263
## 2  0.109  [-0.083, 0.344]  0.23
## 3  0.041  [-0.076, 0.085] 0.912
## 4  0.062   [-0.04, 0.202]  0.19
## 5  0.068  [-0.145, 0.124] 0.878
## 6  0.020   [0.011, 0.089] 0.012
## 7  0.063 [-0.278, -0.029] 0.016
## 8  0.007  [-0.021, 0.006] 0.299
## 9  0.098    [-0.083, 0.3] 0.268
## 10 0.037   [0.006, 0.151] 0.033

# ============================================================
# Step 10 — 저장
# ============================================================
write.csv(results_12m,
          paste0(base_path, "Discrim_12M.csv"),    row.names = FALSE)
write.csv(results_24m,
          paste0(base_path, "Discrim_24M.csv"),    row.names = FALSE)
write.csv(results_pooled,
          paste0(base_path, "Discrim_Pooled.csv"), row.names = FALSE)

results_all <- bind_rows(results_12m, results_24m, results_pooled)
write.csv(results_all,
          paste0(base_path, "Discrim_ALL.csv"),    row.names = FALSE)

cat("\n============================================================\n")

## 
## ============================================================

cat("저장 완료\n")

## 저장 완료

cat("12M  모델 수 :", nrow(results_12m), "\n")

## 12M  모델 수 : 77

cat("24M  모델 수 :", nrow(results_24m), "\n")

## 24M  모델 수 : 77

cat("Pool 모델 수 :", nrow(results_pooled), "\n")

## Pool 모델 수 : 77

cat("BBSEX 쌍둥이 제외 행:",
    sum(results_all$twin_excl, na.rm = TRUE), "\n")

## BBSEX 쌍둥이 제외 행: 33

# ============================================================
# Step 7b — TIME 효과 함수: Pooled
# lmer(outcome ~ RAN + TIME + (1|SUBJNO))
# RAN 고정 통제, TIME 계수만 추출
# ============================================================
run_lmer_time <- function(df_long, wave_label = "Pooled") {
  
  df_all <- df_long
  
  map_dfr(outcomes, function(outcome) {
    
    fit <- tryCatch(
      suppressWarnings(
        lmer(as.formula(paste0(
          outcome, " ~ RAN + TIME + (1 | SUBJNO)")),
          data    = df_all,
          REML    = FALSE,
          control = lmerControl(optimizer = "bobyqa"))
      ),
      error = function(e) NULL
    )
    if (is.null(fit)) return(NULL)
    
    sd_y <- sd(df_all[[outcome]], na.rm = TRUE)
    sd_x <- sd(as.numeric(df_all$TIME), na.rm = TRUE)
    
    tidy(fit, effects = "fixed", conf.int = TRUE) %>%
      filter(startsWith(term, "TIME")) %>%
      mutate(
        wave      = wave_label,
        outcome   = outcome,
        predictor = "TIME",
        n         = nrow(df_all),
        B         = round(estimate,  3),
        Beta      = round(estimate * (sd_x / sd_y), 3),
        SE        = round(std.error, 3),
        CI        = paste0("[", round(conf.low,  3), ", ",
                           round(conf.high, 3), "]"),
        p         = ifelse(p.value < .001, "< .001",
                           as.character(round(p.value, 3)))
      ) %>%
      dplyr::select(wave, outcome, predictor, term,
                    n, B, Beta, SE, CI, p)
  })
}

# ============================================================
# Step 8 — 실행 (기존 + TIME 효과 추가)
# ============================================================
cat("\n12개월 분석 중...\n")

## 
## 12개월 분석 중...

results_12m <- run_lm_cross(data_12_full, "12M")

cat("24개월 분석 중...\n")

## 24개월 분석 중...

results_24m <- run_lm_cross(data_24_full, "24M")

cat("Pooled 분석 중...\n")

## Pooled 분석 중...

results_pooled <- run_lmer_pooled(data_pool_long, "Pooled")

cat("Pooled TIME 효과 분석 중...\n")

## Pooled TIME 효과 분석 중...

results_time <- run_lmer_time(data_pool_long, "Pooled")

# ============================================================
# Step 9 — 결과 확인
# ============================================================
cat("\n=== TIME 효과 결과 ===\n")

## 
## === TIME 효과 결과 ===

print(as.data.frame(results_time))

##      wave  outcome predictor  term    n      B   Beta    SE               CI
## 1  Pooled   QSNCIA      TIME TIME1 1405  0.474  0.209 0.057   [0.363, 0.586]
## 2  Pooled  QSNCIIA      TIME TIME1 1405 -0.045 -0.022 0.053  [-0.149, 0.059]
## 3  Pooled QSNCIIIA      TIME TIME1 1405  0.320  0.135 0.058   [0.206, 0.433]
## 4  Pooled  QSNCIVA      TIME TIME1 1405  1.165  0.280 0.094    [0.979, 1.35]
## 5  Pooled   QSNCVA      TIME TIME1 1405 -0.213 -0.112 0.049  [-0.31, -0.116]
## 6  Pooled  QSNCVIA      TIME TIME1 1405 -0.701 -0.179 0.099 [-0.896, -0.506]
## 7  Pooled  QSNCMTB      TIME TIME1 1405  1.289  0.255 0.123    [1.047, 1.53]
## 8  Pooled  QSNCBTB      TIME TIME1 1405 -0.621 -0.180 0.088 [-0.793, -0.449]
## 9  Pooled  QSNCMTA      TIME TIME1 1405  1.915  0.266 0.164   [1.593, 2.238]
## 10 Pooled  QSNCBTA      TIME TIME1 1405 -0.914 -0.176 0.132 [-1.173, -0.655]
## 11 Pooled  QSNCTTA      TIME TIME1 1405  1.002  0.109 0.213    [0.583, 1.42]
##         p
## 1  < .001
## 2   0.399
## 3  < .001
## 4  < .001
## 5  < .001
## 6  < .001
## 7  < .001
## 8  < .001
## 9  < .001
## 10 < .001
## 11 < .001

# ============================================================
# Step 10 — 저장
# ============================================================
write.csv(results_12m,
          paste0(base_path, "Discrim_12M.csv"),    row.names = FALSE)
write.csv(results_24m,
          paste0(base_path, "Discrim_24M.csv"),    row.names = FALSE)
write.csv(results_pooled,
          paste0(base_path, "Discrim_Pooled.csv"), row.names = FALSE)
write.csv(results_time,
          paste0(base_path, "Discrim_TIME.csv"),   row.names = FALSE)

results_all <- bind_rows(results_12m, results_24m, results_pooled)
write.csv(results_all,
          paste0(base_path, "Discrim_ALL.csv"),    row.names = FALSE)

cat("\n저장 완료\n")

## 
## 저장 완료

cat("TIME 효과 모델 수:", nrow(results_time), "\n")

## TIME 효과 모델 수: 11

KNCAST3.R

joohk

2026-04-21