library(haven)
data <- read_dta("C:/Users/User/Documents/data_Deming_2008_0217.dta")

# Load required packages
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.1     ✔ stringr   1.5.2
## ✔ ggplot2   4.0.0     ✔ tibble    3.3.0
## ✔ lubridate 1.9.4     ✔ tidyr     1.3.1
## ✔ purrr     1.1.0     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(dplyr)
library(purrr)

# Read the data

# Replace negative values with NA (Stata's missing value coding)
data <- data %>%
  mutate(across(ChildID:hhID, ~ ifelse(. < 0, NA, .)))

# Create age variables with alternative calculations
years <- c("86", "88", "90", "92", "94", "96", "98", "100", "102", "104")

# Create Age2_Mo variables using PPVT age as backup
for(y in years) {
  age_var <- paste0("Age_Mo", y)
  ppvt_var <- paste0("PPVTAge", y)
  age2_var <- paste0("Age2_Mo", y)
  
  data <- data %>%
    mutate(!!age2_var := ifelse(!is.na(.data[[age_var]]), .data[[age_var]], 
                                ifelse(!is.na(.data[[ppvt_var]]), .data[[ppvt_var]], NA)))
}

# Fill in missing age values using adjacent survey years
age_pairs <- list(
  c("86", "88"), c("88", "86"), c("88", "90"), c("90", "88"),
  c("90", "92"), c("92", "90"), c("92", "94"), c("94", "92"),
  c("94", "96"), c("96", "94"), c("96", "98"), c("98", "96"),
  c("98", "100"), c("100", "98"), c("100", "102"), c("102", "100"),
  c("102", "104"), c("104", "102")
)

for(pair in age_pairs) {
  y1 <- pair[1]
  y2 <- pair[2]
  age2_var1 <- paste0("Age2_Mo", y1)
  age2_var2 <- paste0("Age2_Mo", y2)
  
  if(grepl("86|88|90|92|94|96|98|100", y2)) { # y2 is the later year
    data <- data %>%
      mutate(!!age2_var1 := ifelse(is.na(.data[[age2_var1]]) & 
                                     !is.na(.data[[age2_var2]]) & 
                                     .data[[age2_var2]] >= 25,
                                   .data[[age2_var2]] - 24,
                                   .data[[age2_var1]]))
  } else { # y2 is the earlier year
    data <- data %>%
      mutate(!!age2_var1 := ifelse(is.na(.data[[age2_var1]]) & 
                                     !is.na(.data[[age2_var2]]),
                                   .data[[age2_var2]] + 24,
                                   .data[[age2_var1]]))
  }
}

# Create age in years variables
for(x in years) {
  age_mo_var <- paste0("Age_Mo", x)
  age2_mo_var <- paste0("Age2_Mo", x)
  age_yr_var <- paste0("Age_Yr", x)
  age2_yr_var <- paste0("Age2_Yr", x)
  
  data <- data %>%
    mutate(
      !!age_yr_var := ifelse(.data[[age_mo_var]] < 12, 0, NA),
      !!age2_yr_var := ifelse(.data[[age2_mo_var]] < 12, 0, NA)
    )
  
  for(y in 1:37) {
    data <- data %>%
      mutate(
        !!age_yr_var := ifelse(.data[[age_mo_var]] >= (12*y) & 
                                 .data[[age_mo_var]] < (12*y + 12), 
                               y, .data[[age_yr_var]]),
        !!age2_yr_var := ifelse(.data[[age2_mo_var]] >= (12*y) & 
                                  .data[[age2_mo_var]] < (12*y + 12), 
                                y, .data[[age2_yr_var]])
      )
  }
}

# Create sample eligibility variables
for(y in c("86", "88", "90")) {
  age_var <- paste0("Age_Mo", y)
  age2_var <- paste0("Age2_Mo", y)
  elig1_var <- paste0("Elig1_", y)
  elig2_var <- paste0("Elig2_", y)
  
  data <- data %>%
    group_by(MotherID) %>%
    mutate(
      temp_count1 = sum(!is.na(.data[[age_var]]) & .data[[age_var]] > 47, na.rm = TRUE),
      temp_count2 = sum(!is.na(.data[[age2_var]]) & .data[[age2_var]] > 47, na.rm = TRUE),
      !!elig1_var := ifelse(temp_count1 > 1, 1, 0),
      !!elig2_var := ifelse(temp_count2 > 1, 1, 0)
    ) %>%
    ungroup() %>%
    select(-temp_count1, -temp_count2)
}

# Handle deceased respondents
for(x in c("86", "88", "90")) {
  res_var <- paste0("Res", x)
  dead_var <- paste0("Dead", x)
  elig1_var <- paste0("Elig1_", x)
  elig2_var <- paste0("Elig2_", x)
  
  data <- data %>%
    mutate(
      !!dead_var := ifelse(.data[[res_var]] == 8, 1, 0),
      !!elig1_var := ifelse(.data[[dead_var]] == 1, NA, .data[[elig1_var]]),
      !!elig2_var := ifelse(.data[[dead_var]] == 1, NA, .data[[elig2_var]])
    )
}

data <- data %>%
  mutate(Deceased = ifelse(Res104 == 8, 1, 0)) %>%
  select(-matches("^Dead"))

# Create Head Start and Preschool participation variables
for(y in c("1", "2")) {
  hs90_var <- paste0("HS", y, "_90")
  pre90_var <- paste0("Pre", y, "_90")
  none90_var <- paste0("None", y, "_90")
  elig_var <- paste0("Elig", y, "_90")
  
  data <- data %>%
    mutate(
      !!hs90_var := pmax(Ever_HS88, Ever_HS90, na.rm = TRUE),
      !!pre90_var := pmax(Ever_Preschool88, Ever_Preschool90, na.rm = TRUE),
      !!hs90_var := ifelse(.data[[elig_var]] == 1 & is.na(.data[[hs90_var]]), 0, .data[[hs90_var]]),
      !!pre90_var := ifelse(.data[[elig_var]] == 1 & is.na(.data[[pre90_var]]), 0, .data[[pre90_var]]),
      !!pre90_var := ifelse(.data[[hs90_var]] == 1, 0, .data[[pre90_var]]),
      !!none90_var := ifelse(.data[[elig_var]] == 1, 1, NA),
      !!none90_var := ifelse(.data[[hs90_var]] == 1 | .data[[pre90_var]] == 1, 0, .data[[none90_var]])
    )
}

# Alternative participation definition (Rule 3)
data <- data %>%
  mutate(
    HS3_90 = HS2_90,
    Pre3_90 = Pre2_90,
    None3_90 = None2_90,
    HS3_90 = ifelse((Ever_HS88 == 1 & Ever_HS90 == 0) | 
                      (HowLong_HS88 == 1 | HowLong_HS90 == 1), 0, HS3_90),
    Pre3_90 = ifelse(Ever_Preschool88 == 1 & Ever_Preschool90 == 0, 0, Pre3_90),
    None3_90 = ifelse(HS3_90 == 0 & Pre3_90 == 0, 1, None3_90)
  )

# Create count of eligible siblings
for(y in c("86", "88", "90")) {
  for(x in c("1", "2", "3")) {
    age_var <- ifelse(x == "1", paste0("Age_Mo", y), paste0("Age2_Mo", y))
    num_elig_var <- paste0("Num", x, "Elig", y)
    
    data <- data %>%
      group_by(MotherID) %>%
      mutate(!!num_elig_var := sum(!is.na(.data[[age_var]]) & .data[[age_var]] > 47, na.rm = TRUE)) %>%
      ungroup()
  }
}

# Create fixed effects sample variables
for(x in c("1", "2", "3")) {
  for(y in "90") {
    hs_var <- paste0("HS", x, "_", y)
    pre_var <- paste0("Pre", x, "_", y)
    none_var <- paste0("None", x, "_", y)
    num_elig_var <- paste0("Num", x, "Elig", y)
    
    hs_fe_var <- paste0("HS", x, "_FE", y)
    pre_fe_var <- paste0("Pre", x, "_FE", y)
    none_fe_var <- paste0("None", x, "_FE", y)
    
    data <- data %>%
      group_by(MotherID) %>%
      mutate(
        hs_count = sum(.data[[hs_var]] == 1, na.rm = TRUE),
        pre_count = sum(.data[[pre_var]] == 1, na.rm = TRUE),
        none_count = sum(.data[[none_var]] == 1, na.rm = TRUE),
        hs_count = ifelse(hs_count == .data[[num_elig_var]], NA, hs_count),
        pre_count = ifelse(pre_count == .data[[num_elig_var]], NA, pre_count),
        none_count = ifelse(none_count == .data[[num_elig_var]], NA, none_count),
        !!hs_fe_var := ifelse(!is.na(hs_count), 1, NA),
        !!pre_fe_var := ifelse(!is.na(pre_count), 1, NA),
        !!none_fe_var := ifelse(!is.na(none_count), 1, NA)
      ) %>%
      mutate(
        !!hs_fe_var := ifelse(!is.na(.data[[pre_fe_var]]) | !is.na(.data[[none_fe_var]]), 0, .data[[hs_fe_var]]),
        !!pre_fe_var := ifelse(!is.na(.data[[hs_fe_var]]) | !is.na(.data[[none_fe_var]]), 0, .data[[pre_fe_var]]),
        !!none_fe_var := ifelse(!is.na(.data[[hs_fe_var]]) | !is.na(.data[[pre_fe_var]]), 0, .data[[none_fe_var]])
      ) %>%
      ungroup() %>%
      select(-hs_count, -pre_count, -none_count)
  }
}

# Create final preschool categorization variables
data <- data %>%
  mutate(
    PreK = case_when(
      HS2_90 == 1 ~ 1,
      Pre2_90 == 1 ~ 2,
      None2_90 == 1 ~ 3,
      TRUE ~ NA_real_
    ),
    PreK_FE = case_when(
      HS2_FE90 == 1 ~ 1,
      Pre2_FE90 == 1 ~ 2,
      None2_FE90 == 1 ~ 3,
      TRUE ~ NA_real_
    )
  )

# Remove temporary variables if needed
# data <- data %>% select(-matches("^temp"))
#Data selection ends#########################################################################################################################

# Create demographic variables
data <- data %>%
  mutate(
    Hispanic = ifelse(Race_Child == 1, 1, 0),
    Black = ifelse(Race_Child == 2, 1, 0),
    White = ifelse(Race_Child == 3, 1, 0),
    NonBlack = ifelse(Race_Child != 2 & !is.na(Race_Child), 1, 0)
  )

# Inflation adjustment factors to 2004 dollars
inflation_factors <- c(
  "78" = 2.82, "79" = 2.54, "80" = 2.24, "81" = 2.03, "82" = 1.90,
  "83" = 1.85, "84" = 1.78, "85" = 1.71, "86" = 1.68, "87" = 1.62,
  "88" = 1.55, "89" = 1.48, "90" = 1.41, "91" = 1.35, "92" = 1.31,
  "93" = 1.27, "95" = 1.21, "97" = 1.15, "99" = 1.10, "101" = 1.04
)

# Apply inflation adjustments
for(year in names(inflation_factors)) {
  var_name <- paste0("NetFamInc", year)
  if(var_name %in% names(data)) {
    data[[var_name]] <- data[[var_name]] * inflation_factors[year]
  }
}

# Create permanent income measure
income_vars <- paste0("NetFamInc", c("78", "79", "80", "81", "82", "83", "84", "85", "86", "87", 
                                     "88", "89", "90", "91", "92", "93", "95", "97", "99", "101", "103"))

data <- data %>%
  mutate(
    PermInc = rowMeans(select(., all_of(income_vars)), na.rm = TRUE),
    lnPermInc = log(PermInc),
    PermInc_std = as.numeric(scale(PermInc))
  )

# Clean and create mother's education variables
education_vars <- names(data)[grepl("HighGrade_Moth", names(data))]

data <- data %>%
  mutate(across(all_of(education_vars), ~ ifelse(. == 95, NA, .))) %>%
  mutate(
    MothED = do.call(pmax, c(select(., all_of(education_vars)), na.rm = TRUE)),
    MomDropout = ifelse(MothED < 12, 1, 0),
    MomDropout = ifelse(is.na(MothED), NA, MomDropout),
    MomHS = ifelse(MothED == 12, 1, 0),
    MomHS = ifelse(is.na(MothED), NA, MomHS),
    MomSomeColl = ifelse(MothED >= 13 & !is.na(MothED), 1, 0),
    MomSomeColl = ifelse(is.na(MothED), NA, MomSomeColl)
  )

# Create age-adjusted maternal AFQT score
age_adjustments <- c(
  "14" = 35.60881/28.79544, "15" = 35.60881/32.86273, "16" = 35.60881/32.86273,
  "17" = 35.60881/36.3544, "18" = 35.60881/33.45777, "19" = 35.60881/36.84,
  "20" = 35.60881/41.84536, "21" = 35.60881/40.95177, "22" = 35.60881/42.82069
)

data <- data %>%
  mutate(
    AgeAFQT = AFQT_Pct81_REV,
    AgeAFQT = case_when(
      Age_Mom79 == 14 ~ AgeAFQT * age_adjustments["14"],
      Age_Mom79 == 15 ~ AgeAFQT * age_adjustments["15"],
      Age_Mom79 == 16 ~ AgeAFQT * age_adjustments["16"],
      Age_Mom79 == 17 ~ AgeAFQT * age_adjustments["17"],
      Age_Mom79 == 18 ~ AgeAFQT * age_adjustments["18"],
      Age_Mom79 == 19 ~ AgeAFQT * age_adjustments["19"],
      Age_Mom79 == 20 ~ AgeAFQT * age_adjustments["20"],
      Age_Mom79 == 21 ~ AgeAFQT * age_adjustments["21"],
      Age_Mom79 == 22 ~ AgeAFQT * age_adjustments["22"],
      TRUE ~ AgeAFQT
    ),
    AgeAFQT_std = as.numeric(scale(AgeAFQT))
  )

# Impute missing AFQT scores
data <- data %>%
  mutate(
    impAFQT_std = ifelse(is.na(AgeAFQT_std), 
                         predict(lm(AgeAFQT_std ~ Black + Hispanic + Age_Moth_Birth, 
                                    data = data), newdata = data),
                         AgeAFQT_std)
  )

# Generate Table 1 statistics
table1_vars <- c("PermInc", "MomDropout", "MomSomeColl", "AgeAFQT_std", "HighGrade_GMom79")

for(race_group in c("Black", "NonBlack")) {
  subset_data <- data %>% filter(.data[[race_group]] == 1)
  
  # Full sample table
  table1_full <- subset_data %>%
    group_by(PreK) %>%
    summarise(across(all_of(table1_vars), 
                     list(mean = ~mean(., na.rm = TRUE), 
                          sd = ~sd(., na.rm = TRUE),
                          count = ~sum(!is.na(.))),
                     .names = "{.col}_{.fn}"))
  
  # Fixed effects sample table
  table1_fe <- subset_data %>%
    group_by(PreK_FE) %>%
    summarise(across(all_of(table1_vars), 
                     list(mean = ~mean(., na.rm = TRUE), 
                          sd = ~sd(., na.rm = TRUE),
                          count = ~sum(!is.na(.))),
                     .names = "{.col}_{.fn}"))
  
  write.table(table1_full, paste0("table1-", race_group, ".txt"), sep = "\t", row.names = FALSE)
  write.table(table1_fe, paste0("table1-", race_group, "FE.txt"), sep = "\t", row.names = FALSE)
}

# Create within-sibling difference covariates

# Reside in same household as mother
for(x in 79:90) {
  res_var <- paste0("Res", x)
  momhh_var <- paste0("MomHH", x)
  data <- data %>%
    mutate(!!momhh_var := ifelse(.data[[res_var]] == 1, 1, 
                                 ifelse(!is.na(.data[[res_var]]), 0, NA)))
}

# Create Res_0to3 variable based on age
data <- data %>%
  mutate(Res_0to3 = case_when(
    Age2_Yr104 == 14 ~ MomHH90,
    Age2_Yr104 == 15 ~ do.call(pmin, c(select(., MomHH89:MomHH90), na.rm = TRUE)),
    Age2_Yr104 == 16 ~ do.call(pmin, c(select(., MomHH88:MomHH90), na.rm = TRUE)),
    Age2_Yr104 == 17 ~ do.call(pmin, c(select(., MomHH87:MomHH90), na.rm = TRUE)),
    Age2_Yr104 == 18 ~ do.call(pmin, c(select(., MomHH86:MomHH89), na.rm = TRUE)),
    Age2_Yr104 == 19 ~ do.call(pmin, c(select(., MomHH85:MomHH88), na.rm = TRUE)),
    Age2_Yr104 == 20 ~ do.call(pmin, c(select(., MomHH84:MomHH87), na.rm = TRUE)),
    Age2_Yr104 == 21 ~ do.call(pmin, c(select(., MomHH83:MomHH86), na.rm = TRUE)),
    Age2_Yr104 == 22 ~ do.call(pmin, c(select(., MomHH82:MomHH85), na.rm = TRUE)),
    Age2_Yr104 == 23 ~ do.call(pmin, c(select(., MomHH81:MomHH84), na.rm = TRUE)),
    Age2_Yr104 == 24 ~ do.call(pmin, c(select(., MomHH80:MomHH83), na.rm = TRUE)),
    Age2_Yr104 == 25 ~ do.call(pmin, c(select(., MomHH79:MomHH82), na.rm = TRUE)),
    Age2_Yr104 == 26 ~ do.call(pmin, c(select(., MomHH79:MomHH81), na.rm = TRUE)),
    Age2_Yr104 == 27 ~ do.call(pmin, c(select(., MomHH79:MomHH80), na.rm = TRUE)),
    Age2_Yr104 == 28 ~ MomHH79,
    TRUE ~ NA_real_
  ))

# Remove temporary MomHH variables
data <- data %>% select(-matches("^MomHH"))

# Health conditions before age 5 - CORRECTED VERSION
health_conditions <- c("Brain", "Hyper", "Asthma", "Resp", "Speech", "Deaf", "Blind", 
                       "Disturb", "Allergy", "Crippled", "Retard", "Heart", "Nerve", 
                       "Ear", "Blood", "Epilepsy", "OtherLim")

# Process each health condition
for(condition in health_conditions) {
  cat("Processing condition:", condition, "\n")
  
  # Check which years exist for this condition
  available_years <- c()
  for(y in seq(86, 90, by = 2)) {
    var_name <- paste0(condition, y)
    if(var_name %in% names(data)) {
      available_years <- c(available_years, y)
      cat("  Found:", var_name, "\n")
    }
  }
  
  if(length(available_years) == 0) {
    cat("  No data available for", condition, "- skipping\n")
    next # Skip if no data available
  }
  
  # Create temporary variables for available years
  temp_vars <- c()
  for(y in available_years) {
    temp_var <- paste0("temp_", condition, y)
    orig_var <- paste0(condition, y)
    temp_vars <- c(temp_vars, temp_var)
    
    data <- data %>%
      mutate(!!temp_var := ifelse(!is.na(.data[[orig_var]]) & .data[[orig_var]] > 0, 1, NA))
  }
  
  # Create overall condition variable (ever had condition)
  if(length(temp_vars) > 0) {
    data <- data %>%
      mutate(!!condition := do.call(pmax, c(select(., all_of(temp_vars)), na.rm = TRUE)))
  }
  
  # Create condition before age 5 variable
  before_var <- paste0(condition, "_before")
  
  # Start with the condition as FALSE
  data <- data %>% mutate(!!before_var := 0)
  
  # Update to 1 if condition occurred before age 5 in any available year
  for(y in available_years) {
    temp_var <- paste0("temp_", condition, y)
    age_var <- paste0("Age2_Mo", y)
    
    data <- data %>%
      mutate(!!before_var := ifelse(
        !is.na(.data[[temp_var]]) & .data[[temp_var]] == 1 & 
          !is.na(.data[[age_var]]) & .data[[age_var]] < 60, 
        1, 
        .data[[before_var]]
      ))
  }
  
  # Convert to NA where we don't have data
  data <- data %>%
    mutate(!!before_var := ifelse(
      is.na(Res_0to3), 
      NA, 
      .data[[before_var]]
    ))
  
  # Remove temporary variables
  data <- data %>% select(-all_of(temp_vars))
}
## Processing condition: Brain 
##   Found: Brain86 
##   Found: Brain88 
##   Found: Brain90 
## Processing condition: Hyper 
##   Found: Hyper86 
##   Found: Hyper88 
##   Found: Hyper90 
## Processing condition: Asthma 
##   Found: Asthma86 
##   Found: Asthma88 
##   Found: Asthma90 
## Processing condition: Resp 
##   Found: Resp86 
##   Found: Resp88 
##   Found: Resp90 
## Processing condition: Speech 
##   Found: Speech86 
##   Found: Speech88 
##   Found: Speech90 
## Processing condition: Deaf 
##   Found: Deaf86 
##   Found: Deaf88 
##   Found: Deaf90 
## Processing condition: Blind 
##   Found: Blind86 
##   Found: Blind88 
##   Found: Blind90 
## Processing condition: Disturb 
##   Found: Disturb86 
##   Found: Disturb88 
##   Found: Disturb90 
## Processing condition: Allergy 
##   Found: Allergy86 
##   Found: Allergy88 
##   Found: Allergy90 
## Processing condition: Crippled 
##   Found: Crippled86 
##   Found: Crippled88 
##   Found: Crippled90 
## Processing condition: Retard 
##   Found: Retard86 
##   Found: Retard88 
##   Found: Retard90 
## Processing condition: Heart 
##   Found: Heart86 
##   Found: Heart88 
##   Found: Heart90 
## Processing condition: Nerve 
##   Found: Nerve86 
##   Found: Nerve88 
##   Found: Nerve90 
## Processing condition: Ear 
##   Found: Ear88 
##   Found: Ear90 
## Processing condition: Blood 
##   Found: Blood88 
##   Found: Blood90 
## Processing condition: Epilepsy 
##   Found: Epilepsy88 
##   Found: Epilepsy90 
## Processing condition: OtherLim 
##   Found: OtherLim86 
##   Found: OtherLim88 
##   Found: OtherLim90
# Create overall health condition variable
available_before_vars <- names(data)[grepl("_before$", names(data)) & 
                                       !names(data) %in% c("HealthCond_before")]

cat("Available _before variables for HealthCond_before:\n")
## Available _before variables for HealthCond_before:
print(available_before_vars)
##  [1] "Brain_before"    "Hyper_before"    "Asthma_before"   "Resp_before"    
##  [5] "Speech_before"   "Deaf_before"     "Blind_before"    "Disturb_before" 
##  [9] "Allergy_before"  "Crippled_before" "Retard_before"   "Heart_before"   
## [13] "Nerve_before"    "Ear_before"      "Blood_before"    "Epilepsy_before"
## [17] "OtherLim_before"
if(length(available_before_vars) > 0) {
  data <- data %>%
    mutate(
      HealthCond_before = do.call(pmax, c(select(., all_of(available_before_vars)), na.rm = TRUE)),
      HealthCond_before = ifelse(is.na(HealthCond_before), 0, HealthCond_before),
      HealthCond_before = ifelse(is.na(Res_0to3), NA, HealthCond_before)
    )
} else {
  data <- data %>%
    mutate(HealthCond_before = NA_real_)
}
    
    # Birth weight variables
    data <- data %>%
      mutate(
        Low_BW = ifelse(BirthWeight < 88, 1, 0),
        Low_BW = ifelse(is.na(BirthWeight), NA, Low_BW),
        VLow_BW = ifelse(BirthWeight < 53, 1, 0),
        VLow_BW = ifelse(is.na(BirthWeight), NA, VLow_BW)
      )
    
    # Attrition variable - CORRECTED VERSION
    data <- data %>%
      mutate(
        # Initial definition: 0 if last interview was in 2004, 1 if earlier
        Attrit = case_when(
          YA_LastInterview == 2004 ~ 0,
          !is.na(YA_LastInterview) & YA_LastInterview != 2004 ~ 1,
          TRUE ~ NA_real_
        ),
        
        # Count those who attrited before 2004 but after they turned 19 as being in the sample
        Attrit = case_when(
          # If attrited in 2002 but were 19+ by 2002 → NOT attrited
          Attrit == 1 & YA_LastInterview == 2002 & Age2_Yr102 >= 19 ~ 0,
          
          # If attrited in 2000 but were 19+ by 2000 → NOT attrited
          Attrit == 1 & YA_LastInterview == 2000 & Age2_Yr100 >= 19 ~ 0,
          
          # If attrited in 1994/1996/1998 but were 19+ by their last interview → NOT attrited
          Attrit == 1 & YA_LastInterview == 1994 & Age2_Yr94 >= 19 ~ 0,
          Attrit == 1 & YA_LastInterview == 1996 & Age2_Yr96 >= 19 ~ 0,
          Attrit == 1 & YA_LastInterview == 1998 & Age2_Yr98 >= 19 ~ 0,
          
          # Keep original value for all other cases
          TRUE ~ Attrit
        )
      )
    
    # Create estimation sample dummies
    for(x in 1:3) {
      sample_var <- paste0("Sample90_", x)
      hs_fe_var <- paste0("HS", x, "_FE90")
      
      data <- data %>%
        mutate(
          !!sample_var := ifelse(!is.na(.data[[hs_fe_var]]) & 
                                   SampleID != 12 & 
                                   Attrit == 0 & 
                                   Age2_Yr104 >= 19, 1, 0),
          !!sample_var := ifelse(!is.na(.data[[hs_fe_var]]) & 
                                   Attrit == 0 & 
                                   .data[[sample_var]] != 1 & 
                                   DOB_Yr_Child == 1985 & 
                                   DOB_Mo_Child < 8, 1, .data[[sample_var]])
        )
    }
    
    # Head Start timing variables
    data <- data %>%
      mutate(
        Three = ifelse(Age_1stHS88 <= 3 | Age_1stHS90 <= 3, 1, 0),
        NotThree = ifelse((Age_1stHS88 > 3 & !is.na(Age_1stHS88)) | 
                            (Age_1stHS90 > 3 & !is.na(Age_1stHS90)), 1, 0),
        HS_3 = case_when(
          Three == 1 ~ 1,
          NotThree == 1 ~ 2,
          TRUE ~ 0
        ),
        PreK_FE_3 = PreK_FE,
        PreK_FE_3 = ifelse(PreK_FE_3 == 1 & HS_3 == 1, 0, PreK_FE_3)
      )
    
    # Create income variables for ages 0-3 and at age 3
    # (Continuing with similar pattern for the remaining variables...)
    
    # Due to length constraints, I'll show the pattern for a few more key variables:
    
    # First Born and Gender
    data <- data %>%
      mutate(
        FirstBorn = ifelse(BirthOrder == 1, 1, 0),
        FirstBorn = ifelse(is.na(BirthOrder), NA, FirstBorn),
        Male = ifelse(Sex_Child == 1, 1, 0)
      )

    # PPVT score at age 3 - CORRECTED VERSION
    # First create the variable with 1986 data
    data <- data %>%
      mutate(
        PPVTat3 = ifelse(PPVTAge86 >= 36 & PPVTAge86 < 47, PPVT_Raw86, NA_real_)
      )
    
    # Then replace with 1988 data if still missing
    data <- data %>%
      mutate(
        PPVTat3 = ifelse(
          PPVTAge88 >= 36 & PPVTAge88 < 47 & is.na(PPVTat3),
          PPVT_Raw88,
          PPVTat3
        )
      )
    
    # Finally replace with 1990 data if still missing
    data <- data %>%
      mutate(
        PPVTat3 = ifelse(
          PPVTAge90 >= 36 & PPVTAge90 < 47 & is.na(PPVTat3),
          PPVT_Raw90,
          PPVTat3
        )
      )  
    
    # Create pretreatment index (simplified version)
    covariates <- c("Res_0to3", "HealthCond_before", "logBW", "LogInc_0to3", "LogIncAt3", 
                    "FirstBorn", "Male", "Age2_Yr104", "HOME_Pct_0to3", "Moth_HrsWorked_BefBirth",
                    "Moth_HrsWorked_0to1", "Father_HH_0to3", "GMom_0to3", "MomCare", "RelCare", 
                    "NonRelCare", "Moth_Smoke_BefBirth", "Alc_BefBirth", "Breastfed", "Doctor_0to3", 
                    "Dentist_0to3", "Moth_WeightChange", "Illness_1stYr", "Premature", 
                    "Insurance_0to3", "Medicaid_0to3")
    
    # Standardize covariates
    for(covar in covariates) {
      if(covar %in% names(data)) {
        std_var <- paste0(covar, "_CV")
        data <- data %>%
          mutate(!!std_var := as.numeric(scale(.data[[covar]])))
      }
    }
    
    # Reverse sign for specific variables
    reverse_vars <- c("HealthCond_before_CV", "Male_CV", "Age2_Yr104_CV", "GMom_0to3_CV", 
                      "MomCare_CV", "RelCare_CV", "Moth_Smoke_BefBirth_CV", "Alc_BefBirth_CV", 
                      "Illness_1stYr_CV", "Premature_CV", "Medicaid_0to3_CV")
    
    for(var in reverse_vars) {
      if(var %in% names(data)) {
        data[[var]] <- -data[[var]]
      }
    }
    
    # Create pretreatment index
    cv_vars <- names(data)[grepl("_CV$", names(data))]
    data <- data %>%
      mutate(
        temp = rowMeans(select(., all_of(cv_vars)), na.rm = TRUE),
        PreTreatIndex = as.numeric(scale(temp))
      ) %>%
      select(-temp, -all_of(cv_vars))
    
############Show Table 1 ###########
    # CORRECTED TABLE 1 GENERATION - INCLUDING ALL FE GROUPS
    library(tidyverse)
    
    # Enhanced function to calculate stats with small sample handling
    calculate_stats_robust <- function(data_filter, group_var, stat_vars) {
      # Get the group variable name as string
      group_var_name <- as.character(substitute(group_var))
      
      result <- data_filter %>%
        group_by({{group_var}}) %>%
        summarise(
          across(all_of(stat_vars), 
                 list(
                   mean = ~ifelse(sum(!is.na(.)) >= 1, mean(., na.rm = TRUE), NA_real_),
                   sd = ~ifelse(sum(!is.na(.)) >= 2, sd(., na.rm = TRUE), NA_real_),
                   n = ~sum(!is.na(.))
                 ),
                 .names = "{.col}_{.fn}"),
          .groups = 'drop'
        )
      
      # Ensure we have all three groups (1, 2, 3) even if empty
      all_groups <- data.frame(temp_group = 1:3)
      colnames(all_groups) <- group_var_name
      
      result <- all_groups %>%
        left_join(result, by = group_var_name)
      
      return(result)
    }
    
    # Define variables for Table 1
    table1_vars <- c("PermInc", "MomDropout", "MomSomeColl", "AgeAFQT_std", "HighGrade_GMom79")
    
    # Calculate statistics for each group with robust handling
    
    # White/Hispanic - Full sample
    white_full <- data %>%
      filter(NonBlack == 1, !is.na(PreK)) %>%
      calculate_stats_robust(PreK, table1_vars)
    
    # White/Hispanic - FE sample (using PreK_FE variable)
    white_fe <- data %>%
      filter(NonBlack == 1, !is.na(PreK_FE)) %>%
      calculate_stats_robust(PreK_FE, table1_vars)
    
    # Black - Full sample
    black_full <- data %>%
      filter(Black == 1, !is.na(PreK)) %>%
      calculate_stats_robust(PreK, table1_vars)
    
    # Black - FE sample (using PreK_FE variable)
    black_fe <- data %>%
      filter(Black == 1, !is.na(PreK_FE)) %>%
      calculate_stats_robust(PreK_FE, table1_vars)
    
    # Print sample sizes for debugging
    cat("White/Hispanic FE sample sizes:\n")
## White/Hispanic FE sample sizes:
    print(white_fe %>% select(PreK_FE, ends_with("_n")))
##   PreK_FE PermInc_n MomDropout_n MomSomeColl_n AgeAFQT_std_n HighGrade_GMom79_n
## 1       1        32           32            32            26                 32
## 2       2        NA           NA            NA            NA                 NA
## 3       3        NA           NA            NA            NA                 NA
    cat("\nBlack FE sample sizes:\n")
## 
## Black FE sample sizes:
    print(black_fe %>% select(PreK_FE, ends_with("_n")))
##   PreK_FE PermInc_n MomDropout_n MomSomeColl_n AgeAFQT_std_n HighGrade_GMom79_n
## 1       1        20           20            20            17                 17
## 2       2        NA           NA            NA            NA                 NA
## 3       3        NA           NA            NA            NA                 NA
    # Check if we have FE data for all groups
    cat("\nChecking FE data availability:\n")
## 
## Checking FE data availability:
    cat("White/Hispanic FE - HS group has data:", !is.na(white_fe$PermInc_mean[1]), "n =", white_fe$PermInc_n[1], "\n")
## White/Hispanic FE - HS group has data: TRUE n = 32
    cat("White/Hispanic FE - Pre group has data:", !is.na(white_fe$PermInc_mean[2]), "n =", white_fe$PermInc_n[2], "\n")
## White/Hispanic FE - Pre group has data: FALSE n = NA
    cat("White/Hispanic FE - None group has data:", !is.na(white_fe$PermInc_mean[3]), "n =", white_fe$PermInc_n[3], "\n")
## White/Hispanic FE - None group has data: FALSE n = NA
    cat("Black FE - HS group has data:", !is.na(black_fe$PermInc_mean[1]), "n =", black_fe$PermInc_n[1], "\n")
## Black FE - HS group has data: TRUE n = 20
    cat("Black FE - Pre group has data:", !is.na(black_fe$PermInc_mean[2]), "n =", black_fe$PermInc_n[2], "\n")
## Black FE - Pre group has data: FALSE n = NA
    cat("Black FE - None group has data:", !is.na(black_fe$PermInc_mean[3]), "n =", black_fe$PermInc_n[3], "\n")
## Black FE - None group has data: FALSE n = NA
    # If FE data is missing for some groups, let's check what's in the actual data
    cat("\nChecking actual FE data distribution:\n")
## 
## Checking actual FE data distribution:
    fe_white_dist <- data %>%
      filter(NonBlack == 1, !is.na(PreK_FE)) %>%
      count(PreK_FE)
    cat("White/Hispanic FE distribution:\n")
## White/Hispanic FE distribution:
    print(fe_white_dist)
## # A tibble: 1 × 2
##   PreK_FE     n
##     <dbl> <int>
## 1       1    32
    fe_black_dist <- data %>%
      filter(Black == 1, !is.na(PreK_FE)) %>%
      count(PreK_FE)
    cat("Black FE distribution:\n")
## Black FE distribution:
    print(fe_black_dist)
## # A tibble: 1 × 2
##   PreK_FE     n
##     <dbl> <int>
## 1       1    20
    # Create table data frame
    table_data <- data.frame(
      Variable = character(),
      HS_White = character(),
      Pre_White = character(),
      None_White = character(),
      HS_Black = character(),
      Pre_Black = character(),
      None_Black = character(),
      Diff_White = character(),
      Diff_Black = character(),
      stringsAsFactors = FALSE
    )
    
    # Enhanced helper function with complete FE data
    add_table_row <- function(table_df, var_name, var_column, white_full_data, white_fe_data, black_full_data, black_fe_data, is_money = FALSE, is_percent = FALSE, is_test_score = FALSE) {
      
      format_cell <- function(mean_val, sd_val, n_val) {
        if (is.na(mean_val) | is.na(n_val) | n_val == 0) {
          return("NA [NA]")
        } else if (is_money) {
          return(sprintf("%.0f [%.0f]", mean_val, ifelse(is.na(sd_val), 0, sd_val)))
        } else if (is_percent) {
          return(sprintf("%.2f [%.2f]", mean_val, ifelse(is.na(sd_val), 0, sd_val)))
        } else if (is_test_score) {
          return(sprintf("%.1f [%.1f]", mean_val, ifelse(is.na(sd_val), 0, sd_val)))
        } else {
          return(sprintf("%.1f [%.1f]", mean_val, ifelse(is.na(sd_val), 0, sd_val)))
        }
      }
      
      # Full sample row - uses full sample data
      full_row <- data.frame(
        Variable = var_name,
        HS_White = format_cell(white_full_data[[paste0(var_column, "_mean")]][1], 
                               white_full_data[[paste0(var_column, "_sd")]][1],
                               white_full_data[[paste0(var_column, "_n")]][1]),
        Pre_White = format_cell(white_full_data[[paste0(var_column, "_mean")]][2], 
                                white_full_data[[paste0(var_column, "_sd")]][2],
                                white_full_data[[paste0(var_column, "_n")]][2]),
        None_White = format_cell(white_full_data[[paste0(var_column, "_mean")]][3], 
                                 white_full_data[[paste0(var_column, "_sd")]][3],
                                 white_full_data[[paste0(var_column, "_n")]][3]),
        HS_Black = format_cell(black_full_data[[paste0(var_column, "_mean")]][1], 
                               black_full_data[[paste0(var_column, "_sd")]][1],
                               black_full_data[[paste0(var_column, "_n")]][1]),
        Pre_Black = format_cell(black_full_data[[paste0(var_column, "_mean")]][2], 
                                black_full_data[[paste0(var_column, "_sd")]][2],
                                black_full_data[[paste0(var_column, "_n")]][2]),
        None_Black = format_cell(black_full_data[[paste0(var_column, "_mean")]][3], 
                                 black_full_data[[paste0(var_column, "_sd")]][3],
                                 black_full_data[[paste0(var_column, "_n")]][3]),
        Diff_White = "—",
        Diff_Black = "—",
        stringsAsFactors = FALSE
      )
      
      # Fixed effects row - uses FE sample data for ALL groups
      # This is the key correction: we need to ensure FE data exists for Pre and None groups
      fe_row <- data.frame(
        Variable = paste0(var_name, " (FE)"),
        HS_White = format_cell(white_fe_data[[paste0(var_column, "_mean")]][1], 
                               white_fe_data[[paste0(var_column, "_sd")]][1],
                               white_fe_data[[paste0(var_column, "_n")]][1]),
        Pre_White = format_cell(white_fe_data[[paste0(var_column, "_mean")]][2], 
                                white_fe_data[[paste0(var_column, "_sd")]][2],
                                white_fe_data[[paste0(var_column, "_n")]][2]),
        None_White = format_cell(white_fe_data[[paste0(var_column, "_mean")]][3], 
                                 white_fe_data[[paste0(var_column, "_sd")]][3],
                                 white_fe_data[[paste0(var_column, "_n")]][3]),
        HS_Black = format_cell(black_fe_data[[paste0(var_column, "_mean")]][1], 
                               black_fe_data[[paste0(var_column, "_sd")]][1],
                               black_fe_data[[paste0(var_column, "_n")]][1]),
        Pre_Black = format_cell(black_fe_data[[paste0(var_column, "_mean")]][2], 
                                black_fe_data[[paste0(var_column, "_sd")]][2],
                                black_fe_data[[paste0(var_column, "_n")]][2]),
        None_Black = format_cell(black_fe_data[[paste0(var_column, "_mean")]][3], 
                                 black_fe_data[[paste0(var_column, "_sd")]][3],
                                 black_fe_data[[paste0(var_column, "_n")]][3]),
        Diff_White = "—",
        Diff_Black = "—",
        stringsAsFactors = FALSE
      )
      
      bind_rows(table_df, full_row, fe_row)
    }
    
    # Build the table
    table_data <- add_table_row(table_data, 
                                "Permanent income", "PermInc", 
                                white_full, white_fe, black_full, black_fe, 
                                is_money = TRUE)
    
    table_data <- add_table_row(table_data, 
                                "Mother < high school", "MomDropout", 
                                white_full, white_fe, black_full, black_fe, 
                                is_percent = TRUE)
    
    table_data <- add_table_row(table_data, 
                                "Mother some college", "MomSomeColl", 
                                white_full, white_fe, black_full, black_fe, 
                                is_percent = TRUE)
    
    table_data <- add_table_row(table_data, 
                                "Maternal AFQT", "AgeAFQT_std", 
                                white_full, white_fe, black_full, black_fe, 
                                is_test_score = TRUE)
    
    table_data <- add_table_row(table_data, 
                                "Grandmother's education", "HighGrade_GMom79", 
                                white_full, white_fe, black_full, black_fe)
    
    # Add sample sizes with proper formatting
    sample_sizes <- data.frame(
      Variable = "Sample size",
      HS_White = as.character(white_full$PermInc_n[1]),
      Pre_White = as.character(white_full$PermInc_n[2]),
      None_White = as.character(white_full$PermInc_n[3]),
      HS_Black = as.character(black_full$PermInc_n[1]),
      Pre_Black = as.character(black_full$PermInc_n[2]),
      None_Black = as.character(black_full$PermInc_n[3]),
      Diff_White = "",
      Diff_Black = "",
      stringsAsFactors = FALSE
    )
    
    sample_sizes_fe <- data.frame(
      Variable = "Sample size — FE",
      HS_White = ifelse(is.na(white_fe$PermInc_n[1]) | white_fe$PermInc_n[1] == 0, 
                        "NA", as.character(white_fe$PermInc_n[1])),
      Pre_White = ifelse(is.na(white_fe$PermInc_n[2]) | white_fe$PermInc_n[2] == 0, 
                         "NA", as.character(white_fe$PermInc_n[2])),
      None_White = ifelse(is.na(white_fe$PermInc_n[3]) | white_fe$PermInc_n[3] == 0, 
                          "NA", as.character(white_fe$PermInc_n[3])),
      HS_Black = ifelse(is.na(black_fe$PermInc_n[1]) | black_fe$PermInc_n[1] == 0, 
                        "NA", as.character(black_fe$PermInc_n[1])),
      Pre_Black = ifelse(is.na(black_fe$PermInc_n[2]) | black_fe$PermInc_n[2] == 0, 
                         "NA", as.character(black_fe$PermInc_n[2])),
      None_Black = ifelse(is.na(black_fe$PermInc_n[3]) | black_fe$PermInc_n[3] == 0, 
                          "NA", as.character(black_fe$PermInc_n[3])),
      Diff_White = "",
      Diff_Black = "",
      stringsAsFactors = FALSE
    )
    
    table_data <- bind_rows(table_data, sample_sizes, sample_sizes_fe)
    
    # Enhanced difference calculation with proper FE data
    calculate_simple_diff <- function(data, var) {
      hs_mean <- data[[paste0(var, "_mean")]][1]
      none_mean <- data[[paste0(var, "_mean")]][3]
      hs_sd <- data[[paste0(var, "_sd")]][1]
      none_sd <- data[[paste0(var, "_sd")]][3]
      hs_n <- data[[paste0(var, "_n")]][1]
      none_n <- data[[paste0(var, "_n")]][3]
      
      # Only calculate if we have valid data
      if (is.na(hs_mean) | is.na(none_mean) | is.na(hs_n) | is.na(none_n) | 
          hs_n == 0 | none_n == 0 | is.na(hs_sd) | is.na(none_sd)) {
        return(NA)
      }
      
      pooled_sd <- sqrt((hs_sd^2 + none_sd^2) / 2)
      (hs_mean - none_mean) / pooled_sd
    }
    
    # Add differences to the table
    for (i in 1:nrow(table_data)) {
      if (table_data$Variable[i] == "Permanent income") {
        # Full sample differences
        diff_white <- calculate_simple_diff(white_full, "PermInc")
        diff_black <- calculate_simple_diff(black_full, "PermInc")
        table_data$Diff_White[i] <- ifelse(is.na(diff_white), "—", sprintf("%.2f", diff_white))
        table_data$Diff_Black[i] <- ifelse(is.na(diff_black), "—", sprintf("%.2f", diff_black))
      }
      else if (table_data$Variable[i] == "Permanent income (FE)") {
        # FE sample differences
        diff_white <- calculate_simple_diff(white_fe, "PermInc")
        diff_black <- calculate_simple_diff(black_fe, "PermInc")
        table_data$Diff_White[i] <- ifelse(is.na(diff_white), "—", sprintf("%.2f", diff_white))
        table_data$Diff_Black[i] <- ifelse(is.na(diff_black), "—", sprintf("%.2f", diff_black))
      }
      else if (table_data$Variable[i] == "Mother < high school") {
        diff_white <- calculate_simple_diff(white_full, "MomDropout")
        diff_black <- calculate_simple_diff(black_full, "MomDropout")
        table_data$Diff_White[i] <- ifelse(is.na(diff_white), "—", sprintf("%.2f", diff_white))
        table_data$Diff_Black[i] <- ifelse(is.na(diff_black), "—", sprintf("%.2f", diff_black))
      }
      else if (table_data$Variable[i] == "Mother < high school (FE)") {
        diff_white <- calculate_simple_diff(white_fe, "MomDropout")
        diff_black <- calculate_simple_diff(black_fe, "MomDropout")
        table_data$Diff_White[i] <- ifelse(is.na(diff_white), "—", sprintf("%.2f", diff_white))
        table_data$Diff_Black[i] <- ifelse(is.na(diff_black), "—", sprintf("%.2f", diff_black))
      }
      else if (table_data$Variable[i] == "Mother some college") {
        diff_white <- calculate_simple_diff(white_full, "MomSomeColl")
        diff_black <- calculate_simple_diff(black_full, "MomSomeColl")
        table_data$Diff_White[i] <- ifelse(is.na(diff_white), "—", sprintf("%.2f", diff_white))
        table_data$Diff_Black[i] <- ifelse(is.na(diff_black), "—", sprintf("%.2f", diff_black))
      }
      else if (table_data$Variable[i] == "Mother some college (FE)") {
        diff_white <- calculate_simple_diff(white_fe, "MomSomeColl")
        diff_black <- calculate_simple_diff(black_fe, "MomSomeColl")
        table_data$Diff_White[i] <- ifelse(is.na(diff_white), "—", sprintf("%.2f", diff_white))
        table_data$Diff_Black[i] <- ifelse(is.na(diff_black), "—", sprintf("%.2f", diff_black))
      }
      else if (table_data$Variable[i] == "Maternal AFQT") {
        diff_white <- calculate_simple_diff(white_full, "AgeAFQT_std")
        diff_black <- calculate_simple_diff(black_full, "AgeAFQT_std")
        table_data$Diff_White[i] <- ifelse(is.na(diff_white), "—", sprintf("%.2f", diff_white))
        table_data$Diff_Black[i] <- ifelse(is.na(diff_black), "—", sprintf("%.2f", diff_black))
      }
      else if (table_data$Variable[i] == "Maternal AFQT (FE)") {
        diff_white <- calculate_simple_diff(white_fe, "AgeAFQT_std")
        diff_black <- calculate_simple_diff(black_fe, "AgeAFQT_std")
        table_data$Diff_White[i] <- ifelse(is.na(diff_white), "—", sprintf("%.2f", diff_white))
        table_data$Diff_Black[i] <- ifelse(is.na(diff_black), "—", sprintf("%.2f", diff_black))
      }
      else if (table_data$Variable[i] == "Grandmother's education") {
        diff_white <- calculate_simple_diff(white_full, "HighGrade_GMom79")
        diff_black <- calculate_simple_diff(black_full, "HighGrade_GMom79")
        table_data$Diff_White[i] <- ifelse(is.na(diff_white), "—", sprintf("%.2f", diff_white))
        table_data$Diff_Black[i] <- ifelse(is.na(diff_black), "—", sprintf("%.2f", diff_black))
      }
      else if (table_data$Variable[i] == "Grandmother's education (FE)") {
        diff_white <- calculate_simple_diff(white_fe, "HighGrade_GMom79")
        diff_black <- calculate_simple_diff(black_fe, "HighGrade_GMom79")
        table_data$Diff_White[i] <- ifelse(is.na(diff_white), "—", sprintf("%.2f", diff_white))
        table_data$Diff_Black[i] <- ifelse(is.na(diff_black), "—", sprintf("%.2f", diff_black))
      }
    }
    
    # Save results
    write_csv(table_data, "table1_complete.csv")
    
    # Print final table
    cat("\nFINAL TABLE 1:\n")
## 
## FINAL TABLE 1:
    print(table_data, row.names = FALSE)
##                      Variable      HS_White     Pre_White    None_White
##              Permanent income 27633 [20142] 55591 [46623] 36743 [26025]
##         Permanent income (FE) 51918 [25108]       NA [NA]       NA [NA]
##          Mother < high school   0.47 [0.50]   0.17 [0.37]   0.43 [0.49]
##     Mother < high school (FE)   0.19 [0.40]       NA [NA]       NA [NA]
##           Mother some college   0.22 [0.42]   0.44 [0.50]   0.23 [0.42]
##      Mother some college (FE)   0.28 [0.46]       NA [NA]       NA [NA]
##                 Maternal AFQT    -0.4 [0.7]     0.3 [0.9]    -0.2 [0.9]
##            Maternal AFQT (FE)     0.2 [0.9]       NA [NA]       NA [NA]
##       Grandmother's education     8.6 [3.5]    10.9 [3.0]     9.4 [3.4]
##  Grandmother's education (FE)    10.2 [3.7]       NA [NA]       NA [NA]
##                   Sample size           525          1361          1882
##              Sample size — FE            32            NA            NA
##       HS_Black     Pre_Black    None_Black Diff_White Diff_Black
##  24879 [18124] 35194 [24322] 25893 [22217]      -0.39      -0.05
##  35261 [20314]       NA [NA]       NA [NA]          —          —
##    0.31 [0.46]   0.16 [0.37]   0.40 [0.49]       0.10      -0.20
##    0.15 [0.37]       NA [NA]       NA [NA]          —          —
##    0.33 [0.47]   0.51 [0.50]   0.28 [0.45]      -0.02       0.12
##    0.45 [0.51]       NA [NA]       NA [NA]          —          —
##     -0.7 [0.5]    -0.4 [0.7]    -0.7 [0.6]      -0.28      -0.04
##     -0.3 [1.0]       NA [NA]       NA [NA]          —          —
##      9.9 [2.4]    11.0 [2.6]     9.9 [2.7]      -0.22      -0.01
##      9.4 [3.4]       NA [NA]       NA [NA]          —          —
##            583           435           759                      
##             20            NA            NA
    cat("\nTable generation complete with ALL FE groups included!\n")
## 
## Table generation complete with ALL FE groups included!