1 MS ASQSE pre and post pandemic

2 Load df

load("C:/Users/luisf/Dropbox/Puc-Rio/Projeto - ASQ ASQSE ESQ PSI/R Base - ASQ ESQ PSI.RData")

2.1 Packages

pacman::p_load(tidyverse, janitor, stringr, psych, corrr, openxlsx, arsenal, 
               correlation, ggplot2, corrplot, summarytools, openxlsx, lavaan)

2.2 Table 1

First table (currently at page 3) Results match! (Lucky me - On Feb 28 2025)

df_full %>% 
  filter(!is.na(asqse_quest)) %>%
  filter(year_completed_cat %in% c("19","20","21","22","23") )%>%
  mutate(asqse_quest = as.factor(asqse_quest)) %>%
  select(asqse_quest, year_completed_cat, momage, gender, momed, momage, income, race_wbho) %>% 
  tableby(year_completed_cat ~ ., data =.) %>%
  summary(digits = 2, text = T) %>% as.data.frame()
df_full %>% filter(!is.na(asqse_quest)) %>% filter(year_completed == 20) %>% count(income)

2.3 Duplicates

df_full %>%
  add_count(id) %>%
  filter(n > 1)

2.4 Number of respondents per questionnaire

df_full %>% filter(!is.na(asqse_quest)) %>% group_by(year_completed_cat) %>% count(asqse_quest) %>% 
  pivot_wider(id_cols = asqse_quest, names_from = year_completed_cat , values_from = n) %>% adorn_totals()

2.5 Table with means

df_full %>%
  filter(!is.na(asqse_quest)) %>%
  select(asqse_quest,year_completed, quest, csum, gmsum, fmsum, cgsum, ps_sum,asqse_total,esq_total) %>%
  tableby(year_completed ~ asqse_total, strata = asqse_quest, 
          control=tableby.control(total=TRUE, cat.simplify=TRUE,
                  numeric.stats = c("meansd", "N","Nmiss")),
          data = .) %>% 
  summary(text = T, digits=2) %>% as.data.frame()
df_full %>%
  filter(!is.na(asqse_quest)) %>%
  select(asqse_quest,year_completed_cat, quest, csum, gmsum, fmsum, cgsum, ps_sum,asqse_total,esq_total) %>%
  tableby(year_completed_cat ~ asqse_total, data = .) %>% 
  summary(text = T, digits=2) %>% as.data.frame()

2.6 Plot with means

df_full %>%
  filter(!is.na(year_completed_cat), !is.na(asqse_quest)) %>%
  select(asqse_quest, year_completed_cat, asqse_total ) %>%
  #filter(asqse_quest == 2) %>%
  # get statistics  
  summarise(mean = mean(asqse_total),
            count = n(),
            sd = sd(asqse_total),
            se = sd/sqrt(n()), .by = year_completed_cat) %>% mutate_if(is.numeric,round,1) %>%
  # plot 
  ggplot(., aes(x = year_completed_cat, y = mean, group = 1)) + 
  
  stat_summary(geom = "pointrange", fun = mean, size=.5) +
  stat_summary(geom = "line", fun = mean, size=1, linetype=3) +
  geom_errorbar(aes(ymin = mean - se, ymax = mean + se), position = position_dodge2(padding = 0.5), width=0.2 ) +
  
  # add text
  #geom_col(alpha = .2, aes(fill = year_completed_cat)) +
  geom_label(aes(label = paste0( mean,"±",sd, "\n(N=",count, ")") ) , fill = "white", vjust = 0.1) +
  
  labs(y = "ASQ:SE-2", x = "Year the questionnaire was completed") +
  theme_bw() 
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## Warning: Removed 7 rows containing missing values or values outside the scale range
## (`geom_segment()`).

  #scale_fill_brewer(breaks=c("ctrl", "trt1"), type="qual", palette="Set1")
  #scale_fill_manual(values = c("#1f78b4", "#33a02c", "#e31a1c", "#ff7f00", "#6a3d9a", "#a6cee3", "#fdbf6f")) 

2.7 Plot overall trend among years

for(i in 1:(unique(df_full$asqse_quest))) {
  p = df_full %>% #get data
    # filter(!is.na(year_completed_cat), !is.na(asqse_quest)) %>% #keep complete obs
  
    # Plot different graphs
     filter(asqse_quest == asqse_quest[[i]]) %>% 
     
     # get statistics  
     summarise(mean = mean(asqse_total),
               quest_name = paste(unique(asqse_quest), collapse = ","),
               count = n(),
               sd = sd(asqse_total),
               se = sd/sqrt(n()), .by = year_completed_cat) %>% mutate_if(is.numeric,round,1) %>%
     
     # plot 
     ggplot(., aes(x = year_completed_cat, y = mean, shape = quest_name,
                   group = 1)) + #color here is a trick to have titles
     
    annotate("rect", xmin = 3.5, xmax = 5.5, ymin = 0, ymax = Inf , fill= "orange", 
            alpha = 0.2) +
     geom_col(alpha = .2) +
     stat_summary(geom = "pointrange", fun = mean, size=.5) +
     stat_summary(geom = "line", fun = mean, size=1, linetype=3) +
     geom_errorbar(aes(ymin = mean - se, ymax = mean + se), position = position_dodge2(padding = 0.5), width=0.2 ) +
     
     # add text
     geom_label(aes(label = paste0( mean,"±",sd, "\n(N=",count, ")") ) , fill = "white", vjust = 0.2, size = 3.5) +
     labs(y = "ASQ:SE-2", x = "Year the questionnaire was completed", shape = "Age group") +
    
     # add colors
     theme_minimal() + theme(legend.position = "top", 
        legend.justification='left') 

    #print(p)
    ggsave(paste0("myPlot",i,".png"), bg = 'white',plot = p)
}

unique(df_full$asqse_quest)

2.8 Deltas

2.8.1 Tables

df_full %>% 
   filter(!is.na(asqse_quest)) %>%
  select(year_completed_cat, asqse_total, asqse_quest) %>%

  pivot_wider(id_cols = asqse_quest, names_from = year_completed_cat, values_from = asqse_total,
              values_fn = {mean}) %>%
  as.data.frame() %>% 
  setNames(., c("quest","2018","2019",
                                        "2020", "2021", "2022",
                                        "2023", "2024")) %>%
  #compute deltas
  mutate(delta_1918 = cur_data()[["2019"]] - cur_data()[[ "2018" ]]  ) %>%
  mutate(delta_2019 = cur_data()[["2020"]] - cur_data()[[ "2019" ]]  ) %>%
  mutate(delta_2120 = cur_data()[["2021"]] - cur_data()[[ "2020" ]]  ) %>%
  mutate(delta_2221 = cur_data()[["2022"]] - cur_data()[[ "2021" ]]  ) %>%
  mutate(delta_2322 = cur_data()[["2023"]] - cur_data()[[ "2022" ]]  ) %>%
  mutate(delta_2423 = cur_data()[["2024"]] - cur_data()[[ "2023" ]]  ) %>%
  .[gtools::mixedorder(.$quest), ] %>%
  
  adorn_totals(name = "mean") %>% 
    mutate(across(where(is.numeric), 
          ~ replace(., n(), .[n()]/(n()-1)))) %>%
      mutate(across(where(is.numeric), 
          ~ round(., 2)))
## Warning: There was 1 warning in `mutate()`.
## ℹ In argument: `delta_1918 = cur_data()[["2019"]] - cur_data()[["2018"]]`.
## Caused by warning:
## ! `cur_data()` was deprecated in dplyr 1.1.0.
## ℹ Please use `pick()` instead.
df_full %>% 
   filter(!is.na(asqse_quest)) %>%
  select(year_completed_cat, asqse_total, asqse_quest) %>%
  pivot_wider(id_cols = asqse_quest, names_from = year_completed_cat, values_from = asqse_total,
              values_fn = {mean})%>%
  as.data.frame() %>% 
  setNames(., c("quest","2018","2019",
                                        "2020", "2021", "2022",
                                        "2023", "2024")) %>%
  #compute deltas
  mutate(delta_1918 = cur_data()[["2019"]] - cur_data()[[ "2018" ]]  ) %>%
  mutate(delta_2019 = cur_data()[["2020"]] - cur_data()[[ "2019" ]]  ) %>%
  mutate(delta_2120 = cur_data()[["2021"]] - cur_data()[[ "2020" ]]  ) %>%
  mutate(delta_2221 = cur_data()[["2022"]] - cur_data()[[ "2021" ]]  ) %>%
  mutate(delta_2322 = cur_data()[["2023"]] - cur_data()[[ "2022" ]]  ) %>%
  mutate(delta_2423 = cur_data()[["2024"]] - cur_data()[[ "2023" ]]  ) %>%
  .[gtools::mixedorder(.$quest), ] %>%
  
  
 # plot
    select(quest, delta_1918:delta_2423) %>%
    pivot_longer(-quest) %>%
  
    ggplot(., aes(x = name, y = value, group=1)) + 
     stat_summary(geom = "pointrange", fun = mean, size=.5) +
     stat_summary(geom = "line", fun = mean, size=1, linetype=3) +
  stat_summary(geom = "errorbar", width = 0.3) +
  geom_hline(yintercept = 0) +
  theme_bw() 
## No summary function supplied, defaulting to `mean_se()`
## Warning: Removed 6 rows containing missing values or values outside the scale range
## (`geom_segment()`).

2.8.2 Plot all age groups

df_full %>%
  filter(!is.na(year_completed_cat), !is.na(asqse_quest)) %>%
  mutate(Quest = as.factor(asqse_quest)) %>%

  ggplot(., aes(x = year_completed_cat, y = asqse_total, 
                group = Quest, color = Quest )) +
  stat_summary(geom = "line", fun = mean, size=1) +
  #stat_summary(geom = "errorbar", width = 0.1) +
  gghighlight:: gghighlight(year_completed_cat %in% c("21", "22", "23", "24")  ) +
  labs(y = "ASQ:SE-2", x = "Year the questionnaire was completed") +
  theme_bw() + theme(legend.position = "bottom")
## Warning: Tried to calculate with group_by(), but the calculation failed.
## Falling back to ungrouped filter operation...

3 Risk

3.1 Tables

3.2 Table w. totals

df_full %>% 
  filter(!is.na(asqse_quest)) %>%
  filter(year_completed_cat %in% c("19","20","21","22","23")) %>%
  select(year_completed_cat, asqse_quest, asqse_risk) %>%
  filter(asqse_risk == "1") %>%
  pivot_wider(id_cols = asqse_quest, names_from = year_completed_cat, values_from = asqse_risk, 
              values_fn = {function(x) sum(!is.na(x))}, values_fill = 0) %>%
  adorn_totals(where = c("row", "col")) %>%
     .[gtools::mixedorder(.$asqse_quest), ]            

3.3 Table w. percentages

df_full %>% 
   filter(!is.na(asqse_quest)) %>%
  filter(year_completed_cat %in% c("19","20","21","22","23") )%>%
  select(year_completed_cat, asqse_quest, asqse_risk) %>%
  pivot_wider(id_cols = asqse_quest, names_from = year_completed_cat, values_from = asqse_risk,
              values_fn = {mean}) %>%
   .[gtools::mixedorder(.$asqse_quest), ] %>%
  #percentages
  adorn_totals(name = "mean") %>% 
    mutate(across(where(is.numeric), 
          ~ replace(., n(), .[n()]/(n()-1)))) %>%
      mutate(across(where(is.numeric), ~ round ( .*100 ,2) ))

3.4 Plots

df_full %>% 
   filter(!is.na(asqse_quest)) %>%
   select(year_completed_cat, asqse_quest, asqse_risk) %>%
  ggplot(., aes(x = year_completed_cat, y = asqse_risk, group=1)) + #color here is a trick to have titles
  annotate("rect", xmin = 4, xmax = 5, ymin = 0, ymax = Inf , fill= "orange", 
           alpha = 0.2) +
  stat_summary(geom = "pointrange", fun = mean, size=.5) +
  stat_summary(geom = "line", fun = mean, size=1, linetype=3) +
  labs(x = "Year", y = "Risk") +
  scale_y_continuous(labels = scales::percent)+
  theme_bw() 
## Warning: Removed 7 rows containing missing values or values outside the scale range
## (`geom_segment()`).

3.5 Plot all age groups

df_full %>% 
   filter(!is.na(asqse_quest)) %>%
  filter(asqse_quest < 60) %>%
   select(year_completed_cat, asqse_quest, asqse_risk) %>%
  ggplot(., aes(x = year_completed_cat, y = asqse_risk, group=1)) + #color here is a trick to have titles
  annotate("rect", xmin = 4, xmax = 5, ymin = 0, ymax = Inf , fill= "orange", 
           alpha = 0.2) +
  stat_summary(geom = "pointrange", fun = mean, size=.5) +
  stat_summary(geom = "line", fun = mean, size=1, linetype=3) +
  labs(x = "Year", y = "Risk") +
  scale_y_continuous(labels = scales::percent)+
  theme_bw() +
  facet_wrap(~asqse_quest)
## Warning: Removed 7 rows containing missing values or values outside the scale range
## (`geom_segment()`).
## Removed 7 rows containing missing values or values outside the scale range
## (`geom_segment()`).
## Removed 7 rows containing missing values or values outside the scale range
## (`geom_segment()`).
## Removed 7 rows containing missing values or values outside the scale range
## (`geom_segment()`).
## Removed 7 rows containing missing values or values outside the scale range
## (`geom_segment()`).
## Removed 7 rows containing missing values or values outside the scale range
## (`geom_segment()`).
## Removed 7 rows containing missing values or values outside the scale range
## (`geom_segment()`).
## Removed 7 rows containing missing values or values outside the scale range
## (`geom_segment()`).

3.6 Table 2

Same results (on Sep 24, 2024)

3.7 Chi square Compare all years

suppressWarnings(
  for (i in c("19","20","21","22","23")) {
    cat("#### tab", i,'\n')
    j = df_full %>% 
      filter(!is.na(asqse_quest)) %>%
      filter(!is.na(momed)) %>% # Here is where we find differences across results.  
      filter(year_completed_cat == i) %>% 
      mutate(sex_male = as.factor(sex_male)) %>%
      mutate(asqse_quest = as.factor(asqse_quest)) %>%
      compareGroups::compareGroups(asqse_risk ~ asqse_quest + race_wbho + sex_male + 
                                     momage + momed + income, data = .,
                                   chisq.test.perm = TRUE, chisq.test.seed = 123) %>% 
      compareGroups::createTable(.)
    
    print(j)
    
  }
)
## #### tab 19 
## 
## --------Summary descriptives table by 'asqse_risk'---------
## 
## ________________________________________________ 
##                   0            1       p.overall 
##                 N=4677       N=1886              
## ÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻ 
## asqse_quest:                            <0.001   
##     2        316 (6.76%)  115 (6.10%)            
##     6        763 (16.3%)  163 (8.64%)            
##     12       473 (10.1%)  215 (11.4%)            
##     18       522 (11.2%)  204 (10.8%)            
##     24       455 (9.73%)  248 (13.1%)            
##     30       308 (6.59%)  181 (9.60%)            
##     36       488 (10.4%)  171 (9.07%)            
##     48       622 (13.3%)  272 (14.4%)            
##     60       730 (15.6%)  317 (16.8%)            
## race_wbho:                               0.719   
##     1W       2913 (62.3%) 1148 (60.9%)           
##     2B       503 (10.8%)  217 (11.5%)            
##     3H       499 (10.7%)  206 (10.9%)            
##     Other    762 (16.3%)  315 (16.7%)            
## sex_male:                               <0.001   
##     0        2230 (47.7%) 634 (33.7%)            
##     1        2443 (52.3%) 1250 (66.3%)           
## momage       29.7 (5.53)  27.7 (5.65)   <0.001   
## momed:                                  <0.001   
##     1         82 (1.75%)   76 (4.03%)            
##     2        1288 (27.5%) 916 (48.6%)            
##     3        636 (13.6%)  310 (16.4%)            
##     4        2671 (57.1%) 584 (31.0%)            
## income:                                 <0.001   
##     1        366 (8.24%)  294 (16.7%)            
##     2        360 (8.10%)  274 (15.6%)            
##     3        652 (14.7%)  378 (21.5%)            
##     4        2466 (55.5%) 563 (32.0%)            
##     5        599 (13.5%)  249 (14.2%)            
## ÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻ 
## #### tab 20 
## 
## --------Summary descriptives table by 'asqse_risk'---------
## 
## ________________________________________________ 
##                   0            1       p.overall 
##                 N=5353       N=1976              
## ÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻ 
## asqse_quest:                            <0.001   
##     2        394 (7.36%)  142 (7.19%)            
##     6        998 (18.6%)  247 (12.5%)            
##     12       764 (14.3%)  329 (16.6%)            
##     18       714 (13.3%)  279 (14.1%)            
##     24       598 (11.2%)  254 (12.9%)            
##     30       410 (7.66%)  156 (7.89%)            
##     36       518 (9.68%)  174 (8.81%)            
##     48       456 (8.52%)  188 (9.51%)            
##     60       501 (9.36%)  207 (10.5%)            
## race_wbho:                               0.055   
##     1W       3412 (63.7%) 1226 (62.0%)           
##     2B       531 (9.92%)  213 (10.8%)            
##     3H       556 (10.4%)  181 (9.16%)            
##     Other    854 (16.0%)  356 (18.0%)            
## sex_male:                               <0.001   
##     0        2585 (48.3%) 753 (38.1%)            
##     1        2766 (51.7%) 1223 (61.9%)           
## momage       29.4 (5.57)  28.1 (5.74)   <0.001   
## momed:                                  <0.001   
##     1        145 (2.71%)  102 (5.16%)            
##     2        1661 (31.0%) 907 (45.9%)            
##     3        853 (15.9%)  298 (15.1%)            
##     4        2694 (50.3%) 669 (33.9%)            
## income:                                 <0.001   
##     1        475 (9.41%)  319 (17.3%)            
##     2        440 (8.72%)  243 (13.2%)            
##     3        889 (17.6%)  403 (21.9%)            
##     4        2501 (49.6%) 616 (33.5%)            
##     5        742 (14.7%)  259 (14.1%)            
## ÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻ 
## #### tab 21 
## 
## --------Summary descriptives table by 'asqse_risk'---------
## 
## ______________________________________________ 
##                   0           1      p.overall 
##                N=1586       N=520              
## ÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻ 
## asqse_quest:                           0.001   
##     2        94 (5.93%)  36 (6.92%)            
##     6        227 (14.3%) 46 (8.85%)            
##     12       212 (13.4%) 102 (19.6%)           
##     18       255 (16.1%) 83 (16.0%)            
##     24       190 (12.0%) 63 (12.1%)            
##     30       144 (9.08%) 38 (7.31%)            
##     36       174 (11.0%) 42 (8.08%)            
##     48       151 (9.52%) 50 (9.62%)            
##     60       139 (8.76%) 60 (11.5%)            
## race_wbho:                             0.620   
##     1W       980 (61.8%) 332 (63.8%)           
##     2B       191 (12.0%) 59 (11.3%)            
##     3H       176 (11.1%) 48 (9.23%)            
##     Other    239 (15.1%) 81 (15.6%)            
## sex_male:                             <0.001   
##     0        789 (49.7%) 191 (36.7%)           
##     1        797 (50.3%) 329 (63.3%)           
## momage       29.6 (5.64) 28.4 (5.76)  <0.001   
## momed:                                <0.001   
##     1        43 (2.71%)  14 (2.69%)            
##     2        518 (32.7%) 255 (49.0%)           
##     3        252 (15.9%) 71 (13.7%)            
##     4        773 (48.7%) 180 (34.6%)           
## income:                               <0.001   
##     1        159 (10.6%) 71 (14.8%)            
##     2        146 (9.75%) 49 (10.2%)            
##     3        236 (15.8%) 107 (22.2%)           
##     4        751 (50.2%) 186 (38.7%)           
##     5        205 (13.7%) 68 (14.1%)            
## ÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻ 
## #### tab 22 
## 
## --------Summary descriptives table by 'asqse_risk'---------
## 
## ______________________________________________ 
##                   0           1      p.overall 
##                N=1170       N=736              
## ÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻ 
## asqse_quest:                          <0.001   
##     2        69 (5.90%)  34 (4.62%)            
##     6        150 (12.8%) 43 (5.84%)            
##     12       156 (13.3%) 90 (12.2%)            
##     18       165 (14.1%) 132 (17.9%)           
##     24       152 (13.0%) 107 (14.5%)           
##     30       100 (8.55%) 67 (9.10%)            
##     36       121 (10.3%) 81 (11.0%)            
##     48       131 (11.2%) 102 (13.9%)           
##     60       126 (10.8%) 80 (10.9%)            
## race_wbho:                             0.019   
##     1W       655 (56.0%) 456 (62.0%)           
##     2B       217 (18.5%) 101 (13.7%)           
##     3H       136 (11.6%) 75 (10.2%)            
##     Other    162 (13.8%) 104 (14.1%)           
## sex_male:                             <0.001   
##     0        549 (46.9%) 265 (36.1%)           
##     1        621 (53.1%) 470 (63.9%)           
## momage       29.6 (5.71) 28.0 (5.68)  <0.001   
## momed:                                <0.001   
##     1        43 (3.68%)  36 (4.89%)            
##     2        408 (34.9%) 367 (49.9%)           
##     3        198 (16.9%) 116 (15.8%)           
##     4        521 (44.5%) 217 (29.5%)           
## income:                               <0.001   
##     1        134 (12.2%) 113 (16.2%)           
##     2        102 (9.26%) 59 (8.45%)            
##     3        169 (15.3%) 157 (22.5%)           
##     4        77 (6.99%)  17 (2.44%)            
##     5        279 (25.3%) 203 (29.1%)           
##     8        341 (30.9%) 149 (21.3%)           
## ÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻ 
## #### tab 23 
## 
## --------Summary descriptives table by 'asqse_risk'---------
## 
## ________________________________________________ 
##                   0            1       p.overall 
##                 N=4110       N=2480              
## ÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻ 
## asqse_quest:                            <0.001   
##     2        202 (4.91%)   93 (3.75%)            
##     6        521 (12.7%)  173 (6.98%)            
##     12       615 (15.0%)  444 (17.9%)            
##     18       659 (16.0%)  474 (19.1%)            
##     24       587 (14.3%)  346 (14.0%)            
##     30       355 (8.64%)  211 (8.51%)            
##     36       465 (11.3%)  262 (10.6%)            
##     48       391 (9.51%)  272 (11.0%)            
##     60       315 (7.66%)  205 (8.27%)            
## race_wbho:                               0.145   
##     1W       2724 (66.3%) 1595 (64.3%)           
##     2B       427 (10.4%)  258 (10.4%)            
##     3H       347 (8.44%)  249 (10.0%)            
##     Other    612 (14.9%)  378 (15.2%)            
## sex_male:                               <0.001   
##     0        1851 (45.1%) 928 (37.4%)            
##     1        2257 (54.9%) 1552 (62.6%)           
## momage       29.5 (5.44)  28.1 (5.72)   <0.001   
## momed:                                  <0.001   
##     1         83 (2.02%)  129 (5.20%)            
##     2        1362 (33.1%) 1258 (50.7%)           
##     3        662 (16.1%)  371 (15.0%)            
##     4        2003 (48.7%) 722 (29.1%)            
## income:                                 <0.001   
##     1        294 (7.50%)  310 (13.3%)            
##     2        240 (6.12%)  259 (11.1%)            
##     3        522 (13.3%)  464 (19.9%)            
##     5        1158 (29.5%) 756 (32.4%)            
##     8        1707 (43.5%) 545 (23.4%)            
## ÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻ

– Comparegroups runs a t test grouping the predictors

df_full %>% 
  filter(!is.na(asqse_quest)) %>%
  filter(!is.na(momed)) %>%
  filter(year_completed == "22") %>%
  #t.test(momage ~ asqse_risk, data =.) %>%
  {gmodels::CrossTable(.$race_wbho, .$asqse_risk, prop.r = T, expected = T, prop.chisq = F, prop.c = F, prop.t = F)}  
## 
##  
##    Cell Contents
## |-------------------------|
## |                       N |
## |              Expected N |
## |           N / Row Total |
## |-------------------------|
## 
##  
## Total Observations in Table:  1906 
## 
##  
##              | .$asqse_risk 
##  .$race_wbho |         0 |         1 | Row Total | 
## -------------|-----------|-----------|-----------|
##           1W |       655 |       456 |      1111 | 
##              |   681.988 |   429.012 |           | 
##              |     0.590 |     0.410 |     0.583 | 
## -------------|-----------|-----------|-----------|
##           2B |       217 |       101 |       318 | 
##              |   195.205 |   122.795 |           | 
##              |     0.682 |     0.318 |     0.167 | 
## -------------|-----------|-----------|-----------|
##           3H |       136 |        75 |       211 | 
##              |   129.523 |    81.477 |           | 
##              |     0.645 |     0.355 |     0.111 | 
## -------------|-----------|-----------|-----------|
##        Other |       162 |       104 |       266 | 
##              |   163.284 |   102.716 |           | 
##              |     0.609 |     0.391 |     0.140 | 
## -------------|-----------|-----------|-----------|
## Column Total |      1170 |       736 |      1906 | 
## -------------|-----------|-----------|-----------|
## 
##  
## Statistics for All Table Factors
## 
## 
## Pearson's Chi-squared test 
## ------------------------------------------------------------
## Chi^2 =  9.932958     d.f. =  3     p =  0.0191447 
## 
## 
## 

And its run a chi-square for categorical variables

df_full %>% 
  filter(!is.na(momed)) %>% # 5 is I don't know
  filter(year_completed == "22") %>%
  {chisq.test(.$asqse_risk, .$race_wbo)}  
## 
##  Pearson's Chi-squared test
## 
## data:  .$asqse_risk and .$race_wbo
## X-squared = 9.933, df = 3, p-value = 0.01914

3.8 Chi square all Compare years

suppressWarnings(
  for (i in c(2,6,12,18,24,30,36,48,60)) {
    cat("#### tab", i,'\n')
    j = df_full %>% 
      filter(!is.na(momed)) %>% # 5 is I don't know
      filter(asqse_quest == i) %>% 
      mutate(sex_male = as.factor(sex_male)) %>%
      compareGroups::compareGroups(asqse_risk ~ year_completed_cat + sex_male + 
                                     race + momage + momed + income, data = .,
                                   chisq.test.perm = TRUE, chisq.test.seed = 123) %>% 
      compareGroups::createTable(.)
    
    print(j)
    
  }
)
## #### tab 2 
## 
## --------Summary descriptives table by 'asqse_risk'---------
## 
## _____________________________________________________ 
##                          0           1      p.overall 
##                       N=1078       N=423              
## ÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻ 
## year_completed_cat:                           0.304   
##     18               1 (0.09%)   0 (0.00%)            
##     19              316 (29.3%) 115 (27.2%)           
##     20              394 (36.5%) 142 (33.6%)           
##     21              94 (8.72%)  36 (8.51%)            
##     22              69 (6.40%)  34 (8.04%)            
##     23              202 (18.7%) 93 (22.0%)            
##     24               2 (0.19%)   3 (0.71%)            
## sex_male:                                     0.348   
##     0               531 (49.3%) 196 (46.4%)           
##     1               546 (50.7%) 226 (53.6%)           
## race:                                         0.276   
##     Black           134 (14.3%) 39 (11.2%)            
##     Hispanic_Latino 97 (10.3%)  42 (12.0%)            
##     White           707 (75.4%) 268 (76.8%)           
## momage              29.2 (5.74) 29.0 (5.89)   0.656   
## momed:                                       <0.001   
##     1               25 (2.32%)  19 (4.49%)            
##     2               384 (35.6%) 195 (46.1%)           
##     3               173 (16.0%) 69 (16.3%)            
##     4               496 (46.0%) 140 (33.1%)           
## income:                                      <0.001   
##     1               112 (10.7%) 74 (18.7%)            
##     2               97 (9.31%)  55 (13.9%)            
##     3               168 (16.1%) 75 (18.9%)            
##     4               358 (34.4%) 95 (24.0%)            
##     5               210 (20.2%) 67 (16.9%)            
##     8               97 (9.31%)  30 (7.58%)            
## ÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻ 
## #### tab 6 
## 
## --------Summary descriptives table by 'asqse_risk'---------
## 
## ______________________________________________________ 
##                          0            1      p.overall 
##                        N=2670       N=674              
## ÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻ 
## year_completed_cat:                            0.013   
##     18               5 (0.19%)    1 (0.15%)            
##     19              763 (28.6%)  163 (24.2%)           
##     20              998 (37.4%)  247 (36.6%)           
##     21              227 (8.50%)  46 (6.82%)            
##     22              150 (5.62%)  43 (6.38%)            
##     23              521 (19.5%)  173 (25.7%)           
##     24               6 (0.22%)    1 (0.15%)            
## sex_male:                                      0.297   
##     0               1294 (48.5%) 311 (46.1%)           
##     1               1375 (51.5%) 363 (53.9%)           
## race:                                          0.599   
##     Black           241 (10.7%)  52 (9.29%)            
##     Hispanic_Latino 259 (11.5%)  65 (11.6%)            
##     White           1743 (77.7%) 443 (79.1%)           
## momage              29.5 (5.43)  28.2 (5.72)  <0.001   
## momed:                                        <0.001   
##     1                55 (2.06%)  29 (4.30%)            
##     2               882 (33.0%)  314 (46.6%)           
##     3               398 (14.9%)  97 (14.4%)            
##     4               1335 (50.0%) 234 (34.7%)           
## income:                                       <0.001   
##     1               239 (9.43%)  92 (14.8%)            
##     2               207 (8.17%)  74 (11.9%)            
##     3               414 (16.3%)  114 (18.4%)           
##     4               992 (39.1%)  186 (30.0%)           
##     5               417 (16.4%)  107 (17.3%)           
##     8               266 (10.5%)  47 (7.58%)            
## ÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻ 
## #### tab 12 
## 
## --------Summary descriptives table by 'asqse_risk'---------
## 
## ______________________________________________________ 
##                          0            1      p.overall 
##                        N=2232      N=1187              
## ÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻ 
## year_completed_cat:                           <0.001   
##     18               2 (0.09%)    1 (0.08%)            
##     19              473 (21.2%)  215 (18.1%)           
##     20              764 (34.2%)  329 (27.7%)           
##     21              212 (9.50%)  102 (8.59%)           
##     22              156 (6.99%)  90 (7.58%)            
##     23              615 (27.6%)  444 (37.4%)           
##     24               10 (0.45%)   6 (0.51%)            
## sex_male:                                     <0.001   
##     0               1102 (49.4%) 447 (37.7%)           
##     1               1128 (50.6%) 738 (62.3%)           
## race:                                          0.008   
##     Black           170 (9.09%)  127 (12.7%)           
##     Hispanic_Latino 195 (10.4%)  110 (11.0%)           
##     White           1505 (80.5%) 764 (76.3%)           
## momage              30.1 (5.41)  28.9 (5.59)  <0.001   
## momed:                                        <0.001   
##     1                42 (1.88%)  35 (2.95%)            
##     2               553 (24.8%)  502 (42.3%)           
##     3               316 (14.2%)  173 (14.6%)           
##     4               1321 (59.2%) 477 (40.2%)           
## income:                                       <0.001   
##     1               151 (7.15%)  124 (11.2%)           
##     2               136 (6.44%)  102 (9.17%)           
##     3               250 (11.8%)  215 (19.3%)           
##     4               851 (40.3%)  277 (24.9%)           
##     5               357 (16.9%)  246 (22.1%)           
##     8               367 (17.4%)  148 (13.3%)           
## ÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻ 
## #### tab 18 
## 
## --------Summary descriptives table by 'asqse_risk'---------
## 
## ______________________________________________________ 
##                          0            1      p.overall 
##                        N=2325      N=1177              
## ÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻ 
## year_completed_cat:                           <0.001   
##     18               1 (0.04%)    1 (0.08%)            
##     19              522 (22.5%)  204 (17.3%)           
##     20              714 (30.7%)  279 (23.7%)           
##     21              255 (11.0%)  83 (7.05%)            
##     22              165 (7.10%)  132 (11.2%)           
##     23              659 (28.3%)  474 (40.3%)           
##     24               9 (0.39%)    4 (0.34%)            
## sex_male:                                     <0.001   
##     0               1092 (47.0%) 436 (37.0%)           
##     1               1233 (53.0%) 741 (63.0%)           
## race:                                         <0.001   
##     Black           194 (9.90%)  145 (15.1%)           
##     Hispanic_Latino 238 (12.1%)  120 (12.5%)           
##     White           1528 (78.0%) 698 (72.5%)           
## momage              30.0 (5.35)  28.3 (5.71)  <0.001   
## momed:                                        <0.001   
##     1                57 (2.45%)  56 (4.76%)            
##     2               591 (25.4%)  587 (49.9%)           
##     3               354 (15.2%)  192 (16.3%)           
##     4               1323 (56.9%) 342 (29.1%)           
## income:                                       <0.001   
##     1               171 (7.73%)  163 (14.8%)           
##     2               144 (6.51%)  145 (13.2%)           
##     3               297 (13.4%)  214 (19.4%)           
##     4               821 (37.1%)  189 (17.2%)           
##     5               429 (19.4%)  267 (24.2%)           
##     8               351 (15.9%)  124 (11.3%)           
## ÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻ 
## #### tab 24 
## 
## --------Summary descriptives table by 'asqse_risk'---------
## 
## ______________________________________________________ 
##                          0            1      p.overall 
##                        N=1987      N=1023              
## ÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻ 
## year_completed_cat:                           <0.001   
##     18               3 (0.15%)    0 (0.00%)            
##     19              455 (22.9%)  248 (24.2%)           
##     20              598 (30.1%)  254 (24.8%)           
##     21              190 (9.56%)  63 (6.16%)            
##     22              152 (7.65%)  107 (10.5%)           
##     23              587 (29.5%)  346 (33.8%)           
##     24               2 (0.10%)    5 (0.49%)            
## sex_male:                                     <0.001   
##     0               911 (45.8%)  348 (34.0%)           
##     1               1076 (54.2%) 675 (66.0%)           
## race:                                         <0.001   
##     Black           188 (11.3%)  136 (16.1%)           
##     Hispanic_Latino 177 (10.7%)  122 (14.5%)           
##     White           1293 (78.0%) 585 (69.4%)           
## momage              29.8 (5.40)  27.7 (5.65)  <0.001   
## momed:                                        <0.001   
##     1                43 (2.16%)  44 (4.30%)            
##     2               593 (29.8%)  511 (50.0%)           
##     3               311 (15.7%)  158 (15.4%)           
##     4               1040 (52.3%) 310 (30.3%)           
## income:                                       <0.001   
##     1               148 (7.84%)  164 (17.0%)           
##     2               130 (6.89%)  123 (12.8%)           
##     3               276 (14.6%)  205 (21.3%)           
##     4               667 (35.3%)  145 (15.1%)           
##     5               373 (19.8%)  225 (23.4%)           
##     8               293 (15.5%)  100 (10.4%)           
## ÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻ 
## #### tab 30 
## 
## --------Summary descriptives table by 'asqse_risk'---------
## 
## _____________________________________________________ 
##                          0           1      p.overall 
##                       N=1324       N=655              
## ÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻ 
## year_completed_cat:                          <0.001   
##     18               4 (0.30%)   0 (0.00%)            
##     19              308 (23.3%) 181 (27.6%)           
##     20              410 (31.0%) 156 (23.8%)           
##     21              144 (10.9%) 38 (5.80%)            
##     22              100 (7.55%) 67 (10.2%)            
##     23              355 (26.8%) 211 (32.2%)           
##     24               3 (0.23%)   2 (0.31%)            
## sex_male:                                    <0.001   
##     0               589 (44.6%) 214 (32.7%)           
##     1               733 (55.4%) 441 (67.3%)           
## race:                                         0.548   
##     Black           143 (12.9%) 83 (14.8%)            
##     Hispanic_Latino 127 (11.5%) 61 (10.9%)            
##     White           838 (75.6%) 416 (74.3%)           
## momage              29.7 (5.70) 28.0 (5.61)  <0.001   
## momed:                                       <0.001   
##     1               33 (2.49%)  27 (4.12%)            
##     2               377 (28.5%) 348 (53.1%)           
##     3               223 (16.8%) 124 (18.9%)           
##     4               691 (52.2%) 156 (23.8%)           
## income:                                      <0.001   
##     1               91 (7.21%)  99 (16.3%)            
##     2               96 (7.61%)  78 (12.8%)            
##     3               202 (16.0%) 145 (23.8%)           
##     4               435 (34.5%) 100 (16.4%)           
##     5               271 (21.5%) 132 (21.7%)           
##     8               167 (13.2%) 54 (8.88%)            
## ÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻ 
## #### tab 36 
## 
## --------Summary descriptives table by 'asqse_risk'---------
## 
## ______________________________________________________ 
##                          0            1      p.overall 
##                        N=1775       N=731              
## ÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻ 
## year_completed_cat:                           <0.001   
##     18               2 (0.11%)    0 (0.00%)            
##     19              488 (27.5%)  171 (23.4%)           
##     20              518 (29.2%)  174 (23.8%)           
##     21              174 (9.80%)  42 (5.75%)            
##     22              121 (6.82%)  81 (11.1%)            
##     23              465 (26.2%)  262 (35.8%)           
##     24               7 (0.39%)    1 (0.14%)            
## sex_male:                                     <0.001   
##     0               814 (45.9%)  246 (33.7%)           
##     1               961 (54.1%)  485 (66.3%)           
## race:                                          0.398   
##     Black           213 (14.3%)  78 (13.1%)            
##     Hispanic_Latino 185 (12.4%)  86 (14.5%)            
##     White           1091 (73.3%) 430 (72.4%)           
## momage              29.3 (5.44)  27.7 (5.72)  <0.001   
## momed:                                        <0.001   
##     1                41 (2.31%)  37 (5.06%)            
##     2               575 (32.4%)  369 (50.5%)           
##     3               294 (16.6%)  123 (16.8%)           
##     4               865 (48.7%)  202 (27.6%)           
## income:                                       <0.001   
##     1               129 (7.67%)  111 (16.3%)           
##     2               156 (9.27%)  81 (11.9%)            
##     3               296 (17.6%)  180 (26.4%)           
##     4               559 (33.2%)  101 (14.8%)           
##     5               325 (19.3%)  145 (21.2%)           
##     8               217 (12.9%)  65 (9.52%)            
## ÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻ 
## #### tab 48 
## 
## --------Summary descriptives table by 'asqse_risk'---------
## 
## _____________________________________________________ 
##                          0           1      p.overall 
##                       N=1757       N=890              
## ÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻ 
## year_completed_cat:                          <0.001   
##     18               2 (0.11%)   1 (0.11%)            
##     19              622 (35.4%) 272 (30.6%)           
##     20              456 (26.0%) 188 (21.1%)           
##     21              151 (8.59%) 50 (5.62%)            
##     22              131 (7.46%) 102 (11.5%)           
##     23              391 (22.3%) 272 (30.6%)           
##     24               4 (0.23%)   5 (0.56%)            
## sex_male:                                    <0.001   
##     0               846 (48.2%) 304 (34.2%)           
##     1               910 (51.8%) 586 (65.8%)           
## race:                                        <0.001   
##     Black           289 (19.3%) 106 (13.9%)           
##     Hispanic_Latino 233 (15.5%) 75 (9.82%)            
##     White           978 (65.2%) 583 (76.3%)           
## momage              28.9 (5.65) 27.4 (5.56)  <0.001   
## momed:                                       <0.001   
##     1               39 (2.22%)  44 (4.94%)            
##     2               666 (37.9%) 452 (50.8%)           
##     3               258 (14.7%) 111 (12.5%)           
##     4               794 (45.2%) 283 (31.8%)           
## income:                                      <0.001   
##     1               197 (11.9%) 137 (16.4%)           
##     2               162 (9.81%) 115 (13.8%)           
##     3               283 (17.1%) 179 (21.5%)           
##     4               520 (31.5%) 132 (15.8%)           
##     5               306 (18.5%) 194 (23.3%)           
##     8               183 (11.1%) 77 (9.23%)            
## ÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻ 
## #### tab 60 
## 
## --------Summary descriptives table by 'asqse_risk'---------
## 
## ______________________________________________________ 
##                          0            1      p.overall 
##                        N=1814       N=870              
## ÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻ 
## year_completed_cat:                           <0.001   
##     18               0 (0.00%)    1 (0.11%)            
##     19              730 (40.2%)  317 (36.4%)           
##     20              501 (27.6%)  207 (23.8%)           
##     21              139 (7.66%)  60 (6.90%)            
##     22              126 (6.95%)  80 (9.20%)            
##     23              315 (17.4%)  205 (23.6%)           
##     24               3 (0.17%)    0 (0.00%)            
## sex_male:                                     <0.001   
##     0               854 (47.1%)  284 (32.6%)           
##     1               959 (52.9%)  586 (67.4%)           
## race:                                         <0.001   
##     Black           302 (19.4%)  86 (11.4%)            
##     Hispanic_Latino 211 (13.6%)  79 (10.5%)            
##     White           1042 (67.0%) 589 (78.1%)           
## momage              29.0 (5.89)  26.8 (5.76)  <0.001   
## momed:                                        <0.001   
##     1                61 (3.36%)  68 (7.82%)            
##     2               638 (35.2%)  436 (50.1%)           
##     3               285 (15.7%)  124 (14.3%)           
##     4               830 (45.8%)  242 (27.8%)           
## income:                                       <0.001   
##     1               195 (11.6%)  145 (17.6%)           
##     2               165 (9.77%)  112 (13.6%)           
##     3               288 (17.1%)  185 (22.5%)           
##     4               604 (35.8%)  161 (19.6%)           
##     5               308 (18.2%)  162 (19.7%)           
##     8               128 (7.58%)  58 (7.05%)            
## ÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻÂŻ

3.9 Table 3

3.10 Chi square for trend

Cochran-Armitage test

for (i in c(2,6,12,18,24,30,36,48,60)) { #child's age
  j =  df_full %>% 
    filter(!is.na(asqse_quest)) %>%
    filter(year_completed_cat %in% c("19","20","21","22","23") )%>%
    filter(asqse_quest == i) %>%
    select(year_completed_cat, asqse_risk) %>%
    # trend analysis
    {table(.$year_completed_cat, .$asqse_risk)} %>%
    DescTools::CochranArmitageTest(., alternative = c("one.sided"))
  print(i)
  print(j)
  rm(j,i)
}
## Registered S3 method overwritten by 'DescTools':
##   method         from 
##   reorder.factor gdata
## [1] 2
## 
##  Cochran-Armitage test for trend
## 
## data:  .
## Z = -1.9876, dim = 7, p-value = 0.02343
## alternative hypothesis: one.sided
## 
## [1] 6
## 
##  Cochran-Armitage test for trend
## 
## data:  .
## Z = -3.8014, dim = 7, p-value = 7.194e-05
## alternative hypothesis: one.sided
## 
## [1] 12
## 
##  Cochran-Armitage test for trend
## 
## data:  .
## Z = -5.9052, dim = 7, p-value = 1.761e-09
## alternative hypothesis: one.sided
## 
## [1] 18
## 
##  Cochran-Armitage test for trend
## 
## data:  .
## Z = -7.832, dim = 7, p-value = 2.442e-15
## alternative hypothesis: one.sided
## 
## [1] 24
## 
##  Cochran-Armitage test for trend
## 
## data:  .
## Z = -2.4228, dim = 7, p-value = 0.0077
## alternative hypothesis: one.sided
## 
## [1] 30
## 
##  Cochran-Armitage test for trend
## 
## data:  .
## Z = -1.5075, dim = 7, p-value = 0.06584
## alternative hypothesis: one.sided
## 
## [1] 36
## 
##  Cochran-Armitage test for trend
## 
## data:  .
## Z = -5.1863, dim = 7, p-value = 1.073e-07
## alternative hypothesis: one.sided
## 
## [1] 48
## 
##  Cochran-Armitage test for trend
## 
## data:  .
## Z = -5.5076, dim = 7, p-value = 1.819e-08
## alternative hypothesis: one.sided
## 
## [1] 60
## 
##  Cochran-Armitage test for trend
## 
## data:  .
## Z = -3.9487, dim = 7, p-value = 3.93e-05
## alternative hypothesis: one.sided
df_full %>% 
  filter(!is.na(asqse_quest)) %>%
  filter(year_completed_cat %in% c("19","20","21","22","23") )%>%
  filter(asqse_quest == 2) %>%
  select(year_completed_cat, asqse_risk)  %>%
  {table(.$year_completed_cat, .$asqse_risk)} %>%
  DescTools::CochranArmitageTest(., alternative = c("two.sided", "one.sided"))
## 
##  Cochran-Armitage test for trend
## 
## data:  .
## Z = -1.9876, dim = 7, p-value = 0.04686
## alternative hypothesis: two.sided
df_full %>% 
  filter(!is.na(asqse_quest)) %>%
  filter(year_completed_cat %in% c("19","20","21","22","23") )%>%
  select(year_completed_cat, asqse_risk) %>%
  mutate_all(., ~as.factor(.)) %>%
  tableby(year_completed_cat  ~ asqse_risk, data = .) %>% summary(digits = 2, text = T) %>%
  as.data.frame()
df_full %>% 
      filter(momed %in% (1:4)) %>% # 5 is I don't know
      filter(asqse_quest == 30) %>% 
      mutate_at(vars(asqse_risk,sex_male,year_completed_cat),~as.factor(.)) %>%
  glm(asqse_risk ~ year_completed_cat ,
      family = binomial(link = "logit"),
      data = .) %>% anova(.)

3.11 Matching sampling Table 4

3.11.1 Dataset

df_full %>%
  filter(!is.na(asqse_quest)) %>%
  filter(year_completed_cat == "19") %>%
  filter(asqse_quest == 24) %>%
  select(asqse_quest,sex_male, momage, momed, income) %>%
  mutate_all(., ~as.numeric(.)) %>%
  mutate_all(~ifelse(is.na(.x), median(.x, na.rm = TRUE), .x))
df_full_sampling <- df_full %>%
  select(id, 
         year_completed_cat, asqse_quest, race, sex_male, momage, momed, income_poor , asqse_total, asqse_risk) %>%
  mutate(momed = case_when(
    momed == "1" ~ "1",
    momed == "2" ~ "2",
    momed == "3" ~ "3",
    momed == "4" ~ "3",
    TRUE ~ NA_character_
  )) %>%
  mutate(race = case_when(
    race == "White" ~ "1W",
    race == "Black" ~ "2B",
    TRUE ~ "Other"
  )) %>%
  na.omit()
write.csv(df_full_sampling, "df_full_sampling.csv", row.names = F)
df_full_sampling %>%
  tableby(year_completed_cat ~ asqse_risk +  race + momage + momed + income_poor, data =.) %>% summary(text=T, digits=2) %>% as.data.frame()

3.11.2 1 year

3.11.3 Matching - work with dynamic dataframe

library(MatchIt)
# Filter datasets for 2019, 2020, 2021, 2022, and 2023
df_2019 <- df_full_sampling %>%
  filter(year_completed_cat == "19", asqse_quest == "12")
cat("Number of rows for 2019: ", nrow(df_2019), "\n")
## Number of rows for 2019:  635
df_2020 <- df_full_sampling %>%
  filter(year_completed_cat == "20", asqse_quest == "12")
cat("Number of rows for 2020: ", nrow(df_2020), "\n")
## Number of rows for 2020:  964
df_2021 <- df_full_sampling %>%
  filter(year_completed_cat == "21", asqse_quest == "12")
cat("Number of rows for 2021: ", nrow(df_2021), "\n")
## Number of rows for 2021:  279
df_2022 <- df_full_sampling %>%
  filter(year_completed_cat == "22", asqse_quest == "12")
cat("Number of rows for 2022: ", nrow(df_2022), "\n")
## Number of rows for 2022:  215
df_2023 <- df_full_sampling %>%
  filter(year_completed_cat == "23", asqse_quest == "12")
cat("Number of rows for 2023: ", nrow(df_2023), "\n")
## Number of rows for 2023:  933
# Function to perform matching and return matched data
perform_matching <- function(df_main, df_other, main_year_label, other_year_label) {
  set.seed(1)
  data <- bind_rows(df_main, df_other, .id = "year") %>%
    mutate(year = ifelse(year == "1", 1, 0))  # 1 for main year, 0 for the other year
  
  match_obj <- matchit(year ~ asqse_quest + race + sex_male + momage + momed + income_poor,
                       data = data, 
                       ratio = 1,
                       exact = ~ momed + race ,
                       method = "optimal")
  #plot(match_obj, type = 'density')
  matched_data <- match.data(match_obj)
  matched_data <- matched_data %>% mutate(compared_year = other_year_label)
  
  return(matched_data)
}

# Perform matching for each year compared to 2021
matched_data_2019 <- perform_matching(df_2021, df_2019, "2021", "2019")
## Warning: Fewer control units than treated units in some `exact` strata; not all
## treated units will get a match.
matched_data_2020 <- perform_matching(df_2021, df_2020, "2021", "2020")
matched_data_2022 <- perform_matching(df_2021, df_2022, "2021", "2022")
## Warning: Fewer control units than treated units in some `exact` strata; not all
## treated units will get a match.
matched_data_2023 <- perform_matching(df_2021, df_2023, "2021", "2023")

# Combine all matched datasets
all_matched_data_1 <- bind_rows(matched_data_2019, matched_data_2020, matched_data_2022, matched_data_2023)

# Print the first few rows of the combined matched data to verify
#head(all_matched_data)

3.11.4 Fine tuning

all_matched_data_1 = all_matched_data_1 %>% 
  group_by(year_completed_cat) %>%
  arrange(year_completed_cat, row_number()) %>%  
  slice(1:200)

3.11.5 Compare results

all_matched_data_1 %>%
  tableby(year_completed_cat ~ as.factor(asqse_risk) +  race + momage + momed + as.factor(sex_male) + income_poor, data =.) %>% summary(text=T, digits=2) %>% as.data.frame()
df_full_sampling %>% 
  select(momage, year_completed_cat) %>%
  ggplot(., aes(year_completed_cat, momage, group=1)) +
  stat_summary(geom ="line")
## No summary function supplied, defaulting to `mean_se()`

a=df_full_sampling %>% 
  select(momage, year_completed_cat,income_poor) %>%
  ggplot(., aes(year_completed_cat, momage, group=income_poor,color=income_poor)) +
  stat_summary(geom ="line") 
b=all_matched_data %>% 
  select(momage, year_completed_cat,income_poor) %>%
  ggplot(., aes(year_completed_cat, momage, group=income_poor,color=income_poor)) +
  stat_summary(geom ="line")

a+b
rm(a,b)
gmodels::CrossTable(all_matched_data$year_completed_cat,all_matched_data$asqse_risk, expected = T, chisq = T)

3.11.6 2 years

# Filter datasets for 2019, 2020, 2021, 2022, and 2023
df_2019 <- df_full_sampling %>%
  filter(year_completed_cat == "19", asqse_quest == "24")
cat("Number of rows for 2019: ", nrow(df_2019), "\n")
## Number of rows for 2019:  645
df_2020 <- df_full_sampling %>%
  filter(year_completed_cat == "20", asqse_quest == "24")
cat("Number of rows for 2020: ", nrow(df_2020), "\n")
## Number of rows for 2020:  762
df_2021 <- df_full_sampling %>%
  filter(year_completed_cat == "21", asqse_quest == "24")
cat("Number of rows for 2021: ", nrow(df_2021), "\n")
## Number of rows for 2021:  229
df_2022 <- df_full_sampling %>%
  filter(year_completed_cat == "22", asqse_quest == "24")
cat("Number of rows for 2022: ", nrow(df_2022), "\n")
## Number of rows for 2022:  228
df_2023 <- df_full_sampling %>%
  filter(year_completed_cat == "23", asqse_quest == "24")
cat("Number of rows for 2023: ", nrow(df_2023), "\n")
## Number of rows for 2023:  851
# Function to perform matching and return matched data
perform_matching <- function(df_main, df_other, main_year_label, other_year_label) {
  set.seed(1)
  data <- bind_rows(df_main, df_other, .id = "year") %>%
    mutate(year = ifelse(year == "1", 1, 0))  # 1 for main year, 0 for the other year
  
  match_obj <- matchit(year ~ asqse_quest + race + sex_male + momage + momed + income_poor,
                       data = data, 
                       exact = ~ momed + income_poor + sex_male,
                       method = "optimal")
  
  matched_data <- match.data(match_obj)
  matched_data <- matched_data %>% mutate(compared_year = other_year_label)
  
  return(matched_data)
}

# Perform matching for each year compared to 2021
matched_data_2019 <- perform_matching(df_2021, df_2019, "2021", "2019")
matched_data_2020 <- perform_matching(df_2021, df_2020, "2021", "2020")
matched_data_2022 <- perform_matching(df_2021, df_2022, "2021", "2022")
## Warning: Fewer control units than treated units in some `exact` strata; not all
## treated units will get a match.
matched_data_2023 <- perform_matching(df_2021, df_2023, "2021", "2023")

# Combine all matched datasets
all_matched_data_2 <- bind_rows(matched_data_2019, matched_data_2020, matched_data_2022, matched_data_2023)

# Print the first few rows of the combined matched data to verify
#head(all_matched_data)

3.11.7 Fine tuning

all_matched_data_2 = all_matched_data_2 %>% 
  group_by(year_completed_cat) %>%
  arrange(year_completed_cat, row_number()) %>% 
  slice(1:200)

3.11.8 Compare results

all_matched_data_2 %>%
  tableby(year_completed_cat ~ as.factor(asqse_risk) + race + momage + momed + as.factor(sex_male) + income_poor, data =.) %>% summary(text=T, digits=2) %>% as.data.frame()

3.11.9 3 years

# Filter datasets for 2019, 2020, 2021, 2022, and 2023
df_2019 <- df_full_sampling %>%
  filter(year_completed_cat == "19", asqse_quest == "36")
cat("Number of rows for 2019: ", nrow(df_2019), "\n")
## Number of rows for 2019:  614
df_2020 <- df_full_sampling %>%
  filter(year_completed_cat == "20", asqse_quest == "36")
cat("Number of rows for 2020: ", nrow(df_2020), "\n")
## Number of rows for 2020:  615
df_2021 <- df_full_sampling %>%
  filter(year_completed_cat == "21", asqse_quest == "36")
cat("Number of rows for 2021: ", nrow(df_2021), "\n")
## Number of rows for 2021:  199
df_2022 <- df_full_sampling %>%
  filter(year_completed_cat == "22", asqse_quest == "36")
cat("Number of rows for 2022: ", nrow(df_2022), "\n")
## Number of rows for 2022:  184
df_2023 <- df_full_sampling %>%
  filter(year_completed_cat == "23", asqse_quest == "36")
cat("Number of rows for 2023: ", nrow(df_2023), "\n")
## Number of rows for 2023:  664
# Function to perform matching and return matched data
perform_matching <- function(df_main, df_other, main_year_label, other_year_label) {
  set.seed(1) 
  data <- bind_rows(df_main, df_other, .id = "year") %>%
    mutate(year = ifelse(year == "1", 1, 0))  # 1 for main year, 0 for the other year
  
  match_obj <- matchit(year ~ asqse_quest + race + sex_male + momage + momed + income_poor,
                       data = data, 
                       exact = ~ momed + income_poor,
                       method = "optimal")
  
  matched_data <- match.data(match_obj)
  matched_data <- matched_data %>% mutate(compared_year = other_year_label)
  
  return(matched_data)
}

# Perform matching for each year compared to 2021
matched_data_2019 <- perform_matching(df_2021, df_2019, "2021", "2019")
matched_data_2020 <- perform_matching(df_2021, df_2020, "2021", "2020")
matched_data_2022 <- perform_matching(df_2021, df_2022, "2021", "2022")
## Warning: Fewer control units than treated units in some `exact` strata; not all
## treated units will get a match.
matched_data_2023 <- perform_matching(df_2021, df_2023, "2021", "2023")

# Combine all matched datasets
all_matched_data_3 <- bind_rows(matched_data_2019, matched_data_2020, matched_data_2022, matched_data_2023)

3.11.10 Fine tuning

3 years old

all_matched_data_3 = all_matched_data_3 %>% 
  group_by(year_completed_cat) %>%
  arrange(year_completed_cat, row_number()) %>% 
  slice(1:200)

3.11.11 Compare results

all_matched_data_3 %>%
  tableby(year_completed_cat ~ as.factor(asqse_risk) +  race + momage + momed + as.factor(sex_male) + income_poor, data =.) %>% summary(text=T, digits=2) %>% as.data.frame()

3.11.12 4 years

# Filter datasets for 2019, 2020, 2021, 2022, and 2023
df_2019 <- df_full_sampling %>%
  filter(year_completed_cat == "19", asqse_quest == "48")
cat("Number of rows for 2019: ", nrow(df_2019), "\n")
## Number of rows for 2019:  826
df_2020 <- df_full_sampling %>%
  filter(year_completed_cat == "20", asqse_quest == "48")
cat("Number of rows for 2020: ", nrow(df_2020), "\n")
## Number of rows for 2020:  588
df_2021 <- df_full_sampling %>%
  filter(year_completed_cat == "21", asqse_quest == "48")
cat("Number of rows for 2021: ", nrow(df_2021), "\n")
## Number of rows for 2021:  181
df_2022 <- df_full_sampling %>%
  filter(year_completed_cat == "22", asqse_quest == "48")
cat("Number of rows for 2022: ", nrow(df_2022), "\n")
## Number of rows for 2022:  214
df_2023 <- df_full_sampling %>%
  filter(year_completed_cat == "23", asqse_quest == "48")
cat("Number of rows for 2023: ", nrow(df_2023), "\n")
## Number of rows for 2023:  594
# Function to perform matching and return matched data
perform_matching <- function(df_main, df_other, main_year_label, other_year_label) {
  set.seed(1) 
  data <- bind_rows(df_main, df_other, .id = "year") %>%
    mutate(year = ifelse(year == "1", 1, 0))  # 1 for main year, 0 for the other year
  
  match_obj <- matchit(year ~ asqse_quest + race + sex_male + momage + momed + income_poor,
                       data = data, 
                       exact = ~ momed + income_poor,
                       method = "optimal")
  
  matched_data <- match.data(match_obj)
  matched_data <- matched_data %>% mutate(compared_year = other_year_label)
  
  return(matched_data)
}

# Perform matching for each year compared to 2021
matched_data_2019 <- perform_matching(df_2021, df_2019, "2021", "2019")
matched_data_2020 <- perform_matching(df_2021, df_2020, "2021", "2020")
matched_data_2022 <- perform_matching(df_2021, df_2022, "2021", "2022")
## Warning: Fewer control units than treated units in some `exact` strata; not all
## treated units will get a match.
matched_data_2023 <- perform_matching(df_2021, df_2023, "2021", "2023")

# Combine all matched datasets
all_matched_data_4 <- bind_rows(matched_data_2019, matched_data_2020, matched_data_2022, matched_data_2023)

3.11.13 Fine tuning

4 years old

all_matched_data_4 = all_matched_data_4 %>% 
  group_by(year_completed_cat) %>%
  arrange(year_completed_cat, row_number()) %>%
  slice(1:200)

3.11.14 Compare results

all_matched_data_4 %>%
  tableby(year_completed_cat ~ as.factor(asqse_risk) +  race + momage + momed + as.factor(sex_male) + income_poor, data =.) %>% summary(text=T, digits=2) %>% as.data.frame() 

3.11.15 5 years

5 years old

# Filter datasets for 2019, 2020, 2021, 2022, and 2023
df_2019 <- df_full_sampling %>%
  filter(year_completed_cat == "19", asqse_quest == "60")
cat("Number of rows for 2019: ", nrow(df_2019), "\n")
## Number of rows for 2019:  967
df_2020 <- df_full_sampling %>%
  filter(year_completed_cat == "20", asqse_quest == "60")
cat("Number of rows for 2020: ", nrow(df_2020), "\n")
## Number of rows for 2020:  631
df_2021 <- df_full_sampling %>%
  filter(year_completed_cat == "21", asqse_quest == "60")
cat("Number of rows for 2021: ", nrow(df_2021), "\n")
## Number of rows for 2021:  175
df_2022 <- df_full_sampling %>%
  filter(year_completed_cat == "22", asqse_quest == "60")
cat("Number of rows for 2022: ", nrow(df_2022), "\n")
## Number of rows for 2022:  184
df_2023 <- df_full_sampling %>%
  filter(year_completed_cat == "23", asqse_quest == "60")
cat("Number of rows for 2023: ", nrow(df_2023), "\n")
## Number of rows for 2023:  456
# Function to perform matching and return matched data
perform_matching <- function(df_main, df_other, main_year_label, other_year_label) {
  set.seed(1)
  data <- bind_rows(df_main, df_other, .id = "year") %>%
    mutate(year = ifelse(year == "1", 1, 0))  # 1 for main year, 0 for the other year
  
  match_obj <- matchit(year ~ asqse_quest + race + sex_male + momage + momed + income_poor,
                       data = data, 
                       exact = ~ momed + race + sex_male + income_poor,
                       method = "optimal")
  
  matched_data <- match.data(match_obj)
  matched_data <- matched_data %>% mutate(compared_year = other_year_label)
  
  return(matched_data)
}

# Perform matching for each year compared to 2021
matched_data_2019 <- perform_matching(df_2021, df_2019, "2021", "2019")
matched_data_2020 <- perform_matching(df_2021, df_2020, "2021", "2020")
matched_data_2022 <- perform_matching(df_2021, df_2022, "2021", "2022")
## Warning: Fewer control units than treated units in some `exact` strata; not all
## treated units will get a match.
matched_data_2023 <- perform_matching(df_2021, df_2023, "2021", "2023")
## Warning: Fewer control units than treated units in some `exact` strata; not all
## treated units will get a match.
# Combine all matched datasets
all_matched_data <- bind_rows(matched_data_2019, matched_data_2020, matched_data_2022, matched_data_2023)

3.11.16 Fine tuning

all_matched_data = all_matched_data %>% 
  group_by(year_completed_cat) %>%
  arrange(year_completed_cat, row_number()) %>% 
  slice(1:200)

3.11.17 Compare results

all_matched_data %>%
  tableby(year_completed_cat ~ as.factor(asqse_risk) +  race + momage + momed + as.factor(sex_male) + income_poor, data =.) %>% summary(text=T, digits=2) %>% as.data.frame()

3.12 Trend lines

Ungrouped

bind_rows(
  all_matched_data_1 %>% select(asqse_risk, year_completed_cat) %>% mutate(age="1-year-old"),
  all_matched_data_2 %>% select(asqse_risk, year_completed_cat) %>% mutate(age="2-year-old"),
  all_matched_data_3 %>% select(asqse_risk, year_completed_cat) %>% mutate(age="3-year-old"),
  all_matched_data_4 %>% select(asqse_risk, year_completed_cat) %>% mutate(age="4-year-old"),
  all_matched_data %>% select(asqse_risk, year_completed_cat) %>% mutate(age="5-year-old")) %>%
  ggplot(., aes(x = year_completed_cat, y = asqse_risk, group = 0)) + #+  color = age)
  stat_summary(geom = "line", fun = mean, size=1) +
  stat_summary(geom = "errorbar",fun.data = mean_se, width = .1) +
  labs(x = "Year the ASQ-SE was completed", y = "Percentage of Children at Risk", color = "Child's age") +
  theme_bw() + theme(legend.position = "bottom")

grouped

bind_rows(
  all_matched_data_1 %>% select(asqse_risk, year_completed_cat) %>% mutate(age="1-year-old"),
  all_matched_data_2 %>% select(asqse_risk, year_completed_cat) %>% mutate(age="2-year-old"),
  all_matched_data_3 %>% select(asqse_risk, year_completed_cat) %>% mutate(age="3-year-old"),
  all_matched_data_4 %>% select(asqse_risk, year_completed_cat) %>% mutate(age="4-year-old"),
  all_matched_data %>% select(asqse_risk, year_completed_cat) %>% mutate(age="5-year-old")) %>%
  ggplot(., aes(x = year_completed_cat, y = asqse_risk, group = age, color = age)) +
  stat_summary(geom = "line", fun = mean, size=1) +
  stat_summary(geom = "errorbar",fun.data = mean_se, width = .1) +
  labs(x = "Year the ASQ-SE was completed", y = "Percentage of Children at Risk", color = "Child's age") +
  theme_bw() + theme(legend.position = "bottom")

library(ggrepel)
## Warning: pacote 'ggrepel' foi compilado no R versĂŁo 4.4.2
bind_rows(
  all_matched_data_1 %>% select(asqse_risk, year_completed_cat) %>% mutate(age="1-year-old"),
  all_matched_data_2 %>% select(asqse_risk, year_completed_cat) %>% mutate(age="2-year-old"),
  all_matched_data_3 %>% select(asqse_risk, year_completed_cat) %>% mutate(age="3-year-old"),
  all_matched_data_4 %>% select(asqse_risk, year_completed_cat) %>% mutate(age="4-year-old"),
  all_matched_data %>% select(asqse_risk, year_completed_cat) %>% mutate(age="5-year-old")
) %>%
  ggplot(aes(x = as.numeric(as.character(year_completed_cat)), y = asqse_risk, group = age, color = age)) +
  stat_summary(geom = "line", fun = mean, size = 1) +
  stat_summary(geom = "errorbar", fun.data = mean_se, width = .1) +
  # Create a separate data frame for the labels at year 19
  geom_label_repel(
    data = . %>% 
      group_by(age, year_completed_cat) %>% 
      summarise(asqse_risk = mean(asqse_risk), .groups = "drop") %>%
      filter(year_completed_cat == 19),
    aes(label = age),
    nudge_x = -0.5,
    direction = "y",
    segment.size = 1,
    box.padding = 0.5,
    point.padding = 0.1,
    force = 5,
    show.legend = FALSE
  ) +
  # Set a distinct color palette that is colorblind-friendly
  scale_color_manual(values = c(
    "1-year-old" = "black",  # Orange
    "2-year-old" = "blue",  # Blue
    "3-year-old" = "darkgreen",  # Green
    "4-year-old" = "purple",  # Pink/magenta
    "5-year-old" = "red"   # Red/brown
  )) +
  labs(x = "Year the ASQ-SE was completed", 
       y = "Percentage of Children at Risk", 
       color = "Child's age") +
  theme_bw() + 
  theme(legend.position = "bottom")

! done