## Parsed with column specification:
## cols(
## .default = col_double(),
## ParticipantId = col_character(),
## Gender = col_character(),
## Ethnic = col_character(),
## FatherEd = col_character(),
## MedBirth = col_character(),
## Language = col_character(),
## CDIForm = col_character()
## )
## See spec(...) for full column specifications.
## Parsed with column specification:
## cols(
## .default = col_double(),
## ParticipantId = col_character(),
## Gender = col_character(),
## Ethnic = col_character(),
## MotherEd = col_character(),
## FatherEd = col_character(),
## MedBirth = col_character(),
## Language = col_character(),
## CDIForm = col_character()
## )
## See spec(...) for full column specifications.
## Warning: Missing column names filled in: 'X3' [3]
## Parsed with column specification:
## cols(
## .default = col_double(),
## ParticipantId = col_character(),
## id = col_character(),
## X3 = col_logical(),
## whosp = col_character(),
## whoeng = col_character(),
## spcdiby = col_character(),
## engcdiby = col_character(),
## `mgcorig Mother/Guardian origin` = col_character(),
## `fgcorig Father's Country of Origin` = col_character(),
## `chicorig Child's Country of Origin` = col_character(),
## `mgnatlng Mother/Guardian native language` = col_character(),
## `fgnatlng Father/Guardian Native Language` = col_character(),
## `homelng primary language spoken at home` = col_character()
## )
## See spec(...) for full column specifications.
en_voc <- en_ws[,15:811] # 797 columns (not 680) - UL / COMPLX mixed in:
ul_col_names <- c("USECMPL","USEFUT","USEMISS","USEPAST","USEPOSS")
# COMPLX01 - COMPLX37: cols 176:211
complx_col_names <- c(paste0("COMPLX0",1:9), paste0("COMPLX",10:37))
en_complx_ul <- en_ws[,c(complx_col_names, ul_col_names)] # 42, missing some? e.g. COMBINE?
en_voc <- en_voc %>% select(-all_of(complx_col_names), -all_of(ul_col_names))
en_ws_new <- en_ws %>% select(-c(15:811)) %>%
mutate(Total = rowSums(en_voc)) %>%
select(-AgeCDI)
sp_voc <- sp_ws[,49:752]
sp_cmplx_ul <- sp_ws[,12:48]
sp_ws_new <- sp_ws %>% select(-c(12:752)) %>%
mutate(Total = rowSums(sp_voc),
EngProp = NA,
SpanProp = NA) %>%
select(-USEPOSS, -USEFUT, -USEMISS, -USEPAST, -SCOMBINE, -USECMPL)
# need to recast some vars (e.g., sp_ws$MotherEd is character)
#cdat <- left_join(en_ws %>% mutate(MotherEd = as.numeric(MotherEd),
# FatherEd = as.numeric(FatherEd)),
# sp_ws %>% mutate(MotherEd = as.numeric(MotherEd),
# FatherEd = as.numeric(FatherEd) #%>%
#dplyr::select(-CDIForm, -MedBirth)),
# by=c("ParticipantId", "CDIAge"))
# , "Gender", "Language", "MotherEd", "FatherEd", "BOrder")) #
# intersect(names(en_ws), names(sp_ws))
# long format data
admins <- en_ws_new %>%
mutate(FatherEd = replace(FatherEd, which(FatherEd=="NR" | FatherEd=="Null"), NA),
FatherEd = as.numeric(FatherEd)) %>% bind_rows(sp_ws_new)
for (s in unique(admins %>% filter(Language=="English") %>% .$ParticipantId)) {
subj_rows = which(admins$ParticipantId==s)
for (age in unique(admins[subj_rows,] %>% filter(Language=="English") %>% .$CDIAge)) {
EngProp = admins[which(admins$ParticipantId==s &
admins$CDIAge==age &
admins$Language=="English"),]$EngProp
sp_ind = which(admins$ParticipantId==s &
abs(admins$CDIAge - age)<=1 & # fuzzy age match (±1 from English CDIAge)
admins$Language=="Spanish")
if (length(EngProp)!=0) {
admins[sp_ind,]$EngProp = EngProp
admins[sp_ind,]$SpanProp = 100 - EngProp
} else {
admins[sp_ind,c("EngProp","SpanProp")]= NA
}
}
}
# 4 missing Spanish CDI admin EngProps (no close match for English CDIAge)
#View(admins %>% arrange(ParticipantId, CDIAge))
# Spanish doesn't have SpProp/EngProp: were those never collected during SpCDI admins?
# (for English, SpProp changes across administrations, and SpCDI admins are not always at the same time as EnCDIs)
There are 163 unique participant IDs in the Spanish dataframe (225 observations), and 161 unique participant IDs in the English dataframe (222 observations).
Note: 4 Spanish CDI administrations are missing language exposure proportion data, as it seems that these were not collected during Spanish CDI administrations, and there were no corresponding English CDI administrations (within one month of the Spanish CDI administration).
We center CDIAge and MotherEd, and center/scale proportion exposure.
admins <- admins %>% mutate(
CDIAgeCtr = CDIAge - mean(CDIAge),
MotherEd = as.numeric(MotherEd),
MotherEdCtr = MotherEd - mean(MotherEd, na.rm=T),
EngPropCS = as.numeric(scale(EngProp)),
SpanPropCS = as.numeric(scale(SpanProp))
)
# center Age, MotherEd, center/scale EngProp
m1 <- lmer(Total ~ Language * EngPropCS + CDIAgeCtr + MotherEdCtr +
Gender + BOrder + (1 | ParticipantId), data=admins)
summary(m1)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: Total ~ Language * EngPropCS + CDIAgeCtr + MotherEdCtr + Gender +
## BOrder + (1 | ParticipantId)
## Data: admins
##
## REML criterion at convergence: 5451.6
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -2.4773 -0.5641 -0.0992 0.4364 3.3829
##
## Random effects:
## Groups Name Variance Std.Dev.
## ParticipantId (Intercept) 6148 78.41
## Residual 11547 107.46
## Number of obs: 440, groups: ParticipantId, 161
##
## Fixed effects:
## Estimate Std. Error df t value Pr(>|t|)
## (Intercept) 186.203 19.254 181.662 9.671 < 2e-16 ***
## LanguageSpanish 4.097 10.259 283.881 0.399 0.690
## EngPropCS 67.332 9.931 317.090 6.780 5.88e-11 ***
## CDIAgeCtr 15.852 1.477 426.098 10.733 < 2e-16 ***
## MotherEdCtr 2.414 2.786 163.464 0.867 0.387
## GenderM -19.526 16.485 156.859 -1.184 0.238
## BOrder -6.767 7.953 160.231 -0.851 0.396
## LanguageSpanish:EngPropCS -125.281 10.327 285.570 -12.131 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) LnggSp EngPCS CDIAgC MthrEC GendrM BOrder
## LangugSpnsh -0.262
## EngPropCS 0.012 0.004
## CDIAgeCtr -0.005 0.010 0.085
## MotherEdCtr -0.170 -0.005 -0.276 0.011
## GenderM -0.428 -0.001 -0.071 0.078 0.030
## BOrder -0.744 -0.002 0.038 -0.036 0.221 -0.019
## LnggSp:EPCS -0.001 0.004 -0.506 0.014 -0.002 0.007 -0.001
en_m1 <- lmer(Total ~ CDIAgeCtr + MotherEdCtr + EngPropCS +
Gender + BOrder + (1 | ParticipantId), data=admins %>% filter(Language=="English"))
summary(en_m1)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: Total ~ CDIAgeCtr + MotherEdCtr + EngPropCS + Gender + BOrder +
## (1 | ParticipantId)
## Data: admins %>% filter(Language == "English")
##
## REML criterion at convergence: 2722.8
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -2.0501 -0.4847 -0.1516 0.3177 3.5880
##
## Random effects:
## Groups Name Variance Std.Dev.
## ParticipantId (Intercept) 7077 84.13
## Residual 8766 93.63
## Number of obs: 222, groups: ParticipantId, 161
##
## Fixed effects:
## Estimate Std. Error df t value Pr(>|t|)
## (Intercept) 193.773 21.181 159.369 9.148 2.63e-16 ***
## CDIAgeCtr 17.808 1.748 166.260 10.189 < 2e-16 ***
## MotherEdCtr 5.185 3.172 161.795 1.635 0.1041
## EngPropCS 61.987 9.807 186.666 6.321 1.86e-09 ***
## GenderM -3.489 18.783 157.234 -0.186 0.8529
## BOrder -15.377 9.083 161.781 -1.693 0.0924 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) CDIAgC MthrEC EngPCS GendrM
## CDIAgeCtr -0.006
## MotherEdCtr -0.176 0.014
## EngPropCS 0.018 0.099 -0.324
## GenderM -0.443 0.080 0.028 -0.085
## BOrder -0.772 -0.036 0.220 0.041 -0.021
sp_m1 <- lmer(Total ~ CDIAgeCtr + MotherEdCtr + SpanPropCS +
Gender + BOrder + (1 | ParticipantId), data=admins %>% filter(Language=="Spanish"))
summary(sp_m1)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: Total ~ CDIAgeCtr + MotherEdCtr + SpanPropCS + Gender + BOrder +
## (1 | ParticipantId)
## Data: admins %>% filter(Language == "Spanish")
##
## REML criterion at convergence: 2704.8
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -2.1774 -0.4678 -0.0545 0.3832 3.6588
##
## Random effects:
## Groups Name Variance Std.Dev.
## ParticipantId (Intercept) 10213 101.06
## Residual 8809 93.86
## Number of obs: 218, groups: ParticipantId, 159
##
## Fixed effects:
## Estimate Std. Error df t value Pr(>|t|)
## (Intercept) 183.4145 23.6329 157.4090 7.761 1.00e-12 ***
## CDIAgeCtr 13.7569 1.8643 148.4882 7.379 1.05e-11 ***
## MotherEdCtr -0.5712 3.5640 159.8183 -0.160 0.8729
## SpanPropCS 52.2496 10.9483 192.2682 4.772 3.60e-06 ***
## GenderM -35.9405 20.9654 155.6624 -1.714 0.0885 .
## BOrder 1.7858 10.0817 159.0231 0.177 0.8596
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) CDIAgC MthrEC SpnPCS GendrM
## CDIAgeCtr 0.003
## MotherEdCtr -0.185 0.001
## SpanPropCS -0.016 -0.122 0.322
## GenderM -0.446 0.080 0.034 0.069
## BOrder -0.772 -0.038 0.223 -0.043 -0.016