##
## ── Column specification ────────────────────────────────────────────────────────
## cols(
## .default = col_double(),
## ParticipantId = col_character(),
## Gender = col_character(),
## Ethnic = col_character(),
## FatherEd = col_character(),
## MedBirth = col_character(),
## Language = col_character(),
## CDIForm = col_character()
## )
## ℹ Use `spec()` for the full column specifications.
##
## ── Column specification ────────────────────────────────────────────────────────
## cols(
## .default = col_double(),
## ParticipantId = col_character(),
## Gender = col_character(),
## Ethnic = col_character(),
## MotherEd = col_character(),
## FatherEd = col_character(),
## MedBirth = col_character(),
## Language = col_character(),
## CDIForm = col_character()
## )
## ℹ Use `spec()` for the full column specifications.
## Warning: Missing column names filled in: 'X3' [3]
##
## ── Column specification ────────────────────────────────────────────────────────
## cols(
## .default = col_double(),
## ParticipantId = col_character(),
## id = col_character(),
## X3 = col_logical(),
## whosp = col_character(),
## whoeng = col_character(),
## spcdiby = col_character(),
## engcdiby = col_character(),
## `mgcorig Mother/Guardian origin` = col_character(),
## `fgcorig Father's Country of Origin` = col_character(),
## `chicorig Child's Country of Origin` = col_character(),
## `mgnatlng Mother/Guardian native language` = col_character(),
## `fgnatlng Father/Guardian Native Language` = col_character(),
## `homelng primary language spoken at home` = col_character()
## )
## ℹ Use `spec()` for the full column specifications.
en_voc <- en_ws[,15:811] # 797 columns (not 680) - UL / COMPLX mixed in:
ul_col_names <- c("USECMPL","USEFUT","USEMISS","USEPAST","USEPOSS")
# COMPLX01 - COMPLX37: cols 176:211
complx_col_names <- c(paste0("COMPLX0",1:9), paste0("COMPLX",10:37))
en_complx_ul <- en_ws[,c(complx_col_names, ul_col_names)] # 42, missing some? e.g. COMBINE?
en_voc <- en_voc %>% select(-all_of(complx_col_names), -all_of(ul_col_names))
en_ws <- en_ws %>% select(-c(15:811)) %>%
mutate(Total = rowSums(en_voc)) %>%
select(-AgeCDI)
sp_voc <- sp_ws[,49:752]
sp_cmplx_ul <- sp_ws[,12:48]
sp_ws <- sp_ws %>% select(-c(12:752)) %>%
mutate(Total = rowSums(sp_voc),
EngProp = NA,
SpanProp = NA) %>%
select(-USEPOSS, -USEFUT, -USEMISS, -USEPAST, -SCOMBINE, -USECMPL)
# need to recast some vars (e.g., sp_ws$MotherEd is character)
#cdat <- left_join(en_ws %>% mutate(MotherEd = as.numeric(MotherEd),
# FatherEd = as.numeric(FatherEd)),
# sp_ws %>% mutate(MotherEd = as.numeric(MotherEd),
# FatherEd = as.numeric(FatherEd) #%>%
#dplyr::select(-CDIForm, -MedBirth)),
# by=c("ParticipantId", "CDIAge"))
# , "Gender", "Language", "MotherEd", "FatherEd", "BOrder")) #
# intersect(names(en_ws), names(sp_ws))
# long format data
admins <- en_ws %>%
mutate(FatherEd = replace(FatherEd, which(FatherEd=="NR" | FatherEd=="Null"), NA),
FatherEd = as.numeric(FatherEd)) %>% bind_rows(sp_ws)
for(s in unique(admins$ParticipantId)) {
subj_rows = which(admins$ParticipantId==s)
for(age in unique(admins[subj_rows,]$CDIAge)) {
EngProp = admins[which(admins$ParticipantId==s & admins$CDIAge==age & admins$Language=="English"),]$EngProp
sp_ind = which(admins$ParticipantId==s & admins$CDIAge==age & admins$Language=="Spanish")
if(length(EngProp)!=0) {
admins[sp_ind,]$EngProp = EngProp
admins[sp_ind,]$SpanProp = 100 - EngProp
} else {
admins[sp_ind,c("EngProp","SpanProp")]= NA
}
}
}
# 7 missing Spanish CDI admin EngProps (set fuzzy age match? e.g. +/-1 months copy from Eng CDI?)
#View(admins %>% arrange(ParticipantId, CDIAge))
# Spanish doesn't have SpProp/EngProp: were those never collected during SpCDI admins?
# (for English, SpProp changes across administrations, and SpCDI admins are not always at the same time as EnCDIs)
There are 163 unique participant IDs in the Spanish dataframe (225 observations), and 161 unique participant IDs in the English dataframe (222 observations).
# center Age, MotherEd, center/scale EngProp
m1 <- lmer(Total ~ Language * EngProp + CDIAge + MotherEd +
Gender + BOrder + (1 | ParticipantId), data=admins)
summary(m1)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: Total ~ Language * EngProp + CDIAge + MotherEd + Gender + BOrder +
## (1 | ParticipantId)
## Data: admins
##
## REML criterion at convergence: 5440.8
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -2.4743 -0.5655 -0.0988 0.4359 3.3735
##
## Random effects:
## Groups Name Variance Std.Dev.
## ParticipantId (Intercept) 6172 78.56
## Residual 11573 107.58
## Number of obs: 438, groups: ParticipantId, 161
##
## Fixed effects:
## Estimate Std. Error df t value Pr(>|t|)
## (Intercept) -369.1782 59.7165 273.9213 -6.182 2.29e-09 ***
## LanguageSpanish 220.2716 20.6653 283.1639 10.659 < 2e-16 ***
## EngProp 2.7528 0.4092 315.6427 6.728 8.11e-11 ***
## CDIAge 15.8040 1.4815 423.9529 10.668 < 2e-16 ***
## MotherEd 2.4273 2.7908 163.8798 0.870 0.386
## GenderM -19.8199 16.5181 157.3719 -1.200 0.232
## BOrder -6.8678 7.9669 160.5467 -0.862 0.390
## LanguageSpanish:EngProp -5.1143 0.4284 284.6165 -11.939 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) LnggSp EngPrp CDIAge MthrEd GendrM BOrder
## LangugSpnsh -0.164
## EngProp -0.166 0.443
## CDIAge -0.671 -0.008 0.086
## MotherEd -0.611 0.000 -0.276 0.009
## GenderM -0.187 -0.007 -0.071 0.080 0.029
## BOrder -0.367 0.001 0.038 -0.036 0.221 -0.019
## LnggSpns:EP 0.138 -0.867 -0.506 0.015 -0.004 0.008 -0.003
en_m1 <- lmer(Total ~ CDIAge + MotherEd + EngProp +
Gender + BOrder + (1 | ParticipantId), data=admins %>% filter(Language=="English"))
summary(en_m1)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: Total ~ CDIAge + MotherEd + EngProp + Gender + BOrder + (1 |
## ParticipantId)
## Data: admins %>% filter(Language == "English")
##
## REML criterion at convergence: 2729.1
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -2.0501 -0.4847 -0.1516 0.3177 3.5880
##
## Random effects:
## Groups Name Variance Std.Dev.
## ParticipantId (Intercept) 7077 84.13
## Residual 8766 93.63
## Number of obs: 222, groups: ParticipantId, 161
##
## Fixed effects:
## Estimate Std. Error df t value Pr(>|t|)
## (Intercept) -441.9463 68.1234 214.1261 -6.487 5.96e-10 ***
## CDIAge 17.8081 1.7478 166.2603 10.189 < 2e-16 ***
## MotherEd 5.1853 3.1723 161.7954 1.635 0.1041
## EngProp 2.5482 0.4031 186.6661 6.321 1.86e-09 ***
## GenderM -3.4893 18.7825 157.2338 -0.186 0.8529
## BOrder -15.3765 9.0834 161.7809 -1.693 0.0924 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) CDIAge MthrEd EngPrp GendrM
## CDIAge -0.696
## MotherEd -0.611 0.014
## EngProp -0.106 0.099 -0.324
## GenderM -0.187 0.080 0.028 -0.085
## BOrder -0.364 -0.036 0.220 0.041 -0.021
sp_m1 <- lmer(Total ~ CDIAge + MotherEd + SpanProp +
Gender + BOrder + (1 | ParticipantId), data=admins %>% filter(Language=="Spanish"))
summary(sp_m1)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: Total ~ CDIAge + MotherEd + SpanProp + Gender + BOrder + (1 |
## ParticipantId)
## Data: admins %>% filter(Language == "Spanish")
##
## REML criterion at convergence: 2688.2
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -2.1520 -0.4617 -0.0552 0.3858 3.6377
##
## Random effects:
## Groups Name Variance Std.Dev.
## ParticipantId (Intercept) 10231 101.15
## Residual 8907 94.38
## Number of obs: 216, groups: ParticipantId, 159
##
## Fixed effects:
## Estimate Std. Error df t value Pr(>|t|)
## (Intercept) -288.6229 82.7217 206.8204 -3.489 0.000592 ***
## CDIAge 13.9061 1.8843 148.7724 7.380 1.04e-11 ***
## MotherEd -0.6675 3.5769 160.4570 -0.187 0.852190
## SpanProp 2.1273 0.4526 192.6404 4.701 4.93e-06 ***
## GenderM -35.2976 21.0460 156.4961 -1.677 0.095507 .
## BOrder 1.6593 10.1145 159.4750 0.164 0.869901
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) CDIAge MthrEd SpnPrp GendrM
## CDIAge -0.544
## MotherEd -0.736 -0.003
## SpanProp -0.436 -0.127 0.324
## GenderM -0.217 0.084 0.033 0.066
## BOrder -0.315 -0.039 0.223 -0.041 -0.016