Preliminaries.
## [1] "dplyr" "langcog" "tidyr" "ggplot2" "lme4"
##
## Attaching package: 'langcog'
## The following object is masked from 'package:base':
##
## scale
## Loading tidyverse: ggplot2
## Loading tidyverse: tibble
## Loading tidyverse: tidyr
## Loading tidyverse: readr
## Loading tidyverse: purrr
## Loading tidyverse: dplyr
## Conflicts with tidy packages ----------------------------------------------
## %+%(): ggplot2, psych
## alpha(): ggplot2, psych
## filter(): dplyr, stats
## lag(): dplyr, stats
##
## Attaching package: 'ggthemes'
## The following objects are masked from 'package:langcog':
##
## scale_color_solarized, scale_colour_solarized,
## scale_fill_solarized
## Loading required package: Matrix
##
## Attaching package: 'Matrix'
## The following object is masked from 'package:tidyr':
##
## expand
##
## Attaching package: 'lmerTest'
## The following object is masked from 'package:lme4':
##
## lmer
## The following object is masked from 'package:stats':
##
## step
Read in participant data.
data <- read.csv("data.csv", header =TRUE)
dem <- read.csv("parenting_proj_emilyhembacher_demo2016.csv", header =TRUE)
conditions <- read.csv("joint_attention/conditions.csv")
load("paq/paq_demo.RData")
#fix ids
data$SID <- stringr::str_trim(data$SID)
data$SID <- stringr::str_replace(data$SID, "_cut.txt", "")
data$SID <- stringr::str_replace(data$SID, "8283_", "")
data$SID <- stringr::str_replace(data$SID, "283_", "")
data$SID <- stringr::str_replace(data$SID, "_cut_save.txt", "")
data$SID <- stringr::str_replace(data$SID, "_cut_save", "")
data$SID <- stringr::str_replace(data$SID, "_cutsave.txt", "")
data$SID <- stringr::str_replace(data$SID, "parenting_obs_", "0")
data$SID <- stringr::str_replace(data$SID, "_cut_m4a.txt", "")
data$SID <- stringr::str_replace(data$SID, "05116_05", "050116_05")
data$SID <- stringr::str_replace(data$SID, "05116_2", "050116_02")
Make data frames.
d <- left_join(data, conditions)%>%
left_join(dem)%>%
transmute(sid=SID,
types = Type.count,
tokens = Token.count,
lexdiv = Lexical.diversity,
condition= Condition,
video = Video,
age = age,
gender = gender,
parent_ed = parent_ed)%>%
left_join(ids)%>%
filter(!is.na(condition))
ms_lex <- d %>%
group_by(condition) %>%
multi_boot_standard(col = "lexdiv")
ggplot(ms_lex, aes(x = condition, y = mean, fill = condition)) +
geom_bar(stat="identity") +
geom_linerange(aes(ymin = ci_lower, ymax = ci_upper),
position = position_dodge(width = .9))+
xlab("Condition") +
ylab("Lexical Diversity") +
langcog::scale_colour_solarized() +
ggthemes::theme_few()
ms_tok <- d %>%
group_by(condition) %>%
multi_boot_standard(col = "tokens")
ggplot(ms_tok, aes(x = condition, y = mean, fill = condition)) +
geom_bar(stat="identity") +
geom_linerange(aes(ymin = ci_lower, ymax = ci_upper),
position = position_dodge(width = .9))+
xlab("Condition") +
ylab("Total Number of Word Tokens") +
langcog::scale_colour_solarized() +
ggthemes::theme_few()
ms_type <- d %>%
group_by(condition) %>%
multi_boot_standard(col = "types")
ggplot(ms_tok, aes(x = condition, y = mean, fill = condition)) +
geom_bar(stat="identity") +
geom_linerange(aes(ymin = ci_lower, ymax = ci_upper),
position = position_dodge(width = .9))+
xlab("Condition") +
ylab("Total Number of Word Types") +
langcog::scale_colour_solarized() +
ggthemes::theme_few()
Prepare data.
lmer_data <- d %>%
filter(!is.na(AA), !is.na(EL), !is.na(RR))%>%
mutate(condition = factor(condition),
lexdiv = as.numeric(lexdiv),
EL = as.numeric(langcog::scale(EL, scale=FALSE)),
AA = as.numeric(langcog::scale(AA, scale=FALSE)),
RR = as.numeric(langcog::scale(RR, scale=FALSE)),
age = as.numeric(langcog::scale(age, scale=FALSE)),
gender = as.factor(gender),
video = as.factor(video))
Predicting lexical diversity based on experimental condition, PAQ, demographics.
maximal_mod <- lmer(lexdiv ~ condition * EL + condition * AA + condition * RR + age + gender + parent_ed +
(1| video),
data = lmer_data)
summary(maximal_mod)
## Linear mixed model fit by REML t-tests use Satterthwaite approximations
## to degrees of freedom [lmerMod]
## Formula:
## lexdiv ~ condition * EL + condition * AA + condition * RR + age +
## gender + parent_ed + (1 | video)
## Data: lmer_data
##
## REML criterion at convergence: -41.8
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -1.8678 -0.5672 -0.1596 0.5007 2.3151
##
## Random effects:
## Groups Name Variance Std.Dev.
## video (Intercept) 8.055e-05 0.008975
## Residual 9.955e-03 0.099776
## Number of obs: 50, groups: video, 6
##
## Fixed effects:
## Estimate Std. Error df t value Pr(>|t|)
## (Intercept) 0.514448 0.059614 27.330000 8.630 2.74e-09 ***
## conditionexp -0.107469 0.032222 37.690000 -3.335 0.00192 **
## EL -0.028839 0.057046 38.970000 -0.506 0.61603
## AA 0.023258 0.046785 38.840000 0.497 0.62190
## RR -0.042137 0.024417 38.310000 -1.726 0.09245 .
## age 0.029031 0.036707 5.520000 0.791 0.46161
## genderM -0.021122 0.033301 38.670000 -0.634 0.52963
## parent_ed -0.015315 0.012676 33.930000 -1.208 0.23532
## conditionexp:EL 0.053452 0.079118 31.840000 0.676 0.50417
## conditionexp:AA -0.011718 0.060339 36.000000 -0.194 0.84710
## conditionexp:RR 0.002142 0.037569 38.910000 0.057 0.95482
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) cndtnx EL AA RR age gendrM prnt_d cnd:EL
## conditionxp 0.048
## EL 0.082 0.196
## AA -0.229 0.189 -0.101
## RR -0.312 -0.134 -0.443 -0.020
## age 0.146 0.080 0.175 -0.115 -0.064
## genderM -0.098 0.028 -0.082 0.179 0.150 -0.158
## parent_ed -0.906 -0.340 -0.155 0.082 0.322 -0.136 -0.094
## condtnxp:EL -0.219 -0.157 -0.748 0.086 0.378 -0.130 0.050 0.286
## condtnxp:AA 0.319 -0.045 0.117 -0.812 -0.048 0.164 -0.239 -0.197 -0.277
## condtnxp:RR 0.102 0.097 0.256 0.036 -0.608 -0.054 -0.047 -0.109 -0.302
## cnd:AA
## conditionxp
## EL
## AA
## RR
## age
## genderM
## parent_ed
## condtnxp:EL
## condtnxp:AA
## condtnxp:RR -0.006
Predicting the number of word tokens based on experimental condition, PAQ, demographics.
maximal_mod <- lmer(tokens ~ condition * EL + condition * AA + condition * RR + age + gender + parent_ed +
(1| video),
data = lmer_data)
summary(maximal_mod)
## Linear mixed model fit by REML t-tests use Satterthwaite approximations
## to degrees of freedom [lmerMod]
## Formula:
## tokens ~ condition * EL + condition * AA + condition * RR + age +
## gender + parent_ed + (1 | video)
## Data: lmer_data
##
## REML criterion at convergence: 471
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -1.72599 -0.62886 -0.08587 0.66739 1.83112
##
## Random effects:
## Groups Name Variance Std.Dev.
## video (Intercept) 274.9 16.58
## Residual 4977.6 70.55
## Number of obs: 50, groups: video, 6
##
## Fixed effects:
## Estimate Std. Error df t value Pr(>|t|)
## (Intercept) 87.9631 43.8688 30.0400 2.005 0.0540 .
## conditionexp 51.3530 22.9033 37.5500 2.242 0.0309 *
## EL -3.4370 40.7816 38.7000 -0.084 0.9333
## AA -7.6797 33.5899 38.9900 -0.229 0.8204
## RR 19.8059 17.3896 38.0300 1.139 0.2618
## age -26.0479 29.2139 6.2500 -0.892 0.4056
## genderM 5.6041 23.7486 38.2700 0.236 0.8147
## parent_ed 17.2185 9.2143 36.6400 1.869 0.0697 .
## conditionexp:EL -5.5297 57.6030 36.6900 -0.096 0.9240
## conditionexp:AA 39.4475 43.7104 37.5800 0.902 0.3726
## conditionexp:RR -0.4329 26.8385 38.5900 -0.016 0.9872
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) cndtnx EL AA RR age gendrM prnt_d cnd:EL
## conditionxp 0.059
## EL 0.079 0.195
## AA -0.245 0.170 -0.099
## RR -0.323 -0.140 -0.441 -0.006
## age 0.127 0.054 0.122 -0.090 -0.038
## genderM -0.083 0.022 -0.080 0.176 0.144 -0.129
## parent_ed -0.902 -0.341 -0.152 0.105 0.336 -0.113 -0.102
## condtnxp:EL -0.227 -0.156 -0.747 0.085 0.382 -0.093 0.047 0.294
## condtnxp:AA 0.338 -0.024 0.119 -0.815 -0.066 0.138 -0.236 -0.223 -0.283
## condtnxp:RR 0.119 0.107 0.261 0.015 -0.610 -0.053 -0.047 -0.126 -0.311
## cnd:AA
## conditionxp
## EL
## AA
## RR
## age
## genderM
## parent_ed
## condtnxp:EL
## condtnxp:AA
## condtnxp:RR 0.023
Predicting the number of word types based on experimental condition, PAQ, demographics.
maximal_mod <- lmer(types ~ condition * EL + condition * AA + condition * RR + age + gender + parent_ed +
(1| video),
data = lmer_data)
summary(maximal_mod)
## Linear mixed model fit by REML t-tests use Satterthwaite approximations
## to degrees of freedom [lmerMod]
## Formula: types ~ condition * EL + condition * AA + condition * RR + age +
## gender + parent_ed + (1 | video)
## Data: lmer_data
##
## REML criterion at convergence: 373.5
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -1.67957 -0.72517 0.07765 0.50239 2.16847
##
## Random effects:
## Groups Name Variance Std.Dev.
## video (Intercept) 0.0 0.00
## Residual 421.5 20.53
## Number of obs: 50, groups: video, 6
##
## Fixed effects:
## Estimate Std. Error df t value Pr(>|t|)
## (Intercept) 52.971 12.160 39.000 4.356 9.31e-05 ***
## conditionexp -5.348 6.623 39.000 -0.808 0.4242
## EL -7.319 11.708 39.000 -0.625 0.5355
## AA -7.009 9.594 39.000 -0.731 0.4694
## RR 2.312 5.016 39.000 0.461 0.6475
## age -4.739 7.368 39.000 -0.643 0.5239
## genderM -6.576 6.838 39.000 -0.962 0.3422
## parent_ed 4.539 2.592 39.000 1.751 0.0877 .
## conditionexp:EL 6.117 16.162 39.000 0.378 0.7072
## conditionexp:AA 19.894 12.349 39.000 1.611 0.1152
## conditionexp:RR -2.656 7.712 39.000 -0.344 0.7324
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) cndtnx EL AA RR age gendrM prnt_d cnd:EL
## conditionxp 0.046
## EL 0.082 0.196
## AA -0.226 0.194 -0.102
## RR -0.309 -0.132 -0.443 -0.023
## age 0.151 0.086 0.187 -0.120 -0.070
## genderM -0.102 0.029 -0.082 0.180 0.151 -0.165
## parent_ed -0.907 -0.339 -0.155 0.077 0.318 -0.142 -0.092
## condtnxp:EL -0.217 -0.157 -0.748 0.086 0.377 -0.139 0.050 0.284
## condtnxp:AA 0.314 -0.049 0.116 -0.811 -0.044 0.170 -0.239 -0.191 -0.275
## condtnxp:RR 0.098 0.095 0.255 0.040 -0.608 -0.055 -0.047 -0.106 -0.300
## cnd:AA
## conditionxp
## EL
## AA
## RR
## age
## genderM
## parent_ed
## condtnxp:EL
## condtnxp:AA
## condtnxp:RR -0.013
Both the number of tokens and types are higher in the experimental condition, while lexical diversity (type-token ratio) is higher in the control condition. Parents may be relatively more repetetive in the experimental condition since they are attempting to stick to a specific prescribed task, but they talk more overall! Demographics and PAQ do not interact with condition, but there is a marginal effect of RR score on lexical diversity (lower ld for higher RR scores), and marginal effects of parent education on word types and tokens (more types and tokens for higher parent ed).