rm(list=ls())
## [1] "loading library"
sol_demo <- read.csv("../../raw_data/sol_demo_all.csv", stringsAsFactors = F)
sol_demo$Sub.Num <- as.character(sol_demo$Sub.Num)
iChart <- read.csv("processed_data/sol-ichart-merged.csv",
check.names=F, stringsAsFactors=F)
iChart$Sub.Num <- as.character(iChart$Sub.Num)
Filter out participants that should not go into analyses based on exclusionary criteria: a) age, b) didn’t know signs in the task, c) not enough ASL exposure.
exclude <- sol_demo %>%
filter(include == "no") %>%
select(Sub.Num, reason_excluded, stimuli, age_peek_months)
include <- sol_demo %>%
filter(include == "yes") %>%
select(Sub.Num, include,
reason_excluded, stimuli,
age_code, signs_produced)
iChart <- left_join(iChart, include, by = c("Sub.Num", "stimuli"))
iChart <- filter(iChart, include == "yes")
Create a clean target image variable.
targets <- c("juice", "cookie", "cup", "ball", "shoe", "kitty",
"doll", "teddy", "book", "birdy", "car", "sock")
make_clean_target <- function (target_image, targets) {
target_img_clean <- targets[str_detect(target_image, targets)]
return(target_img_clean)
}
iChart$clean_target_img <- unlist(sapply(iChart$Target.Image,
function (x) make_clean_target(x, targets)))
Add unknown signs variable. Taken from sol_demo data frame.
ss_unknown_signs <- sol_demo %>%
filter(include == "yes") %>%
select(Sub.Num, parent_report_unknown_signs)
iChart <- left_join(iChart, ss_unknown_signs, by = "Sub.Num")
Now we can filter the iChart, removing the “unknown” signs.
ss_trials_df <- iChart %>%
group_by(Sub.Num, parent_report_unknown_signs) %>%
summarise(Trials = n())
# flag unknown trials
ss_unknown <- iChart %>%
rowwise() %>%
mutate(unknown_trial = ifelse(clean_target_img %in% parent_report_unknown_signs,
"unknown_sign", "known_sign")) %>%
select(Sub.Num, Tr.Num, unknown_trial)
# join this info with the original iChart
iChart$unknown_trial <- ss_unknown$unknown_trial
# now filter
iChart <- filter(iChart, unknown_trial == "known_sign")
# check to make sure our filtering worked correctly
post_filter_n <- iChart %>%
group_by(Sub.Num) %>%
summarise(Trials = n()) %>%
select(Trials)
ss_trials_df$post_filter <- post_filter_n$Trials
ss_trials_df <- ss_trials_df %>%
mutate(trials_removed = as.integer(Trials) - as.integer(post_filter))
Get total number of trials removed because of known signs
sum(ss_trials_df$trials_removed)
## [1] 22
ss_prescreened <- iChart %>%
group_by(Sub.Num) %>%
filter(Prescreen.Notes != "") %>%
summarise(num_prescreened = n())
ss_trials_df <- left_join(ss_trials_df, ss_prescreened, by = "Sub.Num")
ss_trials_df <- ss_trials_df %>%
mutate(good_trials = Trials - sum(trials_removed, num_prescreened, na.rm=T))
iChart <- filter(iChart, Prescreen.Notes == "")
We define too few trials as less than or equal to 25% of the total number of trials in the task.
total_trials <- 32
trials_cut_point <- total_trials * .25
trials_filter <- ss_trials_df %>%
mutate(exclude_few_trials = ifelse(good_trials <= trials_cut_point,
"exclude", "include")) %>%
select(Sub.Num, good_trials, exclude_few_trials)
# get the number of participants removed by filter
trials_filter %>% group_by(exclude_few_trials) %>% summarise(n())
## Source: local data frame [2 x 2]
##
## exclude_few_trials n()
## 1 exclude 5
## 2 include 45
# merge filtering information with iChart
iChart <- left_join(iChart, trials_filter, by = "Sub.Num")
# now filter
iChart <- filter(iChart, exclude_few_trials == "include")
kids_age_descriptives <- iChart %>%
filter(age_code == "child") %>%
select(Sub.Num, Months) %>%
distinct() %>%
summarise(median(Months),
max(Months),
min(Months),
sd(Months),
n()) %>%
print()
## median(Months) max(Months) min(Months) sd(Months) n()
## 1 27 53 16 9.140259 29
# add median split variable to iChart
iChart$age_group <- ifelse(iChart$Months < kids_age_descriptives$`median(Months)`,
"< 26.5 Months",
ifelse(iChart$Months >= kids_age_descriptives$`median(Months)` &
iChart$Months <= kids_age_descriptives$`max(Months)`,
"> 26.5 Months",
"Adults"))
gender_df <- iChart %>%
filter(age_code == "child") %>%
distinct(Sub.Num) %>%
group_by(Sex, age_group) %>%
summarise(count = unique(n())) %>%
print()
## Source: local data frame [4 x 3]
## Groups: Sex
##
## Sex age_group count
## 1 F < 26.5 Months 8
## 2 F > 26.5 Months 9
## 3 M < 26.5 Months 6
## 4 M > 26.5 Months 6
iChart %>% group_by(age_group) %>%
summarise(n_distinct(Sub.Num),
mean(Months),
min(Months),
max(Months))
## Source: local data frame [3 x 5]
##
## age_group n_distinct(Sub.Num) mean(Months) min(Months) max(Months)
## 1 < 26.5 Months 14 20.71308 16 26
## 2 > 26.5 Months 15 36.18715 27 53
## 3 Adults 16 430.57634 246 695
First, we need to process the data, keeping only those trials on which the child was looking at the signer at F0.
includeOffCenter == FALSE -> only include trials child was looking at center at F0
includeOffCenter == TRUE -> include trials child was looking at center, target, or distractor at F0
iChart %>% group_by("0", Response) %>% summarise(Trials = n())
## Source: local data frame [4 x 3]
## Groups: "0"
##
## "0" Response Trials
## 1 0 A 28
## 2 0 C 1015
## 3 0 D 34
## 4 0 T 42
# change all trials to "Vanilla"
iChart$Condition <- "Vanilla"
## define critical onset, change Cs to Ds and everything else to As
iChart <- defineOnsetSOL(iChart, critonset=0, end_critonset=300,
includeOffCenter=FALSE, includeWindow = FALSE)
iChart %>% group_by("0", Response) %>% summarise(Trials = n())
## Source: local data frame [2 x 3]
## Groups: "0"
##
## "0" Response Trials
## 1 0 A 104
## 2 0 D 1015
Datawiz does not tell us which shifts land on a target vs. a disctractor. So we need to use a function that flags each trial as one of the following:
# apply it to each row in our datase
trial_types <- apply(iChart, 1, trial_type_fun)
# merge this information back with the iChart
iChart <- cbind(iChart, trial_types)
iChart %>% group_by(trial_types) %>% summarise(Trials = n())
## Source: local data frame [4 x 2]
##
## trial_types Trials
## 1 C_D 131
## 2 C_T 822
## 3 no_shift 62
## 4 off_signer 104
Next, we compute statistics over long window 0-5000 ms. This will allow us to see a distribution of RTs, which we will use to determine our analysis window.
iChart <- computeStatistics(iChart, startWindow=0, endWindow=5000)
## [1] "### Trials left ###"
## [1] 1100
## [1] 1050
## [1] 1000
## [1] 950
## [1] 900
## [1] 850
## [1] 800
## [1] 750
## [1] 700
## [1] 650
## [1] 600
## [1] 550
## [1] 500
## [1] 450
## [1] 400
## [1] 350
## [1] 300
## [1] 250
## [1] 200
## [1] 150
## [1] 100
## [1] 50
## [1] 0
# get analyisis window where 90% of RTs occur for kids, include all shifts
rts <- filter(iChart, trial_types %in% c("C_T"), age_group != "Adults")
analysis.window <- quantile(rts$RT, probs=c(0.05, 0.95), na.rm=T)
qplot(rts$RT) +
geom_vline(x=analysis.window[1], col="red", lwd=1.5) +
geom_vline(x=analysis.window[2], col="red", lwd=1.5) +
annotate("text", x = 2500, y = 30,
label = "Analysis Window \n (95% RTs)")
Compute statistics over analysis window: 0-2600ms. We use 2600 ms because it is 500 ms longer than the end of our analysis window (2100ms). This allows us to include trials in which the participant to initiates and completes a shift at the very end of the analysis window.
iChart <- computeStatistics(iChart, startWindow=0, endWindow=2500)
## [1] "### Trials left ###"
## [1] 1100
## [1] 1050
## [1] 1000
## [1] 950
## [1] 900
## [1] 850
## [1] 800
## [1] 750
## [1] 700
## [1] 650
## [1] 600
## [1] 550
## [1] 500
## [1] 450
## [1] 400
## [1] 350
## [1] 300
## [1] 250
## [1] 200
## [1] 150
## [1] 100
## [1] 50
## [1] 0
Reject trials with really long RTs and with long gaps. Gaps are defined as a sequence of frames when the child is not looking at either picture or at the signer.
iChart <- filteriChart(iChart, minRT = analysis.window[1], maxRT = analysis.window[2], maxfirstgap=15, maxlonggap=15)
acc_ss <- poolData(meanAccuracy(iChart, startWindowAcc=500, endWindowAcc=2000),
RejectFirstGap=TRUE,RejectLongestGap=TRUE,
RejectRT=FALSE, color=TRUE, dependent="Accuracy",
group="", facet="", dodge="",
xlab="", ylab= "Proportion\n Looking\n to target",
paired=TRUE, miny = 0.2, maxy = 0.80,
size=13, legend.direction="horizontal",
legend.position="bottom",
breaks=c(0.25, 0.50, 0.75))
## Compute reaction time for each age group only on C_T trials
rt_ss <- poolData(filter(iChart, trial_types %in% c("C_T")),
RejectFirstGap=TRUE, RejectLongestGap=TRUE,
RejectRT=TRUE, color=FALSE, dependent="RT", group="",
facet="", dodge="Response",
xlab="", ylab="mean RT (ms)",
paired=TRUE,
miny = 400, maxy=1300,
size=13,
legend.direction = "horizontal",
legend.position="bottom",
breaks=c(400, 800, 1200))
acc <- poolData(meanAccuracy(iChart, startWindowAcc=500, endWindowAcc=2000),
RejectFirstGap=TRUE,RejectLongestGap=TRUE,
RejectRT=FALSE, color=TRUE, dependent="Accuracy",
group="age_group", facet="", dodge="",
xlab="", ylab= "Proportion\n Looking\n to target",
paired=TRUE, miny = 0.2, maxy = 0.80,
size=13, legend.direction="horizontal",
legend.position="bottom",
breaks=c(0.25, 0.50, 0.75))
## Compute reaction time for each age group only on C_T trials
rt <- poolData(filter(iChart, trial_types %in% c("C_T")),
RejectFirstGap=TRUE, RejectLongestGap=TRUE,
RejectRT=TRUE, color=FALSE, dependent="RT", group="age_group",
facet="", dodge="Response",
xlab="", ylab="mean RT (ms)",
paired=TRUE,
miny = 400, maxy=1300,
size=13,
legend.direction = "horizontal",
legend.position="bottom",
breaks=c(400, 800, 1200))
Get mean accuracy and rt for each participant
Some munging to get data frame for analysis. Variables needed for each subject:
# merge acc/rt van1
ss <- merge(acc_ss, rt_ss, by="Sub.Num")
# merge with demo info
ss <- merge(ss, filter(sol_demo, include=="yes"), by="Sub.Num")
# get age bins
ss_age_bins <- iChart %>%
select(Sub.Num, age_group, Months) %>%
distinct()
ss <- merge(ss, ss_age_bins, by="Sub.Num")
# clean up variable names in data frame
names(ss)[names(ss)=="Vanilla"] <- "mean_accuracy"
names(ss)[names(ss)=="Vanilla_D"] <- "mean_rt"
ss_first_shifts <- iChart %>%
filter(trial_types %in% c("C_T", "C_D")) %>%
group_by(Sub.Num, trial_types, age_group) %>%
summarise(count = n())
ss_first_shifts_prop <- ss_first_shifts %>%
group_by(Sub.Num) %>%
summarise(total_trials_shifting = sum(count)) %>%
left_join(ss_first_shifts, by = "Sub.Num") %>%
mutate(shift_prop = round(count / total_trials_shifting, 2))
# add to ss full data frame
ss <- ss_first_shifts %>%
select(Sub.Num, count, trial_types) %>%
spread(trial_types, count) %>%
transmute(Sub.Num = Sub.Num, C_D_count = C_D, C_T_count = C_T) %>%
left_join(ss, by = "Sub.Num")
# add to ss full data frame
ss <- ss_first_shifts_prop %>%
select(Sub.Num, shift_prop, trial_types,
total_trials_shifting) %>%
spread(trial_types, shift_prop) %>%
transmute(Sub.Num = Sub.Num, C_D_prop = C_D, C_T_prop = C_T) %>%
left_join(ss, by = "Sub.Num")
# flag chance first shifters -- need to find a principled way to do this
ss <- ss %>%
mutate(C_D_prop = ifelse(C_T_prop == 1, 0, C_D_prop),
exclude_chance_shifter = ifelse(Sub.Num %in% c("30050", "30018", "30051", "30086", "30088"), "exclude", "include"))
Set up filter to just include kids for correlation analyses.
ss_kids <- filter(ss, age_group != "Adults")
ss_rt <- filter(ss, age_group != "Adults", exclude_chance_shifter == "include")
write.csv(ss_kids, "../bayesian-data-analysis/sol_ss_kids.csv", row.names = F)
Hmisc::rcorr(as.matrix(select(ss_kids, mean_accuracy, C_T_prop, mean_rt, signs_produced, Months)))
## mean_accuracy C_T_prop mean_rt signs_produced Months
## mean_accuracy 1.00 0.55 -0.53 0.46 0.64
## C_T_prop 0.55 1.00 -0.12 0.32 0.35
## mean_rt -0.53 -0.12 1.00 -0.49 -0.34
## signs_produced 0.46 0.32 -0.49 1.00 0.76
## Months 0.64 0.35 -0.34 0.76 1.00
##
## n
## mean_accuracy C_T_prop mean_rt signs_produced Months
## mean_accuracy 29 29 29 28 29
## C_T_prop 29 29 29 28 29
## mean_rt 29 29 29 28 29
## signs_produced 28 28 28 28 28
## Months 29 29 29 28 29
##
## P
## mean_accuracy C_T_prop mean_rt signs_produced Months
## mean_accuracy 0.0020 0.0030 0.0142 0.0002
## C_T_prop 0.0020 0.5351 0.0964 0.0628
## mean_rt 0.0030 0.5351 0.0079 0.0747
## signs_produced 0.0142 0.0964 0.0079 0.0000
## Months 0.0002 0.0628 0.0747 0.0000
ggpairs(data = select(ss_kids, mean_accuracy, C_T_prop, mean_rt, signs_produced, Months),
upper = list(continuous = "smooth", combo = "cor",
params = c(method = "lm", color="darkblue")),
lower = list(continuous = "cor"),
diag = list(continuous = "density")) +
theme_bw()
Just RT.
Hmisc::rcorr(as.matrix(select(ss_rt, mean_rt, signs_produced, Months)))
## mean_rt signs_produced Months
## mean_rt 1.00 -0.51 -0.38
## signs_produced -0.51 1.00 0.72
## Months -0.38 0.72 1.00
##
## n
## mean_rt signs_produced Months
## mean_rt 24 23 24
## signs_produced 23 23 23
## Months 24 23 24
##
## P
## mean_rt signs_produced Months
## mean_rt 0.0132 0.0663
## signs_produced 0.0132 0.0001
## Months 0.0663 0.0001
ggpairs(data = select(ss_rt, mean_rt, signs_produced, Months),
upper = list(continuous = "smooth", combo = "cor",
params = c(method = "lm", color="darkblue")),
lower = list(continuous = "cor"),
diag = list(continuous = "density")) +
theme_bw()
t.test(mean_accuracy ~ age_group, data=ss_kids)
##
## Welch Two Sample t-test
##
## data: mean_accuracy by age_group
## t = -3.3317, df = 26.887, p-value = 0.00252
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -0.11969437 -0.02844552
## sample estimates:
## mean in group < 26.5 Months mean in group > 26.5 Months
## 0.5935824 0.6676523
t.test(mean_rt ~ age_group, data=ss_kids)
##
## Welch Two Sample t-test
##
## data: mean_rt by age_group
## t = 1.8546, df = 18.91, p-value = 0.07931
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -16.59593 274.07901
## sample estimates:
## mean in group < 26.5 Months mean in group > 26.5 Months
## 1242.685 1113.943
createPlots(iChart, startWindow=0, endWindow=2000, RejectLongestGap=FALSE,
RejectFirstGap=FALSE, RejectRT=FALSE, color=TRUE, smooth=400,
targetEnd=800, carrier="", targets=c(""),
group="age_group", plotStats="PP", miny = 0.4, maxy=0.95, size=15,
legend.direction = "vertical", legend.position=c(0.85, 0.9),
breaks=c(0.25, 0.50, 0.75), x.target=0.33)
ss.et <- read.table("sol-ichart-v1-processed_PP_graphValues_by_subs_0_2000_minRT_33_maxRT_2433_lg_59_fg_34_n_45.txt", header = T)
ss.et$Sub.Num <- as.character(ss.et$Sub.Num)
detach("package:reshape", unload=TRUE)
#strip X from header
names(ss.et) <- gsub("X", "", names(ss.et))
ss.et <- left_join(ss.et, ss_kids, by="Sub.Num")
#melt to long form for plotting
ss.et.long <- reshape2::melt(ss.et,
id.vars=c("Sub.Num", "Condition",
"Months", "signs_produced",
"groupping", "stimuli"),
variable.name = "Time.ms",
value.name = "accuracy")
#convert sub.num to a factor
ss.et.long <- ss.et.long %>%
mutate(Sub.Num = as.factor(Sub.Num),
Time.ms = as.numeric(as.character(Time.ms)),
Months = as.factor(Months),
accuracy = as.numeric(accuracy))
# set breaks for x-axis
breaks <- seq(0,2000, by=500)
qplot(data = ss.et.long, x = Time.ms, y = accuracy,
color=groupping, geom="smooth", method = "loess",
ylim=c(0.2,1), xlab=c("Time (ms) from noun onset"),
ylab=c("Proportion Looking to target")) +
geom_hline(yintercept = 0.5, linetype="dashed") +
scale_x_discrete(breaks = breaks) +
facet_wrap(Months+signs_produced~Sub.Num, ncol=8) +
#guides(color=FALSE) +
geom_vline(xintercept=1200, linetype = "dashed") +
theme(axis.text.x = element_text(angle = 75, hjust = 1)) +
theme_bw()
ms_graph_values <- ss.et.long %>%
group_by(groupping, Time.ms) %>%
summarise(mean_accuracy = mean(accuracy),
ci.high = ci.high(accuracy),
ci.low = ci.low(accuracy))
breaks <- seq(0,2000, by=500)
y_breaks <- seq(0.25,1.0, by = 0.25)
points <- seq(0,1900, by = 100)
qplot(data = ms_graph_values, x = Time.ms, y = mean_accuracy,
color=groupping, geom = "blank", xlab=c("Time in msec from onset of noun"),
ylab=c("ACCURACY\nProportion\nlooking\nto target")) +
geom_point(data = filter(ms_graph_values, Time.ms %in% points), size=3) +
geom_line(size=0.7) +
geom_linerange(data = filter(ms_graph_values, Time.ms %in% points),
aes(ymin=mean_accuracy - ci.low,
ymax=mean_accuracy + ci.high),
width = .03, size=0.3, alpha = 1) +
scale_color_brewer(type = "qual", palette = "Set1") +
scale_x_discrete(breaks = breaks) +
scale_y_continuous(limits=c(0.25, 1.0), breaks = y_breaks) +
geom_hline(yintercept = 0.5, linetype="dashed") +
geom_vline(xintercept=1150, linetype = "dashed") +
guides(color=F) +
theme_bw() +
theme(axis.title.x = element_text(colour="grey40",size=22,
angle=0,hjust=0.5,vjust=0,face="plain"),
axis.title.y = element_text(colour="grey40",size=22,
angle=0,hjust=0.5,vjust=0.5,face="plain"),
axis.text.x = element_text(colour="grey20",size=18,
angle=0,hjust=0.5,vjust=0,face="plain"),
axis.text.y = element_text(colour="grey20",size=18,
angle=0,hjust=0.5,vjust=0,face="plain"),
plot.margin = unit(c(0.5,2,1,1), "cm"))