This script analyzes data from an experiment testing the effectiveness of active vs. passive training in a category learning task.
rm(list=ls())
source("useful.R")
df <- read.csv("../data/act-learn-processed-data.csv")
Overall performance (averaged across blocks) was more than three stan- dard deviations below the mean of their group.
ss <- df %>%
filter(trial_type == "test") %>%
group_by(subids, condition) %>%
summarise(mean_accuracy = mean(correct),
ci_high_acc = ci.high(correct),
ci_low_acc = ci.low(correct))
# get the mean and standard deviation of each group
ms_ss <- ss %>%
group_by(condition) %>%
summarise(group_mean = mean(mean_accuracy),
group_sd = sd(mean_accuracy))
m_act <- ms_ss$group_mean[ms_ss$condition == "active"]
sd_act <- ms_ss$group_sd[ms_ss$condition == "active"]
m_rec <- ms_ss$group_mean[ms_ss$condition == "receptive"]
sd_rec <- ms_ss$group_sd[ms_ss$condition == "receptive"]
# flag subject means that are +/- 3sd
ss_include <- ss %>%
mutate(include = ifelse(condition == "active",
ifelse(mean_accuracy > m_act + 3*sd_act |
mean_accuracy < m_act - 3*sd_act, 0, 1),
ifelse(mean_accuracy > m_rec + 3*sd_rec |
mean_accuracy < m_rec - 3*sd_rec, 0, 1))) %>%
select(subids, include, condition)
ss_include %>%
group_by(condition, include) %>%
summarise(n())
## Source: local data frame [3 x 3]
## Groups: condition
##
## condition include n()
## 1 active 0 1
## 2 active 1 24
## 3 receptive 1 23
# merge with full data frame
df <- inner_join(df, select(ss_include, subids, include), by = "subids")
# filter out subjects that are +/- 3sd
df <- df %>% filter(include == 1)
## Source: local data frame [2 x 2]
##
## gender n_distinct(subids)
## 1 Female 20
## 2 Male 27
## floor(median(age))
## 1 33
## min(age) max(age)
## 1 22 67
## Source: local data frame [5 x 2]
##
## language n_distinct(subids)
## 1 english 13
## 2 English 31
## 3 Enlgish 1
## 4 Russian 1
## 5 Spanish 1
## Source: local data frame [4 x 2]
##
## education n_distinct(subids)
## 1 Graduated College 21
## 2 Graduated High School 9
## 3 Higher Degree 2
## 4 Some College 15
## Source: local data frame [1 x 1]
##
## mean(exp_length_min)
## 1 19.95854
Get number of subjects in each condition and order.
## Source: local data frame [4 x 3]
## Groups: condition
##
## condition odb_scale n_subs
## 1 active orientation_scale 13
## 2 active radius_scale 11
## 3 receptive orientation_scale 11
## 4 receptive radius_scale 12
Histogram of dependent variable across blocks: correct categorization of antennas on test trials.
Histogram of length of experiment split by condition
## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.
## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.
Get mean accuracy across all trials for each condition.
Plot mean accuracy.
Get mean accuracy for each condition and order
Plot.
Plot accuracy over blocks
Accuracy by block and dimension.
Plot accuracy over blocks
Analyze the average distance of participants’ samples from the optimal decision boundary.
Rotate, so orientation and radius are on the same dimension.
Plot group level sampling behavior.
qplot(x=dim_of_interest, y=other_dim, data=df_sampling,
facets=.~block) +
geom_vline(aes(xintercept=300))
Plot individual participant sampling behavior
Get distance from optimal decision boundary for each sample.
df_sampling <- mutate(df_sampling,
samp_dist_odb = ifelse(odb_scale == "orientation_scale",
abs(odb_param - orientation_response_param),
abs(odb_param - radius_response_param)))
Now get the average distance across subjects
ms_sampling <- df_sampling %>%
group_by(block) %>%
summarize(mean_samp_dist = mean(samp_dist_odb),
ci_high = ci.high(samp_dist_odb),
ci_low = ci.low(samp_dist_odb))
Now plot.
qplot(x=block, y=mean_samp_dist, data=ms_sampling,
geom="line", ylim=c(75,225)) +
geom_pointrange(aes(ymin=mean_samp_dist - ci_low,
ymax=mean_samp_dist + ci_high),
width = .05, size=0.6) +
scale_color_grey()
Compute average response times across blocks for the two different dimensions.
ms_rt <- df_sampling %>%
group_by(block) %>%
summarise(mean_rt = mean(rt),
ci_high = ci.high(rt),
ci_low = ci.low(rt))
Now plot.
qplot(x=block, y=mean_rt, data=ms_rt,
geom="line") +
geom_pointrange(aes(ymin=mean_rt - ci_low,
ymax=mean_rt + ci_high),
width = .05, size=0.6) +
scale_color_grey()
Sample distance and accuracy
ss_samp_dist <- df_sampling %>%
filter(condition == "active") %>%
group_by(subids) %>%
summarise(mean_samp_dist = mean(samp_dist_odb),
ci_high_msd = ci.high(samp_dist_odb),
ci_low_msd = ci.low(samp_dist_odb))
ss_mean_acc <- df %>%
filter(condition == "active") %>%
filter(trial_type == "test") %>%
group_by(subids) %>%
summarise(mean_accuracy = mean(correct),
ci_high_acc = ci.high(correct),
ci_low_acc = ci.low(correct))
# join sampling and test acc together
ss_all <- inner_join(ss_samp_dist, ss_mean_acc, by="subids")
Plot
qplot(x=mean_samp_dist, y=mean_accuracy, data=ss_all) +
geom_smooth(method="lm")
t.test testing difference in accuracy on test trials between active and receptive learning conditions.
df_test <- filter(df, trial_type == "test")
t.test(correct ~ condition, data=df_test)
##
## Welch Two Sample t-test
##
## data: correct by condition
## t = 12.0932, df = 8116.022, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## 0.06923232 0.09601882
## sample estimates:
## mean in group active mean in group receptive
## 0.9200607 0.8374351
Replicated the slight boost to accuracy in the active learning condition.
Model the relationship between mean sampling distance on training trials and mean accuracy on test trials.
m1 <- lm(mean_accuracy ~ mean_samp_dist, data=ss_all)
summary(m1)
##
## Call:
## lm(formula = mean_accuracy ~ mean_samp_dist, data = ss_all)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.19151 -0.02201 0.01023 0.02735 0.11540
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.0358379 0.0518940 19.961 1.39e-15 ***
## mean_samp_dist -0.0008720 0.0003772 -2.312 0.0305 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.06858 on 22 degrees of freedom
## Multiple R-squared: 0.1954, Adjusted R-squared: 0.1589
## F-statistic: 5.344 on 1 and 22 DF, p-value: 0.03055
Negative relationship such that those subjects who sampled farther away from the boundary were less accurate on test.
Next, we try to predict correct based on condition.
m2 <- glmer(correct ~ condition + (1|subids),
data=filter(df,trial_type=="test"), family=binomial)
summary(m2)
## Generalized linear mixed model fit by maximum likelihood (Laplace
## Approximation) [glmerMod]
## Family: binomial ( logit )
## Formula: correct ~ condition + (1 | subids)
## Data: filter(df, trial_type == "test")
##
## AIC BIC logLik deviance df.resid
## 5737.2 5758.5 -2865.6 5731.2 9042
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -7.5981 0.1750 0.2468 0.3621 0.9898
##
## Random effects:
## Groups Name Variance Std.Dev.
## subids (Intercept) 1.008 1.004
## Number of obs: 9045, groups: subids, 47
##
## Fixed effects:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 2.7771 0.2170 12.799 <2e-16 ***
## conditionreceptive -0.7541 0.3066 -2.459 0.0139 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr)
## condtnrcptv -0.707
# add rt on test trials to the model
m3 <- glmer(correct ~ condition + rt + (1|subids),
data=filter(df,trial_type=="test"), family=binomial)
## Warning: Some predictor variables are on very different scales: consider
## rescaling
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl =
## control$checkConv, : Model failed to converge with max|grad| = 0.0342991
## (tol = 0.001, component 2)
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, : Model is nearly unidentifiable: very large eigenvalue
## - Rescale variables?;Model is nearly unidentifiable: large eigenvalue ratio
## - Rescale variables?
summary(m3)
## Generalized linear mixed model fit by maximum likelihood (Laplace
## Approximation) [glmerMod]
## Family: binomial ( logit )
## Formula: correct ~ condition + rt + (1 | subids)
## Data: filter(df, trial_type == "test")
##
## AIC BIC logLik deviance df.resid
## 5721.4 5749.9 -2856.7 5713.4 9041
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -7.4751 0.1740 0.2469 0.3606 2.6293
##
## Random effects:
## Groups Name Variance Std.Dev.
## subids (Intercept) 1.01 1.005
## Number of obs: 9045, groups: subids, 47
##
## Fixed effects:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 2.835e+00 2.179e-01 13.008 <2e-16 ***
## conditionreceptive -7.560e-01 3.062e-01 -2.469 0.0136 *
## rt -1.478e-05 6.109e-06 -2.419 0.0156 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) cndtnr
## condtnrcptv -0.702
## rt -0.103 0.002
# add block
m4 <- glmer(correct ~ condition + rt + block + (1|subids),
data=filter(df,trial_type=="test"), family=binomial)
## Warning: Some predictor variables are on very different scales: consider
## rescaling
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl =
## control$checkConv, : Model failed to converge with max|grad| = 0.025061
## (tol = 0.001, component 2)
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, : Model is nearly unidentifiable: very large eigenvalue
## - Rescale variables?;Model is nearly unidentifiable: large eigenvalue ratio
## - Rescale variables?
summary(m4)
## Generalized linear mixed model fit by maximum likelihood (Laplace
## Approximation) [glmerMod]
## Family: binomial ( logit )
## Formula: correct ~ condition + rt + block + (1 | subids)
## Data: filter(df, trial_type == "test")
##
## AIC BIC logLik deviance df.resid
## 5613.8 5649.4 -2801.9 5603.8 9040
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -9.1621 0.1665 0.2419 0.3451 2.0170
##
## Random effects:
## Groups Name Variance Std.Dev.
## subids (Intercept) 1.044 1.022
## Number of obs: 9045, groups: subids, 47
##
## Fixed effects:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 2.124e+00 2.305e-01 9.212 <2e-16 ***
## conditionreceptive -7.699e-01 3.111e-01 -2.475 0.0133 *
## rt -1.218e-05 6.108e-06 -1.994 0.0462 *
## block 2.179e-01 2.107e-02 10.338 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) cndtnr rt
## condtnrcptv -0.672
## rt -0.124 0.002
## block -0.279 -0.008 0.085
m5 <- glmer(correct ~ condition * odb_scale + (1|subids),
data=filter(df,trial_type=="test"), family=binomial)
summary(m5)
## Generalized linear mixed model fit by maximum likelihood (Laplace
## Approximation) [glmerMod]
## Family: binomial ( logit )
## Formula: correct ~ condition * odb_scale + (1 | subids)
## Data: filter(df, trial_type == "test")
##
## AIC BIC logLik deviance df.resid
## 5736.7 5772.2 -2863.3 5726.7 9040
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -7.4679 0.1762 0.2469 0.3615 0.9922
##
## Random effects:
## Groups Name Variance Std.Dev.
## subids (Intercept) 0.9028 0.9502
## Number of obs: 9045, groups: subids, 47
##
## Fixed effects:
## Estimate Std. Error z value
## (Intercept) 2.773487 0.279362 9.928
## conditionreceptive -0.281325 0.409516 -0.687
## odb_scaleradius_scale -0.007329 0.411378 -0.018
## conditionreceptive:odb_scaleradius_scale -0.887293 0.581741 -1.525
## Pr(>|z|)
## (Intercept) <2e-16 ***
## conditionreceptive 0.492
## odb_scaleradius_scale 0.986
## conditionreceptive:odb_scaleradius_scale 0.127
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) cndtnr odb_s_
## condtnrcptv -0.680
## odb_sclrds_ -0.676 0.460
## cndtnrcp:__ 0.477 -0.703 -0.706