This script analyzes data from an experiment testing the effectiveness of active vs. passive training in a category learning task.

Libraries

rm(list=ls())
source("useful.R")

Load data

df <- read.csv("../data/act-learn-processed-data.csv")

Exclusionary criteria

Overall performance (averaged across blocks) was more than three stan- dard deviations below the mean of their group.

ss <- df %>%
    filter(trial_type == "test") %>%
    group_by(subids, condition) %>%
    summarise(mean_accuracy = mean(correct),
              ci_high_acc = ci.high(correct),
              ci_low_acc = ci.low(correct))

# get the mean and standard deviation of each group
ms_ss <- ss %>%
    group_by(condition) %>%
    summarise(group_mean = mean(mean_accuracy),
              group_sd = sd(mean_accuracy))

m_act <- ms_ss$group_mean[ms_ss$condition == "active"]
sd_act <- ms_ss$group_sd[ms_ss$condition == "active"]
m_rec <- ms_ss$group_mean[ms_ss$condition == "receptive"]
sd_rec <- ms_ss$group_sd[ms_ss$condition == "receptive"]

# flag subject means that are +/- 3sd 
ss_include <- ss %>%
    mutate(include = ifelse(condition == "active", 
                            ifelse(mean_accuracy > m_act + 3*sd_act | 
                                       mean_accuracy < m_act - 3*sd_act, 0, 1),
                            ifelse(mean_accuracy > m_rec + 3*sd_rec |
                                mean_accuracy < m_rec - 3*sd_rec, 0, 1))) %>%
    select(subids, include, condition) 

ss_include %>%
    group_by(condition, include) %>%
    summarise(n())

## Source: local data frame [3 x 3]
## Groups: condition
## 
##   condition include n()
## 1    active       0   1
## 2    active       1  24
## 3 receptive       1  23

# merge with full data frame
df <- inner_join(df, select(ss_include, subids, include), by = "subids")

# filter out subjects that are +/- 3sd
df <- df %>% filter(include == 1)

Demographics

## Source: local data frame [2 x 2]
## 
##   gender n_distinct(subids)
## 1 Female                 20
## 2   Male                 27

##   floor(median(age))
## 1                 33

##   min(age) max(age)
## 1       22       67

## Source: local data frame [5 x 2]
## 
##   language n_distinct(subids)
## 1  english                 13
## 2  English                 31
## 3  Enlgish                  1
## 4  Russian                  1
## 5  Spanish                  1

## Source: local data frame [4 x 2]
## 
##               education n_distinct(subids)
## 1     Graduated College                 21
## 2 Graduated High School                  9
## 3         Higher Degree                  2
## 4          Some College                 15

## Source: local data frame [1 x 1]
## 
##   mean(exp_length_min)
## 1             19.95854

Descriptives

Get number of subjects in each condition and order.

## Source: local data frame [4 x 3]
## Groups: condition
## 
##   condition         odb_scale n_subs
## 1    active orientation_scale     13
## 2    active      radius_scale     11
## 3 receptive orientation_scale     11
## 4 receptive      radius_scale     12

Histogram of dependent variable across blocks: correct categorization of antennas on test trials.

Histogram of length of experiment split by condition

## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.
## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.

Overall accuracy analysis

Get mean accuracy across all trials for each condition.

Plot mean accuracy.

Get mean accuracy for each condition and order

Plot.

Accuracy by block analysis

Plot accuracy over blocks

Accuracy by block and dimension.

Plot accuracy over blocks

Evidence selection analysis (active learning)

Analyze the average distance of participants’ samples from the optimal decision boundary.

Rotate, so orientation and radius are on the same dimension.

Plot group level sampling behavior.

qplot(x=dim_of_interest, y=other_dim, data=df_sampling,
      facets=.~block) +
    geom_vline(aes(xintercept=300))

Plot individual participant sampling behavior

Get distance from optimal decision boundary for each sample.

df_sampling <- mutate(df_sampling, 
                      samp_dist_odb = ifelse(odb_scale == "orientation_scale",
                                             abs(odb_param - orientation_response_param),
                                             abs(odb_param - radius_response_param)))

Now get the average distance across subjects

ms_sampling <- df_sampling %>%
    group_by(block) %>%
    summarize(mean_samp_dist = mean(samp_dist_odb),
              ci_high = ci.high(samp_dist_odb),
              ci_low = ci.low(samp_dist_odb))

Now plot.

qplot(x=block, y=mean_samp_dist, data=ms_sampling, 
      geom="line", ylim=c(75,225)) +
    geom_pointrange(aes(ymin=mean_samp_dist - ci_low, 
        ymax=mean_samp_dist + ci_high), 
        width = .05, size=0.6) +
    scale_color_grey()

Compute average response times across blocks for the two different dimensions.

ms_rt <- df_sampling %>%
    group_by(block) %>%
    summarise(mean_rt = mean(rt),
              ci_high = ci.high(rt),
              ci_low = ci.low(rt))

Now plot.

qplot(x=block, y=mean_rt, data=ms_rt, 
      geom="line") +
    geom_pointrange(aes(ymin=mean_rt - ci_low, 
        ymax=mean_rt + ci_high), 
        width = .05, size=0.6) +
    scale_color_grey()

Relationship between sampling and test

Sample distance and accuracy

ss_samp_dist <- df_sampling %>%
    filter(condition == "active") %>%
    group_by(subids) %>%
    summarise(mean_samp_dist = mean(samp_dist_odb),
              ci_high_msd = ci.high(samp_dist_odb),
              ci_low_msd = ci.low(samp_dist_odb))

ss_mean_acc <- df %>%
    filter(condition == "active") %>%
    filter(trial_type == "test") %>%
    group_by(subids) %>%
    summarise(mean_accuracy = mean(correct),
              ci_high_acc = ci.high(correct),
              ci_low_acc = ci.low(correct))

# join sampling and test acc together
ss_all <- inner_join(ss_samp_dist, ss_mean_acc, by="subids")

Plot

qplot(x=mean_samp_dist, y=mean_accuracy, data=ss_all) +
    geom_smooth(method="lm")

Relatiship between Confidence and Accuracy

Statistics

t.test testing difference in accuracy on test trials between active and receptive learning conditions.

df_test <- filter(df, trial_type == "test")
t.test(correct ~ condition, data=df_test)

## 
##  Welch Two Sample t-test
## 
## data:  correct by condition
## t = 12.0932, df = 8116.022, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  0.06923232 0.09601882
## sample estimates:
##    mean in group active mean in group receptive 
##               0.9200607               0.8374351

Replicated the slight boost to accuracy in the active learning condition.

Models

Model the relationship between mean sampling distance on training trials and mean accuracy on test trials.

m1 <- lm(mean_accuracy ~ mean_samp_dist, data=ss_all)
summary(m1)

## 
## Call:
## lm(formula = mean_accuracy ~ mean_samp_dist, data = ss_all)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.19151 -0.02201  0.01023  0.02735  0.11540 
## 
## Coefficients:
##                  Estimate Std. Error t value Pr(>|t|)    
## (Intercept)     1.0358379  0.0518940  19.961 1.39e-15 ***
## mean_samp_dist -0.0008720  0.0003772  -2.312   0.0305 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.06858 on 22 degrees of freedom
## Multiple R-squared:  0.1954, Adjusted R-squared:  0.1589 
## F-statistic: 5.344 on 1 and 22 DF,  p-value: 0.03055

Negative relationship such that those subjects who sampled farther away from the boundary were less accurate on test.

Next, we try to predict correct based on condition.

m2 <- glmer(correct ~ condition + (1|subids), 
            data=filter(df,trial_type=="test"), family=binomial) 
summary(m2)

## Generalized linear mixed model fit by maximum likelihood (Laplace
##   Approximation) [glmerMod]
##  Family: binomial  ( logit )
## Formula: correct ~ condition + (1 | subids)
##    Data: filter(df, trial_type == "test")
## 
##      AIC      BIC   logLik deviance df.resid 
##   5737.2   5758.5  -2865.6   5731.2     9042 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -7.5981  0.1750  0.2468  0.3621  0.9898 
## 
## Random effects:
##  Groups Name        Variance Std.Dev.
##  subids (Intercept) 1.008    1.004   
## Number of obs: 9045, groups:  subids, 47
## 
## Fixed effects:
##                    Estimate Std. Error z value Pr(>|z|)    
## (Intercept)          2.7771     0.2170  12.799   <2e-16 ***
## conditionreceptive  -0.7541     0.3066  -2.459   0.0139 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr)
## condtnrcptv -0.707

# add rt on test trials to the model
m3 <- glmer(correct ~ condition + rt + (1|subids), 
            data=filter(df,trial_type=="test"), family=binomial)

## Warning: Some predictor variables are on very different scales: consider
## rescaling

## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl =
## control$checkConv, : Model failed to converge with max|grad| = 0.0342991
## (tol = 0.001, component 2)

## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, : Model is nearly unidentifiable: very large eigenvalue
##  - Rescale variables?;Model is nearly unidentifiable: large eigenvalue ratio
##  - Rescale variables?

summary(m3)

## Generalized linear mixed model fit by maximum likelihood (Laplace
##   Approximation) [glmerMod]
##  Family: binomial  ( logit )
## Formula: correct ~ condition + rt + (1 | subids)
##    Data: filter(df, trial_type == "test")
## 
##      AIC      BIC   logLik deviance df.resid 
##   5721.4   5749.9  -2856.7   5713.4     9041 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -7.4751  0.1740  0.2469  0.3606  2.6293 
## 
## Random effects:
##  Groups Name        Variance Std.Dev.
##  subids (Intercept) 1.01     1.005   
## Number of obs: 9045, groups:  subids, 47
## 
## Fixed effects:
##                      Estimate Std. Error z value Pr(>|z|)    
## (Intercept)         2.835e+00  2.179e-01  13.008   <2e-16 ***
## conditionreceptive -7.560e-01  3.062e-01  -2.469   0.0136 *  
## rt                 -1.478e-05  6.109e-06  -2.419   0.0156 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr) cndtnr
## condtnrcptv -0.702       
## rt          -0.103  0.002

# add block
m4 <- glmer(correct ~ condition + rt + block + (1|subids), 
            data=filter(df,trial_type=="test"), family=binomial)

## Warning: Some predictor variables are on very different scales: consider
## rescaling

## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl =
## control$checkConv, : Model failed to converge with max|grad| = 0.025061
## (tol = 0.001, component 2)

## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, : Model is nearly unidentifiable: very large eigenvalue
##  - Rescale variables?;Model is nearly unidentifiable: large eigenvalue ratio
##  - Rescale variables?

summary(m4)

## Generalized linear mixed model fit by maximum likelihood (Laplace
##   Approximation) [glmerMod]
##  Family: binomial  ( logit )
## Formula: correct ~ condition + rt + block + (1 | subids)
##    Data: filter(df, trial_type == "test")
## 
##      AIC      BIC   logLik deviance df.resid 
##   5613.8   5649.4  -2801.9   5603.8     9040 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -9.1621  0.1665  0.2419  0.3451  2.0170 
## 
## Random effects:
##  Groups Name        Variance Std.Dev.
##  subids (Intercept) 1.044    1.022   
## Number of obs: 9045, groups:  subids, 47
## 
## Fixed effects:
##                      Estimate Std. Error z value Pr(>|z|)    
## (Intercept)         2.124e+00  2.305e-01   9.212   <2e-16 ***
## conditionreceptive -7.699e-01  3.111e-01  -2.475   0.0133 *  
## rt                 -1.218e-05  6.108e-06  -1.994   0.0462 *  
## block               2.179e-01  2.107e-02  10.338   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr) cndtnr rt    
## condtnrcptv -0.672              
## rt          -0.124  0.002       
## block       -0.279 -0.008  0.085

m5 <- glmer(correct ~ condition * odb_scale + (1|subids), 
            data=filter(df,trial_type=="test"), family=binomial)
summary(m5)

## Generalized linear mixed model fit by maximum likelihood (Laplace
##   Approximation) [glmerMod]
##  Family: binomial  ( logit )
## Formula: correct ~ condition * odb_scale + (1 | subids)
##    Data: filter(df, trial_type == "test")
## 
##      AIC      BIC   logLik deviance df.resid 
##   5736.7   5772.2  -2863.3   5726.7     9040 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -7.4679  0.1762  0.2469  0.3615  0.9922 
## 
## Random effects:
##  Groups Name        Variance Std.Dev.
##  subids (Intercept) 0.9028   0.9502  
## Number of obs: 9045, groups:  subids, 47
## 
## Fixed effects:
##                                           Estimate Std. Error z value
## (Intercept)                               2.773487   0.279362   9.928
## conditionreceptive                       -0.281325   0.409516  -0.687
## odb_scaleradius_scale                    -0.007329   0.411378  -0.018
## conditionreceptive:odb_scaleradius_scale -0.887293   0.581741  -1.525
##                                          Pr(>|z|)    
## (Intercept)                                <2e-16 ***
## conditionreceptive                          0.492    
## odb_scaleradius_scale                       0.986    
## conditionreceptive:odb_scaleradius_scale    0.127    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr) cndtnr odb_s_
## condtnrcptv -0.680              
## odb_sclrds_ -0.676  0.460       
## cndtnrcp:__  0.477 -0.703 -0.706

Act-Learn Analysis

Kyle MacDonald