Lec5_Quarto

Lecture 5 Tutorial

this is an Quarto version of last class’s tutorial

loading libraries

library(tidyverse)
library(patchwork)

read in data and do a little cleaning

Code

df <- read_csv('dtilearn_behaviour.csv')
fact_cols <- c('sbj','stimx','answer')
df <- df %>%
  mutate(across(all_of(fact_cols), factor)) %>% #recode variables as factors -- across loops over fact_cols
  filter(!is.na(response)) # filter out rows with missing responses

# recode stimx variable
df <- df %>%
  mutate(stim=factor(
    case_when(
      stimx==1|stimx==8~'proto',
      stimx==5|stimx==6~'far',
      T~'near'),
      levels=c('proto','near','far')
    ))

summarise data across runs and subjects

acc.sbj <- df %>% 
  group_by(sbj,run,stim) %>%
  summarise(acc=mean(correct))
rt.sbj <- df %>%
  group_by(sbj,run,stim,correct) %>%
  summarise(mrt=median(rt, na.rm=T)) %>%
  spread(key=correct,value=mrt) %>%
  rename(rtcor=`1`,rtinc=`0`)
byrun.sbj <- full_join(acc.sbj,rt.sbj)
byrun.grp <- byrun.sbj %>%
  group_by(run,stim) %>%
  summarise(n=length(acc),
            macc=mean(acc,na.rm=T), 
            acc.ci=1.96*sd(acc,na.rm=T)/sqrt(n-1),
            mrtcor=mean(rtcor,na.rm=T), 
            rtcor.ci=1.96*sd(rtcor,na.rm=T)/sqrt(n-1),
            mrtinc=mean(rtinc,na.rm=T), 
            rtinc.ci=1.96*sd(rtinc,na.rm=T)/sqrt(n-1))

ggplot syntax review

ggplot(byrun.grp,aes(x=run,y=macc,colour=stim))+
  geom_line()

# add multiple layers to same plot
ggplot(byrun.grp,aes(x=run,y=macc,colour=stim))+
  geom_line()+
  geom_point()

## aesthetic scope: global vs. layer
#   global if defined in ggplot()
#   layer specific if defined within geom_* or stat_*
#   layer aesthetic overrides global
ggplot(byrun.grp,aes(x=run,y=macc))+
  geom_line(aes(colour=stim))+ # colour aesthetic limited to layer
  geom_point()

## each geom has different properties
#    can be directly set or mapped to variable in aes() call
#    look up docs to see list (?geom_line)
ggplot(byrun.grp,aes(x=run,y=macc,group=stim))+
  geom_line(colour='red',linewidth=2,aes(linetype=stim))+
  geom_point(aes(colour=stim,size=run))

## each layer can use different datasets
#    note that the global scope still applies to geom_point after 
#    geom_dotplot defines layer-specific aes
ggplot(byrun.grp,aes(x=factor(run),y=macc,colour=stim))+
  geom_dotplot(data=byrun.sbj, # using a different dataset for this geom!
              aes(x=factor(run),y=acc,fill=stim),binaxis='y', stackdir='center', 
              dotsize=.3,alpha=0.25, binwidth=1/30)+
  geom_point(size=4,aes(shape=stim))+ # back to global definitions in ggplot above
  theme_classic()

Changing titles and axis labels

p <- ggplot(byrun.grp,aes(x=run,y=macc,colour=stim))+
  geom_line()
p

# add axis labels and title
p + xlab('learning block') + ylab('accuracy') + 
  ggtitle('Accuracy',subtitle='block average')

# add labels/title with one function
p + labs(x='learning block', y='accuracy',
         title='Accuracy', subtitle='block average')

# center title by changing parameters in your theme
p + labs(x='learning block', y='accuracy',
        title='Accuracy', subtitle='block average') + 
  theme(plot.title = element_text(hjust = 0.5), 
        plot.subtitle = element_text(hjust = 0.5))

# subscript, superscript, and symbols
#  ~=space, []=subscript, ^=superscript, Greek variables
p + xlab(expression(learning[block])) + ylab(expression(gamma~~~~eta^2))

changing axes limits

byrun.proto = byrun.sbj %>%
  filter(stim=='proto', !is.na(rtcor), !is.na(rtinc))

p <- ggplot(byrun.proto,aes(x=rtcor,y=rtinc))+
  geom_point()+geom_smooth(method='lm')+
  labs(x='correct RT (ms)',y='incorrect RT (ms)',
       title='RT comparison')
p

`geom_smooth()` using formula = 'y ~ x'

# xlim & ylim delete points outside limits
p1 <- p + xlim(1000,2500)+ylim(1000,2500) + ggtitle('RT comparison cut')
p1 # removed rows!!

`geom_smooth()` using formula = 'y ~ x'

Warning: Removed 62 rows containing non-finite outside the scale range
(`stat_smooth()`).

Warning: Removed 62 rows containing missing values or values outside the scale range
(`geom_point()`).

# coord_cartesian simply zooms w/o deleting points
p2 <- p + coord_cartesian(xlim=c(1000,2500),ylim=c(1000,2500))+ ggtitle('RT comparison zoom')
p2

`geom_smooth()` using formula = 'y ~ x'

# patchwork syntax for side-by-side viewing
p1+p2

`geom_smooth()` using formula = 'y ~ x'

Warning: Removed 62 rows containing non-finite outside the scale range
(`stat_smooth()`).
Removed 62 rows containing missing values or values outside the scale range
(`geom_point()`).

`geom_smooth()` using formula = 'y ~ x'

## axes ticks and labels ----
p <- ggplot(byrun.proto,aes(x=rtcor,y=rtinc))+
  geom_point()+geom_smooth(method='lm')+
  labs(x='correct RT (ms)',y='incorrect RT (ms)',
       title='RT comparison')

# set breaks for ticks location
p + scale_x_continuous(breaks=seq(0,3000,by=250))+
    scale_y_continuous(breaks=seq(0,3000,by=250))

`geom_smooth()` using formula = 'y ~ x'

# add in labels to change tick text
p + scale_y_continuous(breaks=seq(1000,3000,by=1000),
                       labels=c('1K','2K','3K'))

`geom_smooth()` using formula = 'y ~ x'

## adjust axis title and text properties with theme function
byrun.sbjc <- byrun.sbj %>%
  filter(!is.na(rtcor),!is.na(rtinc))
p <- ggplot(byrun.sbjc,aes(x=rtcor,y=rtinc,colour=stim))+
  geom_point(alpha=0.4,size=2)+
  geom_smooth(method='lm')+
  labs(x='correct RT (ms)',y='incorrect RT (ms)',
       title='RT comparison')
p

`geom_smooth()` using formula = 'y ~ x'

p + theme(axis.title.x=element_text(size=10,
                                    family='Courier',
                                    face='bold'),
          axis.text.x=element_text(size=8,
                                   angle=45))

`geom_smooth()` using formula = 'y ~ x'

changing legends

# change legend title and level names
p + labs(colour="stimulus type") + 
  scale_colour_discrete(labels=c('prototypes','near','far'))

`geom_smooth()` using formula = 'y ~ x'

# change legend position
p + theme(legend.position='bottom')

`geom_smooth()` using formula = 'y ~ x'

p + theme(legend.position='None')

`geom_smooth()` using formula = 'y ~ x'

labeling points

library(ggrepel)
p + geom_label_repel(aes(label=sbj),data=byrun.sbjc) #try labeling all the points

`geom_smooth()` using formula = 'y ~ x'

Warning: ggrepel: 513 unlabeled data points (too many overlaps). Consider
increasing max.overlaps

byrun.sbjc <- byrun.sbjc %>%
  mutate(longrt=ifelse(rtcor>2500|rtinc>2500,sbj,"")) # make new column that only contains subject number when RT is very long
p + geom_label_repel(aes(label=longrt),data=byrun.sbjc, show.legend = FALSE) + labs(colour="stimulus type") + 
  scale_colour_discrete(labels=c('prototypes','near','far'))

`geom_smooth()` using formula = 'y ~ x'

Depictions of variance

NOTE: Within-subject error bars require extra consideration! check out rcookbook for tips and example code calculating appropriate error bars

# last lecture we covered how to include bootstrapped error bars on the fly
p1 <- ggplot(byrun.sbj,aes(x=run,y=acc,colour=stim))+
  stat_summary(fun=mean,geom="line")+
  stat_summary(fun=mean,geom="point",size=3)+
  stat_summary(fun.data="mean_cl_boot",geom="pointrange")+
  labs(title='bootstrap CI')

# alternatively, you can calculate your error range and add them yourself
p2 <- ggplot(byrun.grp,aes(x=run,y=macc,colour=stim))+
  geom_line() +
  geom_point(size=3) +
  geom_errorbar(aes(ymin=macc-acc.ci,ymax=macc+acc.ci),width=0.2)+
  labs(title='parametric CI')

p1+p2

# regardless of how you generate them you geom choices!
p <- ggplot(byrun.grp,aes(x=run,y=macc,colour=stim))+
  geom_line() +
  geom_point(size=3)

p + geom_pointrange(aes(ymin=macc-acc.ci,ymax=macc+acc.ci))

p + geom_linerange(aes(ymin=macc-acc.ci,ymax=macc+acc.ci))

p + geom_crossbar(aes(ymin=macc-acc.ci,ymax=macc+acc.ci), width=.2)

# ribbon plots are popular when you have continuous x-axis, but take some work
p + geom_ribbon(aes(ymin=macc-acc.ci,ymax=macc+acc.ci))

p + geom_ribbon(aes(ymin=macc-acc.ci,ymax=macc+acc.ci),alpha=0.25)

p + geom_ribbon(aes(ymin=macc-acc.ci,ymax=macc+acc.ci,fill=stim),
                colour=NA,alpha=0.25)

discrete x-axis (review)

# bar plot with errorbars
dodge=position_dodge(0.9) # define positioning for multiple layers
ggplot(byrun.grp,aes(x=factor(run),y=macc,colour=stim,fill=stim))+
  geom_col(position=dodge,colour=NA)+
  geom_errorbar(aes(ymin=macc-acc.ci,ymax=macc+acc.ci),
                width=0.2,colour='grey20',position=dodge)

# showing your full distribution
dodge = position_dodge(0.9)
ggplot(filter(byrun.grp,run<4),aes(x=factor(run),y=macc,fill=stim,colour=stim))+
  geom_violin(data=filter(byrun.sbj,run<4),aes(x=factor(run),y=acc),
              position=dodge,alpha=0.5,colour=NA)+
  geom_pointrange(aes(ymin=macc-acc.ci,ymax=macc+acc.ci),
                  position=dodge)+
  labs(title='learning accuracy',x='learning block',y='p(correct)',
       colour='stimulus type',fill='stimulus type')

stat_smooth transforms data by fitting models

ggplot(byrun.sbj,aes(x=run,y=acc,colour=stim))+
  stat_smooth()

`geom_smooth()` using method = 'loess' and formula = 'y ~ x'

ggplot(byrun.sbj,aes(x=run,y=acc,colour=stim))+
  stat_smooth(se=F)

`geom_smooth()` using method = 'loess' and formula = 'y ~ x'

ggplot(byrun.sbj,aes(x=run,y=acc,colour=stim))+
  stat_smooth(method='lm',formula=y~x)

ggplot(byrun.sbj,aes(x=run,y=acc,colour=stim))+
  stat_smooth(method='lm',formula=y~log(x))

histograms and densities

ggplot(byrun.sbjc,aes(rtcor,fill=stim))+
  geom_histogram(bins=10,position='dodge',colour=NA)

ggplot(byrun.sbjc,aes(rtcor,fill=stim))+
  geom_density(alpha=0.5)

multiple plots

## separate plots based on variables: ggplot's facet_wrap & facet_grid
p <- ggplot(byrun.sbjc,aes(rtcor,fill=stim,group=run))+
  geom_histogram(bins=20)
p + facet_wrap(~stim)

p + facet_grid(.~stim)

p + facet_grid(stim~run)

## separate plots: patchwork
p1 <- ggplot(byrun.grp,aes(x=run,y=macc,colour=stim))+
  geom_line()
p2 <- ggplot(byrun.sbjc,aes(x=rtcor,y=rtinc,colour=stim))+
  geom_point()
p3 <- ggplot(byrun.sbjc,aes(rtcor,colour=stim))+
  geom_density(alpha=0.5)

# multiple plots with patchwork
p1+p2 # horizontal stacking

p1/p2 # vertical stacking

# change relative areas for plots
p1/p2 &plot_layout(heights=c(1,3))

# grouping plots
p1/(p2|p3)

# change to only 1 legend
{p1+theme(legend.position='top')}/
  {p2+theme(legend.position='None')|p3+theme(legend.position='None')}

# & recurse through each plot for theme change
p1/(p2|p3) & theme_classic()

Saving figures

ggplot(byrun.sbjc,aes(x=rtcor,y=rtinc,colour=stim))+
  geom_point(alpha=0.4,size=2)+
  geom_smooth(method='lm')+
  labs(x='correct RT (ms)',y='incorrect RT (ms)',
       title='RT comparison',colour="stimulus type") + 
  scale_colour_discrete(labels=c('prototypes','near','far'))

`geom_smooth()` using formula = 'y ~ x'

ggsave('amazing.pdf',units='in',width=6,height=4)

`geom_smooth()` using formula = 'y ~ x'

ggsave('amazing.png',dpi=300,units='in',width=6,height=4)

`geom_smooth()` using formula = 'y ~ x'