library(tidyverse)
library(patchwork)Lec5_Quarto
Lecture 5 Tutorial
this is an Quarto version of last class’s tutorial
loading libraries
read in data and do a little cleaning
Code
df <- read_csv('dtilearn_behaviour.csv')
fact_cols <- c('sbj','stimx','answer')
df <- df %>%
mutate(across(all_of(fact_cols), factor)) %>% #recode variables as factors -- across loops over fact_cols
filter(!is.na(response)) # filter out rows with missing responses
# recode stimx variable
df <- df %>%
mutate(stim=factor(
case_when(
stimx==1|stimx==8~'proto',
stimx==5|stimx==6~'far',
T~'near'),
levels=c('proto','near','far')
))summarise data across runs and subjects
acc.sbj <- df %>%
group_by(sbj,run,stim) %>%
summarise(acc=mean(correct))
rt.sbj <- df %>%
group_by(sbj,run,stim,correct) %>%
summarise(mrt=median(rt, na.rm=T)) %>%
spread(key=correct,value=mrt) %>%
rename(rtcor=`1`,rtinc=`0`)
byrun.sbj <- full_join(acc.sbj,rt.sbj)
byrun.grp <- byrun.sbj %>%
group_by(run,stim) %>%
summarise(n=length(acc),
macc=mean(acc,na.rm=T),
acc.ci=1.96*sd(acc,na.rm=T)/sqrt(n-1),
mrtcor=mean(rtcor,na.rm=T),
rtcor.ci=1.96*sd(rtcor,na.rm=T)/sqrt(n-1),
mrtinc=mean(rtinc,na.rm=T),
rtinc.ci=1.96*sd(rtinc,na.rm=T)/sqrt(n-1))ggplot syntax review
ggplot(byrun.grp,aes(x=run,y=macc,colour=stim))+
geom_line()# add multiple layers to same plot
ggplot(byrun.grp,aes(x=run,y=macc,colour=stim))+
geom_line()+
geom_point()## aesthetic scope: global vs. layer
# global if defined in ggplot()
# layer specific if defined within geom_* or stat_*
# layer aesthetic overrides global
ggplot(byrun.grp,aes(x=run,y=macc))+
geom_line(aes(colour=stim))+ # colour aesthetic limited to layer
geom_point() ## each geom has different properties
# can be directly set or mapped to variable in aes() call
# look up docs to see list (?geom_line)
ggplot(byrun.grp,aes(x=run,y=macc,group=stim))+
geom_line(colour='red',linewidth=2,aes(linetype=stim))+
geom_point(aes(colour=stim,size=run))## each layer can use different datasets
# note that the global scope still applies to geom_point after
# geom_dotplot defines layer-specific aes
ggplot(byrun.grp,aes(x=factor(run),y=macc,colour=stim))+
geom_dotplot(data=byrun.sbj, # using a different dataset for this geom!
aes(x=factor(run),y=acc,fill=stim),binaxis='y', stackdir='center',
dotsize=.3,alpha=0.25, binwidth=1/30)+
geom_point(size=4,aes(shape=stim))+ # back to global definitions in ggplot above
theme_classic()Changing titles and axis labels
p <- ggplot(byrun.grp,aes(x=run,y=macc,colour=stim))+
geom_line()
p# add axis labels and title
p + xlab('learning block') + ylab('accuracy') +
ggtitle('Accuracy',subtitle='block average')# add labels/title with one function
p + labs(x='learning block', y='accuracy',
title='Accuracy', subtitle='block average')# center title by changing parameters in your theme
p + labs(x='learning block', y='accuracy',
title='Accuracy', subtitle='block average') +
theme(plot.title = element_text(hjust = 0.5),
plot.subtitle = element_text(hjust = 0.5))# subscript, superscript, and symbols
# ~=space, []=subscript, ^=superscript, Greek variables
p + xlab(expression(learning[block])) + ylab(expression(gamma~~~~eta^2))changing axes limits
byrun.proto = byrun.sbj %>%
filter(stim=='proto', !is.na(rtcor), !is.na(rtinc))
p <- ggplot(byrun.proto,aes(x=rtcor,y=rtinc))+
geom_point()+geom_smooth(method='lm')+
labs(x='correct RT (ms)',y='incorrect RT (ms)',
title='RT comparison')
p`geom_smooth()` using formula = 'y ~ x'
# xlim & ylim delete points outside limits
p1 <- p + xlim(1000,2500)+ylim(1000,2500) + ggtitle('RT comparison cut')
p1 # removed rows!!`geom_smooth()` using formula = 'y ~ x'
Warning: Removed 62 rows containing non-finite outside the scale range
(`stat_smooth()`).
Warning: Removed 62 rows containing missing values or values outside the scale range
(`geom_point()`).
# coord_cartesian simply zooms w/o deleting points
p2 <- p + coord_cartesian(xlim=c(1000,2500),ylim=c(1000,2500))+ ggtitle('RT comparison zoom')
p2`geom_smooth()` using formula = 'y ~ x'
# patchwork syntax for side-by-side viewing
p1+p2`geom_smooth()` using formula = 'y ~ x'
Warning: Removed 62 rows containing non-finite outside the scale range
(`stat_smooth()`).
Removed 62 rows containing missing values or values outside the scale range
(`geom_point()`).
`geom_smooth()` using formula = 'y ~ x'
## axes ticks and labels ----
p <- ggplot(byrun.proto,aes(x=rtcor,y=rtinc))+
geom_point()+geom_smooth(method='lm')+
labs(x='correct RT (ms)',y='incorrect RT (ms)',
title='RT comparison')
# set breaks for ticks location
p + scale_x_continuous(breaks=seq(0,3000,by=250))+
scale_y_continuous(breaks=seq(0,3000,by=250))`geom_smooth()` using formula = 'y ~ x'
# add in labels to change tick text
p + scale_y_continuous(breaks=seq(1000,3000,by=1000),
labels=c('1K','2K','3K'))`geom_smooth()` using formula = 'y ~ x'
## adjust axis title and text properties with theme function
byrun.sbjc <- byrun.sbj %>%
filter(!is.na(rtcor),!is.na(rtinc))
p <- ggplot(byrun.sbjc,aes(x=rtcor,y=rtinc,colour=stim))+
geom_point(alpha=0.4,size=2)+
geom_smooth(method='lm')+
labs(x='correct RT (ms)',y='incorrect RT (ms)',
title='RT comparison')
p`geom_smooth()` using formula = 'y ~ x'
p + theme(axis.title.x=element_text(size=10,
family='Courier',
face='bold'),
axis.text.x=element_text(size=8,
angle=45))`geom_smooth()` using formula = 'y ~ x'
changing legends
# change legend title and level names
p + labs(colour="stimulus type") +
scale_colour_discrete(labels=c('prototypes','near','far'))`geom_smooth()` using formula = 'y ~ x'
# change legend position
p + theme(legend.position='bottom') `geom_smooth()` using formula = 'y ~ x'
p + theme(legend.position='None') `geom_smooth()` using formula = 'y ~ x'
labeling points
library(ggrepel)
p + geom_label_repel(aes(label=sbj),data=byrun.sbjc) #try labeling all the points`geom_smooth()` using formula = 'y ~ x'
Warning: ggrepel: 513 unlabeled data points (too many overlaps). Consider
increasing max.overlaps
byrun.sbjc <- byrun.sbjc %>%
mutate(longrt=ifelse(rtcor>2500|rtinc>2500,sbj,"")) # make new column that only contains subject number when RT is very long
p + geom_label_repel(aes(label=longrt),data=byrun.sbjc, show.legend = FALSE) + labs(colour="stimulus type") +
scale_colour_discrete(labels=c('prototypes','near','far')) `geom_smooth()` using formula = 'y ~ x'
Depictions of variance
NOTE: Within-subject error bars require extra consideration! check out rcookbook for tips and example code calculating appropriate error bars
# last lecture we covered how to include bootstrapped error bars on the fly
p1 <- ggplot(byrun.sbj,aes(x=run,y=acc,colour=stim))+
stat_summary(fun=mean,geom="line")+
stat_summary(fun=mean,geom="point",size=3)+
stat_summary(fun.data="mean_cl_boot",geom="pointrange")+
labs(title='bootstrap CI')
# alternatively, you can calculate your error range and add them yourself
p2 <- ggplot(byrun.grp,aes(x=run,y=macc,colour=stim))+
geom_line() +
geom_point(size=3) +
geom_errorbar(aes(ymin=macc-acc.ci,ymax=macc+acc.ci),width=0.2)+
labs(title='parametric CI')
p1+p2# regardless of how you generate them you geom choices!
p <- ggplot(byrun.grp,aes(x=run,y=macc,colour=stim))+
geom_line() +
geom_point(size=3)
p + geom_pointrange(aes(ymin=macc-acc.ci,ymax=macc+acc.ci))p + geom_linerange(aes(ymin=macc-acc.ci,ymax=macc+acc.ci))p + geom_crossbar(aes(ymin=macc-acc.ci,ymax=macc+acc.ci), width=.2)# ribbon plots are popular when you have continuous x-axis, but take some work
p + geom_ribbon(aes(ymin=macc-acc.ci,ymax=macc+acc.ci))p + geom_ribbon(aes(ymin=macc-acc.ci,ymax=macc+acc.ci),alpha=0.25)p + geom_ribbon(aes(ymin=macc-acc.ci,ymax=macc+acc.ci,fill=stim),
colour=NA,alpha=0.25)discrete x-axis (review)
# bar plot with errorbars
dodge=position_dodge(0.9) # define positioning for multiple layers
ggplot(byrun.grp,aes(x=factor(run),y=macc,colour=stim,fill=stim))+
geom_col(position=dodge,colour=NA)+
geom_errorbar(aes(ymin=macc-acc.ci,ymax=macc+acc.ci),
width=0.2,colour='grey20',position=dodge)# showing your full distribution
dodge = position_dodge(0.9)
ggplot(filter(byrun.grp,run<4),aes(x=factor(run),y=macc,fill=stim,colour=stim))+
geom_violin(data=filter(byrun.sbj,run<4),aes(x=factor(run),y=acc),
position=dodge,alpha=0.5,colour=NA)+
geom_pointrange(aes(ymin=macc-acc.ci,ymax=macc+acc.ci),
position=dodge)+
labs(title='learning accuracy',x='learning block',y='p(correct)',
colour='stimulus type',fill='stimulus type')stat_smooth transforms data by fitting models
ggplot(byrun.sbj,aes(x=run,y=acc,colour=stim))+
stat_smooth()`geom_smooth()` using method = 'loess' and formula = 'y ~ x'
ggplot(byrun.sbj,aes(x=run,y=acc,colour=stim))+
stat_smooth(se=F)`geom_smooth()` using method = 'loess' and formula = 'y ~ x'
ggplot(byrun.sbj,aes(x=run,y=acc,colour=stim))+
stat_smooth(method='lm',formula=y~x)ggplot(byrun.sbj,aes(x=run,y=acc,colour=stim))+
stat_smooth(method='lm',formula=y~log(x))histograms and densities
ggplot(byrun.sbjc,aes(rtcor,fill=stim))+
geom_histogram(bins=10,position='dodge',colour=NA)ggplot(byrun.sbjc,aes(rtcor,fill=stim))+
geom_density(alpha=0.5)multiple plots
## separate plots based on variables: ggplot's facet_wrap & facet_grid
p <- ggplot(byrun.sbjc,aes(rtcor,fill=stim,group=run))+
geom_histogram(bins=20)
p + facet_wrap(~stim)p + facet_grid(.~stim)p + facet_grid(stim~run)## separate plots: patchwork
p1 <- ggplot(byrun.grp,aes(x=run,y=macc,colour=stim))+
geom_line()
p2 <- ggplot(byrun.sbjc,aes(x=rtcor,y=rtinc,colour=stim))+
geom_point()
p3 <- ggplot(byrun.sbjc,aes(rtcor,colour=stim))+
geom_density(alpha=0.5)
# multiple plots with patchwork
p1+p2 # horizontal stackingp1/p2 # vertical stacking# change relative areas for plots
p1/p2 &plot_layout(heights=c(1,3))# grouping plots
p1/(p2|p3)# change to only 1 legend
{p1+theme(legend.position='top')}/
{p2+theme(legend.position='None')|p3+theme(legend.position='None')}# & recurse through each plot for theme change
p1/(p2|p3) & theme_classic() Saving figures
ggplot(byrun.sbjc,aes(x=rtcor,y=rtinc,colour=stim))+
geom_point(alpha=0.4,size=2)+
geom_smooth(method='lm')+
labs(x='correct RT (ms)',y='incorrect RT (ms)',
title='RT comparison',colour="stimulus type") +
scale_colour_discrete(labels=c('prototypes','near','far'))`geom_smooth()` using formula = 'y ~ x'
ggsave('amazing.pdf',units='in',width=6,height=4)`geom_smooth()` using formula = 'y ~ x'
ggsave('amazing.png',dpi=300,units='in',width=6,height=4)`geom_smooth()` using formula = 'y ~ x'