library(tidyverse)
library(patchwork)
Lec5_Quarto
Lecture 5 Tutorial
this is an Quarto version of last class’s tutorial
loading libraries
read in data and do a little cleaning
Code
<- read_csv('dtilearn_behaviour.csv')
df <- c('sbj','stimx','answer')
fact_cols <- df %>%
df mutate(across(all_of(fact_cols), factor)) %>% #recode variables as factors -- across loops over fact_cols
filter(!is.na(response)) # filter out rows with missing responses
# recode stimx variable
<- df %>%
df mutate(stim=factor(
case_when(
==1|stimx==8~'proto',
stimx==5|stimx==6~'far',
stimx~'near'),
Tlevels=c('proto','near','far')
))
summarise data across runs and subjects
<- df %>%
acc.sbj group_by(sbj,run,stim) %>%
summarise(acc=mean(correct))
<- df %>%
rt.sbj group_by(sbj,run,stim,correct) %>%
summarise(mrt=median(rt, na.rm=T)) %>%
spread(key=correct,value=mrt) %>%
rename(rtcor=`1`,rtinc=`0`)
<- full_join(acc.sbj,rt.sbj)
byrun.sbj <- byrun.sbj %>%
byrun.grp group_by(run,stim) %>%
summarise(n=length(acc),
macc=mean(acc,na.rm=T),
acc.ci=1.96*sd(acc,na.rm=T)/sqrt(n-1),
mrtcor=mean(rtcor,na.rm=T),
rtcor.ci=1.96*sd(rtcor,na.rm=T)/sqrt(n-1),
mrtinc=mean(rtinc,na.rm=T),
rtinc.ci=1.96*sd(rtinc,na.rm=T)/sqrt(n-1))
ggplot syntax review
ggplot(byrun.grp,aes(x=run,y=macc,colour=stim))+
geom_line()
# add multiple layers to same plot
ggplot(byrun.grp,aes(x=run,y=macc,colour=stim))+
geom_line()+
geom_point()
## aesthetic scope: global vs. layer
# global if defined in ggplot()
# layer specific if defined within geom_* or stat_*
# layer aesthetic overrides global
ggplot(byrun.grp,aes(x=run,y=macc))+
geom_line(aes(colour=stim))+ # colour aesthetic limited to layer
geom_point()
## each geom has different properties
# can be directly set or mapped to variable in aes() call
# look up docs to see list (?geom_line)
ggplot(byrun.grp,aes(x=run,y=macc,group=stim))+
geom_line(colour='red',linewidth=2,aes(linetype=stim))+
geom_point(aes(colour=stim,size=run))
## each layer can use different datasets
# note that the global scope still applies to geom_point after
# geom_dotplot defines layer-specific aes
ggplot(byrun.grp,aes(x=factor(run),y=macc,colour=stim))+
geom_dotplot(data=byrun.sbj, # using a different dataset for this geom!
aes(x=factor(run),y=acc,fill=stim),binaxis='y', stackdir='center',
dotsize=.3,alpha=0.25, binwidth=1/30)+
geom_point(size=4,aes(shape=stim))+ # back to global definitions in ggplot above
theme_classic()
Changing titles and axis labels
<- ggplot(byrun.grp,aes(x=run,y=macc,colour=stim))+
p geom_line()
p
# add axis labels and title
+ xlab('learning block') + ylab('accuracy') +
p ggtitle('Accuracy',subtitle='block average')
# add labels/title with one function
+ labs(x='learning block', y='accuracy',
p title='Accuracy', subtitle='block average')
# center title by changing parameters in your theme
+ labs(x='learning block', y='accuracy',
p title='Accuracy', subtitle='block average') +
theme(plot.title = element_text(hjust = 0.5),
plot.subtitle = element_text(hjust = 0.5))
# subscript, superscript, and symbols
# ~=space, []=subscript, ^=superscript, Greek variables
+ xlab(expression(learning[block])) + ylab(expression(gamma~~~~eta^2)) p
changing axes limits
= byrun.sbj %>%
byrun.proto filter(stim=='proto', !is.na(rtcor), !is.na(rtinc))
<- ggplot(byrun.proto,aes(x=rtcor,y=rtinc))+
p geom_point()+geom_smooth(method='lm')+
labs(x='correct RT (ms)',y='incorrect RT (ms)',
title='RT comparison')
p
`geom_smooth()` using formula = 'y ~ x'
# xlim & ylim delete points outside limits
<- p + xlim(1000,2500)+ylim(1000,2500) + ggtitle('RT comparison cut')
p1 # removed rows!! p1
`geom_smooth()` using formula = 'y ~ x'
Warning: Removed 62 rows containing non-finite outside the scale range
(`stat_smooth()`).
Warning: Removed 62 rows containing missing values or values outside the scale range
(`geom_point()`).
# coord_cartesian simply zooms w/o deleting points
<- p + coord_cartesian(xlim=c(1000,2500),ylim=c(1000,2500))+ ggtitle('RT comparison zoom')
p2 p2
`geom_smooth()` using formula = 'y ~ x'
# patchwork syntax for side-by-side viewing
+p2 p1
`geom_smooth()` using formula = 'y ~ x'
Warning: Removed 62 rows containing non-finite outside the scale range
(`stat_smooth()`).
Removed 62 rows containing missing values or values outside the scale range
(`geom_point()`).
`geom_smooth()` using formula = 'y ~ x'
## axes ticks and labels ----
<- ggplot(byrun.proto,aes(x=rtcor,y=rtinc))+
p geom_point()+geom_smooth(method='lm')+
labs(x='correct RT (ms)',y='incorrect RT (ms)',
title='RT comparison')
# set breaks for ticks location
+ scale_x_continuous(breaks=seq(0,3000,by=250))+
p scale_y_continuous(breaks=seq(0,3000,by=250))
`geom_smooth()` using formula = 'y ~ x'
# add in labels to change tick text
+ scale_y_continuous(breaks=seq(1000,3000,by=1000),
p labels=c('1K','2K','3K'))
`geom_smooth()` using formula = 'y ~ x'
## adjust axis title and text properties with theme function
<- byrun.sbj %>%
byrun.sbjc filter(!is.na(rtcor),!is.na(rtinc))
<- ggplot(byrun.sbjc,aes(x=rtcor,y=rtinc,colour=stim))+
p geom_point(alpha=0.4,size=2)+
geom_smooth(method='lm')+
labs(x='correct RT (ms)',y='incorrect RT (ms)',
title='RT comparison')
p
`geom_smooth()` using formula = 'y ~ x'
+ theme(axis.title.x=element_text(size=10,
p family='Courier',
face='bold'),
axis.text.x=element_text(size=8,
angle=45))
`geom_smooth()` using formula = 'y ~ x'
changing legends
# change legend title and level names
+ labs(colour="stimulus type") +
p scale_colour_discrete(labels=c('prototypes','near','far'))
`geom_smooth()` using formula = 'y ~ x'
# change legend position
+ theme(legend.position='bottom') p
`geom_smooth()` using formula = 'y ~ x'
+ theme(legend.position='None') p
`geom_smooth()` using formula = 'y ~ x'
labeling points
library(ggrepel)
+ geom_label_repel(aes(label=sbj),data=byrun.sbjc) #try labeling all the points p
`geom_smooth()` using formula = 'y ~ x'
Warning: ggrepel: 513 unlabeled data points (too many overlaps). Consider
increasing max.overlaps
<- byrun.sbjc %>%
byrun.sbjc mutate(longrt=ifelse(rtcor>2500|rtinc>2500,sbj,"")) # make new column that only contains subject number when RT is very long
+ geom_label_repel(aes(label=longrt),data=byrun.sbjc, show.legend = FALSE) + labs(colour="stimulus type") +
p scale_colour_discrete(labels=c('prototypes','near','far'))
`geom_smooth()` using formula = 'y ~ x'
Depictions of variance
NOTE: Within-subject error bars require extra consideration! check out rcookbook for tips and example code calculating appropriate error bars
# last lecture we covered how to include bootstrapped error bars on the fly
<- ggplot(byrun.sbj,aes(x=run,y=acc,colour=stim))+
p1 stat_summary(fun=mean,geom="line")+
stat_summary(fun=mean,geom="point",size=3)+
stat_summary(fun.data="mean_cl_boot",geom="pointrange")+
labs(title='bootstrap CI')
# alternatively, you can calculate your error range and add them yourself
<- ggplot(byrun.grp,aes(x=run,y=macc,colour=stim))+
p2 geom_line() +
geom_point(size=3) +
geom_errorbar(aes(ymin=macc-acc.ci,ymax=macc+acc.ci),width=0.2)+
labs(title='parametric CI')
+p2 p1
# regardless of how you generate them you geom choices!
<- ggplot(byrun.grp,aes(x=run,y=macc,colour=stim))+
p geom_line() +
geom_point(size=3)
+ geom_pointrange(aes(ymin=macc-acc.ci,ymax=macc+acc.ci)) p
+ geom_linerange(aes(ymin=macc-acc.ci,ymax=macc+acc.ci)) p
+ geom_crossbar(aes(ymin=macc-acc.ci,ymax=macc+acc.ci), width=.2) p
# ribbon plots are popular when you have continuous x-axis, but take some work
+ geom_ribbon(aes(ymin=macc-acc.ci,ymax=macc+acc.ci)) p
+ geom_ribbon(aes(ymin=macc-acc.ci,ymax=macc+acc.ci),alpha=0.25) p
+ geom_ribbon(aes(ymin=macc-acc.ci,ymax=macc+acc.ci,fill=stim),
p colour=NA,alpha=0.25)
discrete x-axis (review)
# bar plot with errorbars
=position_dodge(0.9) # define positioning for multiple layers
dodgeggplot(byrun.grp,aes(x=factor(run),y=macc,colour=stim,fill=stim))+
geom_col(position=dodge,colour=NA)+
geom_errorbar(aes(ymin=macc-acc.ci,ymax=macc+acc.ci),
width=0.2,colour='grey20',position=dodge)
# showing your full distribution
= position_dodge(0.9)
dodge ggplot(filter(byrun.grp,run<4),aes(x=factor(run),y=macc,fill=stim,colour=stim))+
geom_violin(data=filter(byrun.sbj,run<4),aes(x=factor(run),y=acc),
position=dodge,alpha=0.5,colour=NA)+
geom_pointrange(aes(ymin=macc-acc.ci,ymax=macc+acc.ci),
position=dodge)+
labs(title='learning accuracy',x='learning block',y='p(correct)',
colour='stimulus type',fill='stimulus type')
stat_smooth transforms data by fitting models
ggplot(byrun.sbj,aes(x=run,y=acc,colour=stim))+
stat_smooth()
`geom_smooth()` using method = 'loess' and formula = 'y ~ x'
ggplot(byrun.sbj,aes(x=run,y=acc,colour=stim))+
stat_smooth(se=F)
`geom_smooth()` using method = 'loess' and formula = 'y ~ x'
ggplot(byrun.sbj,aes(x=run,y=acc,colour=stim))+
stat_smooth(method='lm',formula=y~x)
ggplot(byrun.sbj,aes(x=run,y=acc,colour=stim))+
stat_smooth(method='lm',formula=y~log(x))
histograms and densities
ggplot(byrun.sbjc,aes(rtcor,fill=stim))+
geom_histogram(bins=10,position='dodge',colour=NA)
ggplot(byrun.sbjc,aes(rtcor,fill=stim))+
geom_density(alpha=0.5)
multiple plots
## separate plots based on variables: ggplot's facet_wrap & facet_grid
<- ggplot(byrun.sbjc,aes(rtcor,fill=stim,group=run))+
p geom_histogram(bins=20)
+ facet_wrap(~stim) p
+ facet_grid(.~stim) p
+ facet_grid(stim~run) p
## separate plots: patchwork
<- ggplot(byrun.grp,aes(x=run,y=macc,colour=stim))+
p1 geom_line()
<- ggplot(byrun.sbjc,aes(x=rtcor,y=rtinc,colour=stim))+
p2 geom_point()
<- ggplot(byrun.sbjc,aes(rtcor,colour=stim))+
p3 geom_density(alpha=0.5)
# multiple plots with patchwork
+p2 # horizontal stacking p1
/p2 # vertical stacking p1
# change relative areas for plots
/p2 &plot_layout(heights=c(1,3)) p1
# grouping plots
/(p2|p3) p1
# change to only 1 legend
+theme(legend.position='top')}/
{p1+theme(legend.position='None')|p3+theme(legend.position='None')} {p2
# & recurse through each plot for theme change
/(p2|p3) & theme_classic() p1
Saving figures
ggplot(byrun.sbjc,aes(x=rtcor,y=rtinc,colour=stim))+
geom_point(alpha=0.4,size=2)+
geom_smooth(method='lm')+
labs(x='correct RT (ms)',y='incorrect RT (ms)',
title='RT comparison',colour="stimulus type") +
scale_colour_discrete(labels=c('prototypes','near','far'))
`geom_smooth()` using formula = 'y ~ x'
ggsave('amazing.pdf',units='in',width=6,height=4)
`geom_smooth()` using formula = 'y ~ x'
ggsave('amazing.png',dpi=300,units='in',width=6,height=4)
`geom_smooth()` using formula = 'y ~ x'