Cleanfig4

Load relevant packages

library(readspss) #package to read the original datafile from OFS
library(tidyverse)

## -- Attaching packages --------------------------------------- tidyverse 1.3.1 --

## v ggplot2 3.3.3     v purrr   0.3.4
## v tibble  3.1.2     v dplyr   1.0.6
## v tidyr   1.1.3     v stringr 1.4.0
## v readr   1.4.0     v forcats 0.5.1

## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()

library(psych)

## 
## Attaching package: 'psych'

## The following objects are masked from 'package:ggplot2':
## 
##     %+%, alpha

Load data

Plot4 <- read_csv("cleandata.csv")

## 
## -- Column specification --------------------------------------------------------
## cols(
##   .default = col_double(),
##   ParticipantID = col_character(),
##   General_1_MedList = col_character(),
##   General_1_University = col_character()
## )
## i Use `spec()` for the full column specifications.

Figure 4 data tibble:

#calculate change
pre_post_change_cued = 0.31 - 0.21

pre_post_change_uncued = 0.25 - 0.3

pre_week_change_cued = 0.40 - 0.21

pre_week_change_uncued = 0.40 - 0.30

#Create dataframe
fig4 <- tibble(
  change_from_pre_to = c("immediate","week"),
  cued = c(0.1, 0.19),
  uncued = c(-0.05, 0.1),#can we change this to use variable names instead like in excel or do we have to manually type it out?
)

print(fig4)

## # A tibble: 2 x 3
##   change_from_pre_to  cued uncued
##   <chr>              <dbl>  <dbl>
## 1 immediate           0.1   -0.05
## 2 week                0.19   0.1

In create a data set, you indicate what “variables” go into each group (which can be seen as what will be the axes of the graph).

For time1: this includes the two time conditions “immediate” and “week”. The 2 indicates how many columns we want for each time point i.e. two (which will be for the cued and uncued conditions).

The next line bias change is for us to enter the relevant values calculated previously.

Data = data.frame translates this into a dataframe named data where we put the relevant groups in the brackets to be included in the data.

head allows you to view the data

#create dataset
time1 <- c(rep("immediate",2),rep("week",2))
condition <-rep(c("cued","uncued"),2)
bias_change <- c(0.10, -0.05, 0.19, 0.10)
data = data.frame(time1, condition, bias_change)

head(data)

##       time1 condition bias_change
## 1 immediate      cued        0.10
## 2 immediate    uncued       -0.05
## 3      week      cued        0.19
## 4      week    uncued        0.10

ggplot is to indicate we want to graph our data
in the brackets we indicate what data we are using followed by the aes function for the aesthetics of our graph
the next lines indicate what variables we want on the x and y axis, where R will use the data we provided
fill = condition indicates to R that we want different colours for each condition
geom_bar adds a feature to our graph, where we want the separate conditions not be stacked but side by side (“dodge”)
stat = identity is a statement you need to include when using geom_bar as this function reads data in a way that is incompatible with the ‘y’ aesthetic. Normally the geom_bar function will make the heights of the bars, the number of observations in the group not the value we assign to it. Therefore we need to add stat = identity to indicate to r that we want the bar heights to be the values we provide

#plot
ggplot(data = data, aes(
  x = time1,
  y = bias_change,
  fill = condition
)) +
  geom_bar(position = "dodge", stat = "identity", alpha=0.7)

Calculating the standard deviations and creating a new variable/tibble

Copying data from table 3 that we need

table3.0 <- tibble(
  timepoint = c("Baseline", "Prenap", "Postnap", "1-week delay"),
  mean1 = c(0.52, 0.21, 0.31, 0.40),
  SD1 = c(0.36, 0.51, 0.44, 0.39),
  mean2 = c(0.60, 0.30, 0.25, 0.40),
  SD2 = c(0.45, 0.44, 0.48, 0.47)
)

print(table3.0)

## # A tibble: 4 x 5
##   timepoint    mean1   SD1 mean2   SD2
##   <chr>        <dbl> <dbl> <dbl> <dbl>
## 1 Baseline      0.52  0.36  0.6   0.45
## 2 Prenap        0.21  0.51  0.3   0.44
## 3 Postnap       0.31  0.44  0.25  0.48
## 4 1-week delay  0.4   0.39  0.4   0.47

creating a new tibble to manipulate where the data is orgnaised by cue condition, and only uses the means (ignores the SD calculated previously)

table3.1 <- tibble(
  statistics = c("mean1", "mean2"),
  Baseline = c(0.52, 0.60),
  Prenap = c(0.21, 0.30),
  Postnap = c(0.31, 0.25),
  Week = c(0.40, 0.40)
)

print(table3.1)

## # A tibble: 2 x 5
##   statistics Baseline Prenap Postnap  Week
##   <chr>         <dbl>  <dbl>   <dbl> <dbl>
## 1 mean1          0.52   0.21    0.31   0.4
## 2 mean2          0.6    0.3     0.25   0.4

Mutating data to create new variable. When you describe the data, you get a SD for the variable “Immediate”

Immediate <- table3.1 %>% 
  select(Prenap, Postnap) %>% 
  mutate(immediate = Postnap- Prenap)

print(Immediate)

## # A tibble: 2 x 3
##   Prenap Postnap immediate
##    <dbl>   <dbl>     <dbl>
## 1   0.21    0.31      0.1 
## 2   0.3     0.25     -0.05

describe(Immediate)

##           vars n mean   sd median trimmed  mad   min  max range skew kurtosis
## Prenap       1 2 0.26 0.06   0.26    0.26 0.07  0.21 0.30  0.09    0    -2.75
## Postnap      2 2 0.28 0.04   0.28    0.28 0.04  0.25 0.31  0.06    0    -2.75
## immediate    3 2 0.03 0.11   0.03    0.03 0.11 -0.05 0.10  0.15    0    -2.75
##             se
## Prenap    0.04
## Postnap   0.03
## immediate 0.07

Mutating data to create a new variable for week delay

Weekd <- table3.1 %>% 
  select(Prenap, Week) %>% 
  mutate(Week = Week-Prenap)

print(Weekd)

## # A tibble: 2 x 2
##   Prenap  Week
##    <dbl> <dbl>
## 1   0.21  0.19
## 2   0.3   0.1

describe(Weekd)

##        vars n mean   sd median trimmed  mad  min  max range skew kurtosis   se
## Prenap    1 2 0.26 0.06   0.26    0.26 0.07 0.21 0.30  0.09    0    -2.75 0.04
## Week      2 2 0.15 0.06   0.15    0.15 0.07 0.10 0.19  0.09    0    -2.75 0.04

table3.2 <- table3.1 %>% 
  mutate(week = Week - Prenap) %>% 
  mutate(immediate = Postnap- Prenap)

describe(table3.2)

##             vars n mean   sd median trimmed  mad   min  max range skew kurtosis
## statistics*    1 2 1.50 0.71   1.50    1.50 0.74  1.00 2.00  1.00    0    -2.75
## Baseline       2 2 0.56 0.06   0.56    0.56 0.06  0.52 0.60  0.08    0    -2.75
## Prenap         3 2 0.26 0.06   0.26    0.26 0.07  0.21 0.30  0.09    0    -2.75
## Postnap        4 2 0.28 0.04   0.28    0.28 0.04  0.25 0.31  0.06    0    -2.75
## Week           5 2 0.40 0.00   0.40    0.40 0.00  0.40 0.40  0.00  NaN      NaN
## week           6 2 0.15 0.06   0.15    0.15 0.07  0.10 0.19  0.09    0    -2.75
## immediate      7 2 0.03 0.11   0.03    0.03 0.11 -0.05 0.10  0.15    0    -2.75
##               se
## statistics* 0.50
## Baseline    0.04
## Prenap      0.04
## Postnap     0.03
## Week        0.00
## week        0.04
## immediate   0.07

Making a new data set with Sd (I don’t think the SD’s are fully correct? they don’t seem the same as the figure. I think each column has it’s own sd? Maybe a question for Jenny this week)

time1 <- c(rep("immediate",2),rep("week",2))
condition <-rep(c("cued","uncued"),2)
bias_change <- c(0.10, -0.05, 0.19, 0.10)
sd <- c(0.11, 0.11, 0.06, 0.06)
data1 = data.frame(time1, condition, bias_change, sd)

head(data1)

##       time1 condition bias_change   sd
## 1 immediate      cued        0.10 0.11
## 2 immediate    uncued       -0.05 0.11
## 3      week      cued        0.19 0.06
## 4      week    uncued        0.10 0.06

Adding error bars

Some notes:

alpha controls the opacity of that item e.g. the bar or the error bar
geom_errorbar: specify where on the graph the error bars are in terms of x and y, specify what they are i.e. bias_change + or - SD, indicate colour
importantly, also indicate the position, otherwise the bars will sit at “immediate” and “week” and not on each bar for each condition. That is what position_dodge is for
ylim indicates where the y axis will cut off

#plot
ggplot(data = data1, aes(
  x = time1,
  y = bias_change,
  fill = condition
)) +
  geom_bar(position = "dodge", stat = "identity", alpha=0.7) +
  geom_errorbar(aes(
    x= time1,
    ymin=bias_change-sd,
    ymax=bias_change+sd), 
    width=0.4, colour="grey", alpha= 0.9, position = position_dodge(0.9) ) +
  ylim(-0.2, 0.4) #where the y axis cuts off

Cleanfig4

Michelle

01/07/2021