Load relevant packages
library(readspss) #package to read the original datafile from OFS
library(tidyverse)
## -- Attaching packages --------------------------------------- tidyverse 1.3.1 --
## v ggplot2 3.3.3 v purrr 0.3.4
## v tibble 3.1.2 v dplyr 1.0.6
## v tidyr 1.1.3 v stringr 1.4.0
## v readr 1.4.0 v forcats 0.5.1
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
library(psych)
##
## Attaching package: 'psych'
## The following objects are masked from 'package:ggplot2':
##
## %+%, alpha
Load data
Plot4 <- read_csv("cleandata.csv")
##
## -- Column specification --------------------------------------------------------
## cols(
## .default = col_double(),
## ParticipantID = col_character(),
## General_1_MedList = col_character(),
## General_1_University = col_character()
## )
## i Use `spec()` for the full column specifications.
Figure 4 data tibble:
#calculate change
pre_post_change_cued = 0.31 - 0.21
pre_post_change_uncued = 0.25 - 0.3
pre_week_change_cued = 0.40 - 0.21
pre_week_change_uncued = 0.40 - 0.30
#Create dataframe
fig4 <- tibble(
change_from_pre_to = c("immediate","week"),
cued = c(0.1, 0.19),
uncued = c(-0.05, 0.1),#can we change this to use variable names instead like in excel or do we have to manually type it out?
)
print(fig4)
## # A tibble: 2 x 3
## change_from_pre_to cued uncued
## <chr> <dbl> <dbl>
## 1 immediate 0.1 -0.05
## 2 week 0.19 0.1
In create a data set, you indicate what “variables” go into each group (which can be seen as what will be the axes of the graph).
For time1: this includes the two time conditions “immediate” and “week”. The 2 indicates how many columns we want for each time point i.e. two (which will be for the cued and uncued conditions).
The next line bias change is for us to enter the relevant values calculated previously.
Data = data.frame translates this into a dataframe named data where we put the relevant groups in the brackets to be included in the data.
head allows you to view the data
#create dataset
time1 <- c(rep("immediate",2),rep("week",2))
condition <-rep(c("cued","uncued"),2)
bias_change <- c(0.10, -0.05, 0.19, 0.10)
data = data.frame(time1, condition, bias_change)
head(data)
## time1 condition bias_change
## 1 immediate cued 0.10
## 2 immediate uncued -0.05
## 3 week cued 0.19
## 4 week uncued 0.10
aes function for the aesthetics of our graph#plot
ggplot(data = data, aes(
x = time1,
y = bias_change,
fill = condition
)) +
geom_bar(position = "dodge", stat = "identity", alpha=0.7)
Calculating the standard deviations and creating a new variable/tibble
Copying data from table 3 that we need
table3.0 <- tibble(
timepoint = c("Baseline", "Prenap", "Postnap", "1-week delay"),
mean1 = c(0.52, 0.21, 0.31, 0.40),
SD1 = c(0.36, 0.51, 0.44, 0.39),
mean2 = c(0.60, 0.30, 0.25, 0.40),
SD2 = c(0.45, 0.44, 0.48, 0.47)
)
print(table3.0)
## # A tibble: 4 x 5
## timepoint mean1 SD1 mean2 SD2
## <chr> <dbl> <dbl> <dbl> <dbl>
## 1 Baseline 0.52 0.36 0.6 0.45
## 2 Prenap 0.21 0.51 0.3 0.44
## 3 Postnap 0.31 0.44 0.25 0.48
## 4 1-week delay 0.4 0.39 0.4 0.47
creating a new tibble to manipulate where the data is orgnaised by cue condition, and only uses the means (ignores the SD calculated previously)
table3.1 <- tibble(
statistics = c("mean1", "mean2"),
Baseline = c(0.52, 0.60),
Prenap = c(0.21, 0.30),
Postnap = c(0.31, 0.25),
Week = c(0.40, 0.40)
)
print(table3.1)
## # A tibble: 2 x 5
## statistics Baseline Prenap Postnap Week
## <chr> <dbl> <dbl> <dbl> <dbl>
## 1 mean1 0.52 0.21 0.31 0.4
## 2 mean2 0.6 0.3 0.25 0.4
Mutating data to create new variable. When you describe the data, you get a SD for the variable “Immediate”
Immediate <- table3.1 %>%
select(Prenap, Postnap) %>%
mutate(immediate = Postnap- Prenap)
print(Immediate)
## # A tibble: 2 x 3
## Prenap Postnap immediate
## <dbl> <dbl> <dbl>
## 1 0.21 0.31 0.1
## 2 0.3 0.25 -0.05
describe(Immediate)
## vars n mean sd median trimmed mad min max range skew kurtosis
## Prenap 1 2 0.26 0.06 0.26 0.26 0.07 0.21 0.30 0.09 0 -2.75
## Postnap 2 2 0.28 0.04 0.28 0.28 0.04 0.25 0.31 0.06 0 -2.75
## immediate 3 2 0.03 0.11 0.03 0.03 0.11 -0.05 0.10 0.15 0 -2.75
## se
## Prenap 0.04
## Postnap 0.03
## immediate 0.07
Mutating data to create a new variable for week delay
Weekd <- table3.1 %>%
select(Prenap, Week) %>%
mutate(Week = Week-Prenap)
print(Weekd)
## # A tibble: 2 x 2
## Prenap Week
## <dbl> <dbl>
## 1 0.21 0.19
## 2 0.3 0.1
describe(Weekd)
## vars n mean sd median trimmed mad min max range skew kurtosis se
## Prenap 1 2 0.26 0.06 0.26 0.26 0.07 0.21 0.30 0.09 0 -2.75 0.04
## Week 2 2 0.15 0.06 0.15 0.15 0.07 0.10 0.19 0.09 0 -2.75 0.04
table3.2 <- table3.1 %>%
mutate(week = Week - Prenap) %>%
mutate(immediate = Postnap- Prenap)
describe(table3.2)
## vars n mean sd median trimmed mad min max range skew kurtosis
## statistics* 1 2 1.50 0.71 1.50 1.50 0.74 1.00 2.00 1.00 0 -2.75
## Baseline 2 2 0.56 0.06 0.56 0.56 0.06 0.52 0.60 0.08 0 -2.75
## Prenap 3 2 0.26 0.06 0.26 0.26 0.07 0.21 0.30 0.09 0 -2.75
## Postnap 4 2 0.28 0.04 0.28 0.28 0.04 0.25 0.31 0.06 0 -2.75
## Week 5 2 0.40 0.00 0.40 0.40 0.00 0.40 0.40 0.00 NaN NaN
## week 6 2 0.15 0.06 0.15 0.15 0.07 0.10 0.19 0.09 0 -2.75
## immediate 7 2 0.03 0.11 0.03 0.03 0.11 -0.05 0.10 0.15 0 -2.75
## se
## statistics* 0.50
## Baseline 0.04
## Prenap 0.04
## Postnap 0.03
## Week 0.00
## week 0.04
## immediate 0.07
Making a new data set with Sd (I don’t think the SD’s are fully correct? they don’t seem the same as the figure. I think each column has it’s own sd? Maybe a question for Jenny this week)
time1 <- c(rep("immediate",2),rep("week",2))
condition <-rep(c("cued","uncued"),2)
bias_change <- c(0.10, -0.05, 0.19, 0.10)
sd <- c(0.11, 0.11, 0.06, 0.06)
data1 = data.frame(time1, condition, bias_change, sd)
head(data1)
## time1 condition bias_change sd
## 1 immediate cued 0.10 0.11
## 2 immediate uncued -0.05 0.11
## 3 week cued 0.19 0.06
## 4 week uncued 0.10 0.06
Adding error bars
Some notes:
alpha controls the opacity of that item e.g. the bar or the error bar
geom_errorbar: specify where on the graph the error bars are in terms of x and y, specify what they are i.e. bias_change + or - SD, indicate colour
importantly, also indicate the position, otherwise the bars will sit at “immediate” and “week” and not on each bar for each condition. That is what position_dodge is for
ylim indicates where the y axis will cut off
#plot
ggplot(data = data1, aes(
x = time1,
y = bias_change,
fill = condition
)) +
geom_bar(position = "dodge", stat = "identity", alpha=0.7) +
geom_errorbar(aes(
x= time1,
ymin=bias_change-sd,
ymax=bias_change+sd),
width=0.4, colour="grey", alpha= 0.9, position = position_dodge(0.9) ) +
ylim(-0.2, 0.4) #where the y axis cuts off