library(tidyverse)
## -- Attaching packages --------------------------------------- tidyverse 1.3.1 --
## v ggplot2 3.3.5 v purrr 0.3.4
## v tibble 3.1.2 v dplyr 1.0.6
## v tidyr 1.1.3 v stringr 1.4.0
## v readr 1.4.0 v forcats 0.5.1
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
library(plotrix)
library(gt)
library(haven)
library(ggpubr)
library(jmv)
library(rstatix)
##
## Attaching package: 'rstatix'
## The following object is masked from 'package:stats':
##
## filter
library(here)
## here() starts at C:/Users/miche/Documents/Coding-R/Learning logs
library(readxl)
library(psych)
##
## Attaching package: 'psych'
## The following objects are masked from 'package:jmv':
##
## pca, reliability
## The following object is masked from 'package:plotrix':
##
## rescale
## The following objects are masked from 'package:ggplot2':
##
## %+%, alpha
data <- read_sav("~/Coding-R/Learning logs/Humiston & Wamsley 2019 data.sav")
cleandata <- data %>%
filter(exclude == 0)
This weeks goals are focusing mainly on finishing my exploratory analyses, and continuing on with my verification report!
Mainly, I want to have a good start on my third question, and also try out another question I came across during the workshop from this week.
This question was to look at the differences between genders in the study. I was mostly interested if there was a difference between genders in changes of gender bias.
Was TMR more effective with men or women? Perhaps it is more effective with a certain gender, for gender bias?
I thought of this question with the help from Thenuka during the coding discussion in the workshop, along with the example of gender bias given in the paper.
In the paper they mention an example of gender bias which is that when you are asked to think of a doctor, people tend to think of a male rather than a female.
I intend to do some research to confirm this, but I would assume females have less gender bias than males? I’m not too sure if that is true, but it further inspired me to look into this question!
For this question, I think I would observe it by looking at either bias changes across each time point (using a line plot), or using a bar plot to depict bias differences for post nap and 1 week delay measures.
I will also attempt to produce some summary statistics as I don’t think I’ve done that with my other questions yet!
I start by filtering for participants who were played the gender cue. I do this because I want to look at gender bias levels for participants who were cued with gender cues.
If I don’t filter the data, I will be observing data for participants who were either uncued AND cued, which would not be useful.
gendercued <- cleandata %>%
filter(Cue_condition == 1)
Following Jenny’s Q and A, I’ve used the same method to calculate summary statistics!
So far, I have to calculate this separately for each time point, but perhaps there is a shorter way to do this and condense the code. I might need to play with summarise, across, contains again.
baselineIATgender <- gendercued %>%
group_by(General_1_Sex) %>%
summarise(mean = mean(base_IAT_gen),
sd = sd(base_IAT_gen),
n = n(),
se = sd/sqrt(n))
print(baselineIATgender)
## # A tibble: 2 x 5
## General_1_Sex mean sd n se
## <dbl+lbl> <dbl> <dbl> <int> <dbl>
## 1 1 [Male] 0.553 0.526 8 0.186
## 2 2 [Female] 0.437 0.354 9 0.118
prenapIATgender <- gendercued %>%
group_by(General_1_Sex) %>%
summarise(mean = mean(pre_IAT_gen),
sd = sd(pre_IAT_gen),
n = n(),
se = sd/sqrt(n))
print(prenapIATgender)
## # A tibble: 2 x 5
## General_1_Sex mean sd n se
## <dbl+lbl> <dbl> <dbl> <int> <dbl>
## 1 1 [Male] 0.366 0.428 8 0.151
## 2 2 [Female] 0.223 0.272 9 0.0907
postnapIATgender <- gendercued %>%
group_by(General_1_Sex) %>%
summarise(mean = mean(post_IAT_gen),
sd = sd(post_IAT_gen),
n = n(),
se = sd/sqrt(n))
print(postnapIATgender)
## # A tibble: 2 x 5
## General_1_Sex mean sd n se
## <dbl+lbl> <dbl> <dbl> <int> <dbl>
## 1 1 [Male] 0.438 0.395 8 0.140
## 2 2 [Female] -0.0378 0.411 9 0.137
weekIATgender <- gendercued %>%
group_by(General_1_Sex) %>%
summarise(mean = mean(week_IAT_gen),
sd = sd(week_IAT_gen),
n = n(),
se = sd/sqrt(n))
print(weekIATgender)
## # A tibble: 2 x 5
## General_1_Sex mean sd n se
## <dbl+lbl> <dbl> <dbl> <int> <dbl>
## 1 1 [Male] 0.337 0.448 8 0.159
## 2 2 [Female] 0.267 0.477 9 0.159
Following previous figures, I enter my data into a data frame in as similar way.
I group the data based on gender, and time points.
genderdata <- data.frame(
condition = factor(c("male", "female", "male", "female", "male", "female", "male", "female")),
time = factor(c("Baseline", "Prenap", "Postnap", "1-week", "Baseline", "Prenap", "Postnap", "1-week")),
levels = c("Baseline", "Prenap", "Postnap", "1-week"),
bias_av = c(0.5526989, 0.3664675, 0.4379670, 0.3372313, 0.4374292, 0.2231499, -0.0378266, 0.2665315),
se = c(0.1860581, 0.15124648, 0.1397863, 0.1585409, 0.1179810, 0.09070483, 0.1371307, 0.1590986))
Now to put this into a plot! Again, I use the same premises from the line plot I made in my verification report.
ggplot(data = genderdata, aes(
x = factor(time, level = c("Baseline", "Prenap", "Postnap", "1-week")),
y = bias_av,
colour = condition,
group = condition)) +
geom_line() +
geom_errorbar(aes(
x= time,
ymin=bias_av-se,
ymax=bias_av+se),
width=0.1, colour="grey", alpha= 0.9) +
labs(x= "time")
Oh no! This doesn’t look quite right. I’m not quite sure why the lines are connected, and why some time points don’t have a data point!
When I double check my data set I do enter 8 points of data, so this is strange. I have a feeling it might be related to how some of the data points are negative, but I’m not sure how it has this effect.
Perhaps my data frame is set out wrong, lets have a better look:
print(genderdata)
## condition time levels bias_av se
## 1 male Baseline Baseline 0.5526989 0.18605810
## 2 female Prenap Prenap 0.3664675 0.15124648
## 3 male Postnap Postnap 0.4379670 0.13978630
## 4 female 1-week 1-week 0.3372313 0.15854090
## 5 male Baseline Baseline 0.4374292 0.11798100
## 6 female Prenap Prenap 0.2231499 0.09070483
## 7 male Postnap Postnap -0.0378266 0.13713070
## 8 female 1-week 1-week 0.2665315 0.15909860
Hm, it appears the time points aren’t lining up properly with the conditions (gender). There is no male prenap score, and no female baseline score?
Lets try do deduce how this happened.
The female baseline score is 0.4374292. It has been assigned to the male baseline score? The male prenap score is 0.3664675 and has been assigned to the female prenap score? Are they all mixed up?
Lets check using the male baseline: 0.5526989. Hm, nope. Only the female baseline score and male prenap score.
Looking at the data frame, I think the problem may be how I laid out my conditions. I alternated male and female, perhaps it was supposed to be not alternated?
Looking back at my other plots, this seems to be a case! (I think I just got a little rusty by not doing much coding earlier in the week haha)
genderdata <- data.frame(
condition = factor(c("male", "male", "male", "male", "female", "female", "female", "female")),
time = factor(c("Baseline", "Prenap", "Postnap", "1-week", "Baseline", "Prenap", "Postnap", "1-week")),
levels = c("Baseline", "Prenap", "Postnap", "1-week"),
bias_av = c(0.5526989, 0.3664675, 0.4379670, 0.3372313, 0.4374292, 0.2231499, -0.0378266, 0.2665315),
se = c(0.1860581, 0.15124648, 0.1397863, 0.1585409, 0.1179810, 0.09070483, 0.1371307, 0.1590986))
ggplot(data = genderdata, aes(
x = factor(time, level = c("Baseline", "Prenap", "Postnap", "1-week")),
y = bias_av,
colour = condition,
group = condition)) +
geom_line() +
geom_errorbar(aes(
x= time,
ymin=bias_av-se,
ymax=bias_av+se),
width=0.1, colour="grey", alpha= 0.9) +
labs(x= "time")
Ta da! It worked yay!
I find this data super interesting! Here’s a few points I noted:
However, I will note, in this plot I am not comparing across cued and uncued conditions. Therefore there may be gender differences, but this does not show us that TMR works for either gender.
Perhaps a better way to plot this data would be to make two separate plots, one for each gender, with cued and uncued conditions. Then by putting these plots side by side you could compare trends?
Lets try it out with females (since they seemed to have the largest reduction in bias)
I just need to calculate the data for females who were not cued for gender bias.These people were cued with the race cue.
I start by making a new data set to filter out only female participants.
sex_female <- cleandata %>%
filter(General_1_Sex == 2)
I then apply the same code I used for gender cued participants, organising the data by which cue was played during the nape (i.e. cued vs uncued)
Baseline
baseIATfemale <- sex_female %>%
group_by(Cue_condition) %>%
summarise(mean = mean(base_IAT_gen),
sd = sd(base_IAT_gen),
n = n(),
se = sd/sqrt(n))
print(baseIATfemale)
## # A tibble: 2 x 5
## Cue_condition mean sd n se
## <dbl+lbl> <dbl> <dbl> <int> <dbl>
## 1 1 [race cue played] 0.437 0.354 9 0.118
## 2 2 [gender cue played] 0.403 0.321 7 0.121
Prenap
preIATfemale <- sex_female %>%
group_by(Cue_condition) %>%
summarise(mean = mean(pre_IAT_gen),
sd = sd(pre_IAT_gen),
n = n(),
se = sd/sqrt(n))
print(preIATfemale)
## # A tibble: 2 x 5
## Cue_condition mean sd n se
## <dbl+lbl> <dbl> <dbl> <int> <dbl>
## 1 1 [race cue played] 0.223 0.272 9 0.0907
## 2 2 [gender cue played] 0.199 0.483 7 0.183
Postnap
postIATfemale <- sex_female %>%
group_by(Cue_condition) %>%
summarise(mean = mean(post_IAT_gen),
sd = sd(post_IAT_gen),
n = n(),
se = sd/sqrt(n))
print(postIATfemale)
## # A tibble: 2 x 5
## Cue_condition mean sd n se
## <dbl+lbl> <dbl> <dbl> <int> <dbl>
## 1 1 [race cue played] -0.0378 0.411 9 0.137
## 2 2 [gender cue played] 0.377 0.539 7 0.204
Week
weekIATfemale <- sex_female %>%
group_by(Cue_condition) %>%
summarise(mean = mean(week_IAT_gen),
sd = sd(week_IAT_gen),
n = n(),
se = sd/sqrt(n))
print(weekIATfemale)
## # A tibble: 2 x 5
## Cue_condition mean sd n se
## <dbl+lbl> <dbl> <dbl> <int> <dbl>
## 1 1 [race cue played] 0.267 0.477 9 0.159
## 2 2 [gender cue played] 0.405 0.174 7 0.0659
Putting this data into a data frame
female_genderdata <- data.frame(
condition = factor(c("cued", "cued", "cued", "cued", "uncued", "uncued", "uncued", "uncued")),
time = factor(c("Baseline", "Prenap", "Postnap", "1-week", "Baseline", "Prenap", "Postnap", "1-week")),
levels = c("Baseline", "Prenap", "Postnap", "1-week"),
bias_av = c(0.4374292, 0.2231499, -0.0378266, 0.2665315, 0.4027796, 0.1991437, 0.3774958, 0.4054752),
se = c(0.1179810, 0.09070483, 0.1371307, 0.1590986, 0.12137730, 0.1826184, 0.2037286, 0.06591996))
ggplot(data = female_genderdata, aes(
x = factor(time, level = c("Baseline", "Prenap", "Postnap", "1-week")),
y = bias_av,
colour = condition,
group = condition)) +
geom_line() +
geom_errorbar(aes(
x= time,
ymin=bias_av-se,
ymax=bias_av+se),
width=0.1, colour="grey", alpha= 0.9) +
labs(x= "time")
Now this is even more insightful!! TMR appears to have an effect for the post nap measure! (however, this effect seems to be reduced at 1 week). To double check, lets run some statistical analyses.
I’ll use the ttestIS function from the JMV function.
ttestIS(formula = pre_IAT_race ~ Cue_condition, data = sex_female)
##
## INDEPENDENT SAMPLES T-TEST
##
## Independent Samples T-Test
## ---------------------------------------------------------------------
## Statistic df p
## ---------------------------------------------------------------------
## pre_IAT_race Student's t -1.231246 14.00000 0.2385142
## ---------------------------------------------------------------------
P > 0.05, therefore no!
ttestIS(formula = post_IAT_race ~ Cue_condition, data = sex_female)
##
## INDEPENDENT SAMPLES T-TEST
##
## Independent Samples T-Test
## ----------------------------------------------------------------------
## Statistic df p
## ----------------------------------------------------------------------
## post_IAT_race Student's t -1.206085 14.00000 0.2477725
## ----------------------------------------------------------------------
Interestingly, this effect is not significant!
ttestIS(formula = week_IAT_race ~ Cue_condition, data = sex_female)
##
## INDEPENDENT SAMPLES T-TEST
##
## Independent Samples T-Test
## ----------------------------------------------------------------------
## Statistic df p
## ----------------------------------------------------------------------
## week_IAT_race Student's t -2.179954 14.00000 0.0468250
## ----------------------------------------------------------------------
P = 0.0468250 < 0.05
Therefore this effect is significant! There is a difference between cued and uncued conditions for the week delay time point.
It could therefore be suggested that TMR is effective for females for gender bias, but only after some time.
Does type of compensation influence the effect of TMR?
This is a question as this was one of the ways the presented study differed from the original study. However, I’m not so confident in this question, as I’m not sure if this is a good/valid research question, even though it does look at how the presented study differed from the original study.
In the original study, participants were compensated with only course credits.
For my plot, I will be looking at differential bias change, which is how much bias changed between each time point.
I begin this question by selecting the relevant variables for analysis. The variable compensation indicates how the participants were compensated: either by cash or course credits.
compensationdata <- cleandata %>%
select(compensation, preIATcued, preIATuncued, postIATcued, postIATuncued, weekIATcued, weekIATuncued)
print(compensationdata)
## # A tibble: 31 x 7
## compensation preIATcued preIATuncued postIATcued postIATuncued weekIATcued
## <dbl+lbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 1 [cash] 0.559 0.215 0.682 0.467 0.204
## 2 1 [cash] -0.134 0.340 0.0446 -0.0569 0.459
## 3 1 [cash] 0.511 0.380 -0.00258 0.682 0.399
## 4 1 [cash] -0.0293 -0.942 -0.246 0.950 0.923
## 5 2 [course cred~ 0.305 -0.241 0.442 -0.292 -0.0187
## 6 2 [course cred~ 0.148 0.247 0.989 0.258 0.561
## 7 2 [course cred~ 0.0338 0.211 0.684 0.927 -0.0686
## 8 2 [course cred~ 1.10 0.127 0.737 0.334 0.254
## 9 2 [course cred~ 0.271 0.338 -0.563 -0.691 0.778
## 10 2 [course cred~ 0.728 0.519 0.463 -0.265 0.0808
## # ... with 21 more rows, and 1 more variable: weekIATuncued <dbl>
I filter for participants who were given cash.
I then mutate the data to calculate differential bias change, followed by using std.error to calculate standard error.
Finally I calculate summary statistics.
cash <- cleandata %>%
filter(compensation == 1)
cash_biaschange <- cash %>%
mutate(cuedimmediateIAT = postIATcued - preIATcued,
cuedweekIAT = weekIATcued - preIATcued)
std.error(cash_biaschange)
## Warning in is.data.frame(x): NAs introduced by coercion
## Warning in is.data.frame(x): NAs introduced by coercion
## Warning in is.data.frame(x): NAs introduced by coercion
## ParticipantID exclude cue_presented
## NA 0.00000000 0.00000000
## heard_cue_report heard_cue_exit predicted_cue
## 0.00000000 0.16666667 0.22473329
## Cue_condition Counterbias_order Sound_assignment
## 0.14864710 0.14213381 0.15075567
## IAT1_order IAT234_order IAT_order
## 0.15075567 0.15075567 0.15075567
## compensation General_1_Age General_1_Sex
## 0.00000000 0.24099960 0.14864710
## General_1_Race General_1_English General_1_EnglishYrs
## 0.00000000 0.00000000 NA
## General_1_Caffeine General_1_CaffCups General_1_CaffHrsAgo
## 0.13055824 0.00000000 0.57735027
## General_1_SleepDisor General_1_MentalDiso General_1_Meds
## 0.08333333 0.00000000 0.14213381
## General_1_MedList General_1_University General_1_UniYears
## NA NA 0.25990480
## Demo_1_Ethnic Demo_1_Racial Demo_1_Gender
## 0.00000000 0.00000000 0.14864710
## Demo_1_NonParticipat Epworth_1_Read Epworth_1_TV
## NA 0.19299605 0.12309149
## Epworth_1_Public Epworth_1_Passenger Epworth_1_LyingDown
## 0.14864710 0.24099960 0.30151134
## Epworth_1_Talking Epworth_1_Lunch Epworth_1_Traffic
## 0.08333333 0.21320072 0.00000000
## Epworth_total AlertTest_1_Concentr_1 AlertTest_1_Refresh_1
## 0.79891024 4.08248290 5.42976272
## AlertTest_1_Feel AlertTest_2_Concentr_1 AlertTest_2_Refresh_1
## 0.25623537 3.28564370 4.81999204
## AlertTest_2_Feel AlertTest_3_Concentr_1 AlertTest_3_Refresh_1
## 0.35798962 3.77026206 4.28335236
## AlertTest_3_Feel AlertTest_4_Concentr_1 AlertTest_4_Refresh_1
## 0.27872199 5.39359890 6.75424568
## AlertTest_4_Feel S1_ExitQ_1_sound S1_ExitQ_1_soundaffect
## 0.35906214 0.00000000 0.00000000
## S1_ExitQ_2_sound S1_ExitQ_3_sound S1_ExitQ_4_sound
## 0.00000000 0.08333333 0.00000000
## S1_ExitQ_4_soundaffect S1_ExitQ_5_sound S1_ExitQ_5_soundaffect
## 0.00000000 0.00000000 0.00000000
## S2_ExitQ_1_sound S2_ExitQ_1_soundaffect S2_ExitQ_2_sound
## 0.00000000 0.00000000 0.00000000
## S2_ExitQ_3_sound S2_ExitQ_4_sound S2_ExitQ_4_soundaffect
## 0.08333333 0.00000000 0.00000000
## S2_ExitQ_5_sound S2_ExitQ_5_soundaffect Total_sleep
## 0.00000000 0.00000000 2.11222392
## Wake_amount NREM1_amount NREM2_amount
## 2.11222392 0.90836461 4.08346861
## SWS_amount REM_amount SWSxREM
## 3.19830684 2.28120892 71.06781155
## cue_minutes baseIATcued baseIATuncued
## 3.71989176 0.08538123 0.13307249
## preIATcued preIATuncued postIATcued
## 0.13539860 0.13464486 0.07369271
## postIATuncued weekIATcued weekIATuncued
## 0.11300636 0.12059749 0.12910852
## postnap_change_cued postnap_change_uncued week_change_cued
## 0.12809607 0.21193407 0.00000000
## week_change_uncued diff_biaschange_cued diff_biaschange_uncued
## 0.00000000 0.15049754 0.12381492
## diff_biaschange base_IAT_race base_IAT_gen
## 0.13803354 0.14101822 0.07948214
## pre_IAT_race pre_IAT_gen post_IAT_race
## 0.16902486 0.09044665 0.11121321
## week_IAT_race post_IAT_gen week_IAT_gen
## 0.13778564 0.07640354 0.10976183
## filter_$ cues_total cuedimmediateIAT
## 0.00000000 55.79837638 0.12809607
## cuedweekIAT
## 0.19506203
cashmeans <- cash_biaschange %>%
select(cuedimmediateIAT, cuedweekIAT) %>%
na.omit() %>%
summarise(across(contains("IAT"), list(mean = "mean", sd = "sd")))
print(cashmeans)
## # A tibble: 1 x 4
## cuedimmediateIAT_mean cuedimmediateIAT_sd cuedweekIAT_mean cuedweekIAT_sd
## <dbl> <dbl> <dbl> <dbl>
## 1 0.0215 0.444 0.297 0.676
The same procedure is used for participants compensated with course credits.
credit <- compensationdata %>%
filter(compensation == 2)
credit_biaschange <- credit %>%
mutate(cuedimmediateIAT = postIATcued - preIATcued,
cuedweekIAT = weekIATcued - postIATcued)
std.error(credit_biaschange)
## compensation preIATcued preIATuncued postIATcued
## 0.00000000 0.12152902 0.09848596 0.11085579
## postIATuncued weekIATcued weekIATuncued cuedimmediateIAT
## 0.12262517 0.08616589 0.11162652 0.13874998
## cuedweekIAT
## 0.12155428
credit_biaschangemeans <- credit_biaschange %>%
select(cuedimmediateIAT, cuedweekIAT) %>%
summarise(across(contains("IAT"), list(mean = "mean", sd = "sd")))
print(credit_biaschangemeans)
## # A tibble: 1 x 4
## cuedimmediateIAT_mean cuedimmediateIAT_sd cuedweekIAT_mean cuedweekIAT_sd
## <dbl> <dbl> <dbl> <dbl>
## 1 0.143 0.605 -0.0222 0.530
I think this data would be best displayed in a table.
I create a tibble to be converted into a table.
tablecompensation <- tibble(
Label = c("Immediate", "Week Delay"),
mean1 = c(0.02145958, 0.2972033 ),
SD1 = c(0.4437378 , 0.6757147),
mean2 = c(0.1429766 , -0.02220309 ),
SD2 = c(0.6047972 , 0.5298428)
)
tablecompensation %>%
gt() %>%
tab_header("Implicit bias levels by compensation") %>%
fmt_number(columns = vars(mean1, mean2, SD1, SD2)) %>%
tab_spanner(
label = "Cash",
columns = c(mean1, SD1)
) %>%
tab_spanner(
label = "Course Credit",
columns = c(mean2, SD2)
) %>%
cols_label(mean1 = "mean", SD1 = "SD", mean2 = "mean", SD1 = "SD", Label = "")
## Warning: `columns = vars(...)` has been deprecated in gt 0.3.0:
## * please use `columns = c(...)` instead
## Warning: `columns = vars(...)` has been deprecated in gt 0.3.0:
## * please use `columns = c(...)` instead
| Implicit bias levels by compensation | ||||
|---|---|---|---|---|
| Cash | Course Credit | |||
| mean | SD | mean | SD2 | |
| Immediate | 0.02 | 0.44 | 0.14 | 0.60 |
| Week Delay | 0.30 | 0.68 | −0.02 | 0.53 |
Now I will plot the data.
compensation_dataframe <- data.frame(
Time = c(rep("Immediate", 2), rep("Week Delay", 2)),
condition = rep(c("cash","course credit"),2),
bias_change = c(0.02145958, 0.1429766, 0.2972033, -0.02220309),
se= c(0.12809607, 0.13874998, 0.19506203, 0.12155428))
Plot
ggplot(data = compensation_dataframe, aes(
x = Time,
y = bias_change,
fill = condition
)) +
geom_bar(position = "dodge", stat = "identity", alpha=0.7) +
geom_errorbar(aes(
x= Time,
ymin=bias_change-se,
ymax=bias_change+se),
width=0.2, colour="grey", alpha= 0.9, position = position_dodge(0.9) )+
ylim(-0.2, 0.5)
From the looks of the plot, it doesn’t seem like there are any significant differences in bias change between conditions.
It seems like the next steps are to keep working on the gender question, particularly statistical analyses. This leads me to a question.
Is there a way to statistically analyse the differences between time points? I tried to use the t-test method, but I realised that it wouldn’t work because I wouldn’t be comparing the means between two conditions (if that makes sense). Say I want to see if there is a significant difference between the prenap and post nap time points, does that mean I would have to calculate differential bias change between time points, then do a t-test on those means?
Just because in my gender question, comparing means across time points seems like it would be interesting as the trend patterns are different for different conditions.
Also not sure if I’m allowed to ask this, but is the compensation question worthwhile? The main reason why I chose it, was because it was based off my summary and reaction, where I noted that the presented study may have failed to replicate results as they made small changes from the original study (such as compensation methods) which could have potentially affected results.