library(tidyverse)
library(here)
library(janitor)
bins <- read_csv(here("data", "combined", "4_zdiff_clean.csv"))
Make new screening column which is TRUE if the zdiff value for a particular bin is greater than 2SD the mean zdiff value for that trial for that participant/emotion/muscle.
bins_out <- bins %>%
group_by(pp_no, emotion, muscle, trial) %>%
mutate(meanZdiff = mean(Zdiff), sdZdiff = sd(Zdiff)) %>%
mutate(bin_outlier = if_else(Zdiff > meanZdiff + 2*sdZdiff, "TRUE", "FALSE"))
Make sure the new screen column is logical TRUE FALSE
bins_out$bin_outlier <- as.logical(bins_out$bin_outlier)
How many bins are bad?
outlier_true <- bins_out %>%
filter(bin_outlier == TRUE)
Pick a pp that has an example of bad bins and work out how to make the bad bin value be NA
example <- bins_out %>%
filter(pp_no == "pp401") %>%
filter(emotion == "626") %>%
filter(trial == "trial4")
glimpse(example)
## Rows: 12
## Columns: 10
## Groups: pp_no, emotion, muscle, trial [2]
## $ pp_no <chr> "pp401", "pp401", "pp401", "pp401", "pp401", "pp401"…
## $ condition <chr> "dyn", "dyn", "dyn", "dyn", "dyn", "dyn", "dyn", "dy…
## $ emotion <dbl> 626, 626, 626, 626, 626, 626, 626, 626, 626, 626, 62…
## $ trial <chr> "trial4", "trial4", "trial4", "trial4", "trial4", "t…
## $ muscle <chr> "brow", "brow", "brow", "brow", "brow", "brow", "che…
## $ bin <chr> "diff_bin1", "diff_bin2", "diff_bin3", "diff_bin4", …
## $ Zdiff <dbl> -0.093680571, -0.350499307, -0.380517733, 2.67917902…
## $ meanZdiff <dbl> 0.3021934, 0.3021934, 0.3021934, 0.3021934, 0.302193…
## $ sdZdiff <dbl> 1.17541707, 1.17541707, 1.17541707, 1.17541707, 1.17…
## $ bin_outlier <lgl> FALSE, FALSE, FALSE, TRUE, FALSE, FALSE, FALSE, FALS…
Use mutate and replace to replace the Zdiff value with NA, if bin_outlier == TRUE
test <- example %>% mutate(Zdiff = replace(Zdiff, bin_outlier == "TRUE", NA))
bins_screen <- bins_out %>%
mutate(Zdiff = replace(Zdiff, bin_outlier == "TRUE", NA))
bins_screen <- bins_screen %>%
select(1:7) %>%
write_csv(here("data", "combined", "5_zdiff_binscreened.csv"))