https://rpubs.com/jenrichmond/emgz
library(tidyverse)
library(here)
library(skimr)
options(scipen = 999) #set no scientific notation
Normally, to center a variable, you would subtract the mean of all data points from each individual data point. With scale(), this can be accomplished in one simple call.
Center variable A using the scale() function
scale(A, center = TRUE, scale = FALSE)
You can generate z-scores for variable A using the scale() function
scale(A, center = TRUE, scale = TRUE)
data <- read_csv(here("data", "combined", "2_clean_no_bl_outliers.csv"))
pp403 <- data %>%
filter(pp_no == "pp403")
# make it wide
widepp403 <- pp403 %>%
pivot_wider(names_from = muscle, values_from = rms)
# create new variables called zbrow and zcheek using the scale function and specifying center = TRUE and scale = TRUE
widepp403$zbrow = scale(widepp403$brow, center = TRUE, scale = TRUE)
widepp403$zcheek = scale(widepp403$cheek, center = TRUE, scale = TRUE)
# Check that the mean of the new zbrow and zcheek columns is 0 - this is what we want...
summary(widepp403)
## pp_no condition emotion bin
## Length:210 Length:210 Min. :626.0 Length:210
## Class :character Class :character 1st Qu.:626.0 Class :character
## Mode :character Mode :character Median :727.0 Mode :character
## Mean :770.8
## 3rd Qu.:828.0
## Max. :929.0
##
## bin_no trial brow cheek
## Min. :0 Length:210 Min. : 3.382 Min. : 2.934
## 1st Qu.:1 Class :character 1st Qu.: 5.577 1st Qu.: 3.673
## Median :3 Mode :character Median : 7.280 Median : 4.241
## Mean :3 Mean : 8.506 Mean : 6.801
## 3rd Qu.:5 3rd Qu.: 9.325 3rd Qu.: 5.708
## Max. :6 Max. :66.192 Max. :120.989
## NA's :7 NA's :7
## zbrow.V1 zcheek.V1
## Min. :-0.818495 Min. :-0.380274
## 1st Qu.:-0.467962 1st Qu.:-0.307551
## Median :-0.195849 Median :-0.251692
## Mean : 0.000000 Mean : 0.000000
## 3rd Qu.: 0.130802 3rd Qu.:-0.107447
## Max. : 9.215078 Max. :11.227675
## NA's :7 NA's :7
make data wide, use mutate to make new z variables
data_zscore <- data %>%
pivot_wider(names_from = muscle,
values_from = rms) %>%
group_by(pp_no) %>%
mutate(Zbrow = scale(brow, center = TRUE, scale = TRUE)) %>%
mutate(Zcheek = scale(cheek, center = TRUE, scale = TRUE))
summary(data_zscore)
## pp_no condition emotion bin
## Length:10836 Length:10836 Min. :626.0 Length:10836
## Class :character Class :character 1st Qu.:701.8 Class :character
## Mode :character Mode :character Median :727.0 Mode :character
## Mean :776.7
## 3rd Qu.:828.0
## Max. :929.0
##
## bin_no trial brow cheek
## Min. :0 Length:10836 Min. : 0.7779 Min. : 1.504
## 1st Qu.:1 Class :character 1st Qu.: 4.4947 1st Qu.: 3.621
## Median :3 Mode :character Median : 7.2542 Median : 5.139
## Mean :3 Mean : 9.6268 Mean : 10.168
## 3rd Qu.:5 3rd Qu.: 11.0048 3rd Qu.: 9.662
## Max. :6 Max. :220.8444 Max. :176.645
## NA's :700 NA's :924
## Zbrow Zcheek
## Min. :-2.9500 Min. :-1.3829
## 1st Qu.:-0.5080 1st Qu.:-0.5146
## Median :-0.2471 Median :-0.2893
## Mean : 0.0000 Mean : 0.0000
## 3rd Qu.: 0.2121 3rd Qu.: 0.1066
## Max. :13.5634 Max. :11.2277
## NA's :700 NA's :924
note: can also make new z-variables manually:
data_z_manual <- data_wide %>% group_by(pp_no) %>% mutate(Zbrow = (brow - mean(brow))/sd(brow)) %>% mutate(Zcheek = (cheek - mean(cheek))/sd(cheek))
Need to calculate difference scores from baseline. Need to make BIN wide to allow for calculations across columns. Its a bit difficult to do that for both muscles at the same time, so lets separate and work out the difference scores for brow and cheek separately.
Just to make things easier to join back together later
brow_z <- data_zscore %>%
mutate(muscle = "brow") %>%
select(pp_no, condition, emotion, bin, trial, muscle, Zbrow)
… and rename bin_0 as BL (i.e. baseline)
brow_z_wide <- brow_z %>%
pivot_wider(names_from = bin,
values_from = Zbrow) %>%
rename(BL = bin_0)
Use wide columns to calcuate the difference between each bin column and BL, creating a new set of columns starting with “diff”, drop BL column and all columns starting with bin (i.e. raw z scores).
note - This df contains for each bin the difference between stimulus and basline, so POSITIVE difference scores = greater activity during STIM than BL and NEGATIVE difference scores = greater activity during BL than STIM
brow_z_diff <- brow_z_wide %>%
mutate(diff_bin1 = bin_1 - BL,
diff_bin2 = bin_2 - BL,
diff_bin3 = bin_3- BL,
diff_bin4 = bin_4 - BL,
diff_bin5 = bin_5 - BL,
diff_bin6 = bin_6 - BL) %>%
select(-BL, - starts_with("bin"))
brow_z_diff_long <- brow_z_diff %>%
pivot_longer(names_to = "bin",
values_to = "Zdiff",
diff_bin1:diff_bin6)
library(naniar)
##
## Attaching package: 'naniar'
## The following object is masked from 'package:skimr':
##
## n_complete
vis_miss(brow_z_diff_long)
…to make things easier to join back together later
cheek_z <- data_zscore %>%
mutate(muscle = "cheek") %>%
select(pp_no, condition, emotion, bin, trial, muscle, Zcheek)
…and rename bin_0 as BL (i.e. baseline)
cheek_z_wide <- cheek_z %>%
pivot_wider(names_from = bin,
values_from = Zcheek) %>%
rename(BL = bin_0)
note - This df contains for each bin the difference between stimulus and basline, so POSITIVE difference scores = greater activity during STIM than BL and NEGATIVE difference scores = greater activity during BL than STIM
cheek_z_diff <- cheek_z_wide %>%
mutate(diff_bin1 = bin_1 - BL,
diff_bin2 = bin_2 - BL,
diff_bin3 = bin_3- BL,
diff_bin4 = bin_4 - BL,
diff_bin5 = bin_5 - BL,
diff_bin6 = bin_6 - BL) %>%
select(-BL, - starts_with("bin"))
cheek_z_diff_long <- cheek_z_diff %>%
pivot_longer(names_to = "bin",
values_to = "Zdiff",
diff_bin1:diff_bin6)
zdiff_cheek_brow <- bind_rows(cheek_z_diff_long, brow_z_diff_long) %>%
arrange(pp_no, emotion, trial, muscle, bin)
zdiff_cheek_brow %>%
write_csv(here("data", "combined", "4_zdiff_clean.csv"))