https://rpubs.com/jenrichmond/emgz

load packages

library(tidyverse)
library(here)
library(skimr)

options(scipen = 999) #set no scientific notation

The process

how to: calculate zscores

Normally, to center a variable, you would subtract the mean of all data points from each individual data point. With scale(), this can be accomplished in one simple call.

Center variable A using the scale() function

scale(A, center = TRUE, scale = FALSE)

You can generate z-scores for variable A using the scale() function

scale(A, center = TRUE, scale = TRUE)

read in data

data <- read_csv(here("data", "combined", "2_clean_no_bl_outliers.csv"))

Try it out on one pp first

pp403 <- data %>%
  filter(pp_no == "pp403")

# make it wide 
widepp403 <- pp403 %>%
  pivot_wider(names_from = muscle, values_from = rms)

# create new variables called zbrow and zcheek using the scale function and specifying center = TRUE and scale = TRUE

widepp403$zbrow = scale(widepp403$brow, center = TRUE, scale = TRUE)
widepp403$zcheek = scale(widepp403$cheek, center = TRUE, scale = TRUE)

# Check that the mean of the new zbrow and zcheek columns is 0 - this is what we want... 
summary(widepp403)
##     pp_no            condition            emotion          bin           
##  Length:210         Length:210         Min.   :626.0   Length:210        
##  Class :character   Class :character   1st Qu.:626.0   Class :character  
##  Mode  :character   Mode  :character   Median :727.0   Mode  :character  
##                                        Mean   :770.8                     
##                                        3rd Qu.:828.0                     
##                                        Max.   :929.0                     
##                                                                          
##      bin_no     trial                brow            cheek        
##  Min.   :0   Length:210         Min.   : 3.382   Min.   :  2.934  
##  1st Qu.:1   Class :character   1st Qu.: 5.577   1st Qu.:  3.673  
##  Median :3   Mode  :character   Median : 7.280   Median :  4.241  
##  Mean   :3                      Mean   : 8.506   Mean   :  6.801  
##  3rd Qu.:5                      3rd Qu.: 9.325   3rd Qu.:  5.708  
##  Max.   :6                      Max.   :66.192   Max.   :120.989  
##                                 NA's   :7        NA's   :7        
##       zbrow.V1            zcheek.V1     
##  Min.   :-0.818495   Min.   :-0.380274  
##  1st Qu.:-0.467962   1st Qu.:-0.307551  
##  Median :-0.195849   Median :-0.251692  
##  Mean   : 0.000000   Mean   : 0.000000  
##  3rd Qu.: 0.130802   3rd Qu.:-0.107447  
##  Max.   : 9.215078   Max.   :11.227675  
##  NA's   :7           NA's   :7

Now do all the data

make data wide, use mutate to make new z variables

data_zscore <- data %>%
  pivot_wider(names_from = muscle, 
              values_from = rms) %>%
  group_by(pp_no) %>%
  mutate(Zbrow = scale(brow, center = TRUE, scale = TRUE)) %>%
  mutate(Zcheek = scale(cheek, center = TRUE, scale = TRUE))
  
summary(data_zscore)
##     pp_no            condition            emotion          bin           
##  Length:10836       Length:10836       Min.   :626.0   Length:10836      
##  Class :character   Class :character   1st Qu.:701.8   Class :character  
##  Mode  :character   Mode  :character   Median :727.0   Mode  :character  
##                                        Mean   :776.7                     
##                                        3rd Qu.:828.0                     
##                                        Max.   :929.0                     
##                                                                          
##      bin_no     trial                brow              cheek        
##  Min.   :0   Length:10836       Min.   :  0.7779   Min.   :  1.504  
##  1st Qu.:1   Class :character   1st Qu.:  4.4947   1st Qu.:  3.621  
##  Median :3   Mode  :character   Median :  7.2542   Median :  5.139  
##  Mean   :3                      Mean   :  9.6268   Mean   : 10.168  
##  3rd Qu.:5                      3rd Qu.: 11.0048   3rd Qu.:  9.662  
##  Max.   :6                      Max.   :220.8444   Max.   :176.645  
##                                 NA's   :700        NA's   :924      
##      Zbrow             Zcheek       
##  Min.   :-2.9500   Min.   :-1.3829  
##  1st Qu.:-0.5080   1st Qu.:-0.5146  
##  Median :-0.2471   Median :-0.2893  
##  Mean   : 0.0000   Mean   : 0.0000  
##  3rd Qu.: 0.2121   3rd Qu.: 0.1066  
##  Max.   :13.5634   Max.   :11.2277  
##  NA's   :700       NA's   :924

note: can also make new z-variables manually:

data_z_manual <- data_wide %>% group_by(pp_no) %>% mutate(Zbrow = (brow - mean(brow))/sd(brow)) %>% mutate(Zcheek = (cheek - mean(cheek))/sd(cheek))

Calculating Difference Scores

Need to calculate difference scores from baseline. Need to make BIN wide to allow for calculations across columns. Its a bit difficult to do that for both muscles at the same time, so lets separate and work out the difference scores for brow and cheek separately.

BROW FIRST

Add a muscle column

Just to make things easier to join back together later

brow_z <- data_zscore %>%
  mutate(muscle = "brow") %>%
  select(pp_no, condition, emotion, bin, trial, muscle, Zbrow) 

Make the bin column wide

… and rename bin_0 as BL (i.e. baseline)

brow_z_wide <- brow_z %>%
  pivot_wider(names_from = bin, 
              values_from = Zbrow) %>%
  rename(BL = bin_0)

mutate() diff scores

Use wide columns to calcuate the difference between each bin column and BL, creating a new set of columns starting with “diff”, drop BL column and all columns starting with bin (i.e. raw z scores).

note - This df contains for each bin the difference between stimulus and basline, so POSITIVE difference scores = greater activity during STIM than BL and NEGATIVE difference scores = greater activity during BL than STIM

brow_z_diff <- brow_z_wide %>% 
  mutate(diff_bin1 = bin_1 - BL, 
         diff_bin2 = bin_2 - BL,
         diff_bin3 = bin_3- BL, 
         diff_bin4 = bin_4 - BL,
         diff_bin5 = bin_5 - BL, 
         diff_bin6 = bin_6 - BL) %>%
  select(-BL, - starts_with("bin"))

Make data long again

brow_z_diff_long <- brow_z_diff %>%
  pivot_longer(names_to = "bin", 
               values_to = "Zdiff", 
               diff_bin1:diff_bin6)

checking on missingness

library(naniar)
## 
## Attaching package: 'naniar'
## The following object is masked from 'package:skimr':
## 
##     n_complete
vis_miss(brow_z_diff_long)

NOW CHEEK

add a muscle column

…to make things easier to join back together later

cheek_z <- data_zscore %>%
  mutate(muscle = "cheek") %>%
  select(pp_no, condition, emotion, bin, trial, muscle, Zcheek) 

Make the bin column wide

…and rename bin_0 as BL (i.e. baseline)

cheek_z_wide <- cheek_z %>%
  pivot_wider(names_from = bin, 
              values_from = Zcheek) %>%
  rename(BL = bin_0)

mutate() diff scores

  • Use wide columns to calcuate the difference between each bin column and BL, creating a new set of columns starting with “diff”, drop BL column and all columns starting with bin (i.e. raw z scores).

note - This df contains for each bin the difference between stimulus and basline, so POSITIVE difference scores = greater activity during STIM than BL and NEGATIVE difference scores = greater activity during BL than STIM

cheek_z_diff <- cheek_z_wide %>% 
  mutate(diff_bin1 = bin_1 - BL, 
         diff_bin2 = bin_2 - BL,
         diff_bin3 = bin_3- BL, 
         diff_bin4 = bin_4 - BL,
         diff_bin5 = bin_5 - BL, 
         diff_bin6 = bin_6 - BL) %>%
  select(-BL, - starts_with("bin"))

make data long again

cheek_z_diff_long <- cheek_z_diff %>%
  pivot_longer(names_to = "bin", 
               values_to = "Zdiff", 
               diff_bin1:diff_bin6)

BIND CHEEK AND BROW TOGETHER

zdiff_cheek_brow <- bind_rows(cheek_z_diff_long, brow_z_diff_long) %>%
  arrange(pp_no, emotion, trial, muscle, bin)

write to csv

zdiff_cheek_brow %>%
  write_csv(here("data", "combined", "4_zdiff_clean.csv"))