# load packages 
library(foreign) 
library(ggplot2) 
library(car)
## Loading required package: carData
library(pastecs) 
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:pastecs':
## 
##     first, last
## The following object is masked from 'package:car':
## 
##     recode
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(readxl)
library(ez)
## Registered S3 methods overwritten by 'lme4':
##   method                          from
##   cooks.distance.influence.merMod car 
##   influence.merMod                car 
##   dfbeta.influence.merMod         car 
##   dfbetas.influence.merMod        car
library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.1 ──
## ✓ tibble  3.1.2     ✓ purrr   0.3.4
## ✓ tidyr   1.1.3     ✓ stringr 1.4.0
## ✓ readr   1.4.0     ✓ forcats 0.5.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## x tidyr::extract() masks pastecs::extract()
## x dplyr::filter()  masks stats::filter()
## x dplyr::first()   masks pastecs::first()
## x dplyr::lag()     masks stats::lag()
## x dplyr::last()    masks pastecs::last()
## x dplyr::recode()  masks car::recode()
## x purrr::some()    masks car::some()
library(tidyr)
library(ggpubr)
library(rstatix)
## 
## Attaching package: 'rstatix'
## The following object is masked from 'package:stats':
## 
##     filter

Set working directory

setwd("/Users/sudimac/Desktop/Data_analysis/behavioural/PSS_stress/")

# open data frame
PSS_Q = read_excel("PSS_data_final.xlsx", sheet = 1)

Recoding items

# rename items first 
PSS_Q$X4=PSS_Q$X4_R_response
PSS_Q$X4=car::recode(PSS_Q$X4, "1=5; 2=4; 3=3; 4=2; 5=1")
#View(PSS_Q$X4) #make sure reverse function worked 

PSS_Q$X5=PSS_Q$X5_R_response
PSS_Q$X5=car::recode(PSS_Q$X5, "1=5; 2=4; 3=3; 4=2; 5=1")
#View(PSS_Q$X5) 

PSS_Q$X7=PSS_Q$X7_R_response
PSS_Q$X7=car::recode(PSS_Q$X7, "1=5; 2=4; 3=3; 4=2; 5=1")
#View(PSS_Q$X7) 

PSS_Q$X8=PSS_Q$X8_R_response
PSS_Q$X8=car::recode(PSS_Q$X8, "1=5; 2=4; 3=3; 4=2; 5=1")
#View(PSS_Q$X8)
# renaming items 
X1=PSS_Q$X1_response
X2=PSS_Q$X2_response
X3=PSS_Q$X3_response 
X4=PSS_Q$X4
X5=PSS_Q$X5
X6=PSS_Q$X6_response
X7=PSS_Q$X7
X8=PSS_Q$X8
X9=PSS_Q$X9_response
X10=PSS_Q$X10_response

Create smaller data frame

# create new variable for stress
#sum scores 
PSS_Q$stress= X1+X2+X3+X4+X5+X6+X7+X8+X9+X10

#create data frame
PSS_new= PSS_Q[,c("subject", "time","condition", "stress")]
PSS_new
## # A tibble: 124 x 4
##    subject time  condition stress
##      <dbl> <chr> <chr>      <dbl>
##  1       1 pre   urban         16
##  2       2 pre   nature        16
##  3       1 post  urban         11
##  4       2 post  nature        17
##  5       3 pre   urban         15
##  6       4 pre   nature        16
##  7       3 post  urban         18
##  8       5 pre   urban         12
##  9       4 post  nature        30
## 10       5 post  urban         33
## # … with 114 more rows
summary(PSS_new)
##     subject          time            condition             stress    
##  Min.   : 1.00   Length:124         Length:124         Min.   :10.0  
##  1st Qu.:18.00   Class :character   Class :character   1st Qu.:16.0  
##  Median :34.50   Mode  :character   Mode  :character   Median :22.0  
##  Mean   :34.61                                         Mean   :22.4  
##  3rd Qu.:52.00                                         3rd Qu.:27.0  
##  Max.   :68.00                                         Max.   :45.0

Exclude participants without post data

PSS_new = subset (PSS_new, subject!= '65')
PSS_new = subset (PSS_new, subject!= '67')

Outliers

upper_limit = mean (PSS_new$stress) + 2.5 * sd(PSS_new$stress)
#42.1

lower_limit = mean (PSS_new$stress) - 2.5 * sd(PSS_new$stress)
#2.63
# no outliers

#n = 63 - 2 not recorded posttest daat = 61 participants

Creating factors and numerical variable

# creating within factor (time)
PSS_new$time= factor (PSS_new$time, levels = c("pre", "post"))

# creating between factor (environment)
PSS_new$condition = factor (PSS_new$condition, levels = c("urban", "nature"))

# creating numerical variable
PSS_new$stress = as.numeric(PSS_new$stress)

Descriptive statistics

PSS_new %>%
  group_by(time, condition) %>%
  get_summary_stats(stress, type = "mean_sd")
## # A tibble: 4 x 6
##   time  condition variable     n  mean    sd
##   <fct> <fct>     <chr>    <dbl> <dbl> <dbl>
## 1 pre   urban     stress      30  17.0  4.61
## 2 pre   nature    stress      31  18.2  4.73
## 3 post  urban     stress      30  26.6  7.25
## 4 post  nature    stress      31  27.7  7.92

Boxplot

# create boxplot 
bxp = ggboxplot(
  PSS_new, x = "time", y = "stress",
  color = "condition", palette = "jco"
)
bxp

ANOVA assumptions

Levene’s test for equality of variances

leveneTest(PSS_new$stress ~ condition, PSS_new, center=mean)
## Levene's Test for Homogeneity of Variance (center = mean)
##        Df F value Pr(>F)
## group   1  0.1346 0.7144
##       120
#histogram
histogram = ggplot(PSS_new, aes(PSS_new$stress)) + geom_histogram(aes(y=..density..), binwidth=1.5, fill='white', colour='black') + geom_density()  + labs(x= "Stress", y="Density")
histogram
## Warning: Use of `PSS_new$stress` is discouraged. Use `stress` instead.

## Warning: Use of `PSS_new$stress` is discouraged. Use `stress` instead.

#histogram by time
qplot(stress, data = PSS_new, geom = 'density', color = time, fill = time, alpha = I(.4))

#histogram by condition
qplot(stress, data = PSS_new, geom = 'density', color = condition, fill = time, alpha = I(.4))

Normality test

#Shapiro-Wilk normality test 
shapiro.test(PSS_new$stress)
## 
##  Shapiro-Wilk normality test
## 
## data:  PSS_new$stress
## W = 0.94743, p-value = 0.0001229
#Shapiro-Wilk normality test by condition and time
shapiro = PSS_new %>% group_by(condition,time) %>%
  shapiro_test(stress)
print(shapiro)
## # A tibble: 4 x 5
##   time  condition variable statistic      p
##   <fct> <fct>     <chr>        <dbl>  <dbl>
## 1 pre   urban     stress       0.924 0.0350
## 2 post  urban     stress       0.972 0.591 
## 3 pre   nature    stress       0.944 0.103 
## 4 post  nature    stress       0.928 0.0375
#Data are not normally distributed!
#plot
ggqqplot(PSS_new, 'stress', ggteme = theme_bw()) + facet_grid(condition ~ time, labeller= 'label_both')

Data are not normally distributed –> Robust mixed ANOVA

library(WRS2)
bwtrim(PSS_new$stress ~ PSS_new$condition*PSS_new$time, id = subject, data = PSS_new)
## Call:
## bwtrim(formula = PSS_new$stress ~ PSS_new$condition * PSS_new$time, 
##     id = subject, data = PSS_new)
## 
##                                  value df1     df2 p.value
## PSS_new$condition               0.6796   1 32.3332  0.4158
## PSS_new$time                   79.9567   1 33.9359  0.0000
## PSS_new$condition:PSS_new$time  0.0006   1 33.9359  0.9814
# Factor 'time' is significant.
# Interaction 'condition x time' is not significant.

Plot

#apa format
apatheme=theme_bw()+ theme(panel.grid.major=element_blank(),
                           panel.grid.minor=element_blank(),
                           panel.border=element_blank(),
                           axis.line=element_line(),
                           text=element_text(family='Times'),
                           legend.title=element_blank())

#barplot
PSS_barplot =ggplot(PSS_new, aes(time, stress , fill = condition)) + 
  stat_summary(PSS_new = mean, geom = "bar", position="dodge") + 
  stat_summary(PSS_new= mean_cl_boot, geom = "errorbar", position=position_dodge(width=0.90), width = 0.2) + 
  labs(x = "time", y = "Perceived stress score", fill = "condition") + apatheme + ggtitle("Perceived stress") +
  theme(plot.title = element_text(size = 30, hjust = 0.5))+ #title centered and size
  theme(legend.text = element_text(size = 12)) + # legend size
  theme(axis.text=element_text(size=25),
        axis.title=element_text(size=25))
## Warning: Ignoring unknown parameters: PSS_new

## Warning: Ignoring unknown parameters: PSS_new
PSS_barplot
## No summary function supplied, defaulting to `mean_se()`
## No summary function supplied, defaulting to `mean_se()`