# load packages 
library(foreign) 
library(ggplot2) 
library(car)

## Loading required package: carData

library(pastecs) 
library(dplyr)

## 
## Attaching package: 'dplyr'

## The following objects are masked from 'package:pastecs':
## 
##     first, last

## The following object is masked from 'package:car':
## 
##     recode

## The following objects are masked from 'package:stats':
## 
##     filter, lag

## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

library(readxl)
library(ez)

## Registered S3 methods overwritten by 'lme4':
##   method                          from
##   cooks.distance.influence.merMod car 
##   influence.merMod                car 
##   dfbeta.influence.merMod         car 
##   dfbetas.influence.merMod        car

library(tidyverse)

## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.1 ──

## ✓ tibble  3.1.2     ✓ purrr   0.3.4
## ✓ tidyr   1.1.3     ✓ stringr 1.4.0
## ✓ readr   1.4.0     ✓ forcats 0.5.1

## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## x tidyr::extract() masks pastecs::extract()
## x dplyr::filter()  masks stats::filter()
## x dplyr::first()   masks pastecs::first()
## x dplyr::lag()     masks stats::lag()
## x dplyr::last()    masks pastecs::last()
## x dplyr::recode()  masks car::recode()
## x purrr::some()    masks car::some()

library(tidyr)
library(ggpubr)
library(rstatix)

## 
## Attaching package: 'rstatix'

## The following object is masked from 'package:stats':
## 
##     filter

Set working directory

setwd("/Users/sudimac/Desktop/Data_analysis/behavioural/PSS_stress/")

# open data frame
PSS_Q = read_excel("PSS_data_final.xlsx", sheet = 1)

Recoding items

# rename items first 
PSS_Q$X4=PSS_Q$X4_R_response
PSS_Q$X4=car::recode(PSS_Q$X4, "1=5; 2=4; 3=3; 4=2; 5=1")
#View(PSS_Q$X4) #make sure reverse function worked 

PSS_Q$X5=PSS_Q$X5_R_response
PSS_Q$X5=car::recode(PSS_Q$X5, "1=5; 2=4; 3=3; 4=2; 5=1")
#View(PSS_Q$X5) 

PSS_Q$X7=PSS_Q$X7_R_response
PSS_Q$X7=car::recode(PSS_Q$X7, "1=5; 2=4; 3=3; 4=2; 5=1")
#View(PSS_Q$X7) 

PSS_Q$X8=PSS_Q$X8_R_response
PSS_Q$X8=car::recode(PSS_Q$X8, "1=5; 2=4; 3=3; 4=2; 5=1")
#View(PSS_Q$X8)

# renaming items 
X1=PSS_Q$X1_response
X2=PSS_Q$X2_response
X3=PSS_Q$X3_response 
X4=PSS_Q$X4
X5=PSS_Q$X5
X6=PSS_Q$X6_response
X7=PSS_Q$X7
X8=PSS_Q$X8
X9=PSS_Q$X9_response
X10=PSS_Q$X10_response

Create smaller data frame

# create new variable for stress
#sum scores 
PSS_Q$stress= X1+X2+X3+X4+X5+X6+X7+X8+X9+X10

#create data frame
PSS_new= PSS_Q[,c("subject", "time","condition", "stress")]
PSS_new

## # A tibble: 124 x 4
##    subject time  condition stress
##      <dbl> <chr> <chr>      <dbl>
##  1       1 pre   urban         16
##  2       2 pre   nature        16
##  3       1 post  urban         11
##  4       2 post  nature        17
##  5       3 pre   urban         15
##  6       4 pre   nature        16
##  7       3 post  urban         18
##  8       5 pre   urban         12
##  9       4 post  nature        30
## 10       5 post  urban         33
## # … with 114 more rows

summary(PSS_new)

##     subject          time            condition             stress    
##  Min.   : 1.00   Length:124         Length:124         Min.   :10.0  
##  1st Qu.:18.00   Class :character   Class :character   1st Qu.:16.0  
##  Median :34.50   Mode  :character   Mode  :character   Median :22.0  
##  Mean   :34.61                                         Mean   :22.4  
##  3rd Qu.:52.00                                         3rd Qu.:27.0  
##  Max.   :68.00                                         Max.   :45.0

Exclude participants without post data

PSS_new = subset (PSS_new, subject!= '65')
PSS_new = subset (PSS_new, subject!= '67')

Outliers

upper_limit = mean (PSS_new$stress) + 2.5 * sd(PSS_new$stress)
#42.1

lower_limit = mean (PSS_new$stress) - 2.5 * sd(PSS_new$stress)
#2.63
# no outliers

#n = 63 - 2 not recorded posttest daat = 61 participants

Creating factors and numerical variable

# creating within factor (time)
PSS_new$time= factor (PSS_new$time, levels = c("pre", "post"))

# creating between factor (environment)
PSS_new$condition = factor (PSS_new$condition, levels = c("urban", "nature"))

# creating numerical variable
PSS_new$stress = as.numeric(PSS_new$stress)

Descriptive statistics

PSS_new %>%
  group_by(time, condition) %>%
  get_summary_stats(stress, type = "mean_sd")

## # A tibble: 4 x 6
##   time  condition variable     n  mean    sd
##   <fct> <fct>     <chr>    <dbl> <dbl> <dbl>
## 1 pre   urban     stress      30  17.0  4.61
## 2 pre   nature    stress      31  18.2  4.73
## 3 post  urban     stress      30  26.6  7.25
## 4 post  nature    stress      31  27.7  7.92

Boxplot

# create boxplot 
bxp = ggboxplot(
  PSS_new, x = "time", y = "stress",
  color = "condition", palette = "jco"
)
bxp

ANOVA assumptions

Levene’s test for equality of variances

leveneTest(PSS_new$stress ~ condition, PSS_new, center=mean)

## Levene's Test for Homogeneity of Variance (center = mean)
##        Df F value Pr(>F)
## group   1  0.1346 0.7144
##       120

#histogram
histogram = ggplot(PSS_new, aes(PSS_new$stress)) + geom_histogram(aes(y=..density..), binwidth=1.5, fill='white', colour='black') + geom_density()  + labs(x= "Stress", y="Density")
histogram

## Warning: Use of `PSS_new$stress` is discouraged. Use `stress` instead.

## Warning: Use of `PSS_new$stress` is discouraged. Use `stress` instead.

#histogram by time
qplot(stress, data = PSS_new, geom = 'density', color = time, fill = time, alpha = I(.4))

#histogram by condition
qplot(stress, data = PSS_new, geom = 'density', color = condition, fill = time, alpha = I(.4))

Normality test

#Shapiro-Wilk normality test 
shapiro.test(PSS_new$stress)

## 
##  Shapiro-Wilk normality test
## 
## data:  PSS_new$stress
## W = 0.94743, p-value = 0.0001229

#Shapiro-Wilk normality test by condition and time
shapiro = PSS_new %>% group_by(condition,time) %>%
  shapiro_test(stress)
print(shapiro)

## # A tibble: 4 x 5
##   time  condition variable statistic      p
##   <fct> <fct>     <chr>        <dbl>  <dbl>
## 1 pre   urban     stress       0.924 0.0350
## 2 post  urban     stress       0.972 0.591 
## 3 pre   nature    stress       0.944 0.103 
## 4 post  nature    stress       0.928 0.0375

#Data are not normally distributed!

#plot
ggqqplot(PSS_new, 'stress', ggteme = theme_bw()) + facet_grid(condition ~ time, labeller= 'label_both')

Data are not normally distributed –> Robust mixed ANOVA

library(WRS2)
bwtrim(PSS_new$stress ~ PSS_new$condition*PSS_new$time, id = subject, data = PSS_new)

## Call:
## bwtrim(formula = PSS_new$stress ~ PSS_new$condition * PSS_new$time, 
##     id = subject, data = PSS_new)
## 
##                                  value df1     df2 p.value
## PSS_new$condition               0.6796   1 32.3332  0.4158
## PSS_new$time                   79.9567   1 33.9359  0.0000
## PSS_new$condition:PSS_new$time  0.0006   1 33.9359  0.9814

# Factor 'time' is significant.
# Interaction 'condition x time' is not significant.

Plot

#apa format
apatheme=theme_bw()+ theme(panel.grid.major=element_blank(),
                           panel.grid.minor=element_blank(),
                           panel.border=element_blank(),
                           axis.line=element_line(),
                           text=element_text(family='Times'),
                           legend.title=element_blank())

#barplot
PSS_barplot =ggplot(PSS_new, aes(time, stress , fill = condition)) + 
  stat_summary(PSS_new = mean, geom = "bar", position="dodge") + 
  stat_summary(PSS_new= mean_cl_boot, geom = "errorbar", position=position_dodge(width=0.90), width = 0.2) + 
  labs(x = "time", y = "Perceived stress score", fill = "condition") + apatheme + ggtitle("Perceived stress") +
  theme(plot.title = element_text(size = 30, hjust = 0.5))+ #title centered and size
  theme(legend.text = element_text(size = 12)) + # legend size
  theme(axis.text=element_text(size=25),
        axis.title=element_text(size=25))

## Warning: Ignoring unknown parameters: PSS_new

## Warning: Ignoring unknown parameters: PSS_new

PSS_barplot

## No summary function supplied, defaulting to `mean_se()`
## No summary function supplied, defaulting to `mean_se()`

PSS

Sonja Sudimac

7/14/2021

Set working directory

Recoding items

Create smaller data frame

Exclude participants without post data

Outliers

Creating factors and numerical variable

Descriptive statistics

Boxplot

ANOVA assumptions

Levene’s test for equality of variances

Normality test

Data are not normally distributed –> Robust mixed ANOVA

Plot