# load packages
library(ggplot2) 
library(car)
## Loading required package: carData
library(pastecs) 
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:pastecs':
## 
##     first, last
## The following object is masked from 'package:car':
## 
##     recode
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(readxl)
library(ez)
## Registered S3 methods overwritten by 'lme4':
##   method                          from
##   cooks.distance.influence.merMod car 
##   influence.merMod                car 
##   dfbeta.influence.merMod         car 
##   dfbetas.influence.merMod        car
library(openxlsx)
library(apaTables)
library(rstatix)
## 
## Attaching package: 'rstatix'
## The following object is masked from 'package:stats':
## 
##     filter
library(psych)
## 
## Attaching package: 'psych'
## The following object is masked from 'package:car':
## 
##     logit
## The following objects are masked from 'package:ggplot2':
## 
##     %+%, alpha
library(tidyr)
## 
## Attaching package: 'tidyr'
## The following object is masked from 'package:pastecs':
## 
##     extract
library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.1 ──
## ✓ tibble  3.1.2     ✓ stringr 1.4.0
## ✓ readr   1.4.0     ✓ forcats 0.5.1
## ✓ purrr   0.3.4
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## x psych::%+%()      masks ggplot2::%+%()
## x psych::alpha()    masks ggplot2::alpha()
## x tidyr::extract()  masks pastecs::extract()
## x rstatix::filter() masks dplyr::filter(), stats::filter()
## x dplyr::first()    masks pastecs::first()
## x dplyr::lag()      masks stats::lag()
## x dplyr::last()     masks pastecs::last()
## x dplyr::recode()   masks car::recode()
## x purrr::some()     masks car::some()
library(readxl)

Set working directory

setwd("/Users/sudimac/Desktop/Data_analysis/behavioural/PANAS_affect/")
#open data frame
panas = read_excel("Panas_data_final.xlsx", sheet =1)
## New names:
## * time -> time...2
## * time -> time...5

Excluded participants

##pp23 and pp65 who have missing data are already excluded, n = 61

PANAS - positive affect

Calculating score

panas$positive = panas$q1_panas_P_response + 
  panas$q3_panas_P_response + 
  panas$q5_panas_P_response + 
  panas$q9_panas_P_response +
  panas$q10_panas_P_response + 
  panas$q12_panas_P_response + 
  panas$q14_panas_P_response + 
  panas$q16_panas_P_response +
  panas$q17_panas_P_response + 
  panas$q19_panas_P_response

PANAS - negative affect

Calculating score

panas$negative = panas$q2_panas_N_response +
  panas$q4_panas_N_response +
  panas$q6_panas_N_response +
  panas$q7_panas_N_response +
  panas$q8_panas_N_response +
  panas$q11_panas_N_response +
  panas$q13_panas_N_response +
  panas$q15_panas_N_response +
  panas$q18_panas_N_response +
  panas$q20_panas_N_response

Making smaller data frame

panas_new = panas[,c(4, 5, 6, 28, 29)]
names(panas_new)
## [1] "subject"   "time...5"  "condition" "positive"  "negative"
summary(panas_new)
##     subject        time...5          condition            positive    
##  Min.   : 1.00   Length:122         Length:122         Min.   :13.00  
##  1st Qu.:18.00   Class :character   Class :character   1st Qu.:23.00  
##  Median :35.00   Mode  :character   Mode  :character   Median :27.50  
##  Mean   :34.82                                         Mean   :27.16  
##  3rd Qu.:52.00                                         3rd Qu.:32.00  
##  Max.   :68.00                                         Max.   :43.00  
##     negative    
##  Min.   :10.00  
##  1st Qu.:11.00  
##  Median :12.00  
##  Mean   :14.36  
##  3rd Qu.:16.75  
##  Max.   :38.00
panas_new$time = panas_new$time...5

Convert to factor and numerical variables

# creating within factor (time)
panas_new$time = factor (panas_new$time, levels = c("pre", "post"))

# creating between factor (environment)
panas_new$condition = factor (panas_new$condition, levels = c("urban", "nature"))

# creating numerical variables
#positive affect
panas_new$positive = as.numeric(panas_new$positive)
#negative affect
panas_new$negative = as.numeric(panas_new$negative)

Positive affect

Descriptive statistics (longer)

describeBy(panas_new$positive,list(panas_new$condition,panas_new$time),
           mat=TRUE,digits=2)
##     item group1 group2 vars  n  mean   sd median trimmed   mad min max range
## X11    1  urban    pre    1 30 29.63 5.86   29.0   29.58  4.45  18  42    24
## X12    2 nature    pre    1 31 27.87 4.53   28.0   28.00  4.45  18  35    17
## X13    3  urban   post    1 30 26.80 6.80   26.5   26.54  5.93  14  43    29
## X14    4 nature   post    1 31 24.42 8.42   24.0   23.88 10.38  13  43    30
##      skew kurtosis   se
## X11  0.03    -0.34 1.07
## X12 -0.19    -0.98 0.81
## X13  0.32    -0.34 1.24
## X14  0.39    -0.88 1.51

Descritptive statistics (summary)

panas_new %>%
  group_by(time, condition) %>%
  get_summary_stats(positive, type = "mean_sd")
## # A tibble: 4 x 6
##   condition time  variable     n  mean    sd
##   <fct>     <fct> <chr>    <dbl> <dbl> <dbl>
## 1 urban     pre   positive    30  29.6  5.86
## 2 nature    pre   positive    31  27.9  4.53
## 3 urban     post  positive    30  26.8  6.80
## 4 nature    post  positive    31  24.4  8.42
#apatable with descriptive statistics
apatable = apa.2way.table(condition, time, positive, panas_new, 
                          filename = "PANAS_positive descriptive statistics table.doc", 
                          table.number = 4, show.conf.interval = FALSE, 
                          show.marginal.means = FALSE,
                          landscape = TRUE)

Boxplot

library(ggpubr)
# create boxplot 
bxp = ggboxplot(
  panas_new, x = "time", y = "positive",
  color = "condition", palette = "jco"
)
bxp

Histogram

histogram = ggplot(panas_new, aes(positive)) + geom_histogram(aes(y=..density..), binwidth=1.5, fill='white', colour='black') + geom_density()  + labs(x= "Score", y="Density")
histogram

#histogram by time
qplot(positive, data = panas_new, geom = 'density', color = time, fill = time, alpha = I(.4))

#histogram by condition
qplot(positive, data = panas_new, geom = 'density', color = condition, fill = time, alpha = I(.4))

ANOVA assumptions

Levene’s test for equality of variances

leveneTest(panas_new$positive ~ condition, panas_new, center=mean)
## Levene's Test for Homogeneity of Variance (center = mean)
##        Df F value Pr(>F)
## group   1  0.6547   0.42
##       120

Normality test

#Shapiro-Wilk normality test by condition and time

shapiro = panas_new %>% group_by(condition,time) %>%
  shapiro_test(positive)
print(shapiro)
## # A tibble: 4 x 5
##   condition time  variable statistic     p
##   <fct>     <fct> <chr>        <dbl> <dbl>
## 1 urban     pre   positive     0.968 0.474
## 2 urban     post  positive     0.976 0.711
## 3 nature    pre   positive     0.960 0.298
## 4 nature    post  positive     0.951 0.162
#Data are normally distributed!
#plot
ggqqplot(panas_new, 'positive', ggteme = theme_bw()) + facet_grid(condition ~ time, labeller= 'label_both')

Outliers

upper_limit = mean (panas_new$positive) + 2.5 * SD (panas_new$positive)
#44.04

lower_limit = mean (panas_new$positive) - 2.5 * SD (panas_new$positive)
#10.28
# no outliers

ezANOVA

positive = ezANOVA(data = panas_new,
                   dv = positive,
                   within= time,
                   between= condition,
                   wid = subject,
                   type = 3,
                   detailed = TRUE)
## Warning: Converting "subject" to factor for ANOVA.
## Warning: Data is unbalanced (unequal N per group). Make sure you specified a
## well-considered value for the type argument to ezANOVA().
positive
## $ANOVA
##           Effect DFn DFd          SSn      SSd            F            p p<.05
## 1    (Intercept)   1  59 90109.631315 3767.877 1410.9983990 6.722933e-43     *
## 2      condition   1  59   130.844430 3767.877    2.0488518 1.575971e-01      
## 3           time   1  59   301.110744 1312.922   13.5312938 5.092911e-04     *
## 4 condition:time   1  59     2.914023 1312.922    0.1309501 7.187418e-01      
##            ges
## 1 0.9466248980
## 2 0.0251061749
## 3 0.0559486804
## 4 0.0005732075
# interaction condition x time not significant (p = 0.718)
# factor time significant (p < 0.001)
#plot results table 
apa.ezANOVA.table(positive,
                  table.number = 1,
                  filename="anova PANAS positive emotions 02.06.2021.doc")
## 
## 
## Table 1 
## 
## ANOVA results
##  
## 
##         Predictor df_num df_den   SS_num  SS_den       F    p ges
##       (Intercept)      1     59 90109.63 3767.88 1411.00 .000 .95
##         condition      1     59   130.84 3767.88    2.05 .158 .03
##              time      1     59   301.11 1312.92   13.53 .001 .06
##  condition x time      1     59     2.91 1312.92    0.13 .719 .00
## 
## Note. df_num indicates degrees of freedom numerator. df_den indicates degrees of freedom denominator. 
## SS_num indicates sum of squares numerator. SS_den indicates sum of squares denominator. 
## ges indicates generalized eta-squared.
## 

Plot results

#apa format
apatheme=theme_bw()+ theme(panel.grid.major=element_blank(),
                           panel.grid.minor=element_blank(),
                           panel.border=element_blank(),
                           axis.line=element_line(),
                           text=element_text(family='Times'),
                           legend.title=element_blank())

bar_postive = ggplot(panas_new, aes(time, positive , fill = condition)) + 
  stat_summary(panas_new = mean, geom = "bar", position="dodge") + 
  stat_summary(panas_new= mean_cl_boot, geom = "errorbar", position=position_dodge(width=0.90), width = 0.2) + 
  labs(x = "time", y = "Positive affect", fill = "time") + apatheme + ggtitle("PANAS - Positive affect") +
  theme(plot.title = element_text(size = 30, hjust = 0.5))+ #title centered and size
  theme(legend.text = element_text(size = 20)) + # legend size
  theme(axis.text=element_text(size=25),
        axis.title=element_text(size=25))
## Warning: Ignoring unknown parameters: panas_new

## Warning: Ignoring unknown parameters: panas_new
bar_postive
## No summary function supplied, defaulting to `mean_se()`
## No summary function supplied, defaulting to `mean_se()`

Negative affect

Outliers

upper_limit = mean (panas_new$negative) + 2.5 * SD (panas_new$negative)
#28.21

lower_limit = mean (panas_new$negative) - 2.5 * SD (panas_new$negative)
#0.5

#Outliers: pp1 (score = 36) and pp 28 (score = 29)

Exclude outliers

panas_negative_nooutliers = subset (panas_new, subject!= '1')
panas_negative_nooutliers = subset (panas_negative_nooutliers , subject!= '28')

#n = 61 -2 outliers = 59

Descriptive statistics

Descriptive statistics (longer)

# Report median and IQR since the data are not notmally distributed.
describeBy(panas_negative_nooutliers$negative,list(panas_negative_nooutliers$condition,panas_negative_nooutliers$time),
           mat=TRUE, IQR = TRUE, digits=2)
##     item group1 group2 vars  n  mean   sd median trimmed  mad min max range
## X11    1  urban    pre    1 29 13.28 4.93     12   12.40 2.97  10  34    24
## X12    2 nature    pre    1 30 13.17 3.29     12   12.62 2.22  10  23    13
## X13    3  urban   post    1 29 13.90 5.51     12   12.96 2.97  10  38    28
## X14    4 nature   post    1 30 14.93 4.53     14   14.42 5.19  10  25    15
##     skew kurtosis   se IQR
## X11 2.67     8.08 0.92   4
## X12 1.31     1.03 0.60   3
## X13 2.92     9.88 1.02   4
## X14 0.60    -0.78 0.83   7

Descriptive statistics (summary)

panas_negative_nooutliers %>%
  group_by(time, condition) %>%
  get_summary_stats(negative, type = "mean_sd")
## # A tibble: 4 x 6
##   condition time  variable     n  mean    sd
##   <fct>     <fct> <chr>    <dbl> <dbl> <dbl>
## 1 urban     pre   negative    29  13.3  4.93
## 2 nature    pre   negative    30  13.2  3.29
## 3 urban     post  negative    29  13.9  5.50
## 4 nature    post  negative    30  14.9  4.53
#apatable with descriptive statistics
apatable = apa.2way.table(condition, time, negative, panas_negative_nooutliers, 
                          filename = "PANAS_negative descriptive statistics table.doc", table.number = 4,
                          show.conf.interval = FALSE, 
                          show.marginal.means = FALSE,
                          landscape = TRUE)

ANOVA assumptions

Levene’s test for equality of variances

leveneTest(negative ~ condition, panas_negative_nooutliers, center=mean)
## Levene's Test for Homogeneity of Variance (center = mean)
##        Df F value Pr(>F)
## group   1  0.0017 0.9671
##       116

Boxplot

library(ggpubr)
# create boxplot 
bxp = ggboxplot(
  panas_negative_nooutliers, x = "time", y = "positive",
  color = "condition", palette = "jco"
)
bxp

Histogram

histogram = ggplot(panas_negative_nooutliers, aes(negative)) + geom_histogram(aes(y=..density..), binwidth=1.5, fill='white', colour='black') + geom_density()  + labs(x= "Score", y="Density")
histogram

#histogram by time
qplot(negative, data = panas_negative_nooutliers, geom = 'density', color = time, fill = time, alpha = I(.4))

#histogram by condition
qplot(negative, data = panas_negative_nooutliers, geom = 'density', color = condition, fill = time, alpha = I(.4))

Normality test

#Shapiro-Wilk normality test by condition and time

shapiro = panas_negative_nooutliers %>% group_by(condition,time) %>%
  shapiro_test(negative)
print(shapiro)
## # A tibble: 4 x 5
##   condition time  variable statistic           p
##   <fct>     <fct> <chr>        <dbl>       <dbl>
## 1 urban     pre   negative     0.657 0.000000532
## 2 urban     post  negative     0.646 0.000000388
## 3 nature    pre   negative     0.839 0.000364   
## 4 nature    post  negative     0.894 0.00600
#Data are not normally distributed!
#plot
ggqqplot(panas_new, 'negative', ggteme = theme_bw()) + facet_grid(condition ~ time, labeller= 'label_both')

Robust mixed ANOVA

library(WRS2)
## 
## Attaching package: 'WRS2'
## The following objects are masked from 'package:apaTables':
## 
##     goggles, viagra
bwtrim(negative ~ condition*time, id = subject, data = panas_negative_nooutliers)
## Call:
## bwtrim(formula = negative ~ condition * time, id = subject, data = panas_negative_nooutliers)
## 
##                 value df1     df2 p.value
## condition      1.8402   1 35.3100  0.1835
## time           5.7605   1 30.4663  0.0227
## condition:time 1.4962   1 30.4663  0.2306
# interaction condition x time not significant (p = 0.23)
# factor time significant (p = 0.02)

Plot results

#apa format
bar_negative =ggplot(panas_new, aes(time, negative , fill = condition)) + 
  stat_summary(panas_new = mean, geom = "bar", position="dodge") + 
  stat_summary(panas_new= mean_cl_boot, geom = "errorbar", position=position_dodge(width=0.90), width = 0.2) + 
  labs(x = "time", y = "Negative affect", fill = "time") + apatheme + ggtitle("PANAS - Negative affect") +
  theme(plot.title = element_text(size = 30, hjust = 0.5))+ #title centered and size
  theme(legend.text = element_text(size = 20)) + # legend size
  theme(axis.text=element_text(size=25),
        axis.title=element_text(size=25))
## Warning: Ignoring unknown parameters: panas_new

## Warning: Ignoring unknown parameters: panas_new
bar_negative
## No summary function supplied, defaulting to `mean_se()`
## No summary function supplied, defaulting to `mean_se()`