knitr::opts_chunk$set(echo=TRUE, warning = FALSE)


# Install required R libraries if not installed already

Packages pre analyzu

list.of.packages <- c("haven", "lme4", "viridis", "ggplot2", "tidyverse", "hrbrthemes")
new.packages <- list.of.packages[!(list.of.packages %in% installed.packages()[,"Package"])]
if(length(new.packages)) install.packages(new.packages)

# Load required libraries
lapply(list.of.packages, require, quietly = TRUE, warn.conflicts = FALSE, character.only = TRUE)
## ── Attaching packages ─────────────────────────── tidyverse 1.3.0 ──
## ✔ tibble  2.1.3     ✔ dplyr   0.8.3
## ✔ tidyr   1.0.0     ✔ stringr 1.4.0
## ✔ readr   1.3.1     ✔ forcats 0.4.0
## ✔ purrr   0.3.3
## ── Conflicts ────────────────────────────── tidyverse_conflicts() ──
## ✖ tidyr::expand() masks Matrix::expand()
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ✖ tidyr::pack()   masks Matrix::pack()
## ✖ tidyr::unpack() masks Matrix::unpack()
## NOTE: Either Arial Narrow or Roboto Condensed fonts are required to use these themes.
##       Please use hrbrthemes::import_roboto_condensed() to install Roboto Condensed and
##       if Arial Narrow is not on your system, please see http://bit.ly/arialnarrow
## [[1]]
## [1] TRUE
## 
## [[2]]
## [1] TRUE
## 
## [[3]]
## [1] TRUE
## 
## [[4]]
## [1] TRUE
## 
## [[5]]
## [1] TRUE
## 
## [[6]]
## [1] TRUE
# Load data
data <- read_sav("DATA-SM 1_2019HPMood.sav")

Zo 2 rychle grafy

Boxplot + individualne data

data %>%
  ggplot(aes(x=factor(person), y=swb)) +
  geom_boxplot(varwidth = F, na.rm = T) +
  scale_fill_viridis(discrete = TRUE) +
  geom_jitter(color="black", size=0.2, alpha=.3, height = .3, width = .2) +
  theme_ipsum() +
  theme(
    legend.position="none",
    plot.title = element_text(size=11)
  ) +
  ggtitle("SWB per participant") +
  xlab("Participant #") +
  ylab("SWB")
## Don't know how to automatically pick scale for object of type haven_labelled. Defaulting to continuous.

“Oscilacia” hodnot u participantov

Vidno vyraznu mortalitu s casom (vo webovom vystupe to nie je dobre vidno). Ak ta je korelovana so SWB (vid nizsie), tak odhad SWB bude biased.

overUnderMean <- with(data, tapply(X = swb, INDEX = person, FUN = function(x)
  ifelse(mean(x, na.rm = T) < mean(data$swb, na.rm = T), 0, 1)))
maxObs <- NA
for(i in 1:51){
  maxObs[i] <- max(data[data$person == i,]$zaznam)
}

Korelacia medzi priemernym SWB participanta a tym, kedy participant prestal zaznamenavat

round(cor(maxObs, with(data, tapply(X = swb, INDEX = person, FUN = function(x) mean(x, na.rm = T)))), 2)
## [1] 0.2
ggplot(data,aes(x = zaznam, y = swb)) +
  geom_line(na.rm = T, size = .5) +
  facet_grid(factor(person) ~ ., space = "free") +
  theme(axis.line=element_blank(),axis.text.x=element_blank(),
        axis.text.y=element_blank(),axis.ticks=element_blank(),
        axis.title.x=element_blank(),
        axis.title.y=element_blank(),legend.position="none",
        panel.background=element_blank(),panel.border=element_blank(),panel.grid.major=element_blank(),
        panel.grid.minor=element_blank(),plot.background=element_blank())
## Don't know how to automatically pick scale for object of type haven_labelled. Defaulting to continuous.

Linear mixed-effects model

data participantov modelovane individualne

Hodnota 60 zodpoveda priemeru 3.4

rescaleTo100 <- function(x) (((x-1)/(5 - 1)) * 100)
rescaleTo100(3.4)
## [1] 60
model <- lmer(swb - 3.4 ~ 1 + (1|person), data = data)
summary(model)
## Linear mixed model fit by REML ['lmerMod']
## Formula: swb - 3.4 ~ 1 + (1 | person)
##    Data: data
## 
## REML criterion at convergence: 7172.7
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -3.7619 -0.5625  0.2154  0.6566  2.9563 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  person   (Intercept) 0.2013   0.4486  
##  Residual             0.4630   0.6805  
## Number of obs: 3384, groups:  person, 51
## 
## Fixed effects:
##             Estimate Std. Error t value
## (Intercept)  0.03295    0.06395   0.515

Komentar

Intercept zodpoveda rozdielu priemeru vzorky od hodnoty 60. Ten je prakticky identicky s hodnotou 60. Z tabulky Random effects je zrejme, ze existuje variabilita v SWB. Variabilita medzi ludmi je .20, intrasubjektova var je .46. Variabilita between-SS ako aj variabilita within-SS je tak znacna.

Signifikantna variancia?

# Null model
# Nulovy variance component pre participantov
data$IDconst <- rep(1, each = length(data$person))
data$IDconst[[1]] <- 0
nullModel <- lmer(swb - 3.4 ~ 1 + (1|IDconst), data = data, REML = T)
## boundary (singular) fit: see ?isSingular
anova(nullModel, model)
## refitting model(s) with ML (instead of REML)
## Data: data
## Models:
## nullModel: swb - 3.4 ~ 1 + (1 | IDconst)
## model: swb - 3.4 ~ 1 + (1 | person)
##           Df    AIC    BIC  logLik deviance  Chisq Chi Df Pr(>Chisq)    
## nullModel  3 8261.5 8279.9 -4127.8   8255.5                             
## model      3 7175.0 7193.4 -3584.5   7169.0 1086.5      0  < 2.2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Pozrite na hodnotu chi^2 v likelihood ratio (de facto test signifikancie velkosti variancie random effektu participantov). Variabilita na urovni participantov je velka. Spravne by sa mal robit klasicky Q test, toto je iba narychlo realizovany LRTest - berte to ako aproximaciu (modely nie su nested, preto nula stupnov volnosti). Data su ale tak jednoznacne, ze vysledok by bol velmi podobny.

Mikrosimulacia

Predpokladam nulovy populacny rozdiel s hodnotou 60. Prva predpoklada sigmu na urovni variability Vasej vzorky, druha variabilitu zistenu Capic et al (priblizna hodnota). Pri opakovanom samplingu sa da ocakavat nasledujuca pravdepodobnost, ze zisteny rozdiel od 60 bude max taky velky (ako bolo zistene) alebo mensi. Hustota pravdepodobnosti Vami zistenych dat spada medzi dve cervene linie.

Ak by realny populacny rozdiel od 60 bol cokolvek ine nez 0, pravdepodobnost pozorovania Vami ziskanych dat by bola nizsie. Zalozene na 1e7 simulacii.

set.seed(123)
replications <- rnorm(1e7, mean = 0, sd = sd(as.numeric(ranef(model)$person[[1]])))
hist(replications, breaks = 1e5, freq = F, xlab = "dSWB", main = "Vyberova var rozdielu SWB od 60 pri between-SS SD = .44 (tento vyskum)", xlim = c(-1.5, 1.5))
abline(v=fixef(model), col = "red", lwd=1)
abline(v=-fixef(model), col = "red", lwd=1)

paste("Pravdepodobnost = ", round(as.numeric(table(replications < fixef(model) & replications > -fixef(model))[2])/length(replications)*100 , 2), "%")
## [1] "Pravdepodobnost =  5.96 %"
set.seed(123)
replications <- rnorm(1e7, mean = 0, sd = .2)
hist(replications, breaks = 1e5, freq = F, xlab = "dSWB", main = "Vyberova variabilita rozdielu SWB od 60 pri sigma = .2 (Capic)", xlim = c(-1.5, 1.5))
abline(v=fixef(model), col = "red", lwd=1)
abline(v=-fixef(model), col = "red", lwd=1)

paste("Pravdepodobnost = ", round(as.numeric(table(replications < fixef(model) & replications > -fixef(model))[2])/length(replications)*100 , 2), "%")
## [1] "Pravdepodobnost =  13.09 %"