knitr::opts_chunk$set(echo=TRUE, warning = FALSE)
# Install required R libraries if not installed already
list.of.packages <- c("haven", "lme4", "viridis", "ggplot2", "tidyverse", "hrbrthemes")
new.packages <- list.of.packages[!(list.of.packages %in% installed.packages()[,"Package"])]
if(length(new.packages)) install.packages(new.packages)
# Load required libraries
lapply(list.of.packages, require, quietly = TRUE, warn.conflicts = FALSE, character.only = TRUE)
## ── Attaching packages ─────────────────────────── tidyverse 1.3.0 ──
## ✔ tibble 2.1.3 ✔ dplyr 0.8.3
## ✔ tidyr 1.0.0 ✔ stringr 1.4.0
## ✔ readr 1.3.1 ✔ forcats 0.4.0
## ✔ purrr 0.3.3
## ── Conflicts ────────────────────────────── tidyverse_conflicts() ──
## ✖ tidyr::expand() masks Matrix::expand()
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ✖ tidyr::pack() masks Matrix::pack()
## ✖ tidyr::unpack() masks Matrix::unpack()
## NOTE: Either Arial Narrow or Roboto Condensed fonts are required to use these themes.
## Please use hrbrthemes::import_roboto_condensed() to install Roboto Condensed and
## if Arial Narrow is not on your system, please see http://bit.ly/arialnarrow
## [[1]]
## [1] TRUE
##
## [[2]]
## [1] TRUE
##
## [[3]]
## [1] TRUE
##
## [[4]]
## [1] TRUE
##
## [[5]]
## [1] TRUE
##
## [[6]]
## [1] TRUE
# Load data
data <- read_sav("DATA-SM 1_2019HPMood.sav")
data %>%
ggplot(aes(x=factor(person), y=swb)) +
geom_boxplot(varwidth = F, na.rm = T) +
scale_fill_viridis(discrete = TRUE) +
geom_jitter(color="black", size=0.2, alpha=.3, height = .3, width = .2) +
theme_ipsum() +
theme(
legend.position="none",
plot.title = element_text(size=11)
) +
ggtitle("SWB per participant") +
xlab("Participant #") +
ylab("SWB")
## Don't know how to automatically pick scale for object of type haven_labelled. Defaulting to continuous.
Vidno vyraznu mortalitu s casom (vo webovom vystupe to nie je dobre vidno). Ak ta je korelovana so SWB (vid nizsie), tak odhad SWB bude biased.
overUnderMean <- with(data, tapply(X = swb, INDEX = person, FUN = function(x)
ifelse(mean(x, na.rm = T) < mean(data$swb, na.rm = T), 0, 1)))
maxObs <- NA
for(i in 1:51){
maxObs[i] <- max(data[data$person == i,]$zaznam)
}
data participantov modelovane individualne
Hodnota 60 zodpoveda priemeru 3.4
rescaleTo100 <- function(x) (((x-1)/(5 - 1)) * 100)
rescaleTo100(3.4)
## [1] 60
model <- lmer(swb - 3.4 ~ 1 + (1|person), data = data)
summary(model)
## Linear mixed model fit by REML ['lmerMod']
## Formula: swb - 3.4 ~ 1 + (1 | person)
## Data: data
##
## REML criterion at convergence: 7172.7
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -3.7619 -0.5625 0.2154 0.6566 2.9563
##
## Random effects:
## Groups Name Variance Std.Dev.
## person (Intercept) 0.2013 0.4486
## Residual 0.4630 0.6805
## Number of obs: 3384, groups: person, 51
##
## Fixed effects:
## Estimate Std. Error t value
## (Intercept) 0.03295 0.06395 0.515
Intercept zodpoveda rozdielu priemeru vzorky od hodnoty 60. Ten je prakticky identicky s hodnotou 60. Z tabulky Random effects je zrejme, ze existuje variabilita v SWB. Variabilita medzi ludmi je .20, intrasubjektova var je .46. Variabilita between-SS ako aj variabilita within-SS je tak znacna.
# Null model
# Nulovy variance component pre participantov
data$IDconst <- rep(1, each = length(data$person))
data$IDconst[[1]] <- 0
nullModel <- lmer(swb - 3.4 ~ 1 + (1|IDconst), data = data, REML = T)
## boundary (singular) fit: see ?isSingular
anova(nullModel, model)
## refitting model(s) with ML (instead of REML)
## Data: data
## Models:
## nullModel: swb - 3.4 ~ 1 + (1 | IDconst)
## model: swb - 3.4 ~ 1 + (1 | person)
## Df AIC BIC logLik deviance Chisq Chi Df Pr(>Chisq)
## nullModel 3 8261.5 8279.9 -4127.8 8255.5
## model 3 7175.0 7193.4 -3584.5 7169.0 1086.5 0 < 2.2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Pozrite na hodnotu chi^2 v likelihood ratio (de facto test signifikancie velkosti variancie random effektu participantov). Variabilita na urovni participantov je velka. Spravne by sa mal robit klasicky Q test, toto je iba narychlo realizovany LRTest - berte to ako aproximaciu (modely nie su nested, preto nula stupnov volnosti). Data su ale tak jednoznacne, ze vysledok by bol velmi podobny.
Predpokladam nulovy populacny rozdiel s hodnotou 60. Prva predpoklada sigmu na urovni variability Vasej vzorky, druha variabilitu zistenu Capic et al (priblizna hodnota). Pri opakovanom samplingu sa da ocakavat nasledujuca pravdepodobnost, ze zisteny rozdiel od 60 bude max taky velky (ako bolo zistene) alebo mensi. Hustota pravdepodobnosti Vami zistenych dat spada medzi dve cervene linie.
Ak by realny populacny rozdiel od 60 bol cokolvek ine nez 0, pravdepodobnost pozorovania Vami ziskanych dat by bola nizsie. Zalozene na 1e7 simulacii.
set.seed(123)
replications <- rnorm(1e7, mean = 0, sd = sd(as.numeric(ranef(model)$person[[1]])))
hist(replications, breaks = 1e5, freq = F, xlab = "dSWB", main = "Vyberova var rozdielu SWB od 60 pri between-SS SD = .44 (tento vyskum)", xlim = c(-1.5, 1.5))
abline(v=fixef(model), col = "red", lwd=1)
abline(v=-fixef(model), col = "red", lwd=1)
paste("Pravdepodobnost = ", round(as.numeric(table(replications < fixef(model) & replications > -fixef(model))[2])/length(replications)*100 , 2), "%")
## [1] "Pravdepodobnost = 5.96 %"
set.seed(123)
replications <- rnorm(1e7, mean = 0, sd = .2)
hist(replications, breaks = 1e5, freq = F, xlab = "dSWB", main = "Vyberova variabilita rozdielu SWB od 60 pri sigma = .2 (Capic)", xlim = c(-1.5, 1.5))
abline(v=fixef(model), col = "red", lwd=1)
abline(v=-fixef(model), col = "red", lwd=1)
paste("Pravdepodobnost = ", round(as.numeric(table(replications < fixef(model) & replications > -fixef(model))[2])/length(replications)*100 , 2), "%")
## [1] "Pravdepodobnost = 13.09 %"