library(tidyverse)
library(brms)
library(gridExtra)
library(car)
library(Hmisc)
library(knitr)
# Loading Data
scl <- read.table('data/average_scr_early.csv', header = TRUE, sep = ',')
data_fs <- filter(scl, cond1 == 'safe', cond2 == 'fake') %>% select(scr, trait, state)
data_ft <- filter(scl, cond1 == 'threat', cond2 == 'fake') %>% select(scr, trait, state)
# Scale by sample mean and std
data_fs$trait = scale(data_fs$trait)
data_fs$state = scale(data_fs$state)
data_ft$trait = scale(data_ft$trait)
data_ft$state = scale(data_ft$state)
scl %>% as_tibble() %>% head() %>% kable(format = 'pipe', align = 'c')
| subject | trait | state | scr | cond1 | cond2 |
|---|---|---|---|---|---|
| MAX_101 | 27 | 22 | -0.1475170 | safe | fake |
| MAX_102 | 47 | 44 | -0.0743359 | safe | fake |
| MAX_103 | 33 | 32 | -0.0431998 | safe | fake |
| MAX_104 | 24 | 21 | -0.0938506 | safe | fake |
| MAX_105 | 56 | 28 | -0.1658941 | safe | fake |
| MAX_106 | 38 | 33 | -0.0798314 | safe | fake |
# Plot average SCR
dens_plot <- function(data, var, color='#69b3a2',
title="",
xlab='SCR'){
var = enquo(var)
plot = data %>%
ggplot(aes(x = !!var)) +
geom_histogram(aes(y = ..density..), bins = 30, alpha = 0.4, fill = 'gray') +
geom_vline(aes(xintercept = mean(!!var)), linetype = "dashed", size = 1) +
geom_density(alpha = 0.8, fill = color, color = color) +
ggtitle(title) +
xlab(xlab) +
theme_classic() +
theme(plot.title = element_text(hjust = 0.5, size = 14))
return(plot)
}
scatter_plot <- function(data, y, x,
title=""){
y = enquo(y)
x = enquo(x)
plot = data %>%
ggplot(aes(x = !!x, y = !!y)) +
geom_point(shape = 1, size = 3) +
stat_smooth(method = 'lm') +
ggtitle(title) +
theme_classic() +
theme(plot.title = element_text(hjust = 0.5, size = 14))
return(plot)
}
data_plot <- function(data, cond="FakeSafe"){
scr_dist = dens_plot(data, scr, title = paste0("Distribution of SCR for ", cond, " \n at Early Phrase"), xlab='SCR')
trait_dist = dens_plot(data, trait, title = paste0("Distribution of Trait Anxiety Score \n for ", cond, " at Early Phrase"), xlab='trait')
state_dist = dens_plot(data, state, title = paste0("Distribution of State Anxiety Score \n for ", cond, " at Early Phrase"), xlab='state')
scr_trait_scatter = scatter_plot(data, scr, trait, title = "SCR vs Trait Anxiety Score")
scr_state_scatter = scatter_plot(data, scr, state, title = "SCR vs State Anxiety Score")
state_trait_scatter = scatter_plot(data, state, trait, title = "State vs Trait Anxiety Score")
trait_scr_scatter = scatter_plot(data, trait, scr, title = "Trait Anxiety Score vs SCR")
state_scr_scatter = scatter_plot(data, state, scr, title = "State Anxiety Score vs SCR")
trait_state_scatter = scatter_plot(data, trait, state, title = "Trait vs State Anxiety Score")
plot = grid.arrange(scr_dist, scr_trait_scatter, scr_state_scatter,
trait_scr_scatter, trait_dist, trait_state_scatter,
state_scr_scatter, state_trait_scatter, state_dist,
nrow = 3, ncol =3)
return(plot)
}
data_plot(data_fs, cond='FakeSafe')
fit_mle_fs <- lm(scr ~ 1 + trait + state, data = data_fs)
summary(fit_mle_fs)
##
## Call:
## lm(formula = scr ~ 1 + trait + state, data = data_fs)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.24800 -0.02969 0.01266 0.04918 0.10438
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.083546 0.006045 -13.821 <2e-16 ***
## trait -0.005539 0.007576 -0.731 0.466
## state 0.007395 0.007576 0.976 0.331
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.0651 on 113 degrees of freedom
## Multiple R-squared: 0.008656, Adjusted R-squared: -0.00889
## F-statistic: 0.4933 on 2 and 113 DF, p-value: 0.6119
par(mfrow=c(2,2))
plot(fit_mle_fs)
Diagnosis Plots of MLE for FakeSafe
sqrt(vif(fit_mle_fs)) > 2
## trait state
## FALSE FALSE
# Bayesian model
# Number of iterations for the MCMC sampling
iterations <- 2000
# Number of chains to run
chains <- 4
SCALE <- 1
ns <- iterations*chains/2
# Model Specification
modelForm = "scr ~ 1 + trait + state"
priorRBA_fs <- get_prior(formula = modelForm, data = data_fs, family = 'student')
priorRBA_fs$prior[1] <- "student_t(3,0,10)"
priorRBA_fs$prior[4] <- "student_t(3,0,10)"
priorRBA_fs$prior[5] <- "gamma(3.325,0.1)"
priorRBA_fs$prior[6] <- "student_t(3,0,10)"
print(priorRBA_fs)
## prior class coef group resp dpar nlpar bound source
## student_t(3,0,10) b default
## student_t(3,0,10) b state (vectorized)
## student_t(3,0,10) b trait (vectorized)
## student_t(3,0,10) Intercept default
## gamma(3.325,0.1) nu default
## student_t(3,0,10) sigma default
fit_fs <- brm(modelForm,
data = data_fs,
chains = chains,
family = 'student',
prior = priorRBA_fs,
inits=0, iter=iterations,
control = list(adapt_delta = 0.99, max_treedepth = 15),
refresh = 0
)
summary(fit_fs)
## Family: student
## Links: mu = identity; sigma = identity; nu = identity
## Formula: scr ~ 1 + trait + state
## Data: data_fs (Number of observations: 116)
## Samples: 4 chains, each with iter = 2000; warmup = 1000; thin = 1;
## total post-warmup samples = 4000
##
## Population-Level Effects:
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## Intercept -0.08 0.01 -0.09 -0.07 1.00 2721 2736
## trait -0.01 0.01 -0.02 0.01 1.00 2520 2663
## state 0.01 0.01 -0.01 0.02 1.00 2619 2432
##
## Family Specific Parameters:
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sigma 0.06 0.01 0.05 0.07 1.00 1560 1660
## nu 22.59 15.97 4.58 63.90 1.00 1572 1709
##
## Samples were drawn using sampling(NUTS). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).
color_map = c('Prior' = '#3489eb', 'Posterior' = '#69b3a2')
dist_plot <- function(model, prior_name, posterior_name, xlim = c(-0.5, 0.5), xlab){
posterior_data <- posterior_samples(model)
prior_data <- tibble(trait = rstudent_t(8e4, 3, 0, 10),
state = rstudent_t(8e4, 3, 0, 10),
sigma = rstudent_t(8e4, 3, 0, 10),
nu = rgamma(8e4, 3.325, 0.1))
prior_name = enquo(prior_name)
posterior_name = enquo(posterior_name)
posterior_data %>%
ggplot() +
geom_density(data = prior_data, aes(x = !!prior_name, fill = 'Prior'),
color = 'transparent', alpha = 0.8) +
geom_density(aes(x = !!posterior_name, fill = 'Posterior'),
color = 'transparent', alpha = 0.9) +
scale_fill_manual(name = 'Prior or Posterior',
breaks = c('Prior', 'Posterior'),
values = color_map) +
scale_color_manual(values = c('transparent', '#a8a8a8')) +
ylab(NULL) + xlab(xlab) +
xlim(xlim) +
theme(plot.title = element_text(hjust = 0.5),
panel.grid = element_blank(),
panel.background = element_rect(fill = 'white', color = 'black'))
}
p_fs_trait <- dist_plot(fit_fs, trait, b_trait, xlim = c(-0.2, 0.2), xlab='trait')
p_fs_state <- dist_plot(fit_fs, state, b_state, xlim = c(-0.2, 0.2), xlab='state')
p_fs_sigma <- dist_plot(fit_fs, sigma, sigma, xlim = c(-0.2, 0.2), xlab='sigma')
p_fs_nu <- dist_plot(fit_fs, nu, nu, xlim = c(0, 100), xlab='nu')
grid.arrange(p_fs_trait, p_fs_state, p_fs_sigma, p_fs_nu, nrow = 2)
residual_plot <- function(model){
res = residuals(model, summary = TRUE)
scr_predit = fitted(model, summary = TRUE)
res_pre_df = tibble(res = res[, 1], fitted = scr_predit[, 1])
plot = res_pre_df %>%
ggplot(aes(x = fitted, y = res)) +
geom_point(shape = 1, size = 3) +
geom_smooth(se = FALSE, color = '#3489eb') +
geom_hline(yintercept = 0, linetype = 'dashed') +
ylab('Residuals') + xlab('Fitted Values') +
ggtitle('Residuals vs Fitted') +
theme_classic() +
theme(plot.title = element_text(hjust = 0.5))
return(plot)
}
residual_plot(fit_fs)
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
pp_check_fs <- pp_check(fit_fs, nsamples = 100)
pp_2d_fs <- pp_check(fit_fs, "stat_2d")
grid.arrange(pp_check_fs, pp_2d_fs, nrow = 1)
p_plus <- function(model, var, n = ns){
pd = posterior_samples(model)
return(sum(pd[var] > 0, na.rm = TRUE) / n)
}
cat(paste0("The P+ value of Trait anxiety score for FakeSafe is ", p_plus(fit_fs, "b_trait"), "\n"))
## The P+ value of Trait anxiety score for FakeSafe is 0.22975
cat(paste0("The P+ value of State anxiety score for FakeSafe is ", p_plus(fit_fs, "b_state")))
## The P+ value of State anxiety score for FakeSafe is 0.80775
data_plot(data_ft, cond='FakeThreat')
fit_mle_ft <- lm(scr ~ 1 + trait + state, data = data_ft)
summary(fit_mle_ft)
##
## Call:
## lm(formula = scr ~ 1 + trait + state, data = data_ft)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.20948 -0.10237 -0.05949 0.02857 0.97462
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.088553 0.016989 5.212 8.5e-07 ***
## trait 0.008338 0.021291 0.392 0.696
## state 0.008382 0.021291 0.394 0.695
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.183 on 113 degrees of freedom
## Multiple R-squared: 0.006744, Adjusted R-squared: -0.01084
## F-statistic: 0.3836 on 2 and 113 DF, p-value: 0.6823
par(mfrow=c(2,2))
plot(fit_mle_ft)
Diagnosis Plots of MLE for FakeThreat
priorRBA_ft <- get_prior(formula = modelForm, data = data_ft, family = 'student')
priorRBA_ft$prior[1] <- "student_t(3,0,10)"
priorRBA_ft$prior[4] <- "student_t(3,0,10)"
priorRBA_ft$prior[5] <- "gamma(3.325,0.1)"
priorRBA_ft$prior[6] <- "student_t(3,0,10)"
print(priorRBA_ft)
## prior class coef group resp dpar nlpar bound source
## student_t(3,0,10) b default
## student_t(3,0,10) b state (vectorized)
## student_t(3,0,10) b trait (vectorized)
## student_t(3,0,10) Intercept default
## gamma(3.325,0.1) nu default
## student_t(3,0,10) sigma default
fit_ft <- brm(modelForm,
data = data_ft,
chains = chains,
family = 'student',
prior = priorRBA_ft,
inits=0, iter=iterations,
control = list(adapt_delta = 0.99, max_treedepth = 15),
refresh = 0
)
summary(fit_ft)
## Family: student
## Links: mu = identity; sigma = identity; nu = identity
## Formula: scr ~ 1 + trait + state
## Data: data_ft (Number of observations: 116)
## Samples: 4 chains, each with iter = 2000; warmup = 1000; thin = 1;
## total post-warmup samples = 4000
##
## Population-Level Effects:
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## Intercept 0.03 0.01 0.01 0.05 1.00 1784 2678
## trait -0.00 0.01 -0.03 0.02 1.00 2024 2492
## state 0.00 0.01 -0.02 0.03 1.00 1597 2210
##
## Family Specific Parameters:
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sigma 0.07 0.01 0.05 0.09 1.00 1444 1832
## nu 2.13 0.55 1.28 3.45 1.00 1470 1382
##
## Samples were drawn using sampling(NUTS). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).
p_ft_trait <- dist_plot(fit_ft, trait, b_trait, xlim = c(-0.2, 0.2), xlab='trait')
p_ft_state <- dist_plot(fit_ft, state, b_state, xlim = c(-0.2, 0.2), xlab='state')
p_ft_sigma <- dist_plot(fit_ft, sigma, sigma, xlim = c(-0.2, 0.2), xlab='sigma')
p_ft_nu <- dist_plot(fit_ft, nu, nu, xlim = c(0, 10), xlab='nu')
grid.arrange(p_ft_trait, p_ft_state, p_ft_sigma, p_ft_nu, nrow = 2)
residual_plot(fit_ft)
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
pp_check_ft <- pp_check(fit_ft, nsamples = 100)
pp_2d_ft <- pp_check(fit_ft, "stat_2d")
grid.arrange(pp_check_ft, pp_2d_ft, nrow = 1)
cat(paste0("The P+ value of Trait anxiety score for FakeThreat is ", p_plus(fit_ft, "b_trait"), "\n"))
## The P+ value of Trait anxiety score for FakeThreat is 0.34975
cat(paste0("The P+ value of State anxiety score for FakeThreat is ", p_plus(fit_ft, "b_state")))
## The P+ value of State anxiety score for FakeThreat is 0.68075