Load necessary libraries

library(ggplot2) # For data visualization library(brms) # For Bayesian regression library(posterior) # For posterior analysis library(bayesplot) # For posterior plots library(tidybayes) # For tidyverse-style summaries of posterior draws library(ggdist) # For plotting posteriors with ggplot2 library(bayestestR) # For Bayesian hypothesis testing library(dplyr) #For data frame manipulation library(tidyr) #For data tidying and reshaping library(HDInterval) # For calculating HDI

The data ————————————————

df <- read.csv(“/Users/adiiore/Desktop/selfesteem.csv”)

df <- df[, c(“frequency_of_use”, “gender”, “self_esteem”)] #Exclude the “age” variable from the dataset

Self-esteem as a function of gender: visualize the relationship between gender and self-esteem.

ggplot(df, aes(x = gender, y = self_esteem)) + geom_point() + labs(title = “Relationship between Gender and Self-Esteem”, x = “Gender”, y = “Self-Esteem”)

Self-esteem as a function of gender and number of hours:

ggplot(df, aes(factor(frequency_of_use), self_esteem)) + geom_boxplot() + labs(title = “Relationship between Frequency of Use and Self-Esteem”, x = “Frequency of Use”, y = “Self-Esteem”)

Set Priors ————————————————

Get the default priors for the model

get_prior( self_esteem ~ gender + frequency_of_use, family = gaussian(), data = df )

Define the hyper parameters for the normal priors

priors <- set_prior(“normal(-4, 2)”, class = “b”, coef = “frequency_of_use”) + set_prior(“student_t(3, 0.5, 0.5)”, class = “b”, coef = “genderMale”) + set_prior(“student_t(3, 24.8, 4.2)”, class = “Intercept”) + set_prior(“student_t(3, 0, 4.2)”, class = “sigma”)

validate priors

validate_prior( self_esteem ~ gender + frequency_of_use, family = gaussian(), data = df, prior = priors )

Posterior Model ————————————————

fit <- brm( self_esteem ~ gender + frequency_of_use, family = gaussian(), data = df, prior = priors, backend = “cmdstanr”, seed = 1234 )

Check and validate the fit of the posterior distribution

fit

I have a proper fit of the posterior distribution - all the RHats are 1.00 (smaller than 1.01) and all the ESS values are greater than 1000

the estimate for `frequency_of_use` indicates that there is a negative relationship between the frequency of use and the self-esteem variable.

#the estimate of ‘gender’ suggests that, on average, there is a slight increase in self-esteem for individuals classified as Male compared to those classified as Female. #However, it’s important to note that this effect is relatively small, as the estimate is close to zero.

mcmc_trace(fit, pars = c(“b_Intercept”, “b_genderMale”, “b_frequency_of_use”, “sigma”))

The trace plot looks good, the chains are mix well with each other

Posterior expected value distributions of females and males with frequency value of 8 hours ————————————————

grid <- data.frame(gender = c(“Female”, “Male”), frequency_of_use = c(8, 8))

#posterior expected value distributions of females and males

grid <- grid |> add_epred_rvars(fit) grid

Create the graph

ggplot(grid, aes(y = interaction(gender, frequency_of_use))) + stat_slab(aes(xdist = .epred), fill = “#6495ED”, alpha = 0.7, color = “black”) + labs(title = “Posterior Expected Value Distributions of Self-Esteem”, x = “Expected Value of Self-Esteem”, y = “Gender and Frequency of Use”) + theme_minimal() + theme(plot.title = element_text(size = 16, face = “bold”), axis.title = element_text(size = 12), axis.text = element_text(size = 10))

Central measures

post_theta <- grid$.epred # Extract the posterior draws of the expected values post_theta

CM <- point_estimate(post_theta) names(post_theta) <- grid$gender

CM_long <- CM %>% mutate(gender = names(post_theta)) %>% select(-Parameter) %>% pivot_longer(-gender, names_to = “Measure”)

CM_long

p <- ggplot(grid, aes(y = gender)) + stat_slab(aes(xdist = .epred), fill = “#FFA500”, alpha = 0.7, color = “black”) + geom_point(aes(x = value, color = Measure, shape = Measure), data = CM_long, size = 4, alpha = 0.6) + labs(title = “Posterior Predictive Distribution: Expected Values”, x = “Expected Value”, y = “Gender”) + theme_minimal() + theme(plot.title = element_text(size = 16, face = “bold”), axis.title = element_text(size = 12), axis.text = element_text(size = 10))

High Density Interval

describe_posterior(post_theta, ci_method = “HDI”, test = NULL)

ggplot(grid, aes(y = gender)) + stat_slabinterval(aes(xdist = .epred, fill = after_stat(level)), point_interval = “median_hdci”, .width = c(0.5, 0.8, 0.95, 1))

The differences between males and females ————————————————

The posterior expected value distribution of the differences in self esteem score estimate

grid <- data.frame(gender = c(“Female”, “Male”), frequency_of_use = c(8, 8)) grid <- grid |> add_epred_rvars(fit) |> mutate( gender_diff = .epred[gender == “Male”] - .epred[gender == “Female”] )

grid

#p-direction & Estimate ROPE (in range of -0.5, 0.5)

p_direction <- grid$gender_diff[grid$frequency_of_use == 8] |> describe_posterior(rope_range = c(-0.5, 0.5), ci_method = “HDI”) |> mutate(Parameter = grid$gender[grid$frequency_of_use == 8]) p_direction

#p-direction - If indeed the probability that women have higher Self-Esteem than men or the opposite #p-direction is 66.45% - The probability of difference between men and women is 66.45%

Visualization of the distributions and ROPE

p <- grid %>% filter(frequency_of_use == 8) %>% ggplot(aes(y = gender)) + stat_slabinterval(aes(xdist = gender_diff), fill = “#FFA500”, alpha = 0.7, color = “black”) + geom_vline(xintercept = 0, linewidth = 1, color = “red”) + geom_vline(xintercept = -0.5, linewidth = 1, linetype = “dashed”, color = “red”) + geom_vline(xintercept = 0.5, linewidth = 1, linetype = “dashed”, color = “red”) + geom_rect(aes(xmin = -0.5, xmax = 0.5, ymin = -Inf, ymax = Inf), fill = “red”, alpha = 0.01) + labs(title = “Distribution of Difference in Self-Esteem Score (Female - Male) for Frequency of Use = 8”, x = “Difference of Self-Esteem Score”, y = “Gender”) + theme_minimal() + theme(plot.title = element_text(size = 16, face = “bold”), axis.title = element_text(size = 12), axis.text = element_text(size = 10))

#PPD for the difference between females and males ———————————————— ppd_grid <- expand.grid(gender = unique(df$gender), frequency_of_use = unique(df$frequency_of_use)) |> add_predicted_rvars(fit) |> group_by(frequency_of_use)|> mutate( PPD_gender_diff = .prediction[gender == “Male”] - .prediction[gender == “Female”] )

ppd_grid <- ppd_grid |> add_predicted_rvars(fit) ppd_grid

Create the graph

ggplot(ppd_grid, aes(y = interaction(gender, frequency_of_use))) + stat_slab(aes(xdist = .prediction), fill = “#FFA500”, alpha = 0.7, color = “black”) + labs(title = “PPD: Difference in Self-Esteem Score (Female - Male)”, x = “Predicted Difference of Self-Esteem Score”, y = “Gender and Frequency of Use”) + theme_minimal() + theme(plot.title = element_text(size = 16, face = “bold”), axis.title = element_text(size = 12), axis.text = element_text(size = 10))

head(draws_of(ppd_grid$.prediction))

probability for a random female-male pair to have the female exhibit

a better score across different levels of frequency_of_use.

prob_female_better <- mean(ppd_grid$PPD_gender_diff > 0) prob_female_better

#the average probability across all levels of frequency_of_use combined prob_female_better <- mean(ppd_grid$PPD_gender_diff > 0) prob_female_better_combined <- mean(prob_female_better) prob_female_better_combined

Load necessary libraries

The data ————————————————

Self-esteem as a function of gender: visualize the relationship between gender and self-esteem.

Self-esteem as a function of gender and number of hours:

Self-esteem as a function of gender and number of hours, with facets:

Set Priors ————————————————

Get the default priors for the model

Define the hyper parameters for the normal priors

validate priors

Posterior Model ————————————————

Check and validate the fit of the posterior distribution

I have a proper fit of the posterior distribution - all the RHats are 1.00 (smaller than 1.01) and all the ESS values are greater than 1000

the estimate for frequency_of_use indicates that there is a negative relationship between the frequency of use and the self-esteem variable.

The trace plot looks good, the chains are mix well with each other

Posterior expected value distributions of females and males with frequency value of 8 hours ————————————————

Create the graph

Central measures

High Density Interval

The differences between males and females ————————————————

The posterior expected value distribution of the differences in self esteem score estimate

Visualization of the distributions and ROPE

Create the graph

probability for a random female-male pair to have the female exhibit

a better score across different levels of frequency_of_use.

the estimate for `frequency_of_use` indicates that there is a negative relationship between the frequency of use and the self-esteem variable.