Load necessary libraries

library(ggplot2) # For data visualization library(brms) # For Bayesian regression library(posterior) # For posterior analysis library(bayesplot) # For posterior plots library(tidybayes) # For tidyverse-style summaries of posterior draws library(ggdist) # For plotting posteriors with ggplot2 library(bayestestR) # For Bayesian hypothesis testing library(dplyr) #For data frame manipulation library(tidyr) #For data tidying and reshaping library(HDInterval) # For calculating HDI

The data ————————————————

df <- read.csv(“/Users/adiiore/Desktop/selfesteem.csv”)

df <- df[, c(“frequency_of_use”, “gender”, “self_esteem”)] #Exclude the “age” variable from the dataset

Self-esteem as a function of gender: visualize the relationship between gender and self-esteem.

ggplot(df, aes(x = gender, y = self_esteem)) + geom_point() + labs(title = “Relationship between Gender and Self-Esteem”, x = “Gender”, y = “Self-Esteem”)

Self-esteem as a function of gender and number of hours:

ggplot(df, aes(factor(frequency_of_use), self_esteem)) + geom_boxplot() + labs(title = “Relationship between Frequency of Use and Self-Esteem”, x = “Frequency of Use”, y = “Self-Esteem”)

Self-esteem as a function of gender and number of hours, with facets:

ggplot(df, aes(factor(frequency_of_use), self_esteem)) + geom_boxplot() + facet_grid(. ~ gender) + labs(title = “Relationship between Frequency of Use and Self-Esteem (Grouped by Gender)”, x = “Frequency of Use”, y = “Self-Esteem”)

Set Priors ————————————————

Get the default priors for the model

get_prior( self_esteem ~ gender + frequency_of_use, family = gaussian(), data = df )

Define the hyper parameters for the normal priors

priors <- set_prior(“normal(-4, 2)”, class = “b”, coef = “frequency_of_use”) + set_prior(“student_t(3, 0.5, 0.5)”, class = “b”, coef = “genderMale”) + set_prior(“student_t(3, 24.8, 4.2)”, class = “Intercept”) + set_prior(“student_t(3, 0, 4.2)”, class = “sigma”)

validate priors

validate_prior( self_esteem ~ gender + frequency_of_use, family = gaussian(), data = df, prior = priors )

Posterior Model ————————————————

fit <- brm( self_esteem ~ gender + frequency_of_use, family = gaussian(), data = df, prior = priors, backend = “cmdstanr”, seed = 1234 )

Check and validate the fit of the posterior distribution

fit

I have a proper fit of the posterior distribution - all the RHats are 1.00 (smaller than 1.01) and all the ESS values are greater than 1000

the estimate for frequency_of_use indicates that there is a negative relationship between the frequency of use and the self-esteem variable.

#the estimate of ‘gender’ suggests that, on average, there is a slight increase in self-esteem for individuals classified as Male compared to those classified as Female. #However, it’s important to note that this effect is relatively small, as the estimate is close to zero.

mcmc_trace(fit, pars = c(“b_Intercept”, “b_genderMale”, “b_frequency_of_use”, “sigma”))

The trace plot looks good, the chains are mix well with each other

Posterior expected value distributions of females and males with frequency value of 8 hours ————————————————

grid <- data.frame(gender = c(“Female”, “Male”), frequency_of_use = c(8, 8))

#posterior expected value distributions of females and males

grid <- grid |> add_epred_rvars(fit) grid

Create the graph

ggplot(grid, aes(y = interaction(gender, frequency_of_use))) + stat_slab(aes(xdist = .epred), fill = “#6495ED”, alpha = 0.7, color = “black”) + labs(title = “Posterior Expected Value Distributions of Self-Esteem”, x = “Expected Value of Self-Esteem”, y = “Gender and Frequency of Use”) + theme_minimal() + theme(plot.title = element_text(size = 16, face = “bold”), axis.title = element_text(size = 12), axis.text = element_text(size = 10))

Central measures

post_theta <- grid$.epred # Extract the posterior draws of the expected values post_theta

CM <- point_estimate(post_theta) names(post_theta) <- grid$gender

CM_long <- CM %>% mutate(gender = names(post_theta)) %>% select(-Parameter) %>% pivot_longer(-gender, names_to = “Measure”)

CM_long

p <- ggplot(grid, aes(y = gender)) + stat_slab(aes(xdist = .epred), fill = “#FFA500”, alpha = 0.7, color = “black”) + geom_point(aes(x = value, color = Measure, shape = Measure), data = CM_long, size = 4, alpha = 0.6) + labs(title = “Posterior Predictive Distribution: Expected Values”, x = “Expected Value”, y = “Gender”) + theme_minimal() + theme(plot.title = element_text(size = 16, face = “bold”), axis.title = element_text(size = 12), axis.text = element_text(size = 10))

p

High Density Interval

describe_posterior(post_theta, ci_method = “HDI”, test = NULL)

ggplot(grid, aes(y = gender)) + stat_slabinterval(aes(xdist = .epred, fill = after_stat(level)), point_interval = “median_hdci”, .width = c(0.5, 0.8, 0.95, 1))

The differences between males and females ————————————————

The posterior expected value distribution of the differences in self esteem score estimate

grid <- data.frame(gender = c(“Female”, “Male”), frequency_of_use = c(8, 8)) grid <- grid |> add_epred_rvars(fit) |> mutate( gender_diff = .epred[gender == “Male”] - .epred[gender == “Female”] )

grid

#p-direction & Estimate ROPE (in range of -0.5, 0.5)

p_direction <- grid\(gender_diff[grid\)frequency_of_use == 8] |> describe_posterior(rope_range = c(-0.5, 0.5), ci_method = “HDI”) |> mutate(Parameter = grid\(gender[grid\)frequency_of_use == 8]) p_direction

#p-direction - If indeed the probability that women have higher Self-Esteem than men or the opposite #p-direction is 66.45% - The probability of difference between men and women is 66.45%

Visualization of the distributions and ROPE

p <- grid %>% filter(frequency_of_use == 8) %>% ggplot(aes(y = gender)) + stat_slabinterval(aes(xdist = gender_diff), fill = “#FFA500”, alpha = 0.7, color = “black”) + geom_vline(xintercept = 0, linewidth = 1, color = “red”) + geom_vline(xintercept = -0.5, linewidth = 1, linetype = “dashed”, color = “red”) + geom_vline(xintercept = 0.5, linewidth = 1, linetype = “dashed”, color = “red”) + geom_rect(aes(xmin = -0.5, xmax = 0.5, ymin = -Inf, ymax = Inf), fill = “red”, alpha = 0.01) + labs(title = “Distribution of Difference in Self-Esteem Score (Female - Male) for Frequency of Use = 8”, x = “Difference of Self-Esteem Score”, y = “Gender”) + theme_minimal() + theme(plot.title = element_text(size = 16, face = “bold”), axis.title = element_text(size = 12), axis.text = element_text(size = 10))

p

#PPD for the difference between females and males ———————————————— ppd_grid <- expand.grid(gender = unique(df\(gender), frequency_of_use = unique(df\)frequency_of_use)) |> add_predicted_rvars(fit) |> group_by(frequency_of_use)|> mutate( PPD_gender_diff = .prediction[gender == “Male”] - .prediction[gender == “Female”] )

ppd_grid <- ppd_grid |> add_predicted_rvars(fit) ppd_grid

Create the graph

ggplot(ppd_grid, aes(y = interaction(gender, frequency_of_use))) + stat_slab(aes(xdist = .prediction), fill = “#FFA500”, alpha = 0.7, color = “black”) + labs(title = “PPD: Difference in Self-Esteem Score (Female - Male)”, x = “Predicted Difference of Self-Esteem Score”, y = “Gender and Frequency of Use”) + theme_minimal() + theme(plot.title = element_text(size = 16, face = “bold”), axis.title = element_text(size = 12), axis.text = element_text(size = 10))

head(draws_of(ppd_grid$.prediction))

probability for a random female-male pair to have the female exhibit

a better score across different levels of frequency_of_use.

prob_female_better <- mean(ppd_grid$PPD_gender_diff > 0) prob_female_better

#the average probability across all levels of frequency_of_use combined prob_female_better <- mean(ppd_grid$PPD_gender_diff > 0) prob_female_better_combined <- mean(prob_female_better) prob_female_better_combined