data <- read.csv ("C:\\Users\\91630\\OneDrive\\Desktop\\statistics\\age_gaps.CSV")
library(ggplot2)
library(ggthemes)
## Warning: package 'ggthemes' was built under R version 4.3.3
library(ggrepel)
## Warning: package 'ggrepel' was built under R version 4.3.3
library(boot)
library(broom)
library(lindia)
## Warning: package 'lindia' was built under R version 4.3.3

I am going to work on encoded_gender column.

data$encoded_gender <- ifelse(data$character_1_gender == "man", 0, 1)

I selected age_difference, release_year, actor_1_age, and couple_number as my explanatory variables.

model <- glm(encoded_gender ~ age_difference + release_year + actor_1_age + couple_number, data = data, family = binomial(link = 'logit'))

summary(model)
## 
## Call:
## glm(formula = encoded_gender ~ age_difference + release_year + 
##     actor_1_age + couple_number, family = binomial(link = "logit"), 
##     data = data)
## 
## Coefficients:
##                  Estimate Std. Error z value Pr(>|z|)    
## (Intercept)    -30.999206  13.093538  -2.368   0.0179 *  
## age_difference  -0.135770   0.019006  -7.143  9.1e-13 ***
## release_year     0.015353   0.006555   2.342   0.0192 *  
## actor_1_age     -0.005824   0.010585  -0.550   0.5821    
## couple_number    0.035809   0.097700   0.367   0.7140    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 1107.20  on 1154  degrees of freedom
## Residual deviance:  972.38  on 1150  degrees of freedom
## AIC: 982.38
## 
## Number of Fisher Scoring iterations: 6

Intersection: -30.999206

Age_difference (-0.135770):

Confidence interval for the “age_difference” coefficient:

coef_age_difference <- -0.135770
se_age_difference <- 0.019006

lower_bound <- coef_age_difference - 1.96 * se_age_difference
upper_bound <- coef_age_difference + 1.96 * se_age_difference

cat("95% Confidence Interval for the coefficient of age_difference: (", round(lower_bound, 3), ",", round(upper_bound, 3), ")\n")
## 95% Confidence Interval for the coefficient of age_difference: ( -0.173 , -0.099 )

Plotting the age_difference coefficient’s confidence interval:

confidence_interval <- data.frame(
  coefficient = "age_difference",
  estimate = coef_age_difference,
  lower = lower_bound,
  upper = upper_bound
)

ggplot(confidence_interval, aes(x = coefficient, y = estimate)) +
  geom_bar(stat = "identity", fill = "pink", width = 0.5) +
  geom_errorbar(aes(ymin = lower, ymax = upper), width = 0.2, color = "purple") +
  labs(title = "95% Confidence Interval for Coefficient of age_difference",
       x = "Coefficient",
       y = "Estimate") +
  coord_flip() +
  theme_minimal()