data <- read.csv ("C:\\Users\\varsh\\OneDrive\\Desktop\\Gitstuff\\age_gaps.CSV")
library(ggplot2)
library(ggthemes)
library(ggrepel)
library(boot)
library(broom)
library(lindia)

The binary column I’m selecting is encoded_gender.

data$encoded_gender <- ifelse(data$character_1_gender == "man", 0, 1)

The explanatory variables that I chose are age_difference, release_year, actor_1_age, couple_number.

model <- glm(encoded_gender ~ age_difference + release_year + actor_1_age + couple_number, data = data, family = binomial(link = 'logit'))

summary(model)
## 
## Call:
## glm(formula = encoded_gender ~ age_difference + release_year + 
##     actor_1_age + couple_number, family = binomial(link = "logit"), 
##     data = data)
## 
## Coefficients:
##                  Estimate Std. Error z value Pr(>|z|)    
## (Intercept)    -30.999206  13.093538  -2.368   0.0179 *  
## age_difference  -0.135770   0.019006  -7.143  9.1e-13 ***
## release_year     0.015353   0.006555   2.342   0.0192 *  
## actor_1_age     -0.005824   0.010585  -0.550   0.5821    
## couple_number    0.035809   0.097700   0.367   0.7140    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 1107.20  on 1154  degrees of freedom
## Residual deviance:  972.38  on 1150  degrees of freedom
## AIC: 982.38
## 
## Number of Fisher Scoring iterations: 6

Intercept (-30.999206):

Age_difference (-0.135770):

Release Year (0.015353):

actor_1_age is -0.005824:

Couple Number (0.035809):

Confidence Interval for the coefficient of the “age_difference”:

coef_age_difference <- -0.135770
se_age_difference <- 0.019006

lower_bound <- coef_age_difference - 1.96 * se_age_difference
upper_bound <- coef_age_difference + 1.96 * se_age_difference

cat("95% Confidence Interval for the coefficient of age_difference: (", round(lower_bound, 3), ",", round(upper_bound, 3), ")\n")
## 95% Confidence Interval for the coefficient of age_difference: ( -0.173 , -0.099 )

Plot for Confidence Interval for the coefficient of age_difference:

confidence_interval <- data.frame(
  coefficient = "age_difference",
  estimate = coef_age_difference,
  lower = lower_bound,
  upper = upper_bound
)

ggplot(confidence_interval, aes(x = coefficient, y = estimate)) +
  geom_bar(stat = "identity", fill = "grey", width = 0.5) +
  geom_errorbar(aes(ymin = lower, ymax = upper), width = 0.2, color = "black") +
  labs(title = "95% Confidence Interval for Coefficient of age_difference",
       x = "Coefficient",
       y = "Estimate") +
  coord_flip() +
  theme_minimal()