data <- read.csv ("C:\\Users\\91630\\OneDrive\\Desktop\\statistics\\age_gaps.CSV")
library(ggplot2)
library(ggthemes)
## Warning: package 'ggthemes' was built under R version 4.3.3
library(ggrepel)
## Warning: package 'ggrepel' was built under R version 4.3.3
library(boot)
library(broom)
library(lindia)
## Warning: package 'lindia' was built under R version 4.3.3
model <- lm(actor_1_age ~ age_difference + release_year + couple_number, data = data)
summary(model)
## 
## Call:
## lm(formula = actor_1_age ~ age_difference + release_year + couple_number, 
##     data = data)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -17.723  -4.943  -0.566   3.712  36.430 
## 
## Coefficients:
##                  Estimate Std. Error t value Pr(>|t|)    
## (Intercept)    -141.64135   26.70573  -5.304 1.36e-07 ***
## age_difference    0.92008    0.02640  34.857  < 2e-16 ***
## release_year      0.08553    0.01331   6.425 1.93e-10 ***
## couple_number     1.11478    0.29163   3.823 0.000139 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 7.243 on 1151 degrees of freedom
## Multiple R-squared:  0.5185, Adjusted R-squared:  0.5172 
## F-statistic: 413.1 on 3 and 1151 DF,  p-value: < 2.2e-16
  1. Fitted values vs residuals
gg_resfitted(model) +
  geom_smooth(se=FALSE)
## `geom_smooth()` using method = 'gam' and formula = 'y ~ s(x, bs = "cs")'

  1. X Values vs Residuals

    residual_plots <- gg_resX(model)

  1. Residuals Histogram

    gg_reshist(model)
    ## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

  1. QQ-Plots
gg_qqplot(model)

  1. Cook’s Distance Plot
gg_cooksd(model, threshold = 'matlab')