install.packages("tidyverse", repos = "https://cloud.r-project.org")
## 
## The downloaded binary packages are in
##  /var/folders/5_/389qrkvs1sd7nkp792bslx5r0000gn/T//RtmpMWwXQk/downloaded_packages
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.1     ✔ readr     2.1.4
## ✔ forcats   1.0.0     ✔ stringr   1.5.0
## ✔ ggplot2   3.4.1     ✔ tibble    3.2.1
## ✔ lubridate 1.9.2     ✔ tidyr     1.3.0
## ✔ purrr     1.0.1     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the ]8;;http://conflicted.r-lib.org/conflicted package]8;; to force all conflicts to become errors
install.packages("psych", repos = "https://cloud.r-project.org")
## 
## The downloaded binary packages are in
##  /var/folders/5_/389qrkvs1sd7nkp792bslx5r0000gn/T//RtmpMWwXQk/downloaded_packages
library(psych)
## 
## Attaching package: 'psych'
## 
## The following objects are masked from 'package:ggplot2':
## 
##     %+%, alpha
install.packages("knitr",repos = "https://cloud.r-project.org")
## 
## The downloaded binary packages are in
##  /var/folders/5_/389qrkvs1sd7nkp792bslx5r0000gn/T//RtmpMWwXQk/downloaded_packages
library("knitr")
install.packages("kableExtra",repos = "https://cloud.r-project.org")
## 
## The downloaded binary packages are in
##  /var/folders/5_/389qrkvs1sd7nkp792bslx5r0000gn/T//RtmpMWwXQk/downloaded_packages
library("kableExtra")
## 
## Attaching package: 'kableExtra'
## 
## The following object is masked from 'package:dplyr':
## 
##     group_rows
install.packages("apaTables", repos = "https://cloud.r-project.org")
## 
## The downloaded binary packages are in
##  /var/folders/5_/389qrkvs1sd7nkp792bslx5r0000gn/T//RtmpMWwXQk/downloaded_packages
library(apaTables)
install.packages("modelsummary", repos = "https://cloud.r-project.org")
## 
## The downloaded binary packages are in
##  /var/folders/5_/389qrkvs1sd7nkp792bslx5r0000gn/T//RtmpMWwXQk/downloaded_packages
library("modelsummary")
## 
## Attaching package: 'modelsummary'
## 
## The following object is masked from 'package:psych':
## 
##     SD
install.packages("margins", repos = "https://cloud.r-project.org")
## 
## The downloaded binary packages are in
##  /var/folders/5_/389qrkvs1sd7nkp792bslx5r0000gn/T//RtmpMWwXQk/downloaded_packages
library(margins)
library(ggplot2)
data <- read_csv("affair.csv")
## Rows: 6366 Columns: 9
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (9): affair, rate_marriage, age, yrs_married, children, religious, educ,...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
data$rate_marriage <- factor(data$rate_marriage)
Logit_model2.1 <- glm(affair ~  rate_marriage + yrs_married + age + religious + occupation, data = data, family = "binomial")
summary(Logit_model2.1)
## 
## Call:
## glm(formula = affair ~ rate_marriage + yrs_married + age + religious + 
##     occupation, family = "binomial", data = data)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -2.0543  -0.8063  -0.5748   0.9941   2.3494  
## 
## Coefficients:
##                 Estimate Std. Error z value Pr(>|z|)    
## (Intercept)     2.211808   0.339627   6.512 7.39e-11 ***
## rate_marriage2 -0.415525   0.263891  -1.575  0.11535    
## rate_marriage3 -0.788912   0.246372  -3.202  0.00136 ** 
## rate_marriage4 -1.675918   0.242104  -6.922 4.44e-12 ***
## rate_marriage5 -2.419855   0.243072  -9.955  < 2e-16 ***
## yrs_married     0.115507   0.009414  12.270  < 2e-16 ***
## age            -0.065531   0.009992  -6.559 5.43e-11 ***
## religious      -0.374411   0.034646 -10.807  < 2e-16 ***
## occupation      0.130942   0.031477   4.160 3.18e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 8005.1  on 6365  degrees of freedom
## Residual deviance: 6936.6  on 6357  degrees of freedom
## AIC: 6954.6
## 
## Number of Fisher Scoring iterations: 4
summary(margins(Logit_model2.1))
##          factor     AME     SE        z      p   lower   upper
##             age -0.0120 0.0018  -6.6313 0.0000 -0.0155 -0.0084
##      occupation  0.0239 0.0057   4.1785 0.0000  0.0127  0.0351
##  rate_marriage2 -0.0881 0.0539  -1.6360 0.1018 -0.1937  0.0175
##  rate_marriage3 -0.1735 0.0498  -3.4859 0.0005 -0.2711 -0.0759
##  rate_marriage4 -0.3721 0.0484  -7.6853 0.0000 -0.4670 -0.2772
##  rate_marriage5 -0.5038 0.0481 -10.4747 0.0000 -0.5980 -0.4095
##       religious -0.0683 0.0061 -11.1365 0.0000 -0.0803 -0.0563
##     yrs_married  0.0211 0.0017  12.7715 0.0000  0.0178  0.0243

Testing

Extra Lets predict the probability of an affair at each value of rate_marriege

We will start by calculating the predicted probability of admission at each value of rank, holding other variable at their means.

newdata1 <- with(data, data.frame(age = mean(age),
                                yrs_married = mean(yrs_married),
                                religious = mean(religious),
                                occupation = mean(occupation),
                                rate_marriage = factor(1:5)))
newdata1
##        age yrs_married religious occupation rate_marriage
## 1 29.08286    9.009425   2.42617   3.424128             1
## 2 29.08286    9.009425   2.42617   3.424128             2
## 3 29.08286    9.009425   2.42617   3.424128             3
## 4 29.08286    9.009425   2.42617   3.424128             4
## 5 29.08286    9.009425   2.42617   3.424128             5
newdata1$rateP <- predict(Logit_model2.1, newdata = newdata1, type = "response")
newdata1
##        age yrs_married religious occupation rate_marriage     rateP
## 1 29.08286    9.009425   2.42617   3.424128             1 0.7081869
## 2 29.08286    9.009425   2.42617   3.424128             2 0.6156370
## 3 29.08286    9.009425   2.42617   3.424128             3 0.5244013
## 4 29.08286    9.009425   2.42617   3.424128             4 0.3123139
## 5 29.08286    9.009425   2.42617   3.424128             5 0.1775172
newdata2 <- with(data, data.frame(age = rep(seq(from = 17, to = 42, length.out = 100),5), yrs_married = mean(yrs_married),
                                religious = mean(religious),
                                occupation = mean(occupation),
                                rate_marriage = factor(rep(1:5, each = 100 ))))
newdata3 <- cbind(newdata2, predict(Logit_model2.1, newdata = newdata2, type = "link", se = TRUE))

newdata3 <- within(newdata3, {
  PredictedProb <- plogis(fit)
  LL <- plogis(fit - (1.96 * se.fit))
  UL <- plogis(fit + (1.96 * se.fit))
  })
ggplot(newdata3, aes(x = age, y = PredictedProb)) + geom_ribbon(aes(ymin = LL,
    ymax = UL, fill = rate_marriage), alpha = 0.2) + geom_line(aes(colour = rate_marriage),
    size = 1)
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.