install.packages("tidyverse", repos = "https://cloud.r-project.org")
##
## The downloaded binary packages are in
## /var/folders/5_/389qrkvs1sd7nkp792bslx5r0000gn/T//RtmpMWwXQk/downloaded_packages
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.1 ✔ readr 2.1.4
## ✔ forcats 1.0.0 ✔ stringr 1.5.0
## ✔ ggplot2 3.4.1 ✔ tibble 3.2.1
## ✔ lubridate 1.9.2 ✔ tidyr 1.3.0
## ✔ purrr 1.0.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the ]8;;http://conflicted.r-lib.org/conflicted package]8;; to force all conflicts to become errors
install.packages("psych", repos = "https://cloud.r-project.org")
##
## The downloaded binary packages are in
## /var/folders/5_/389qrkvs1sd7nkp792bslx5r0000gn/T//RtmpMWwXQk/downloaded_packages
library(psych)
##
## Attaching package: 'psych'
##
## The following objects are masked from 'package:ggplot2':
##
## %+%, alpha
install.packages("knitr",repos = "https://cloud.r-project.org")
##
## The downloaded binary packages are in
## /var/folders/5_/389qrkvs1sd7nkp792bslx5r0000gn/T//RtmpMWwXQk/downloaded_packages
library("knitr")
install.packages("kableExtra",repos = "https://cloud.r-project.org")
##
## The downloaded binary packages are in
## /var/folders/5_/389qrkvs1sd7nkp792bslx5r0000gn/T//RtmpMWwXQk/downloaded_packages
library("kableExtra")
##
## Attaching package: 'kableExtra'
##
## The following object is masked from 'package:dplyr':
##
## group_rows
install.packages("apaTables", repos = "https://cloud.r-project.org")
##
## The downloaded binary packages are in
## /var/folders/5_/389qrkvs1sd7nkp792bslx5r0000gn/T//RtmpMWwXQk/downloaded_packages
library(apaTables)
install.packages("modelsummary", repos = "https://cloud.r-project.org")
##
## The downloaded binary packages are in
## /var/folders/5_/389qrkvs1sd7nkp792bslx5r0000gn/T//RtmpMWwXQk/downloaded_packages
library("modelsummary")
##
## Attaching package: 'modelsummary'
##
## The following object is masked from 'package:psych':
##
## SD
install.packages("margins", repos = "https://cloud.r-project.org")
##
## The downloaded binary packages are in
## /var/folders/5_/389qrkvs1sd7nkp792bslx5r0000gn/T//RtmpMWwXQk/downloaded_packages
library(margins)
library(ggplot2)
data <- read_csv("affair.csv")
## Rows: 6366 Columns: 9
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (9): affair, rate_marriage, age, yrs_married, children, religious, educ,...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
data$rate_marriage <- factor(data$rate_marriage)
Logit_model2.1 <- glm(affair ~ rate_marriage + yrs_married + age + religious + occupation, data = data, family = "binomial")
summary(Logit_model2.1)
##
## Call:
## glm(formula = affair ~ rate_marriage + yrs_married + age + religious +
## occupation, family = "binomial", data = data)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -2.0543 -0.8063 -0.5748 0.9941 2.3494
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 2.211808 0.339627 6.512 7.39e-11 ***
## rate_marriage2 -0.415525 0.263891 -1.575 0.11535
## rate_marriage3 -0.788912 0.246372 -3.202 0.00136 **
## rate_marriage4 -1.675918 0.242104 -6.922 4.44e-12 ***
## rate_marriage5 -2.419855 0.243072 -9.955 < 2e-16 ***
## yrs_married 0.115507 0.009414 12.270 < 2e-16 ***
## age -0.065531 0.009992 -6.559 5.43e-11 ***
## religious -0.374411 0.034646 -10.807 < 2e-16 ***
## occupation 0.130942 0.031477 4.160 3.18e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 8005.1 on 6365 degrees of freedom
## Residual deviance: 6936.6 on 6357 degrees of freedom
## AIC: 6954.6
##
## Number of Fisher Scoring iterations: 4
summary(margins(Logit_model2.1))
## factor AME SE z p lower upper
## age -0.0120 0.0018 -6.6313 0.0000 -0.0155 -0.0084
## occupation 0.0239 0.0057 4.1785 0.0000 0.0127 0.0351
## rate_marriage2 -0.0881 0.0539 -1.6360 0.1018 -0.1937 0.0175
## rate_marriage3 -0.1735 0.0498 -3.4859 0.0005 -0.2711 -0.0759
## rate_marriage4 -0.3721 0.0484 -7.6853 0.0000 -0.4670 -0.2772
## rate_marriage5 -0.5038 0.0481 -10.4747 0.0000 -0.5980 -0.4095
## religious -0.0683 0.0061 -11.1365 0.0000 -0.0803 -0.0563
## yrs_married 0.0211 0.0017 12.7715 0.0000 0.0178 0.0243
Testing
We will start by calculating the predicted probability of admission
at each value of rank, holding other variable at their means.
newdata1 <- with(data, data.frame(age = mean(age),
yrs_married = mean(yrs_married),
religious = mean(religious),
occupation = mean(occupation),
rate_marriage = factor(1:5)))
newdata1
## age yrs_married religious occupation rate_marriage
## 1 29.08286 9.009425 2.42617 3.424128 1
## 2 29.08286 9.009425 2.42617 3.424128 2
## 3 29.08286 9.009425 2.42617 3.424128 3
## 4 29.08286 9.009425 2.42617 3.424128 4
## 5 29.08286 9.009425 2.42617 3.424128 5
newdata1$rateP <- predict(Logit_model2.1, newdata = newdata1, type = "response")
newdata1
## age yrs_married religious occupation rate_marriage rateP
## 1 29.08286 9.009425 2.42617 3.424128 1 0.7081869
## 2 29.08286 9.009425 2.42617 3.424128 2 0.6156370
## 3 29.08286 9.009425 2.42617 3.424128 3 0.5244013
## 4 29.08286 9.009425 2.42617 3.424128 4 0.3123139
## 5 29.08286 9.009425 2.42617 3.424128 5 0.1775172
newdata2 <- with(data, data.frame(age = rep(seq(from = 17, to = 42, length.out = 100),5), yrs_married = mean(yrs_married),
religious = mean(religious),
occupation = mean(occupation),
rate_marriage = factor(rep(1:5, each = 100 ))))
newdata3 <- cbind(newdata2, predict(Logit_model2.1, newdata = newdata2, type = "link", se = TRUE))
newdata3 <- within(newdata3, {
PredictedProb <- plogis(fit)
LL <- plogis(fit - (1.96 * se.fit))
UL <- plogis(fit + (1.96 * se.fit))
})
ggplot(newdata3, aes(x = age, y = PredictedProb)) + geom_ribbon(aes(ymin = LL,
ymax = UL, fill = rate_marriage), alpha = 0.2) + geom_line(aes(colour = rate_marriage),
size = 1)
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
