library(tidyverse)
library(gt)
library(readr)
admissions <- read_csv("C:/Users/Lenovo/Downloads/admissions.csv")
View(admissions)

Plotting

ggplot(admissions, aes(x= gpa, 
                       y= admitted))+
  geom_jitter(height=.05,
              alpha=.1)

Seems there is a relation between gpa and admitted

model <- glm(admitted ~ gpa,     
             data=admissions,
             family="binomial")
summary(model)
## 
## Call:
## glm(formula = admitted ~ gpa, family = "binomial", data = admissions)
## 
## Coefficients:
##             Estimate Std. Error z value Pr(>|z|)    
## (Intercept) -12.0352     0.4917  -24.48   <2e-16 ***
## gpa           4.0802     0.1649   24.74   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 2906.6  on 2099  degrees of freedom
## Residual deviance: 1527.4  on 2098  degrees of freedom
## AIC: 1531.4
## 
## Number of Fisher Scoring iterations: 5

P_value of GPA is very significant

Plotting with smoother

ggplot(admissions, aes(x= gpa, 
                       y= admitted))+
  geom_jitter(height=.05,
              alpha=.1)+
  geom_smooth(method="glm",
              method.args = list(family = "binomial"),
              se=FALSE)

We are organizing the admitted as mean values

adm_sum <- admissions %>% 
  group_by(gpa) %>% 
  summarize(prop_adm = mean(admitted),
            count=n())

gt(adm_sum)%>% 
  opt_stylize(style=2,color="red")
gpa prop_adm count
2.0 0.01 100
2.1 0.02 100
2.2 0.03 100
2.3 0.03 100
2.4 0.11 100
2.5 0.08 100
2.6 0.23 100
2.7 0.45 100
2.8 0.27 100
2.9 0.38 100
3.0 0.64 100
3.1 0.63 100
3.2 0.81 100
3.3 0.73 100
3.4 0.93 100
3.5 0.83 100
3.6 0.94 100
3.7 0.93 100
3.8 0.97 100
3.9 0.98 100
4.0 0.99 100

Building model based on the mean

model2 <- glm(prop_adm ~ gpa,
              family = "binomial",
              data=adm_sum,
              weights=count)
summary(model2)     
## 
## Call:
## glm(formula = prop_adm ~ gpa, family = "binomial", data = adm_sum, 
##     weights = count)
## 
## Coefficients:
##             Estimate Std. Error z value Pr(>|z|)    
## (Intercept) -12.0352     0.4918  -24.47   <2e-16 ***
## gpa           4.0802     0.1649   24.74   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 1429.780  on 20  degrees of freedom
## Residual deviance:   50.493  on 19  degrees of freedom
## AIC: 135.07
## 
## Number of Fisher Scoring iterations: 4

lower AIC value, means more fitting