Admitted

library(readr)
library(readxl)
 Admission_Predict <- read_csv("C:/Users/USER/Desktop/data_science_portfolio/Admission_Predict.csv")

## Rows: 400 Columns: 9
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (9): Serial No., GRE Score, TOEFL Score, University Rating, SOP, LOR, CG...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

library(stringr)
names(Admission_Predict)<-str_to_title(str_to_lower(str_replace_all(names(Admission_Predict),"\\s","_")))
head(Admission_Predict)

## # A tibble: 6 × 9
##   Serial_no. Gre_score Toefl_score University_rating   Sop   Lor  Cgpa Research
##        <dbl>     <dbl>       <dbl>             <dbl> <dbl> <dbl> <dbl>    <dbl>
## 1          1       337         118                 4   4.5   4.5  9.65        1
## 2          2       324         107                 4   4     4.5  8.87        1
## 3          3       316         104                 3   3     3.5  8           1
## 4          4       322         110                 3   3.5   2.5  8.67        1
## 5          5       314         103                 2   2     3    8.21        0
## 6          6       330         115                 5   4.5   3    9.34        1
## # ℹ 1 more variable: Chance_of_admit <dbl>

names(Admission_Predict)

## [1] "Serial_no."        "Gre_score"         "Toefl_score"      
## [4] "University_rating" "Sop"               "Lor"              
## [7] "Cgpa"              "Research"          "Chance_of_admit"

#logistic regression

g<-glm(Chance_of_admit~Sop+Gre_score+Lor,data=Admission_Predict,family="binomial")
summary(g)

## 
## Call:
## glm(formula = Chance_of_admit ~ Sop + Gre_score + Lor, family = "binomial", 
##     data = Admission_Predict)
## 
## Coefficients:
##              Estimate Std. Error z value Pr(>|z|)    
## (Intercept) -65.30172    7.65939  -8.526  < 2e-16 ***
## Sop           0.99611    0.26188   3.804 0.000143 ***
## Gre_score     0.18664    0.02382   7.836 4.64e-15 ***
## Lor           0.65551    0.26487   2.475 0.013329 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 550.51  on 399  degrees of freedom
## Residual deviance: 251.74  on 396  degrees of freedom
## AIC: 259.74
## 
## Number of Fisher Scoring iterations: 6

library(ggplot2)
ggplot(Admission_Predict,aes(x=Cgpa,y=Chance_of_admit))+geom_jitter(height = 0.5,alpha=.1)+geom_smooth(method = "glm",method.args=list(family="binomial"),se=FALSE)+theme_minimal()

## `geom_smooth()` using formula = 'y ~ x'

library(dplyr)

## 
## Attaching package: 'dplyr'

## The following objects are masked from 'package:stats':
## 
##     filter, lag

## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

sum<-Admission_Predict%>%group_by(Cgpa)%>%summarise(prop=mean(Chance_of_admit),count=n())
sum

## # A tibble: 168 × 3
##     Cgpa  prop count
##    <dbl> <dbl> <int>
##  1  6.8      0     1
##  2  7.2      0     1
##  3  7.25     0     1
##  4  7.28     0     1
##  5  7.3      0     1
##  6  7.34     0     2
##  7  7.36     0     1
##  8  7.4      0     1
##  9  7.43     0     2
## 10  7.46     0     3
## # ℹ 158 more rows

ggplot(sum,aes(x=Cgpa,y=prop))+geom_point()

model2

model2<-glm(prop~Cgpa,data=sum,family="binomial",weights=count)
summary(model2)

## 
## Call:
## glm(formula = prop ~ Cgpa, family = "binomial", data = sum, weights = count)
## 
## Coefficients:
##             Estimate Std. Error z value Pr(>|z|)    
## (Intercept) -52.4698     5.3325  -9.840   <2e-16 ***
## Cgpa          6.0403     0.6149   9.824   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 397.334  on 167  degrees of freedom
## Residual deviance:  84.681  on 166  degrees of freedom
## AIC: 143.84
## 
## Number of Fisher Scoring iterations: 6

ggplot(sum,aes(x=Cgpa,y=prop))+geom_point()+geom_smooth(method="glm",se=F,method.args=list(family="binomial"))

## `geom_smooth()` using formula = 'y ~ x'

## Warning in eval(family$initialize): non-integer #successes in a binomial glm!

Admitted

mugo

24 February 2024

model2