R Markdown

This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.

When you click the Knit button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this:

library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.3     ✔ readr     2.1.4
## ✔ forcats   1.0.0     ✔ stringr   1.5.0
## ✔ ggplot2   3.4.3     ✔ tibble    3.2.1
## ✔ lubridate 1.9.2     ✔ tidyr     1.3.0
## ✔ purrr     1.0.2     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(readr)
library(knitr)
library(kableExtra)
## 
## Attaching package: 'kableExtra'
## 
## The following object is masked from 'package:dplyr':
## 
##     group_rows
library(ggplot2)
library(ggpubr)
library(modelr)
library(car)
## Loading required package: carData
## 
## Attaching package: 'car'
## 
## The following object is masked from 'package:dplyr':
## 
##     recode
## 
## The following object is masked from 'package:purrr':
## 
##     some
library(broom)
## 
## Attaching package: 'broom'
## 
## The following object is masked from 'package:modelr':
## 
##     bootstrap
set.seed(123)
setwd("/Users/saitejaravulapalli/Documents/IUPUI_SEM 01/Intro to Statistic in R/DATA SET")
data <- read.csv("student dropout.csv" , sep= ";", header = TRUE)
str(data)
## 'data.frame':    4424 obs. of  37 variables:
##  $ Marital.status                                : int  1 1 1 1 2 2 1 1 1 1 ...
##  $ Application.mode                              : int  17 15 1 17 39 39 1 18 1 1 ...
##  $ Application.order                             : int  5 1 5 2 1 1 1 4 3 1 ...
##  $ Course                                        : int  171 9254 9070 9773 8014 9991 9500 9254 9238 9238 ...
##  $ Daytime.evening.attendance.                   : int  1 1 1 1 0 0 1 1 1 1 ...
##  $ Previous.qualification                        : int  1 1 1 1 1 19 1 1 1 1 ...
##  $ Previous.qualification..grade.                : num  122 160 122 122 100 ...
##  $ Nacionality                                   : int  1 1 1 1 1 1 1 1 62 1 ...
##  $ Mother.s.qualification                        : int  19 1 37 38 37 37 19 37 1 1 ...
##  $ Father.s.qualification                        : int  12 3 37 37 38 37 38 37 1 19 ...
##  $ Mother.s.occupation                           : int  5 3 9 5 9 9 7 9 9 4 ...
##  $ Father.s.occupation                           : int  9 3 9 3 9 7 10 9 9 7 ...
##  $ Admission.grade                               : num  127 142 125 120 142 ...
##  $ Displaced                                     : int  1 1 1 1 0 0 1 1 0 1 ...
##  $ Educational.special.needs                     : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ Debtor                                        : int  0 0 0 0 0 1 0 0 0 1 ...
##  $ Tuition.fees.up.to.date                       : int  1 0 0 1 1 1 1 0 1 0 ...
##  $ Gender                                        : int  1 1 1 0 0 1 0 1 0 0 ...
##  $ Scholarship.holder                            : int  0 0 0 0 0 0 1 0 1 0 ...
##  $ Age.at.enrollment                             : int  20 19 19 20 45 50 18 22 21 18 ...
##  $ International                                 : int  0 0 0 0 0 0 0 0 1 0 ...
##  $ Curricular.units.1st.sem..credited.           : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ Curricular.units.1st.sem..enrolled.           : int  0 6 6 6 6 5 7 5 6 6 ...
##  $ Curricular.units.1st.sem..evaluations.        : int  0 6 0 8 9 10 9 5 8 9 ...
##  $ Curricular.units.1st.sem..approved.           : int  0 6 0 6 5 5 7 0 6 5 ...
##  $ Curricular.units.1st.sem..grade.              : num  0 14 0 13.4 12.3 ...
##  $ Curricular.units.1st.sem..without.evaluations.: int  0 0 0 0 0 0 0 0 0 0 ...
##  $ Curricular.units.2nd.sem..credited.           : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ Curricular.units.2nd.sem..enrolled.           : int  0 6 6 6 6 5 8 5 6 6 ...
##  $ Curricular.units.2nd.sem..evaluations.        : int  0 6 0 10 6 17 8 5 7 14 ...
##  $ Curricular.units.2nd.sem..approved.           : int  0 6 0 5 6 5 8 0 6 2 ...
##  $ Curricular.units.2nd.sem..grade.              : num  0 13.7 0 12.4 13 ...
##  $ Curricular.units.2nd.sem..without.evaluations.: int  0 0 0 0 0 5 0 0 0 0 ...
##  $ Unemployment.rate                             : num  10.8 13.9 10.8 9.4 13.9 16.2 15.5 15.5 16.2 8.9 ...
##  $ Inflation.rate                                : num  1.4 -0.3 1.4 -0.8 -0.3 0.3 2.8 2.8 0.3 1.4 ...
##  $ GDP                                           : num  1.74 0.79 1.74 -3.12 0.79 -0.92 -4.06 -4.06 -0.92 3.51 ...
##  $ Target                                        : chr  "Dropout" "Graduate" "Dropout" "Graduate" ...
# Convert the binary response variable to 0 (Dropout) and 1 (Graduate)
data$Target <- ifelse(data$Target == "Graduate", 1, 0)

# Build a logistic regression model
model <- glm(Target ~ Age.at.enrollment  + Admission.grade, data = data, family = binomial)

# Get a summary of the logistic regression model
summary(model)
## 
## Call:
## glm(formula = Target ~ Age.at.enrollment + Admission.grade, family = binomial, 
##     data = data)
## 
## Coefficients:
##                    Estimate Std. Error z value Pr(>|z|)    
## (Intercept)       -0.949996   0.295953  -3.210  0.00133 ** 
## Age.at.enrollment -0.056417   0.004567 -12.353  < 2e-16 ***
## Admission.grade    0.017701   0.002193   8.071 6.99e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 6133.0  on 4423  degrees of freedom
## Residual deviance: 5889.8  on 4421  degrees of freedom
## AIC: 5895.8
## 
## Number of Fisher Scoring iterations: 4
# Calculate confidence intervals for coefficients
confint(model)
## Waiting for profiling to be done...
##                         2.5 %      97.5 %
## (Intercept)       -1.53074793 -0.37033358
## Age.at.enrollment -0.06547837 -0.04756894
## Admission.grade    0.01341922  0.02201884
# Create scatter plots
ggplot(data, aes(x = Age.at.enrollment, y = Target)) +
  geom_point() +
  ggtitle("Scatter Plot: Age at Enrollment vs. Target")

ggplot(data, aes(x = Admission.grade, y = Target)) +
  geom_point() +
  ggtitle("Scatter Plot: Admission Grade vs. Target")