Loading my data set:

library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ ggplot2   3.5.1     ✔ tibble    3.2.1
## ✔ lubridate 1.9.3     ✔ tidyr     1.3.1
## ✔ purrr     1.0.2     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
dataset <-read_delim("C:/Users/MSKR/MASTERS_ADS/STATISTICS_SEM1/DATA_SET_1.csv", delim = ",")
## Rows: 4424 Columns: 37
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr  (1): Target
## dbl (36): Marital status, Application mode, Application order, Course, Dayti...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

Creating a custom table with mutating necessary categorical columns:

dataset_1<-dataset
dataset_1<-mutate(dataset_1, marital_status = ifelse(dataset$`Marital status` == 1, "single",
                    ifelse(`Marital status` == 2, "married",
                    ifelse(`Marital status` == 3, "widower",
                    ifelse(`Marital status` == 4, "divorced",
                    ifelse(`Marital status` == 5, "facto union",
                    ifelse(`Marital status` == 6, "legally seperated", "no")))))))
dataset_1<-mutate(dataset_1, day_eve_class= ifelse(dataset_1$`Daytime/evening attendance    ` == 1, "day","evening"))
dataset_1<-mutate(dataset_1, day_eve_class= ifelse(dataset_1$`Daytime/evening attendance    ` == 1, "day","evening"))
dataset_1<-mutate(dataset_1, sem_results= rowMeans(select(dataset_1,`Curricular units 1st sem (grade)`, `Curricular units 2nd sem (grade)`)))
dataset_1<-mutate(dataset_1, target = ifelse(dataset$Target == "Graduate",1,
                    ifelse(Target == "Enrolled",2,
                    ifelse(Target == "Dropout", 0, "no"))))

Logistic Regression Model:

df_bin<-filter(dataset_1,target!=2)
df_bin
## # A tibble: 3,630 × 41
##    `Marital status` `Application mode` `Application order` Course
##               <dbl>              <dbl>               <dbl>  <dbl>
##  1                1                 17                   5    171
##  2                1                 15                   1   9254
##  3                1                  1                   5   9070
##  4                1                 17                   2   9773
##  5                2                 39                   1   8014
##  6                2                 39                   1   9991
##  7                1                  1                   1   9500
##  8                1                 18                   4   9254
##  9                1                  1                   3   9238
## 10                1                  1                   1   9238
## # ℹ 3,620 more rows
## # ℹ 37 more variables: `Daytime/evening attendance\t` <dbl>,
## #   `Previous qualification` <dbl>, `Previous qualification (grade)` <dbl>,
## #   Nacionality <dbl>, `Mother's qualification` <dbl>,
## #   `Father's qualification` <dbl>, `Mother's occupation` <dbl>,
## #   `Father's occupation` <dbl>, `Admission grade` <dbl>, Displaced <dbl>,
## #   `Educational special needs` <dbl>, Debtor <dbl>, …
df_bin$target<-as.numeric(df_bin$target)
# Logistic regression model
logit_model <- glm(target ~ `Previous qualification (grade)` + `Admission grade` + `Curricular units 1st sem (grade)` + `Curricular units 2nd sem (grade)`, data = df_bin, family = binomial)
summary(logit_model)
## 
## Call:
## glm(formula = target ~ `Previous qualification (grade)` + `Admission grade` + 
##     `Curricular units 1st sem (grade)` + `Curricular units 2nd sem (grade)`, 
##     family = binomial, data = df_bin)
## 
## Coefficients:
##                                     Estimate Std. Error z value Pr(>|z|)    
## (Intercept)                        -5.686198   0.507221 -11.210  < 2e-16 ***
## `Previous qualification (grade)`    0.010695   0.004076   2.624 0.008695 ** 
## `Admission grade`                   0.012588   0.003817   3.298 0.000973 ***
## `Curricular units 1st sem (grade)`  0.020274   0.019223   1.055 0.291567    
## `Curricular units 2nd sem (grade)`  0.282426   0.018089  15.613  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 4859.8  on 3629  degrees of freedom
## Residual deviance: 3369.5  on 3625  degrees of freedom
## AIC: 3379.5
## 
## Number of Fisher Scoring iterations: 5

Confidence Intervals for Semester2_grades:

To construct the confidence interval for semester2_grades attribute, I am using the standard error from the model summary

coef_estimate <- coef(logit_model)["`Curricular units 2nd sem (grade)`"]
std_error <- summary(logit_model)$coefficients["`Curricular units 2nd sem (grade)`", "Std. Error"]
# Calculate the 95% CI
ci_lower <- coef_estimate - 1.96 * std_error
ci_upper <- coef_estimate + 1.96 * std_error
c(ci_lower, ci_upper)
## `Curricular units 2nd sem (grade)` `Curricular units 2nd sem (grade)` 
##                          0.2469712                          0.3178807