R Markdown

This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.

When you click the Knit button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this:

# Load libraries
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(ggplot2)
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ forcats   1.0.0     ✔ stringr   1.5.0
## ✔ lubridate 1.9.2     ✔ tibble    3.2.1
## ✔ purrr     1.0.2     ✔ tidyr     1.3.0
## ✔ readr     2.1.4
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
# Read dataset 
setwd("/Users/saitejaravulapalli/Documents/IUPUI_SEM 01/Intro to Statistic in R/DATA SET")
student_dropout <- read.csv("student dropout.csv" , sep= ";", header = TRUE)

# Set seed
set.seed(123)

# Set 1
admission_threshold <- 130
min_age <- 18
max_age <- 25
previous_qualification_threshold <- 140


set1 <- student_dropout %>%
  mutate(
    High_Achiever = ifelse(
      Admission.grade > admission_threshold & 
        Age.at.enrollment > min_age & Age.at.enrollment < max_age &
        `Previous.qualification..grade.` > previous_qualification_threshold,  
      1,
      0
    )
  )

# Correlation 
correlation1 <- cor.test(set1$High_Achiever, set1$High_Achiever, method = "pearson")
print(correlation1)
## 
##  Pearson's product-moment correlation
## 
## data:  set1$High_Achiever and set1$High_Achiever
## t = 3155544133, df = 4422, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  1 1
## sample estimates:
## cor 
##   1
# Set 2
set2 <- student_dropout %>%
  mutate(
    Financially_stable = ifelse(
      Tuition.fees.up.to.date ==1 & Debtor == 0 & Scholarship.holder ==1,
      1,
      0
    )
  )

# Correlation 
correlation2 <- cor.test(set2$Financially_stable, set2$Financially_stable, method = "pearson")
print(correlation2)
## 
##  Pearson's product-moment correlation
## 
## data:  set2$Financially_stable and set2$Financially_stable
## t = Inf, df = 4422, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  1 1
## sample estimates:
## cor 
##   1
# Set 3
set3 <- student_dropout %>%
  mutate(  
    merit_student = ifelse(
      (`Previous.qualification..grade.` + Admission.grade + `Curricular.units.1st.sem..grade.` + `Curricular.units.2nd.sem..grade.`)/4 > 80,
      1, 
      0
    )
  )
# Correlation 
correlation3 <- cor.test(set3$merit_student, set3$merit_student, method = "pearson")
print(correlation3)
## 
##  Pearson's product-moment correlation
## 
## data:  set3$merit_student and set3$merit_student
## t = Inf, df = 4422, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  1 1
## sample estimates:
## cor 
##   1
# Plots 
ggplot(set1, aes(x = factor(High_Achiever))) +
  geom_bar(fill = "blue", position = "dodge") +
  labs(x = "High Achiever", y = "Count") +
  ggtitle("Distribution of High Achievers")

ggplot(set2, aes(x = factor(Financially_stable))) +
  geom_bar(fill = "green", position = "dodge") + 
  labs(x = "Financially Stable", y = "Count") +
  ggtitle("Distribution of Financially Stable Students")  

ggplot(set3, aes(x = factor(merit_student))) +
  geom_bar(fill = "orange", position = "dodge") +
  labs(x = "Merit Student", y = "Count") +
  ggtitle("Distribution of Merit Students")

In the Set 1

If the student is High Achiever then the value is 1 and if the student is non-high Achiever then then the vslue is 0

In the set 2

If the student is financially stable then the value is 1 and if the student is not financially stable then the value is 0

In the set 3

the student is considered as merit student if the average of previous qualification grade , admission grade ,1st sem grade and 2nd sem grade is more than 80 . If the student is merit student then the value is 1 and if the student is not a merit student then the value is 0