This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.
When you click the Knit button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this:
# Load libraries
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(ggplot2)
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ forcats 1.0.0 ✔ stringr 1.5.0
## ✔ lubridate 1.9.2 ✔ tibble 3.2.1
## ✔ purrr 1.0.2 ✔ tidyr 1.3.0
## ✔ readr 2.1.4
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
# Read dataset
setwd("/Users/saitejaravulapalli/Documents/IUPUI_SEM 01/Intro to Statistic in R/DATA SET")
student_dropout <- read.csv("student dropout.csv" , sep= ";", header = TRUE)
# Set seed
set.seed(123)
# Set 1
admission_threshold <- 130
min_age <- 18
max_age <- 25
previous_qualification_threshold <- 140
set1 <- student_dropout %>%
mutate(
High_Achiever = ifelse(
Admission.grade > admission_threshold &
Age.at.enrollment > min_age & Age.at.enrollment < max_age &
`Previous.qualification..grade.` > previous_qualification_threshold,
1,
0
)
)
# Correlation
correlation1 <- cor.test(set1$High_Achiever, set1$High_Achiever, method = "pearson")
print(correlation1)
##
## Pearson's product-moment correlation
##
## data: set1$High_Achiever and set1$High_Achiever
## t = 3155544133, df = 4422, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 1 1
## sample estimates:
## cor
## 1
# Set 2
set2 <- student_dropout %>%
mutate(
Financially_stable = ifelse(
Tuition.fees.up.to.date ==1 & Debtor == 0 & Scholarship.holder ==1,
1,
0
)
)
# Correlation
correlation2 <- cor.test(set2$Financially_stable, set2$Financially_stable, method = "pearson")
print(correlation2)
##
## Pearson's product-moment correlation
##
## data: set2$Financially_stable and set2$Financially_stable
## t = Inf, df = 4422, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 1 1
## sample estimates:
## cor
## 1
# Set 3
set3 <- student_dropout %>%
mutate(
merit_student = ifelse(
(`Previous.qualification..grade.` + Admission.grade + `Curricular.units.1st.sem..grade.` + `Curricular.units.2nd.sem..grade.`)/4 > 80,
1,
0
)
)
# Correlation
correlation3 <- cor.test(set3$merit_student, set3$merit_student, method = "pearson")
print(correlation3)
##
## Pearson's product-moment correlation
##
## data: set3$merit_student and set3$merit_student
## t = Inf, df = 4422, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 1 1
## sample estimates:
## cor
## 1
# Plots
ggplot(set1, aes(x = factor(High_Achiever))) +
geom_bar(fill = "blue", position = "dodge") +
labs(x = "High Achiever", y = "Count") +
ggtitle("Distribution of High Achievers")
ggplot(set2, aes(x = factor(Financially_stable))) +
geom_bar(fill = "green", position = "dodge") +
labs(x = "Financially Stable", y = "Count") +
ggtitle("Distribution of Financially Stable Students")
ggplot(set3, aes(x = factor(merit_student))) +
geom_bar(fill = "orange", position = "dodge") +
labs(x = "Merit Student", y = "Count") +
ggtitle("Distribution of Merit Students")
If the student is High Achiever then the value is 1 and if the student is non-high Achiever then then the vslue is 0
If the student is financially stable then the value is 1 and if the student is not financially stable then the value is 0
the student is considered as merit student if the average of previous qualification grade , admission grade ,1st sem grade and 2nd sem grade is more than 80 . If the student is merit student then the value is 1 and if the student is not a merit student then the value is 0