# Set 1: Relationship between Age and Admission Grade
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
df<-read.csv('./Downloads/students_dropout_and_academic_success.csv')
set1 <- df%>%
  mutate(Age_Group = cut(Age_at_enrollment, breaks = c(0, 18, 25, 40, Inf), labels = c("Young", "Young Adult", "Adult", "Senior"))) %>%
  group_by(Age_Group) %>%
  summarise(Avg_Admission_Grade = mean(Admission_grade, na.rm = TRUE))

# Set 2: Relationship between Gender, Displacement, and Admission Grade
set2 <- df %>%
  group_by(Gender, Displaced) %>%
  summarise(Avg_Admission_Grade_Set2 = mean(Admission_grade, na.rm = TRUE))
## `summarise()` has grouped output by 'Gender'. You can override using the
## `.groups` argument.
# Set 3: Relationship between Marital Status, Age, and Admission Grade
set3 <- df %>%
  group_by(Previous_qualification) %>%
  summarise(Avg_Previous_Qualification_Grade = mean(Previous_qualification_grade, na.rm = TRUE))
# View the results
print("Set 1:")
## [1] "Set 1:"
print(set1)
## # A tibble: 4 × 2
##   Age_Group   Avg_Admission_Grade
##   <fct>                     <dbl>
## 1 Young                      130.
## 2 Young Adult                126.
## 3 Adult                      126.
## 4 Senior                     128.
print("Set 2:")
## [1] "Set 2:"
print(set2)
## # A tibble: 4 × 3
## # Groups:   Gender [2]
##   Gender Displaced Avg_Admission_Grade_Set2
##    <int>     <int>                    <dbl>
## 1      0         0                     127.
## 2      0         1                     127.
## 3      1         0                     127.
## 4      1         1                     127.
print("Set 3:")
## [1] "Set 3:"
print(set3)
## # A tibble: 17 × 2
##    Previous_qualification Avg_Previous_Qualification_Grade
##                     <int>                            <dbl>
##  1                      1                             132.
##  2                      2                             126.
##  3                      3                             131.
##  4                      4                             154.
##  5                      5                             140 
##  6                      6                             131.
##  7                      9                             133.
##  8                     10                             125.
##  9                     12                             128.
## 10                     14                             133.
## 11                     15                             152.
## 12                     19                             130.
## 13                     38                             128.
## 14                     39                             140.
## 15                     40                             130.
## 16                     42                             139.
## 17                     43                             153.

Set 1: Relationship between Age and Admission Grade

Insights:

The data has been grouped into different age groups (Young, Young Adult, Adult, Senior). The average admission grade is calculated for each age group. ### Significance: This set allows us to understand how admission grades vary across different age groups. It provides insights into whether there is a notable difference in admission grades based on the age at enrollment.

Set 2: Relationship between Gender, Displacement, and Admission Grade

Insights:

The data has been grouped by Gender and Displacement. The average admission grade is calculated for each combination of gender and displacement status. ### Significance: This set helps us explore whether there are differences in admission grades based on both gender and displacement status. It provides insights into potential disparities in admission grades related to gender and displacement.

Set 3: Relationship between Previous Qualification and Admission Grade

Insights:

The data has been grouped by Previous Qualification. The average previous qualification grade is calculated. ## Significance: This set allows us to examine the relationship between the average admission grade and the previous qualification of students. It provides insights into whether there is a correlation between previous qualification and admission grade.

library(ggplot2)
# Visualization for Set 1
ggplot(set1, aes(x = Age_Group, y = Avg_Admission_Grade)) +
  geom_bar(stat = "identity", fill = "skyblue") +
  labs(title = "Average Admission Grade by Age Group",
       x = "Age Group", y = "Average Admission Grade")

This bar plot illustrates the average admission grade across different age groups. It provides an overview of how admission grades are distributed among young, young adult, adult, and senior students.

# Visualization for Set 2

# Convert Gender and Displaced to factors
set2$Gender <- as.factor(set2$Gender)
set2$Displaced <- as.factor(set2$Displaced)

ggplot(set2, aes(x = interaction(Gender, Displaced), y = Avg_Admission_Grade_Set2, fill = Gender)) +
  geom_bar(stat = "identity", position = "dodge", color = "black") +
  labs(title = "Average Admission Grade by Gender and Displacement",
       x = "Gender and Displacement", y = "Average Admission Grade") +
  scale_fill_manual(values = c("lightblue", "pink"), name = "Gender") +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))

This plot visualizes the average admission grade based on gender and displacement status. The interaction function is used to create a combined factor variable for gender and displacement, and the plot is colored by gender.

# Visualization for Set 3
ggplot(set3, aes(x = Previous_qualification, y = Avg_Previous_Qualification_Grade)) +
  geom_bar(stat = "identity", fill = "lightcoral") +
  labs(title = "Average Previous Qualification Grade by Previous Qualification",
       x = "Previous Qualification", y = "Average Previous Qualification Grade")

This bar plot displays the average grades in the previous qualification across different levels of previous qualifications. It gives insights into the academic performance in previous qualifications.

# Set 1: Relationship between Age and Admission Grade
cor_set1 <- cor(df$Age_at_enrollment, df$Admission_grade, use = "complete.obs")

# Set 2: Relationship between Gender, Displacement, and Admission Grade
cor_set2 <- cor(cbind(df$Gender, df$Displaced, df$Admission_grade), use = "complete.obs")

# Set 3: Relationship between Previous Qualification and Admission Grade
cor_set3 <- cor(df$Previous_qualification, df$Admission_grade, use = "complete.obs")

# Display correlation coefficients
cat("Set 1 Correlation Coefficient:", cor_set1, "\n")
## Set 1 Correlation Coefficient: -0.02991536
cat("Set 2 Correlation Coefficient:", cor_set2, "\n")
## Set 2 Correlation Coefficient: 1 -0.1258146 0.008314795 -0.1258146 1 -0.0005432048 0.008314795 -0.0005432048 1
cat("Set 3 Correlation Coefficient:", cor_set3, "\n")
## Set 3 Correlation Coefficient: 0.184183

Set 1: Age and Admission Grade

The correlation coefficient here will indicate the strength and direction of the linear relationship between age and admission grade. A positive correlation suggests that older students tend to have higher admission grades.

Interpretation:

A positive correlation would suggest that, on average, as the age at enrollment increases, the admission grade tends to increase. A negative correlation would suggest the opposite. It’s important to note that correlation does not imply causation. Even if there is a correlation, it doesn’t mean that age directly causes changes in the admission grade.

Set 2: Gender, Displacement, and Admission Grade

Here, the correlation coefficient will show the linear relationship between gender, displacement, and admission grade. A positive or negative correlation will indicate the direction and strength of these relationships.

Interpretation:

Positive correlation would imply that, on average, higher admission grades are associated with a specific gender or displacement status. Negative correlation would imply the opposite. The correlation coefficient for a set of variables gives an overall measure of how these variables change together. It’s important to interpret each pair of variables individually as well.

Set 3: Previous Qualification and Admission Grade

The correlation coefficient in this set will indicate how previous qualification is linearly related to admission grade. A positive correlation suggests that higher previous qualifications are associated with higher admission grades.

Interpretation:

Positive correlation would imply that, on average, higher admission grades are associated with a specific gender or displacement status. Negative correlation would imply the opposite. The correlation coefficient for a set of variables gives an overall measure of how these variables change together. It’s important to interpret each pair of variables individually as well.