data <- read.csv("student-mat 2.csv")
str(data)
## 'data.frame': 395 obs. of 32 variables:
## $ school : chr "GP" "GP" "GP" "GP" ...
## $ sex : chr "F" "F" "F" "F" ...
## $ age : int 18 17 15 15 16 16 16 17 15 15 ...
## $ address : chr "U" "U" "U" "U" ...
## $ famsize : chr "GT3" "GT3" "LE3" "GT3" ...
## $ Pstatus : chr "A" "T" "T" "T" ...
## $ Medu : int 4 1 1 4 3 4 2 4 3 3 ...
## $ Fedu : int 4 1 1 2 3 3 2 4 2 4 ...
## $ Mjob : chr "at_home" "at_home" "at_home" "health" ...
## $ Fjob : chr "teacher" "other" "other" "services" ...
## $ reason : chr "course" "course" "other" "home" ...
## $ guardian : chr "mother" "father" "mother" "mother" ...
## $ traveltime: int 2 1 1 1 1 1 1 2 1 1 ...
## $ studytime : int 2 2 2 3 2 2 2 2 2 2 ...
## $ failures : int 0 0 3 0 0 0 0 0 0 0 ...
## $ schoolsup : chr "yes" "no" "yes" "no" ...
## $ famsup : chr "no" "yes" "no" "yes" ...
## $ paid : chr "no" "no" "yes" "yes" ...
## $ activities: chr "no" "no" "no" "yes" ...
## $ nursery : chr "yes" "no" "yes" "yes" ...
## $ higher : chr "yes" "yes" "yes" "yes" ...
## $ internet : chr "no" "yes" "yes" "yes" ...
## $ famrel : int 4 5 4 3 4 5 4 4 4 5 ...
## $ freetime : int 3 3 3 2 3 4 4 1 2 5 ...
## $ goout : int 4 3 2 2 2 2 4 4 2 1 ...
## $ Dalc : int 1 1 2 1 1 1 1 1 1 1 ...
## $ Walc : int 1 1 3 1 2 2 1 1 1 1 ...
## $ health : int 3 3 3 5 5 5 3 1 1 5 ...
## $ absences : int 6 4 10 2 4 10 0 6 0 0 ...
## $ G1 : int 5 5 7 15 6 15 12 6 16 14 ...
## $ G2 : int 6 5 8 14 10 15 12 5 18 15 ...
## $ G3 : int 6 6 10 15 10 15 11 6 19 15 ...
summary(data)
## school sex age address
## Length:395 Length:395 Min. :15.0 Length:395
## Class :character Class :character 1st Qu.:16.0 Class :character
## Mode :character Mode :character Median :17.0 Mode :character
## Mean :16.7
## 3rd Qu.:18.0
## Max. :22.0
## famsize Pstatus Medu Fedu
## Length:395 Length:395 Min. :0.000 Min. :0.000
## Class :character Class :character 1st Qu.:2.000 1st Qu.:2.000
## Mode :character Mode :character Median :3.000 Median :2.000
## Mean :2.749 Mean :2.522
## 3rd Qu.:4.000 3rd Qu.:3.000
## Max. :4.000 Max. :4.000
## Mjob Fjob reason guardian
## Length:395 Length:395 Length:395 Length:395
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
##
##
##
## traveltime studytime failures schoolsup
## Min. :1.000 Min. :1.000 Min. :0.0000 Length:395
## 1st Qu.:1.000 1st Qu.:1.000 1st Qu.:0.0000 Class :character
## Median :1.000 Median :2.000 Median :0.0000 Mode :character
## Mean :1.448 Mean :2.035 Mean :0.3342
## 3rd Qu.:2.000 3rd Qu.:2.000 3rd Qu.:0.0000
## Max. :4.000 Max. :4.000 Max. :3.0000
## famsup paid activities nursery
## Length:395 Length:395 Length:395 Length:395
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
##
##
##
## higher internet famrel freetime
## Length:395 Length:395 Min. :1.000 Min. :1.000
## Class :character Class :character 1st Qu.:4.000 1st Qu.:3.000
## Mode :character Mode :character Median :4.000 Median :3.000
## Mean :3.944 Mean :3.235
## 3rd Qu.:5.000 3rd Qu.:4.000
## Max. :5.000 Max. :5.000
## goout Dalc Walc health
## Min. :1.000 Min. :1.000 Min. :1.000 Min. :1.000
## 1st Qu.:2.000 1st Qu.:1.000 1st Qu.:1.000 1st Qu.:3.000
## Median :3.000 Median :1.000 Median :2.000 Median :4.000
## Mean :3.109 Mean :1.481 Mean :2.291 Mean :3.554
## 3rd Qu.:4.000 3rd Qu.:2.000 3rd Qu.:3.000 3rd Qu.:5.000
## Max. :5.000 Max. :5.000 Max. :5.000 Max. :5.000
## absences G1 G2 G3
## Min. : 0.000 Min. : 3.00 Min. : 0.00 Min. : 0.00
## 1st Qu.: 0.000 1st Qu.: 8.00 1st Qu.: 9.00 1st Qu.: 8.00
## Median : 4.000 Median :11.00 Median :11.00 Median :11.00
## Mean : 5.709 Mean :10.91 Mean :10.71 Mean :10.42
## 3rd Qu.: 8.000 3rd Qu.:13.00 3rd Qu.:13.00 3rd Qu.:14.00
## Max. :75.000 Max. :19.00 Max. :19.00 Max. :20.00
head(data)
## school sex age address famsize Pstatus Medu Fedu Mjob Fjob reason
## 1 GP F 18 U GT3 A 4 4 at_home teacher course
## 2 GP F 17 U GT3 T 1 1 at_home other course
## 3 GP F 15 U LE3 T 1 1 at_home other other
## 4 GP F 15 U GT3 T 4 2 health services home
## 5 GP F 16 U GT3 T 3 3 other other home
## 6 GP M 16 U LE3 T 4 3 services other reputation
## guardian traveltime studytime failures schoolsup famsup paid activities
## 1 mother 2 2 0 yes no no no
## 2 father 1 2 0 no yes no no
## 3 mother 1 2 3 yes no yes no
## 4 mother 1 3 0 no yes yes yes
## 5 father 1 2 0 no yes yes no
## 6 mother 1 2 0 no yes yes yes
## nursery higher internet famrel freetime goout Dalc Walc health absences G1 G2
## 1 yes yes no 4 3 4 1 1 3 6 5 6
## 2 no yes yes 5 3 3 1 1 3 4 5 5
## 3 yes yes yes 4 3 2 2 3 3 10 7 8
## 4 yes yes yes 3 2 2 1 1 5 2 15 14
## 5 yes yes no 4 3 2 1 2 5 4 6 10
## 6 yes yes yes 5 4 2 1 2 5 10 15 15
## G3
## 1 6
## 2 6
## 3 10
## 4 15
## 5 10
## 6 15
male_students <- subset(data, sex == "M")
female_students <- subset(data, sex == "F")
high_walc <- subset(data, Walc >= 4)
low_walc <- subset(data, Walc <= 2)
high_study <- subset(data, studytime >= 3)
low_study <- subset(data, studytime <= 2)
urban_students <- subset(data, address == "U")
rural_students <- subset(data, address == "R")
internet_yes <- subset(data, internet == "yes")
internet_no <- subset(data, internet == "no")
hist(male_students$G3,
main = "Histogram of Final Grade - Male Students",
xlab = "Final Grade (G3)",
col = "lightblue",
border = "black")
This histogram shows the distribution of final grades among male students.
hist(female_students$G3,
main = "Histogram of Final Grade - Female Students",
xlab = "Final Grade (G3)",
col = "lightpink",
border = "black")
This histogram shows the distribution of final grades among female students.
hist(high_walc$absences,
main = "Histogram of Absences - High Walc Students",
xlab = "Absences",
col = "orange",
border = "black")
This histogram shows the absence distribution of students with high weekend alcohol consumption.
plot(data$studytime, data$G3,
main = "Study Time vs Final Grade",
xlab = "Study Time",
ylab = "Final Grade (G3)",
col = "blue",
pch = 19)
This scatter plot shows the relationship between study time and final grade.
plot(data$absences, data$G3,
main = "Absences vs Final Grade",
xlab = "Absences",
ylab = "Final Grade (G3)",
col = "red",
pch = 19)
This scatter plot shows the relationship between absences and final grade.
plot(data$Walc, data$G3,
main = "Weekend Alcohol Consumption vs Final Grade",
xlab = "Weekend Alcohol Consumption",
ylab = "Final Grade (G3)",
col = "darkgreen",
pch = 19)
This scatter plot shows the relationship between weekend alcohol consumption and final grade.
boxplot(G3 ~ sex, data = data,
main = "Final Grade by Sex",
xlab = "Sex",
ylab = "Final Grade (G3)",
col = c("lightblue", "lightpink"))
This boxplot compares final grades between male and female students.
boxplot(G3 ~ address, data = data,
main = "Final Grade by Address",
xlab = "Address",
ylab = "Final Grade (G3)",
col = c("lightgreen", "lightyellow"))
This boxplot compares final grades based on student residence.
boxplot(G3 ~ internet, data = data,
main = "Final Grade by Internet Access",
xlab = "Internet Access",
ylab = "Final Grade (G3)",
col = c("orange", "cyan"))
This boxplot compares final grades based on internet access.
mosaicplot(table(data$sex, data$internet),
main = "Sex vs Internet Access",
xlab = "Sex",
ylab = "Internet Access",
color = TRUE)
This mosaic plot shows the relationship between sex and internet access.
mosaicplot(table(data$address, data$higher),
main = "Address vs Higher Education",
xlab = "Address",
ylab = "Higher Education",
color = TRUE)
This mosaic plot shows the relationship between address and higher education interest.
data$pass_fail <- ifelse(data$G3 >= 10, "Pass", "Fail")
mosaicplot(table(data$schoolsup, data$pass_fail),
main = "School Support vs Pass/Fail",
xlab = "School Support",
ylab = "Pass / Fail",
color = TRUE)
This mosaic plot shows the relationship between school support and pass/fail result.