#1
#2 sex - student’s sex (binary: ‘F’ - female or ‘M’ - male) #3 age - student’s age (numeric: from 15 to 22) #4 address - student’s home address type (binary: ‘U’ - urban or ‘R’ - rural) #5 famsize - family size (binary: ‘LE3’ - less or equal to 3 or ‘GT3’ - greater than 3) #6 Pstatus - parent’s cohabitation status (binary: ‘T’ - living together or ‘A’ - apart) #7 Medu - mother’s education (numeric: 0 - none, 1. - primary education (4th grade), 2. 5th to 9th grade, 3, secondary education or 4 higher education) #8 Fedu - father’s education (numeric: 0 - none, 1 - primary education (4th grade), 2 5th to 9th grade, 3 secondary education or 4 higher education) #9 Mjob - mother’s job (nominal: ‘teacher’, ‘health’ care related, civil ‘services’ (e.g. administrative or police), ‘at_home’ or ‘other’) #10 Fjob - father’s job (nominal: ‘teacher’, ‘health’ care related, civil ‘services’ (e.g. administrative or police), ‘at_home’ or ‘other’) #11 reason - reason to choose this school (nominal: close to ‘home’, school ‘reputation’, ‘course’ preference or ‘other’) #12 guardian - student’s guardian (nominal: ‘mother’, ‘father’ or ‘other’) #13 traveltime - home to school travel time (numeric: 1 - <15 min., 2 - 15 to 30 min., 3 - 30 min. to 1 hour, or 4 - >1 hour) #14 studytime - weekly study time (numeric: 1 - <2 hours, 2 - 2 to 5 hours, 3 - 5 to 10 hours, or 4 - >10 hours) #15 failures - number of past class failures (numeric: n if 1<=n<3, else 4) #16 schoolsup - extra educational support (binary: yes or no) #17 famsup - family educational support (binary: yes or no) #18 paid - extra paid classes within the course subject (Math or Portuguese) (binary: yes or no) #19 activities - extra-curricular activities (binary: yes or no) #20 nursery - attended nursery school (binary: yes or no) #21 higher - wants to take higher education (binary: yes or no) #22 internet - Internet access at home (binary: yes or no) #23 romantic - with a romantic relationship (binary: yes or no) #24 famrel - quality of family relationships (numeric: from 1 - very bad to 5 - excellent) #25 freetime - free time after school (numeric: from 1 - very low to 5 - very high) #26 goout - going out with friends (numeric: from 1 - very low to 5 - very high) #27 Dalc - workday alcohol consumption (numeric: from 1 - very low to 5 - very high) #28 Walc - weekend alcohol consumption (numeric: from 1 - very low to 5 - very high) #29 health - current health status (numeric: from 1 - very bad to 5 - very good) #30 absences - number of school absences (numeric: from 0 to 93)
#3
setwd("~/Data 101")
student_df <- read.csv("student-mat.csv")
library(tidyverse)
## Warning: package 'tidyverse' was built under R version 4.1.3
## -- Attaching packages --------------------------------------- tidyverse 1.3.1 --
## v ggplot2 3.3.5 v purrr 0.3.4
## v tibble 3.1.6 v dplyr 1.0.7
## v tidyr 1.1.4 v stringr 1.4.0
## v readr 2.1.1 v forcats 0.5.1
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
library(dplyr)
library(ggpubr)
## Warning: package 'ggpubr' was built under R version 4.1.3
library(ggplot2)
library(psych)
## Warning: package 'psych' was built under R version 4.1.3
##
## Attaching package: 'psych'
## The following objects are masked from 'package:ggplot2':
##
## %+%, alpha
library(highcharter)
## Warning: package 'highcharter' was built under R version 4.1.3
## Registered S3 method overwritten by 'quantmod':
## method from
## as.zoo.data.frame zoo
## Highcharts (www.highcharts.com) is a Highsoft software product which is
## not free for commercial and Governmental use
str(student_df)
## 'data.frame': 395 obs. of 33 variables:
## $ school : chr "GP" "GP" "GP" "GP" ...
## $ sex : chr "F" "F" "F" "F" ...
## $ age : int 18 17 15 15 16 16 16 17 15 15 ...
## $ address : chr "U" "U" "U" "U" ...
## $ famsize : chr "GT3" "GT3" "LE3" "GT3" ...
## $ Pstatus : chr "A" "T" "T" "T" ...
## $ Medu : int 4 1 1 4 3 4 2 4 3 3 ...
## $ Fedu : int 4 1 1 2 3 3 2 4 2 4 ...
## $ Mjob : chr "at_home" "at_home" "at_home" "health" ...
## $ Fjob : chr "teacher" "other" "other" "services" ...
## $ reason : chr "course" "course" "other" "home" ...
## $ guardian : chr "mother" "father" "mother" "mother" ...
## $ traveltime: int 2 1 1 1 1 1 1 2 1 1 ...
## $ studytime : int 2 2 2 3 2 2 2 2 2 2 ...
## $ failures : int 0 0 3 0 0 0 0 0 0 0 ...
## $ schoolsup : chr "yes" "no" "yes" "no" ...
## $ famsup : chr "no" "yes" "no" "yes" ...
## $ paid : chr "no" "no" "yes" "yes" ...
## $ activities: chr "no" "no" "no" "yes" ...
## $ nursery : chr "yes" "no" "yes" "yes" ...
## $ higher : chr "yes" "yes" "yes" "yes" ...
## $ internet : chr "no" "yes" "yes" "yes" ...
## $ romantic : chr "no" "no" "no" "yes" ...
## $ famrel : int 4 5 4 3 4 5 4 4 4 5 ...
## $ freetime : int 3 3 3 2 3 4 4 1 2 5 ...
## $ goout : int 4 3 2 2 2 2 4 4 2 1 ...
## $ Dalc : int 1 1 2 1 1 1 1 1 1 1 ...
## $ Walc : int 1 1 3 1 2 2 1 1 1 1 ...
## $ health : int 3 3 3 5 5 5 3 1 1 5 ...
## $ absences : int 6 4 10 2 4 10 0 6 0 0 ...
## $ G1 : int 5 5 7 15 6 15 12 6 16 14 ...
## $ G2 : int 6 5 8 14 10 15 12 5 18 15 ...
## $ G3 : int 6 6 10 15 10 15 11 6 19 15 ...
summary(student_df)
## school sex age address
## Length:395 Length:395 Min. :15.0 Length:395
## Class :character Class :character 1st Qu.:16.0 Class :character
## Mode :character Mode :character Median :17.0 Mode :character
## Mean :16.7
## 3rd Qu.:18.0
## Max. :22.0
## famsize Pstatus Medu Fedu
## Length:395 Length:395 Min. :0.000 Min. :0.000
## Class :character Class :character 1st Qu.:2.000 1st Qu.:2.000
## Mode :character Mode :character Median :3.000 Median :2.000
## Mean :2.749 Mean :2.522
## 3rd Qu.:4.000 3rd Qu.:3.000
## Max. :4.000 Max. :4.000
## Mjob Fjob reason guardian
## Length:395 Length:395 Length:395 Length:395
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
##
##
##
## traveltime studytime failures schoolsup
## Min. :1.000 Min. :1.000 Min. :0.0000 Length:395
## 1st Qu.:1.000 1st Qu.:1.000 1st Qu.:0.0000 Class :character
## Median :1.000 Median :2.000 Median :0.0000 Mode :character
## Mean :1.448 Mean :2.035 Mean :0.3342
## 3rd Qu.:2.000 3rd Qu.:2.000 3rd Qu.:0.0000
## Max. :4.000 Max. :4.000 Max. :3.0000
## famsup paid activities nursery
## Length:395 Length:395 Length:395 Length:395
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
##
##
##
## higher internet romantic famrel
## Length:395 Length:395 Length:395 Min. :1.000
## Class :character Class :character Class :character 1st Qu.:4.000
## Mode :character Mode :character Mode :character Median :4.000
## Mean :3.944
## 3rd Qu.:5.000
## Max. :5.000
## freetime goout Dalc Walc
## Min. :1.000 Min. :1.000 Min. :1.000 Min. :1.000
## 1st Qu.:3.000 1st Qu.:2.000 1st Qu.:1.000 1st Qu.:1.000
## Median :3.000 Median :3.000 Median :1.000 Median :2.000
## Mean :3.235 Mean :3.109 Mean :1.481 Mean :2.291
## 3rd Qu.:4.000 3rd Qu.:4.000 3rd Qu.:2.000 3rd Qu.:3.000
## Max. :5.000 Max. :5.000 Max. :5.000 Max. :5.000
## health absences G1 G2
## Min. :1.000 Min. : 0.000 Min. : 3.00 Min. : 0.00
## 1st Qu.:3.000 1st Qu.: 0.000 1st Qu.: 8.00 1st Qu.: 9.00
## Median :4.000 Median : 4.000 Median :11.00 Median :11.00
## Mean :3.554 Mean : 5.709 Mean :10.91 Mean :10.71
## 3rd Qu.:5.000 3rd Qu.: 8.000 3rd Qu.:13.00 3rd Qu.:13.00
## Max. :5.000 Max. :75.000 Max. :19.00 Max. :19.00
## G3
## Min. : 0.00
## 1st Qu.: 8.00
## Median :11.00
## Mean :10.42
## 3rd Qu.:14.00
## Max. :20.00
###1. First Period Grade: ###a. Minimum score is 3, and maximum score achieved is 19. The mean score is 10.91. ###b. The median is 11. ###2. Second Period Grade: ###3. Final Grade: ###a. The max score is 20 and the mean score is 10.42, making the mean final grade the lowest of all averages for first and second period grades.
ggplot(student_df, aes(x=G3, color=Fjob, fill=Fjob)) +
geom_density(alpha=0.4) +
labs(title = "Final Grade According to Father's Job",
x = "G3",
y = "Father's Job")
ggplot(student_df, aes(x=G3, color=Mjob, fill=Mjob)) +
geom_density(alpha=0.4) +
labs(title = "Final Grade According to Mother's Job",
x = "G3",
y = "Mother's Job")
colnames(student_df)
## [1] "school" "sex" "age" "address" "famsize"
## [6] "Pstatus" "Medu" "Fedu" "Mjob" "Fjob"
## [11] "reason" "guardian" "traveltime" "studytime" "failures"
## [16] "schoolsup" "famsup" "paid" "activities" "nursery"
## [21] "higher" "internet" "romantic" "famrel" "freetime"
## [26] "goout" "Dalc" "Walc" "health" "absences"
## [31] "G1" "G2" "G3"
data1<-dplyr::select(student_df, age, Medu, Fedu, traveltime, studytime, failures, famrel, freetime, goout, Dalc, Walc, health, absences, G1, G2, G3)
multi.hist(data1, freq=F, dcol = "blue", dlty=c("dotted", "solid"))
ggplot(student_df, aes(x = G3, fill = famsup)) +
geom_bar(position = "dodge") +
theme(axis.text.x = element_text(angle = 90)) +
ylab("Family Support") + xlab("G3")+ coord_flip()
ggplot(student_df, aes(x = G3, fill = nursery)) +
geom_bar(position = "dodge") +
theme(axis.text.x = element_text(angle = 90)) +
ylab("Nursery") + xlab("G3")+ coord_flip()
ggplot(student_df, aes(x = G3, fill = internet)) +
geom_bar(position = "dodge") +
theme(axis.text.x = element_text(angle = 90)) +
ylab("Internet") + xlab("G3")+ coord_flip()
ggplot(student_df, aes(x = G3, fill = romantic)) +
geom_bar(position = "dodge") +
theme(axis.text.x = element_text(angle = 90)) +
ylab("Romantic Relationships") + xlab("G3")+ coord_flip()
ggplot(student_df, aes(x = G3, fill = paid)) +
geom_bar(position = "dodge") +
theme(axis.text.x = element_text(angle = 90)) +
ylab("Paid Courses") + xlab("G3")+ coord_flip()
ggboxplot(student_df, x = "address", y = "G3",
color = "higher", palette = c("#00AFBB", "#E7B800"),
ylab = "G3 Score", xlab = "Address") +
labs(title = "Higher Education G3 Score based on Address")
ggboxplot(student_df, x = "sex", y = "G3",
color = "higher", palette = c("#00AFBB", "#E7B800"),
ylab = "G3 Score", xlab = "Sex") +
labs(title = "Higher Education G3 Score based on Sex")
ggboxplot(student_df, x = "school", y = "G3",
color = "higher", palette = c("#00AFBB", "#E7B800"),
ylab = "G3 Score", xlab = "School") +
labs(title = "Higher Education G3 Score based on School")
ggboxplot(student_df, x = "age", y = "G3",
color = "higher", palette = c("#00AFBB", "#E7B800"),
ylab = "G3 Score", xlab = "Age") +
labs(title = "Higher Education G3 Score based on Age")
student_df5<-
student_df %>%
mutate(period_grade = case_when(G3 < 6.5 ~ 'Failing',
G3 < 13 ~ 'Good',
G3 < 20.1 ~ 'Very Good'))
ggplot(data= student_df5, aes(x = G3, y = G2, color = period_grade)) +
geom_point()+
facet_wrap(~period_grade)+
labs(title = "Final Grade x Second Period Grade")+
geom_smooth(method=lm , color="red", fill="#69b3a2", se=TRUE)
## `geom_smooth()` using formula 'y ~ x'
highchart () %>%
hc_add_series(data = student_df5,
type = "bar",
hcaes(x = traveltime,
y = G3,
group = period_grade, ))%>%
hc_title(
text=" Effect of Travel Time on Grades")%>%
hc_xAxis(
title = list(text="Travel Time")) %>%
hc_yAxis(
title = list(text="Final Grade")) %>%
hc_legend(align = "right",
verticalAlign = "top")
#4