The purpose of the assignment is to analyse yearly changes in thickness of Ozone layer and predict the changes for the next 5 years
getwd()
## [1] "/Users/mohammadrazzak/Documents/University/RMIT/Machine Learning/Assignment phase 1/phase 2"
setwd("/Users/mohammadrazzak/Documents/University/RMIT/Machine Learning/Assignment phase 1/phase 2")
student_mat <- read_delim("student-mat.csv",
";", escape_double = FALSE, trim_ws = TRUE)
## Parsed with column specification:
## cols(
## .default = col_character(),
## age = col_integer(),
## Medu = col_integer(),
## Fedu = col_integer(),
## traveltime = col_integer(),
## studytime = col_integer(),
## failures = col_integer(),
## famrel = col_integer(),
## freetime = col_integer(),
## goout = col_integer(),
## Dalc = col_integer(),
## Walc = col_integer(),
## health = col_integer(),
## absences = col_integer(),
## G1 = col_integer(),
## G2 = col_integer(),
## G3 = col_integer()
## )
## See spec(...) for full column specifications.
student_por <- read_delim("student-por.csv",
";", escape_double = FALSE, trim_ws = TRUE)
## Parsed with column specification:
## cols(
## .default = col_character(),
## age = col_integer(),
## Medu = col_integer(),
## Fedu = col_integer(),
## traveltime = col_integer(),
## studytime = col_integer(),
## failures = col_integer(),
## famrel = col_integer(),
## freetime = col_integer(),
## goout = col_integer(),
## Dalc = col_integer(),
## Walc = col_integer(),
## health = col_integer(),
## absences = col_integer(),
## G1 = col_integer(),
## G2 = col_integer(),
## G3 = col_integer()
## )
## See spec(...) for full column specifications.
total <- rbind(student_mat, student_por)
names(total) <- c('school', 'sex', 'age','address',
'Family_Size', 'Parental_Status',
'Mothers_Education', 'Fathers_Education',
'Mothers_Job',
'Fathers_Job',
'Enrollment_Reason',
'Guardian',
'Travel_Time',
'Study_Time', 'Class_Repeat',
'Extra_Edu_Support',
'Family_Edu_Support',
'Extra_paid_classes',
'Extra_Curricular_Activities',
'Nursery_Schooling'
,'Higher_Education',
'Home_Internet',
'Romantic_Relationship',
'Family_Relationship',
'Free_Time_Post_School',
'Socialising',
'Daily_Alcohol_Consumption',
'Weekend_Alcohol_Consumption'
, 'Health_Status', 'School_Absences',
'Sem1_Grade','Sem2_Grade',
'Final_Grade')
head(total)
## # A tibble: 6 x 33
## school sex age address Family_Size Parental_Status Mothers_Education
## <chr> <chr> <int> <chr> <chr> <chr> <int>
## 1 GP F 18 U GT3 A 4
## 2 GP F 17 U GT3 T 1
## 3 GP F 15 U LE3 T 1
## 4 GP F 15 U GT3 T 4
## 5 GP F 16 U GT3 T 3
## 6 GP M 16 U LE3 T 4
## # ... with 26 more variables: Fathers_Education <int>, Mothers_Job <chr>,
## # Fathers_Job <chr>, Enrollment_Reason <chr>, Guardian <chr>,
## # Travel_Time <int>, Study_Time <int>, Class_Repeat <int>,
## # Extra_Edu_Support <chr>, Family_Edu_Support <chr>,
## # Extra_paid_classes <chr>, Extra_Curricular_Activities <chr>,
## # Nursery_Schooling <chr>, Higher_Education <chr>, Home_Internet <chr>,
## # Romantic_Relationship <chr>, Family_Relationship <int>,
## # Free_Time_Post_School <int>, Socialising <int>,
## # Daily_Alcohol_Consumption <int>, Weekend_Alcohol_Consumption <int>,
## # Health_Status <int>, School_Absences <int>, Sem1_Grade <int>,
## # Sem2_Grade <int>, Final_Grade <int>
total$Pass_Fail <- ifelse(total$Final_Grade >=10, "Pass", "Fail")
View(total)
head(total)
## # A tibble: 6 x 34
## school sex age address Family_Size Parental_Status Mothers_Education
## <chr> <chr> <int> <chr> <chr> <chr> <int>
## 1 GP F 18 U GT3 A 4
## 2 GP F 17 U GT3 T 1
## 3 GP F 15 U LE3 T 1
## 4 GP F 15 U GT3 T 4
## 5 GP F 16 U GT3 T 3
## 6 GP M 16 U LE3 T 4
## # ... with 27 more variables: Fathers_Education <int>, Mothers_Job <chr>,
## # Fathers_Job <chr>, Enrollment_Reason <chr>, Guardian <chr>,
## # Travel_Time <int>, Study_Time <int>, Class_Repeat <int>,
## # Extra_Edu_Support <chr>, Family_Edu_Support <chr>,
## # Extra_paid_classes <chr>, Extra_Curricular_Activities <chr>,
## # Nursery_Schooling <chr>, Higher_Education <chr>, Home_Internet <chr>,
## # Romantic_Relationship <chr>, Family_Relationship <int>,
## # Free_Time_Post_School <int>, Socialising <int>,
## # Daily_Alcohol_Consumption <int>, Weekend_Alcohol_Consumption <int>,
## # Health_Status <int>, School_Absences <int>, Sem1_Grade <int>,
## # Sem2_Grade <int>, Final_Grade <int>, Pass_Fail <chr>
dim(total)
## [1] 1044 34
str(total)
## Classes 'tbl_df', 'tbl' and 'data.frame': 1044 obs. of 34 variables:
## $ school : chr "GP" "GP" "GP" "GP" ...
## $ sex : chr "F" "F" "F" "F" ...
## $ age : int 18 17 15 15 16 16 16 17 15 15 ...
## $ address : chr "U" "U" "U" "U" ...
## $ Family_Size : chr "GT3" "GT3" "LE3" "GT3" ...
## $ Parental_Status : chr "A" "T" "T" "T" ...
## $ Mothers_Education : int 4 1 1 4 3 4 2 4 3 3 ...
## $ Fathers_Education : int 4 1 1 2 3 3 2 4 2 4 ...
## $ Mothers_Job : chr "at_home" "at_home" "at_home" "health" ...
## $ Fathers_Job : chr "teacher" "other" "other" "services" ...
## $ Enrollment_Reason : chr "course" "course" "other" "home" ...
## $ Guardian : chr "mother" "father" "mother" "mother" ...
## $ Travel_Time : int 2 1 1 1 1 1 1 2 1 1 ...
## $ Study_Time : int 2 2 2 3 2 2 2 2 2 2 ...
## $ Class_Repeat : int 0 0 3 0 0 0 0 0 0 0 ...
## $ Extra_Edu_Support : chr "yes" "no" "yes" "no" ...
## $ Family_Edu_Support : chr "no" "yes" "no" "yes" ...
## $ Extra_paid_classes : chr "no" "no" "yes" "yes" ...
## $ Extra_Curricular_Activities: chr "no" "no" "no" "yes" ...
## $ Nursery_Schooling : chr "yes" "no" "yes" "yes" ...
## $ Higher_Education : chr "yes" "yes" "yes" "yes" ...
## $ Home_Internet : chr "no" "yes" "yes" "yes" ...
## $ Romantic_Relationship : chr "no" "no" "no" "yes" ...
## $ Family_Relationship : int 4 5 4 3 4 5 4 4 4 5 ...
## $ Free_Time_Post_School : int 3 3 3 2 3 4 4 1 2 5 ...
## $ Socialising : int 4 3 2 2 2 2 4 4 2 1 ...
## $ Daily_Alcohol_Consumption : int 1 1 2 1 1 1 1 1 1 1 ...
## $ Weekend_Alcohol_Consumption: int 1 1 3 1 2 2 1 1 1 1 ...
## $ Health_Status : int 3 3 3 5 5 5 3 1 1 5 ...
## $ School_Absences : int 6 4 10 2 4 10 0 6 0 0 ...
## $ Sem1_Grade : int 5 5 7 15 6 15 12 6 16 14 ...
## $ Sem2_Grade : int 6 5 8 14 10 15 12 5 18 15 ...
## $ Final_Grade : int 6 6 10 15 10 15 11 6 19 15 ...
## $ Pass_Fail : chr "Fail" "Fail" "Pass" "Pass" ...
## - attr(*, "spec")=List of 2
## ..$ cols :List of 33
## .. ..$ school : list()
## .. .. ..- attr(*, "class")= chr "collector_character" "collector"
## .. ..$ sex : list()
## .. .. ..- attr(*, "class")= chr "collector_character" "collector"
## .. ..$ age : list()
## .. .. ..- attr(*, "class")= chr "collector_integer" "collector"
## .. ..$ address : list()
## .. .. ..- attr(*, "class")= chr "collector_character" "collector"
## .. ..$ famsize : list()
## .. .. ..- attr(*, "class")= chr "collector_character" "collector"
## .. ..$ Pstatus : list()
## .. .. ..- attr(*, "class")= chr "collector_character" "collector"
## .. ..$ Medu : list()
## .. .. ..- attr(*, "class")= chr "collector_integer" "collector"
## .. ..$ Fedu : list()
## .. .. ..- attr(*, "class")= chr "collector_integer" "collector"
## .. ..$ Mjob : list()
## .. .. ..- attr(*, "class")= chr "collector_character" "collector"
## .. ..$ Fjob : list()
## .. .. ..- attr(*, "class")= chr "collector_character" "collector"
## .. ..$ reason : list()
## .. .. ..- attr(*, "class")= chr "collector_character" "collector"
## .. ..$ guardian : list()
## .. .. ..- attr(*, "class")= chr "collector_character" "collector"
## .. ..$ traveltime: list()
## .. .. ..- attr(*, "class")= chr "collector_integer" "collector"
## .. ..$ studytime : list()
## .. .. ..- attr(*, "class")= chr "collector_integer" "collector"
## .. ..$ failures : list()
## .. .. ..- attr(*, "class")= chr "collector_integer" "collector"
## .. ..$ schoolsup : list()
## .. .. ..- attr(*, "class")= chr "collector_character" "collector"
## .. ..$ famsup : list()
## .. .. ..- attr(*, "class")= chr "collector_character" "collector"
## .. ..$ paid : list()
## .. .. ..- attr(*, "class")= chr "collector_character" "collector"
## .. ..$ activities: list()
## .. .. ..- attr(*, "class")= chr "collector_character" "collector"
## .. ..$ nursery : list()
## .. .. ..- attr(*, "class")= chr "collector_character" "collector"
## .. ..$ higher : list()
## .. .. ..- attr(*, "class")= chr "collector_character" "collector"
## .. ..$ internet : list()
## .. .. ..- attr(*, "class")= chr "collector_character" "collector"
## .. ..$ romantic : list()
## .. .. ..- attr(*, "class")= chr "collector_character" "collector"
## .. ..$ famrel : list()
## .. .. ..- attr(*, "class")= chr "collector_integer" "collector"
## .. ..$ freetime : list()
## .. .. ..- attr(*, "class")= chr "collector_integer" "collector"
## .. ..$ goout : list()
## .. .. ..- attr(*, "class")= chr "collector_integer" "collector"
## .. ..$ Dalc : list()
## .. .. ..- attr(*, "class")= chr "collector_integer" "collector"
## .. ..$ Walc : list()
## .. .. ..- attr(*, "class")= chr "collector_integer" "collector"
## .. ..$ health : list()
## .. .. ..- attr(*, "class")= chr "collector_integer" "collector"
## .. ..$ absences : list()
## .. .. ..- attr(*, "class")= chr "collector_integer" "collector"
## .. ..$ G1 : list()
## .. .. ..- attr(*, "class")= chr "collector_integer" "collector"
## .. ..$ G2 : list()
## .. .. ..- attr(*, "class")= chr "collector_integer" "collector"
## .. ..$ G3 : list()
## .. .. ..- attr(*, "class")= chr "collector_integer" "collector"
## ..$ default: list()
## .. ..- attr(*, "class")= chr "collector_guess" "collector"
## ..- attr(*, "class")= chr "col_spec"
Feature Summary before Data Preprocessing
summarizeColumns(total) %>% knitr::kable( caption = 'Feature Summary before Data Preprocessing')
| name | type | na | mean | disp | median | mad | min | max | nlevs |
|---|---|---|---|---|---|---|---|---|---|
| school | character | 0 | NA | 0.2605364 | NA | NA | 272 | 772 | 2 |
| sex | character | 0 | NA | 0.4339080 | NA | NA | 453 | 591 | 2 |
| age | integer | 0 | 16.7260536 | 1.2399747 | 17 | 1.4826 | 15 | 22 | 0 |
| address | character | 0 | NA | 0.2729885 | NA | NA | 285 | 759 | 2 |
| Family_Size | character | 0 | NA | 0.2931034 | NA | NA | 306 | 738 | 2 |
| Parental_Status | character | 0 | NA | 0.1159004 | NA | NA | 121 | 923 | 2 |
| Mothers_Education | integer | 0 | 2.6034483 | 1.1249066 | 3 | 1.4826 | 0 | 4 | 0 |
| Fathers_Education | integer | 0 | 2.3879310 | 1.0999381 | 2 | 1.4826 | 0 | 4 | 0 |
| Mothers_Job | character | 0 | NA | 0.6178161 | NA | NA | 82 | 399 | 5 |
| Fathers_Job | character | 0 | NA | 0.4406130 | NA | NA | 41 | 584 | 5 |
| Enrollment_Reason | character | 0 | NA | 0.5881226 | NA | NA | 108 | 430 | 4 |
| Guardian | character | 0 | NA | 0.3026820 | NA | NA | 73 | 728 | 3 |
| Travel_Time | integer | 0 | 1.5229885 | 0.7317274 | 1 | 0.0000 | 1 | 4 | 0 |
| Study_Time | integer | 0 | 1.9703065 | 0.8343532 | 2 | 1.4826 | 1 | 4 | 0 |
| Class_Repeat | integer | 0 | 0.2643678 | 0.6561418 | 0 | 0.0000 | 0 | 3 | 0 |
| Extra_Edu_Support | character | 0 | NA | 0.1139847 | NA | NA | 119 | 925 | 2 |
| Family_Edu_Support | character | 0 | NA | 0.3869732 | NA | NA | 404 | 640 | 2 |
| Extra_paid_classes | character | 0 | NA | 0.2107280 | NA | NA | 220 | 824 | 2 |
| Extra_Curricular_Activities | character | 0 | NA | 0.4942529 | NA | NA | 516 | 528 | 2 |
| Nursery_Schooling | character | 0 | NA | 0.2001916 | NA | NA | 209 | 835 | 2 |
| Higher_Education | character | 0 | NA | 0.0852490 | NA | NA | 89 | 955 | 2 |
| Home_Internet | character | 0 | NA | 0.2078544 | NA | NA | 217 | 827 | 2 |
| Romantic_Relationship | character | 0 | NA | 0.3553640 | NA | NA | 371 | 673 | 2 |
| Family_Relationship | integer | 0 | 3.9358238 | 0.9334008 | 4 | 1.4826 | 1 | 5 | 0 |
| Free_Time_Post_School | integer | 0 | 3.2011494 | 1.0315068 | 3 | 1.4826 | 1 | 5 | 0 |
| Socialising | integer | 0 | 3.1561303 | 1.1525747 | 3 | 1.4826 | 1 | 5 | 0 |
| Daily_Alcohol_Consumption | integer | 0 | 1.4942529 | 0.9117143 | 1 | 0.0000 | 1 | 5 | 0 |
| Weekend_Alcohol_Consumption | integer | 0 | 2.2844828 | 1.2851048 | 2 | 1.4826 | 1 | 5 | 0 |
| Health_Status | integer | 0 | 3.5431034 | 1.4247034 | 4 | 1.4826 | 1 | 5 | 0 |
| School_Absences | integer | 0 | 4.4348659 | 6.2100166 | 2 | 2.9652 | 0 | 75 | 0 |
| Sem1_Grade | integer | 0 | 11.2136015 | 2.9833939 | 11 | 2.9652 | 0 | 19 | 0 |
| Sem2_Grade | integer | 0 | 11.2461686 | 3.2850711 | 11 | 2.9652 | 0 | 19 | 0 |
| Final_Grade | integer | 0 | 11.3419540 | 3.8647958 | 11 | 2.9652 | 0 | 20 | 0 |
| Pass_Fail | character | 0 | NA | 0.2203065 | NA | NA | 230 | 814 | 2 |
Excessive White characters removed
total[, sapply( total, is.character )] <- sapply( total[, sapply( total, is.character )], trimws)
Number of grades and its frequencies
q01 <- qplot(x=school,data = total,geom='bar')
p19 <- ggplot(total,aes(x=Pass_Fail))+geom_bar(fill = 'lavender')+
labs(x="Final Grade")
p18 <- p19+facet_grid(~school)+
ggtitle("Barplot of School by \n Final Grade")
plot_grid(q01, p18, labels = "AUTO")
Study Time Studying 2-5 hrs on weekly basis seems to get one pass in final exams
Family Educational Support Most of students had educational support from family.We can see that students with family support tend to do well.
Extra Paid Classes Not many students opted for extra paid classes.Majority of students seem to have done well without extra paid classes
Extra Curricular Activities It seems taking extra-curricular activities didnt affect student grades.Almost half of students did not show interest in getting involved in after school activities
Romantic Relationship We can see that proportion of students who were romantically involved had low pass rate.
## $title
## [1] "Barplot of \n Socialising by \n Final Grade"
##
## $subtitle
## NULL
##
## attr(,"class")
## [1] "labels"
Socialising Students socialising mostly passed the exams. It didn’t affect the grades of most of them.While, students who socialised a lot didnt, also flunked in their last exam
total<- total[-c(31:33)]
data <- total
total %>% mutate_if(is.character, as.factor) -> total
The attributes were combined to the Pass/Fail target feature and we tried to know the reasoning for people passing and failing, The data exploration demonstrated the school, age, sex, travel time, address etc. had an impact on the students grades.