Load Dataset

data <- read.csv("student-mat 2.csv")

str(data)
## 'data.frame':    395 obs. of  32 variables:
##  $ school    : chr  "GP" "GP" "GP" "GP" ...
##  $ sex       : chr  "F" "F" "F" "F" ...
##  $ age       : int  18 17 15 15 16 16 16 17 15 15 ...
##  $ address   : chr  "U" "U" "U" "U" ...
##  $ famsize   : chr  "GT3" "GT3" "LE3" "GT3" ...
##  $ Pstatus   : chr  "A" "T" "T" "T" ...
##  $ Medu      : int  4 1 1 4 3 4 2 4 3 3 ...
##  $ Fedu      : int  4 1 1 2 3 3 2 4 2 4 ...
##  $ Mjob      : chr  "at_home" "at_home" "at_home" "health" ...
##  $ Fjob      : chr  "teacher" "other" "other" "services" ...
##  $ reason    : chr  "course" "course" "other" "home" ...
##  $ guardian  : chr  "mother" "father" "mother" "mother" ...
##  $ traveltime: int  2 1 1 1 1 1 1 2 1 1 ...
##  $ studytime : int  2 2 2 3 2 2 2 2 2 2 ...
##  $ failures  : int  0 0 3 0 0 0 0 0 0 0 ...
##  $ schoolsup : chr  "yes" "no" "yes" "no" ...
##  $ famsup    : chr  "no" "yes" "no" "yes" ...
##  $ paid      : chr  "no" "no" "yes" "yes" ...
##  $ activities: chr  "no" "no" "no" "yes" ...
##  $ nursery   : chr  "yes" "no" "yes" "yes" ...
##  $ higher    : chr  "yes" "yes" "yes" "yes" ...
##  $ internet  : chr  "no" "yes" "yes" "yes" ...
##  $ famrel    : int  4 5 4 3 4 5 4 4 4 5 ...
##  $ freetime  : int  3 3 3 2 3 4 4 1 2 5 ...
##  $ goout     : int  4 3 2 2 2 2 4 4 2 1 ...
##  $ Dalc      : int  1 1 2 1 1 1 1 1 1 1 ...
##  $ Walc      : int  1 1 3 1 2 2 1 1 1 1 ...
##  $ health    : int  3 3 3 5 5 5 3 1 1 5 ...
##  $ absences  : int  6 4 10 2 4 10 0 6 0 0 ...
##  $ G1        : int  5 5 7 15 6 15 12 6 16 14 ...
##  $ G2        : int  6 5 8 14 10 15 12 5 18 15 ...
##  $ G3        : int  6 6 10 15 10 15 11 6 19 15 ...
summary(data)
##     school              sex                 age         address         
##  Length:395         Length:395         Min.   :15.0   Length:395        
##  Class :character   Class :character   1st Qu.:16.0   Class :character  
##  Mode  :character   Mode  :character   Median :17.0   Mode  :character  
##                                        Mean   :16.7                     
##                                        3rd Qu.:18.0                     
##                                        Max.   :22.0                     
##    famsize            Pstatus               Medu            Fedu      
##  Length:395         Length:395         Min.   :0.000   Min.   :0.000  
##  Class :character   Class :character   1st Qu.:2.000   1st Qu.:2.000  
##  Mode  :character   Mode  :character   Median :3.000   Median :2.000  
##                                        Mean   :2.749   Mean   :2.522  
##                                        3rd Qu.:4.000   3rd Qu.:3.000  
##                                        Max.   :4.000   Max.   :4.000  
##      Mjob               Fjob              reason            guardian        
##  Length:395         Length:395         Length:395         Length:395        
##  Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character  
##                                                                             
##                                                                             
##                                                                             
##    traveltime      studytime        failures       schoolsup        
##  Min.   :1.000   Min.   :1.000   Min.   :0.0000   Length:395        
##  1st Qu.:1.000   1st Qu.:1.000   1st Qu.:0.0000   Class :character  
##  Median :1.000   Median :2.000   Median :0.0000   Mode  :character  
##  Mean   :1.448   Mean   :2.035   Mean   :0.3342                     
##  3rd Qu.:2.000   3rd Qu.:2.000   3rd Qu.:0.0000                     
##  Max.   :4.000   Max.   :4.000   Max.   :3.0000                     
##     famsup              paid            activities          nursery         
##  Length:395         Length:395         Length:395         Length:395        
##  Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character  
##                                                                             
##                                                                             
##                                                                             
##     higher            internet             famrel         freetime    
##  Length:395         Length:395         Min.   :1.000   Min.   :1.000  
##  Class :character   Class :character   1st Qu.:4.000   1st Qu.:3.000  
##  Mode  :character   Mode  :character   Median :4.000   Median :3.000  
##                                        Mean   :3.944   Mean   :3.235  
##                                        3rd Qu.:5.000   3rd Qu.:4.000  
##                                        Max.   :5.000   Max.   :5.000  
##      goout            Dalc            Walc           health     
##  Min.   :1.000   Min.   :1.000   Min.   :1.000   Min.   :1.000  
##  1st Qu.:2.000   1st Qu.:1.000   1st Qu.:1.000   1st Qu.:3.000  
##  Median :3.000   Median :1.000   Median :2.000   Median :4.000  
##  Mean   :3.109   Mean   :1.481   Mean   :2.291   Mean   :3.554  
##  3rd Qu.:4.000   3rd Qu.:2.000   3rd Qu.:3.000   3rd Qu.:5.000  
##  Max.   :5.000   Max.   :5.000   Max.   :5.000   Max.   :5.000  
##     absences            G1              G2              G3       
##  Min.   : 0.000   Min.   : 3.00   Min.   : 0.00   Min.   : 0.00  
##  1st Qu.: 0.000   1st Qu.: 8.00   1st Qu.: 9.00   1st Qu.: 8.00  
##  Median : 4.000   Median :11.00   Median :11.00   Median :11.00  
##  Mean   : 5.709   Mean   :10.91   Mean   :10.71   Mean   :10.42  
##  3rd Qu.: 8.000   3rd Qu.:13.00   3rd Qu.:13.00   3rd Qu.:14.00  
##  Max.   :75.000   Max.   :19.00   Max.   :19.00   Max.   :20.00
head(data)
##   school sex age address famsize Pstatus Medu Fedu     Mjob     Fjob     reason
## 1     GP   F  18       U     GT3       A    4    4  at_home  teacher     course
## 2     GP   F  17       U     GT3       T    1    1  at_home    other     course
## 3     GP   F  15       U     LE3       T    1    1  at_home    other      other
## 4     GP   F  15       U     GT3       T    4    2   health services       home
## 5     GP   F  16       U     GT3       T    3    3    other    other       home
## 6     GP   M  16       U     LE3       T    4    3 services    other reputation
##   guardian traveltime studytime failures schoolsup famsup paid activities
## 1   mother          2         2        0       yes     no   no         no
## 2   father          1         2        0        no    yes   no         no
## 3   mother          1         2        3       yes     no  yes         no
## 4   mother          1         3        0        no    yes  yes        yes
## 5   father          1         2        0        no    yes  yes         no
## 6   mother          1         2        0        no    yes  yes        yes
##   nursery higher internet famrel freetime goout Dalc Walc health absences G1 G2
## 1     yes    yes       no      4        3     4    1    1      3        6  5  6
## 2      no    yes      yes      5        3     3    1    1      3        4  5  5
## 3     yes    yes      yes      4        3     2    2    3      3       10  7  8
## 4     yes    yes      yes      3        2     2    1    1      5        2 15 14
## 5     yes    yes       no      4        3     2    1    2      5        4  6 10
## 6     yes    yes      yes      5        4     2    1    2      5       10 15 15
##   G3
## 1  6
## 2  6
## 3 10
## 4 15
## 5 10
## 6 15

Subsetting the Dataset

male_students <- subset(data, sex == "M")
female_students <- subset(data, sex == "F")

high_walc <- subset(data, Walc >= 4)
low_walc <- subset(data, Walc <= 2)

high_study <- subset(data, studytime >= 3)
low_study <- subset(data, studytime <= 2)

urban_students <- subset(data, address == "U")
rural_students <- subset(data, address == "R")

internet_yes <- subset(data, internet == "yes")
internet_no <- subset(data, internet == "no")

Histogram Plots

Histogram 1

hist(male_students$G3,
     main = "Histogram of Final Grade - Male Students",
     xlab = "Final Grade (G3)",
     col = "lightblue",
     border = "black")

This histogram shows the distribution of final grades among male students.

Histogram 2

hist(female_students$G3,
     main = "Histogram of Final Grade - Female Students",
     xlab = "Final Grade (G3)",
     col = "lightpink",
     border = "black")

This histogram shows the distribution of final grades among female students.

Histogram 3

hist(high_walc$absences,
     main = "Histogram of Absences - High Walc Students",
     xlab = "Absences",
     col = "orange",
     border = "black")

This histogram shows the absence distribution of students with high weekend alcohol consumption.

Scatter Plots

Scatter Plot 1

plot(data$studytime, data$G3,
     main = "Study Time vs Final Grade",
     xlab = "Study Time",
     ylab = "Final Grade (G3)",
     col = "blue",
     pch = 19)

This scatter plot shows the relationship between study time and final grade.

Scatter Plot 2

plot(data$absences, data$G3,
     main = "Absences vs Final Grade",
     xlab = "Absences",
     ylab = "Final Grade (G3)",
     col = "red",
     pch = 19)

This scatter plot shows the relationship between absences and final grade.

Scatter Plot 3

plot(data$Walc, data$G3,
     main = "Weekend Alcohol Consumption vs Final Grade",
     xlab = "Weekend Alcohol Consumption",
     ylab = "Final Grade (G3)",
     col = "darkgreen",
     pch = 19)

This scatter plot shows the relationship between weekend alcohol consumption and final grade.

Boxplots

Boxplot 1

boxplot(G3 ~ sex, data = data,
        main = "Final Grade by Sex",
        xlab = "Sex",
        ylab = "Final Grade (G3)",
        col = c("lightblue", "lightpink"))

This boxplot compares final grades between male and female students.

Boxplot 2

boxplot(G3 ~ address, data = data,
        main = "Final Grade by Address",
        xlab = "Address",
        ylab = "Final Grade (G3)",
        col = c("lightgreen", "lightyellow"))

This boxplot compares final grades based on student residence.

Boxplot 3

boxplot(G3 ~ internet, data = data,
        main = "Final Grade by Internet Access",
        xlab = "Internet Access",
        ylab = "Final Grade (G3)",
        col = c("orange", "cyan"))

This boxplot compares final grades based on internet access.

Mosaic Plots

Mosaic Plot 1

mosaicplot(table(data$sex, data$internet),
           main = "Sex vs Internet Access",
           xlab = "Sex",
           ylab = "Internet Access",
           color = TRUE)

This mosaic plot shows the relationship between sex and internet access.

Mosaic Plot 2

mosaicplot(table(data$address, data$higher),
           main = "Address vs Higher Education",
           xlab = "Address",
           ylab = "Higher Education",
           color = TRUE)

This mosaic plot shows the relationship between address and higher education interest.

Mosaic Plot 3

data$pass_fail <- ifelse(data$G3 >= 10, "Pass", "Fail")

mosaicplot(table(data$schoolsup, data$pass_fail),
           main = "School Support vs Pass/Fail",
           xlab = "School Support",
           ylab = "Pass / Fail",
           color = TRUE)

This mosaic plot shows the relationship between school support and pass/fail result.