College alcoholism affects millions of students every year. The college years are some of the most popular times to experiment with alcohol. Roughly 80% of college students – four out of every five – consume alcohol to some degree. It’s estimated that 50% of those students engage in binge drinking, which involves consuming too much alcohol in too little time. this document will give you a brief clarity of the student alcohol consumption,making it easier to analyse through various plots.

#reading the data
data=read.csv("C:/Users/chvss/OneDrive/Documents/student-mat.csv")
summary(data)
##     school              sex                 age         address         
##  Length:395         Length:395         Min.   :15.0   Length:395        
##  Class :character   Class :character   1st Qu.:16.0   Class :character  
##  Mode  :character   Mode  :character   Median :17.0   Mode  :character  
##                                        Mean   :16.7                     
##                                        3rd Qu.:18.0                     
##                                        Max.   :22.0                     
##    famsize            Pstatus               Medu            Fedu      
##  Length:395         Length:395         Min.   :0.000   Min.   :0.000  
##  Class :character   Class :character   1st Qu.:2.000   1st Qu.:2.000  
##  Mode  :character   Mode  :character   Median :3.000   Median :2.000  
##                                        Mean   :2.749   Mean   :2.522  
##                                        3rd Qu.:4.000   3rd Qu.:3.000  
##                                        Max.   :4.000   Max.   :4.000  
##      Mjob               Fjob              reason            guardian        
##  Length:395         Length:395         Length:395         Length:395        
##  Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character  
##                                                                             
##                                                                             
##                                                                             
##    traveltime      studytime        failures       schoolsup        
##  Min.   :1.000   Min.   :1.000   Min.   :0.0000   Length:395        
##  1st Qu.:1.000   1st Qu.:1.000   1st Qu.:0.0000   Class :character  
##  Median :1.000   Median :2.000   Median :0.0000   Mode  :character  
##  Mean   :1.448   Mean   :2.035   Mean   :0.3342                     
##  3rd Qu.:2.000   3rd Qu.:2.000   3rd Qu.:0.0000                     
##  Max.   :4.000   Max.   :4.000   Max.   :3.0000                     
##     famsup              paid            activities          nursery         
##  Length:395         Length:395         Length:395         Length:395        
##  Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character  
##                                                                             
##                                                                             
##                                                                             
##     higher            internet           romantic             famrel     
##  Length:395         Length:395         Length:395         Min.   :1.000  
##  Class :character   Class :character   Class :character   1st Qu.:4.000  
##  Mode  :character   Mode  :character   Mode  :character   Median :4.000  
##                                                           Mean   :3.944  
##                                                           3rd Qu.:5.000  
##                                                           Max.   :5.000  
##     freetime         goout            Dalc            Walc      
##  Min.   :1.000   Min.   :1.000   Min.   :1.000   Min.   :1.000  
##  1st Qu.:3.000   1st Qu.:2.000   1st Qu.:1.000   1st Qu.:1.000  
##  Median :3.000   Median :3.000   Median :1.000   Median :2.000  
##  Mean   :3.235   Mean   :3.109   Mean   :1.481   Mean   :2.291  
##  3rd Qu.:4.000   3rd Qu.:4.000   3rd Qu.:2.000   3rd Qu.:3.000  
##  Max.   :5.000   Max.   :5.000   Max.   :5.000   Max.   :5.000  
##      health         absences            G1              G2       
##  Min.   :1.000   Min.   : 0.000   Min.   : 3.00   Min.   : 0.00  
##  1st Qu.:3.000   1st Qu.: 0.000   1st Qu.: 8.00   1st Qu.: 9.00  
##  Median :4.000   Median : 4.000   Median :11.00   Median :11.00  
##  Mean   :3.554   Mean   : 5.709   Mean   :10.91   Mean   :10.71  
##  3rd Qu.:5.000   3rd Qu.: 8.000   3rd Qu.:13.00   3rd Qu.:13.00  
##  Max.   :5.000   Max.   :75.000   Max.   :19.00   Max.   :19.00  
##        G3       
##  Min.   : 0.00  
##  1st Qu.: 8.00  
##  Median :11.00  
##  Mean   :10.42  
##  3rd Qu.:14.00  
##  Max.   :20.00
# Explore the student alcohol consumption data frame with str()
str(data)
## 'data.frame':    395 obs. of  33 variables:
##  $ school    : chr  "GP" "GP" "GP" "GP" ...
##  $ sex       : chr  "F" "F" "F" "F" ...
##  $ age       : int  18 17 15 15 16 16 16 17 15 15 ...
##  $ address   : chr  "U" "U" "U" "U" ...
##  $ famsize   : chr  "GT3" "GT3" "LE3" "GT3" ...
##  $ Pstatus   : chr  "A" "T" "T" "T" ...
##  $ Medu      : int  4 1 1 4 3 4 2 4 3 3 ...
##  $ Fedu      : int  4 1 1 2 3 3 2 4 2 4 ...
##  $ Mjob      : chr  "at_home" "at_home" "at_home" "health" ...
##  $ Fjob      : chr  "teacher" "other" "other" "services" ...
##  $ reason    : chr  "course" "course" "other" "home" ...
##  $ guardian  : chr  "mother" "father" "mother" "mother" ...
##  $ traveltime: int  2 1 1 1 1 1 1 2 1 1 ...
##  $ studytime : int  2 2 2 3 2 2 2 2 2 2 ...
##  $ failures  : int  0 0 3 0 0 0 0 0 0 0 ...
##  $ schoolsup : chr  "yes" "no" "yes" "no" ...
##  $ famsup    : chr  "no" "yes" "no" "yes" ...
##  $ paid      : chr  "no" "no" "yes" "yes" ...
##  $ activities: chr  "no" "no" "no" "yes" ...
##  $ nursery   : chr  "yes" "no" "yes" "yes" ...
##  $ higher    : chr  "yes" "yes" "yes" "yes" ...
##  $ internet  : chr  "no" "yes" "yes" "yes" ...
##  $ romantic  : chr  "no" "no" "no" "yes" ...
##  $ famrel    : int  4 5 4 3 4 5 4 4 4 5 ...
##  $ freetime  : int  3 3 3 2 3 4 4 1 2 5 ...
##  $ goout     : int  4 3 2 2 2 2 4 4 2 1 ...
##  $ Dalc      : int  1 1 2 1 1 1 1 1 1 1 ...
##  $ Walc      : int  1 1 3 1 2 2 1 1 1 1 ...
##  $ health    : int  3 3 3 5 5 5 3 1 1 5 ...
##  $ absences  : int  6 4 10 2 4 10 0 6 0 0 ...
##  $ G1        : int  5 5 7 15 6 15 12 6 16 14 ...
##  $ G2        : int  6 5 8 14 10 15 12 5 18 15 ...
##  $ G3        : int  6 6 10 15 10 15 11 6 19 15 ...

Data Layer: In the data Layer, the source of the information is to be visualized i.e the mtcars dataset in the ggplot2 package.

library(ggplot2)
#Data Layer
ggplot(data = data) + labs(title ="student alcohol consumption dataset")

aesthetic layer:display and map dataset into certain aesthetics

# Aesthetic Layer
ggplot(data = data, aes(x = sex, y = age, col = reason))+labs(title = "student alcohol consumption")

Geometric layer:In geometric layer control the essential elements, how our data being displayed using point, line, histogram, bar, boxplot

# Geometric layer
ggplot(data = data, aes(x = age, y = Medu, col = reason)) +
  geom_point() +
  labs(title = "age vs Medu", x = "age", y = "Medu")

then plotting the Histogram plot # Adding size

ggplot(data = data, aes(x = age, y = absences, size = Medu)) +
geom_point() +
labs(title = "age vs absences", x = "age", y = "absences")

# Adding shape and color
ggplot(data = data, aes(x =age, y = famsup, col = factor(age), shape = factor(famsup))) +geom_point() +
labs(title = "age vs family support", x = "age", y = "family support")

scatter plot

data$age<-factor(data$age)
ggplot(data, aes(x = factor(age), y = reason)) +
  geom_point()

# Histogram plot
ggplot(data = data, aes(x = health)) +
geom_histogram(binwidth = 5,color="purple", fill="orange") +
labs(title = "Histogram of health and the age of the students", x = "health", y = "age")

ggplot(data = data, aes(x=as.factor(health), fill=health)) + 
       geom_bar(stat="count")

pie chart the below pie chart tells us the reason of the alcohol consumption and analysing the percentage of each reason.

reason = table(data$reason)
data.labels = names(reason)
share = round(reason/sum(reason)*100)
data.labels = paste(data.labels, share)
data.labels = paste(data.labels,"%",sep="") 
pie(reason,labels = data.labels,clockwise=TRUE, col=heat.colors(length(data.labels)), main="Frequency of reason")

creating a pie chart analysing the percentage of the consumers and the job they do(males)

Mjob = table(data$Mjob)
data.labels = names(Mjob)
share = round(Mjob/sum(Mjob)*100)
data.labels = paste(data.labels, share)
data.labels = paste(data.labels,"%",sep="") 
pie(Mjob,labels = data.labels,clockwise=TRUE, col=heat.colors(length(data.labels)), main="Frequency of males job who consume alcohol")

creating a similar pie analysing the same with the females

Fjob = table(data$Fjob)
data.labels = names(Fjob)
share = round(Fjob   /sum(Fjob)*100)
data.labels = paste(data.labels, share)
data.labels = paste(data.labels,"%",sep="") 
pie(Fjob,labels = data.labels,clockwise=TRUE, col=heat.colors(length(data.labels)), main="Frequency of jobs (female)")