Exercise Week 5 + 6

Loading the Data

dfStudent <- read.table(file="clipboard", header=T)
dfStudent
##    student gender X2nd age a1 a2 a3 a4 a5 totala hw2 hw3 hw4 hw5 totalhw
## 1   gerrit      m  MBO  21  1  1  1  1  1      5   1   1   1   1       4
## 2     saar      f HAVO  23  1  1  1  1  1      5   1   0   1   1       3
## 3     lars      m  VWO  19  1  1  1  1  1      5   1   1   1   1       4
## 4     henk      m  MBO  23  1  1  1  1  1      5   1   1   1   1       4
## 5    klara      f  MBO  19  1  0  1  0  1      3   0   1   0   1       2
## 6      sem      m HAVO  20  1  1  1  1  1      5   1   1   1   1       4
## 7      liv      f HAVO  19  1  1  1  1  1      5   1   1   1   1       4
## 8    rinus      m  VWO  19  1  1  1  0  1      4   1   1   0   1       3
## 9     tess      f  MBO  19  0  1  1  0  0      2   1   1   0   0       2
## 10     tim      m HAVO  18  1  1  1  1  1      5   1   0   1   1       3
## 11    lisa      f HAVO  19  1  1  1  1  1      5   1   1   1   1       4
## 12   lotte      f HAVO  24  1  1  1  1  1      5   1   1   1   1       4
## 13   tinus      m  MBO  26  1  0  1  0  1      3   1   0   0   0       1
## 14   karel      m  MBO  20  1  1  1  0  1      4   1   1   1   1       4
## 15    mila      f  VWO  19  1  1  1  1  1      5   1   1   1   1       4
## 16 betrand      m HAVO  21  1  1  1  1  1      5   1   1   1   1       4
## 17    liam      m  MBO  19  1  1  1  0  1      4   1   1   0   1       3
## 18   janus      m  MBO  18  1  1  1  1  1      5   1   1   1   1       4
## 19    riek      f HAVO  19  1  0  0  1  1      3   1   0   0   1       2
## 20     zus      f HAVO  20  1  0  1  1  1      4   1   1   0   1       3
## 21  jayden      m HAVO  19  1  1  1  0  1      4   1   1   0   1       3
## 22     ans      f HAVO  22  1  1  1  1  1      5   1   1   1   1       4
## 23    sien      f  MBO  21  1  1  1  1  0      4   1   1   1   0       3
## 24  thomas      m  VWO  20  1  1  1  1  1      5   1   1   1   1       4
## 25    loek      m HAVO  24  1  1  1  0  1      4   1   1   1   0       3
## 26    daan      m  VWO  20  1  1  0  0  1      3   1   0   0   0       1
## 27    bert      m HAVO  19  1  1  1  1  1      5   1   1   1   1       4
## 28    noah      m HAVO  21  1  1  1  0  1      4   1   0   0   1       2
## 29   thijs      m  VWO  20  1  1  1  1  1      5   1   1   1   1       4
## 30   jesse      m HAVO  18  1  0  1  1  1      4   1   0   1   1       3
## 31   julia      f  VWO  20  1  1  0  1  1      4   1   1   1   1       4
## 32    piet      m  MBO  20  1  1  1  0  1      4   1   1   1   1       4
## 33     jet      f  MBO  21  1  1  1  1  1      5   1   1   1   1       4
## 34    bram      m  MBO  21  1  1  0  1  0      3   1   0   1   0       2
## 35     ans      f HAVO  25  1  1  0  0  1      3   1   1   0   0       2
## 36     eva      f HAVO  20  1  1  1  1  0      4   1   1   1   0       3
## 37    mees      m  MBO  24  1  1  1  1  1      5   1   1   1   1       4
## 38    levi      m HAVO  21  1  0  1  0  1      3   1   0   1   0       2
## 39    luuk      m  MBO  20  1  1  1  1  1      5   1   1   1   1       4
## 40     zoë      f  MBO  19  1  1  1  1  1      5   1   1   1   1       4
## 41   sjaak      m HAVO  20  1  0  1  1  1      4   1   1   1   1       4
## 42    theo      m  MBO  21  1  1  1  1  1      5   1   1   0   1       3
## 43   lucas      m HAVO  21  1  1  0  1  1      4   1   1   0   1       3
## 44  nellie      f  VWO  22  1  0  1  0  1      3   1   1   0   1       3
##    exam USG
## 1     7   S
## 2     6   S
## 3     8   G
## 4     9   G
## 5     6   S
## 6     7   S
## 7     8   G
## 8     9   G
## 9     4   U
## 10    9   G
## 11    8   G
## 12    8   G
## 13    3   U
## 14    4   U
## 15    8   G
## 16    5   U
## 17    6   S
## 18    7   S
## 19    5   U
## 20    6   S
## 21    4   U
## 22    8   G
## 23    6   S
## 24    9   G
## 25    5   U
## 26    2   U
## 27    7   S
## 28    5   U
## 29    9   G
## 30    5   U
## 31    8   G
## 32    5   U
## 33    4   U
## 34    4   U
## 35    3   U
## 36    7   S
## 37    6   S
## 38    4   U
## 39    8   G
## 40    9   G
## 41    6   S
## 42    4   U
## 43    6   S
## 44    5   U

1. Frequency distribution for

Gender

table(dfStudent$gender)
## 
##  f  m 
## 17 27

Secondary School

table(dfStudent$X2nd)
## 
## HAVO  MBO  VWO 
##   20   16    8

Age Class (17-<20; 20-<23; 23-<)

# 17-20
age <- dfStudent$age
range(age)
## [1] 18 26
break1<- seq(17,20,by=1)
break1
## [1] 17 18 19 20
age1.cut = cut(age, break1, right=FALSE) 
age1.freq <- table(age1.cut)
age1.freq
## age1.cut
## [17,18) [18,19) [19,20) 
##       0       3      12
#20-23
break2 <- seq(20,23,by=1)
break2
## [1] 20 21 22 23
age2.cut <- cut(age,break2,right = FALSE)
age2.freq <-table (age2.cut)
age2.freq
## age2.cut
## [20,21) [21,22) [22,23) 
##      11       9       2
#23<
break3 <- seq(23,26, by=1)
break3
## [1] 23 24 25 26
age3.cut = cut(age,break3, right=FALSE)
age3.freq = table(age3.cut)
age3.freq
## age3.cut
## [23,24) [24,25) [25,26) 
##       2       3       1

Attendance (0-<2; 2-<4; 4; 5)

#0-<2
dfStudent$totala
##  [1] 5 5 5 5 3 5 5 4 2 5 5 5 3 4 5 5 4 5 3 4 4 5 4 5 4 3 5 4 5 4 4 4 5 3 3
## [36] 4 5 3 5 5 4 5 4 3
attendance1 <- seq(0,2, by=1)
attendance1.cut <- cut(dfStudent$totala, attendance1, right = FALSE)
attendance1.freq <- table (attendance1.cut)
attendance1.freq
## attendance1.cut
## [0,1) [1,2) 
##     0     0
#2-<4
attendance2 <- seq(2,4, by=1)
attendance2.cut <- cut(dfStudent$totala, attendance2, right = FALSE)
attendance2.freq <- table (attendance2.cut)
attendance2.freq
## attendance2.cut
## [2,3) [3,4) 
##     1     8
#4
attendance3 <- seq(4,5, by=1)
attendance3.cut <- cut(dfStudent$totala, attendance3, right = FALSE)
attendance3.freq <- table (attendance3.cut)
attendance3.freq
## attendance3.cut
## [4,5) 
##    14
#5
attendance4 <- seq(5,6, by=1)
attendance4.cut <- cut(dfStudent$totala, attendance4, right = FALSE)
attendance4.freq <- table (attendance4.cut)
attendance4.freq
## attendance4.cut
## [5,6) 
##    21

Homework Submitted (0-<2; 2-<4; 4; 5)

#0-<2
hw <- dfStudent$totalhw
hw1 <- seq(0,2, by=1)
hw1.cut <- cut(dfStudent$totalhw,hw1,righ=FALSE)
hw1.freq <- table (hw1.cut)
hw1.freq
## hw1.cut
## [0,1) [1,2) 
##     0     2
#2-<4
hw2 <- seq(2,4, by=1)
hw2.cut <- cut(dfStudent$totalhw,hw2,righ=FALSE)
hw2.freq <- table (hw2.cut)
hw2.freq
## hw2.cut
## [2,3) [3,4) 
##     7    13
#4
hw3 <- seq(4,5, by=1)
hw3.cut <- cut(dfStudent$totalhw,hw3,righ=FALSE)
hw3.freq <- table (hw3.cut)
hw3.freq
## hw3.cut
## [4,5) 
##    22
#5
hw4 <- seq(5,6, by=1)
hw4.cut <- cut(dfStudent$totalhw,hw4,righ=FALSE)
hw4.freq <- table (hw4.cut)
hw4.freq
## hw4.cut
## [5,6) 
##     0

2. central tendencies for

Results

summary(dfStudent$exam)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   2.000   5.000   6.000   6.182   8.000   9.000
mean(dfStudent$exam)
## [1] 6.181818
median(dfStudent$exam)
## [1] 6
mode(dfStudent$exam)
## [1] "numeric"

Number of attendance

summary(dfStudent$totala)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    2.00    4.00    4.00    4.25    5.00    5.00
mean(dfStudent$totala)
## [1] 4.25
median(dfStudent$totala)
## [1] 4
mode(dfStudent$totala)
## [1] "numeric"

Number of homework submitted

summary(dfStudent$totalhw)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    1.00    3.00    3.50    3.25    4.00    4.00
mean(dfStudent$totalhw)
## [1] 3.25
median(dfStudent$totalhw)
## [1] 3.5
mode(dfStudent$totalhw)
## [1] "numeric"

3.data dispersity for

Results

var(dfStudent$exam)
## [1] 3.687104
IQR(dfStudent$exam)
## [1] 3
sd(dfStudent$exam)
## [1] 1.920183

Number of attendance

var(dfStudent$totala)
## [1] 0.7034884
IQR(dfStudent$totala)
## [1] 1
sd(dfStudent$totala)
## [1] 0.8387421

Number of homework submitted

var(dfStudent$totalhw)
## [1] 0.7965116
IQR(dfStudent$totalhw)
## [1] 1
sd(dfStudent$totalhw)
## [1] 0.892475

4.Relation Between:

Gender and USG

plot(dfStudent$gender,dfStudent$USG, xlab= "Gender", ylab="USG")

2nd and USG

plot(dfStudent$X2nd, dfStudent$USG, pch=16, xlab="2nd", ylab="USG")

Attendance and result

plot(dfStudent$totala, dfStudent$exam, pch=16, xlab="Attendance", ylab="Results")

Attendance and USG

plot(dfStudent$totala,dfStudent$USG, pch=16, xlab="Attendance", ylab="USG")

Homework submitted and USG

plot(dfStudent$totalhw,dfStudent$USG, pch=16, xlab="Homework Submitted",ylab="USG")

5.Correlation Analysis

a.the correlation between any of the variables NrSubmittedHomeworkAssignments (or any shorter name), NrAttendances and the target variable Exam (expressing the exam result)

correl <-cor(dfStudent[,c("totalhw","exam","totala")])
correl
##           totalhw      exam    totala
## totalhw 1.0000000 0.6649499 0.8155232
## exam    0.6649499 1.0000000 0.6642296
## totala  0.8155232 0.6642296 1.0000000

b. show the correlations in a corrgram and in a corrplot. You need to install the proper R packages to be able to use these.

library(corrgram)
## Warning: package 'corrgram' was built under R version 3.2.4
corrgram(correl,order = TRUE, lower.panel = panel.shade, upper.panel = panel.pie,text.panel = panel.txt, main="Student Doing Home Work, Student Attendance and Exam Results Correlation ")

library(corrplot)
## Warning: package 'corrplot' was built under R version 3.2.4
corrplot(correl,main="Student Doing Home Work, Student Attendance and Exam Results Correlation " )