This is an R Markdown document for Exercises of week 5 and 6 about Analyse the dataset by showing:
ssr <- read.table(file.choose(),header = TRUE )
ssr
## student gender X2nd age a1 a2 a3 a4 a5 totala hw2 hw3 hw4 hw5 totalhw
## 1 gerrit m MBO 21 1 1 1 1 1 5 1 1 1 1 4
## 2 saar f HAVO 23 1 1 1 1 1 5 1 0 1 1 3
## 3 lars m VWO 19 1 1 1 1 1 5 1 1 1 1 4
## 4 henk m MBO 23 1 1 1 1 1 5 1 1 1 1 4
## 5 klara f MBO 19 1 0 1 0 1 3 0 1 0 1 2
## 6 sem m HAVO 20 1 1 1 1 1 5 1 1 1 1 4
## 7 liv f HAVO 19 1 1 1 1 1 5 1 1 1 1 4
## 8 rinus m VWO 19 1 1 1 0 1 4 1 1 0 1 3
## 9 tess f MBO 19 0 1 1 0 0 2 1 1 0 0 2
## 10 tim m HAVO 18 1 1 1 1 1 5 1 0 1 1 3
## 11 lisa f HAVO 19 1 1 1 1 1 5 1 1 1 1 4
## 12 lotte f HAVO 24 1 1 1 1 1 5 1 1 1 1 4
## 13 tinus m MBO 26 1 0 1 0 1 3 1 0 0 0 1
## 14 karel m MBO 20 1 1 1 0 1 4 1 1 1 1 4
## 15 mila f VWO 19 1 1 1 1 1 5 1 1 1 1 4
## 16 betrand m HAVO 21 1 1 1 1 1 5 1 1 1 1 4
## 17 liam m MBO 19 1 1 1 0 1 4 1 1 0 1 3
## 18 janus m MBO 18 1 1 1 1 1 5 1 1 1 1 4
## 19 riek f HAVO 19 1 0 0 1 1 3 1 0 0 1 2
## 20 zus f HAVO 20 1 0 1 1 1 4 1 1 0 1 3
## 21 jayden m HAVO 19 1 1 1 0 1 4 1 1 0 1 3
## 22 ans f HAVO 22 1 1 1 1 1 5 1 1 1 1 4
## 23 sien f MBO 21 1 1 1 1 0 4 1 1 1 0 3
## 24 thomas m VWO 20 1 1 1 1 1 5 1 1 1 1 4
## 25 loek m HAVO 24 1 1 1 0 1 4 1 1 1 0 3
## 26 daan m VWO 20 1 1 0 0 1 3 1 0 0 0 1
## 27 bert m HAVO 19 1 1 1 1 1 5 1 1 1 1 4
## 28 noah m HAVO 21 1 1 1 0 1 4 1 0 0 1 2
## 29 thijs m VWO 20 1 1 1 1 1 5 1 1 1 1 4
## 30 jesse m HAVO 18 1 0 1 1 1 4 1 0 1 1 3
## 31 julia f VWO 20 1 1 0 1 1 4 1 1 1 1 4
## 32 piet m MBO 20 1 1 1 0 1 4 1 1 1 1 4
## 33 jet f MBO 21 1 1 1 1 1 5 1 1 1 1 4
## 34 bram m MBO 21 1 1 0 1 0 3 1 0 1 0 2
## 35 ans f HAVO 25 1 1 0 0 1 3 1 1 0 0 2
## 36 eva f HAVO 20 1 1 1 1 0 4 1 1 1 0 3
## 37 mees m MBO 24 1 1 1 1 1 5 1 1 1 1 4
## 38 levi m HAVO 21 1 0 1 0 1 3 1 0 1 0 2
## 39 luuk m MBO 20 1 1 1 1 1 5 1 1 1 1 4
## 40 zoë f MBO 19 1 1 1 1 1 5 1 1 1 1 4
## 41 sjaak m HAVO 20 1 0 1 1 1 4 1 1 1 1 4
## 42 theo m MBO 21 1 1 1 1 1 5 1 1 0 1 3
## 43 lucas m HAVO 21 1 1 0 1 1 4 1 1 0 1 3
## 44 nellie f VWO 22 1 0 1 0 1 3 1 1 0 1 3
## exam USG
## 1 7 S
## 2 6 S
## 3 8 G
## 4 9 G
## 5 6 S
## 6 7 S
## 7 8 G
## 8 9 G
## 9 4 U
## 10 9 G
## 11 8 G
## 12 8 G
## 13 3 U
## 14 4 U
## 15 8 G
## 16 5 U
## 17 6 S
## 18 7 S
## 19 5 U
## 20 6 S
## 21 4 U
## 22 8 G
## 23 6 S
## 24 9 G
## 25 5 U
## 26 2 U
## 27 7 S
## 28 5 U
## 29 9 G
## 30 5 U
## 31 8 G
## 32 5 U
## 33 4 U
## 34 4 U
## 35 3 U
## 36 7 S
## 37 6 S
## 38 4 U
## 39 8 G
## 40 9 G
## 41 6 S
## 42 4 U
## 43 6 S
## 44 5 U
table( ssr$gender)
##
## f m
## 17 27
plot(table( ssr$gender))
table( ssr$X2nd)
##
## HAVO MBO VWO
## 20 16 8
#c. age class (17-<20; 20-<23; 23-<)
a <- ssr$age[17-20]
b <- ssr$age[20-23]
max(ssr$age)
## [1] 26
c <- ssr$age[23-26]
hist(a,b,c)# col =c("red1", "yellow" , "plum3") )
## Warning in plot.histogram(r, freq = freq1, col = col, border = border,
## angle = angle, : the AREAS in the plot are wrong -- rather use 'freq =
## FALSE'
## Warning in if (freq) x$counts else x$density: the condition has length > 1
## and only the first element will be used
## Warning in if (!freq) "Density" else "Frequency": the condition has length
## > 1 and only the first element will be used
table(ssr$age)
##
## 18 19 20 21 22 23 24 25 26
## 3 12 11 9 2 2 3 1 1
plot(table(ssr$age))
hist(a,b,c)
## Warning in plot.histogram(r, freq = freq1, col = col, border = border,
## angle = angle, : the AREAS in the plot are wrong -- rather use 'freq =
## FALSE'
## Warning in if (freq) x$counts else x$density: the condition has length > 1
## and only the first element will be used
## Warning in if (!freq) "Density" else "Frequency": the condition has length
## > 1 and only the first element will be used
d. attendance (0-<2; 2-<4; 4; 5)
ssr$totala
## [1] 5 5 5 5 3 5 5 4 2 5 5 5 3 4 5 5 4 5 3 4 4 5 4 5 4 3 5 4 5 4 4 4 5 3 3
## [36] 4 5 3 5 5 4 5 4 3
at1 <- seq(0,2, by=1)
at1.cut <- cut(ssr$totala, at1, right = FALSE)
at1.freq <- table (at1.cut)
at1.freq
## at1.cut
## [0,1) [1,2)
## 0 0
at2 <- seq(2,4, by=1)
at2.cut <- cut(ssr$totala, at2, right = FALSE)
at2.freq <- table (at2.cut)
at2.freq
## at2.cut
## [2,3) [3,4)
## 1 8
at3 <- seq(4,5, by=1)
at3.cut <- cut(ssr$totala, at3, right = FALSE)
table (at3.cut)
## at3.cut
## [4,5)
## 14
at4 <- seq(5,6, by=1)
at4.cut <- cut(ssr$totala, at4, right = FALSE)
table (at4.cut)
## at4.cut
## [5,6)
## 21
hw<-seq(0,5)
hw.cut <- cut(ssr$totalhw,hw, right = FALSE)
table(hw.cut)
## hw.cut
## [0,1) [1,2) [2,3) [3,4) [4,5)
## 0 2 7 13 22
mean(ssr$exam)
## [1] 6.181818
mode(ssr$exam)
## [1] "numeric"
median(ssr$exam)
## [1] 6
b.Number of attendance
mode(ssr$a1)
## [1] "numeric"
mean(ssr$a1)
## [1] 0.9772727
median(ssr$a1)
## [1] 1
mode(ssr$a2)
## [1] "numeric"
mean(ssr$a2)
## [1] 0.8181818
median(ssr$a2)
## [1] 1
mode(ssr$a3)
## [1] "numeric"
mean(ssr$a3)
## [1] 0.8636364
median(ssr$a3)
## [1] 1
mode(ssr$a4)
## [1] "numeric"
mean(ssr$a4)
## [1] 0.6818182
median(ssr$a4)
## [1] 1
mode(ssr$a5)
## [1] "numeric"
mean(ssr$a5)
## [1] 0.9090909
median(ssr$a5)
## [1] 1
c.Number of homework submitted
mode(ssr$hw2)
## [1] "numeric"
mean(ssr$hw2)
## [1] 0.9772727
median(ssr$hw2)
## [1] 1
mode(ssr$hw3)
## [1] "numeric"
mean(ssr$hw3)
## [1] 0.7954545
median(ssr$hw3)
## [1] 1
mode(ssr$hw4)
## [1] "numeric"
mean(ssr$hw4)
## [1] 0.6818182
median(ssr$hw4)
## [1] 1
mode(ssr$hw5)
## [1] "numeric"
mean(ssr$hw5)
## [1] 0.7954545
median(ssr$hw5)
## [1] 1
sum(ssr$exam)
## [1] 272
number of attendance
sum(ssr$a1,ssr$a2,ssr$a3,ssr$a4,ssr$a5)
## [1] 187
number of homework submitted
sum(ssr$hw2,ssr$hw3,ssr$hw4,ssr$hw5)
## [1] 143
4.relation between: gender and USG
plot(ssr$gender, ssr$USG, xlab = "Gender", ylab = "USG")
2nd and USG
plot(ssr$X2nd,ssr$USG,xlab="Secondary school", ylab="USG")
attendance and result
plot(ssr$totala, ssr$exam, xlab = "Attendance", ylab = "Exam result")
attendance and USG
plot(ssr$totala, ssr$USG, xlab = "Attendance", ylab = "USG")
homework submitted and USG
plot(ssr$totalhw,ssr$USG, xlab = "Homework submited",ylab = "USG")
Week 6 a. the correlation between any of the variables NrSubmittedHomeworkAssignments (or any shorter name), NrAttendances and the target variable Exam (expressing the exam result)
par(mfrow = c(1, 2))
plot(ssr$totala,ssr$exam, col = "red1", xlab = "Number of attendance", ylab = "Exam result")
plot(ssr$totalhw,ssr$exam, col = "red1", xlab = "Number of Submitted homeworks", ylab = "Exam result")
par(mfrow = c(1, 2))
plot(log(ssr$totala),log(ssr$exam), pch = 16, col = "red1", xlab = "Number of attendance", ylab = "Exam result")
plot(log(ssr$totalhw),log(ssr$exam), pch = 16, col = "red1", xlab = "Number of Submitted homeworks", ylab = "Exam result")
In addition, show the correlations in a corrgram and in a corrplot. You need to install the proper R packages to be able to use these
if (!require("corrgram")) {
install.packages("corrgram", repos="http://cran.rstudio.com/")
library("corrgram")
}
## Loading required package: corrgram
## Warning: package 'corrgram' was built under R version 3.2.4
head(ssr)
## student gender X2nd age a1 a2 a3 a4 a5 totala hw2 hw3 hw4 hw5 totalhw
## 1 gerrit m MBO 21 1 1 1 1 1 5 1 1 1 1 4
## 2 saar f HAVO 23 1 1 1 1 1 5 1 0 1 1 3
## 3 lars m VWO 19 1 1 1 1 1 5 1 1 1 1 4
## 4 henk m MBO 23 1 1 1 1 1 5 1 1 1 1 4
## 5 klara f MBO 19 1 0 1 0 1 3 0 1 0 1 2
## 6 sem m HAVO 20 1 1 1 1 1 5 1 1 1 1 4
## exam USG
## 1 7 S
## 2 6 S
## 3 8 G
## 4 9 G
## 5 6 S
## 6 7 S
rm(corr)
## Warning in rm(corr): object 'corr' not found
exam.attencanse.homework<- cbind(ssr$totala, ssr$exam, ssr$totalh)
colnames(exam.attencanse.homework)<- c("Total.Attendance","Exam.Result","Total.Homework")
exam.attencanse.homework
## Total.Attendance Exam.Result Total.Homework
## [1,] 5 7 4
## [2,] 5 6 3
## [3,] 5 8 4
## [4,] 5 9 4
## [5,] 3 6 2
## [6,] 5 7 4
## [7,] 5 8 4
## [8,] 4 9 3
## [9,] 2 4 2
## [10,] 5 9 3
## [11,] 5 8 4
## [12,] 5 8 4
## [13,] 3 3 1
## [14,] 4 4 4
## [15,] 5 8 4
## [16,] 5 5 4
## [17,] 4 6 3
## [18,] 5 7 4
## [19,] 3 5 2
## [20,] 4 6 3
## [21,] 4 4 3
## [22,] 5 8 4
## [23,] 4 6 3
## [24,] 5 9 4
## [25,] 4 5 3
## [26,] 3 2 1
## [27,] 5 7 4
## [28,] 4 5 2
## [29,] 5 9 4
## [30,] 4 5 3
## [31,] 4 8 4
## [32,] 4 5 4
## [33,] 5 4 4
## [34,] 3 4 2
## [35,] 3 3 2
## [36,] 4 7 3
## [37,] 5 6 4
## [38,] 3 4 2
## [39,] 5 8 4
## [40,] 5 9 4
## [41,] 4 6 4
## [42,] 5 4 3
## [43,] 4 6 3
## [44,] 3 5 3
correlation <- cor(exam.attencanse.homework)
corrgram(correlation, order=TRUE, lower.panel=panel.shade, upper.panel=panel.pie, text.panel=panel.txt, main="")
if (!require("corrplot")) {
install.packages("corrplot", repos="http://cran.rstudio.com/")
library("corrplot")
}
## Loading required package: corrplot
## Warning: package 'corrplot' was built under R version 3.2.4
corrplot(correlation)