R Markdown

This is an R Markdown document for Exercises of week 5 and 6 about Analyse the dataset by showing:

ssr <- read.table(file.choose(),header = TRUE )
ssr
##    student gender X2nd age a1 a2 a3 a4 a5 totala hw2 hw3 hw4 hw5 totalhw
## 1   gerrit      m  MBO  21  1  1  1  1  1      5   1   1   1   1       4
## 2     saar      f HAVO  23  1  1  1  1  1      5   1   0   1   1       3
## 3     lars      m  VWO  19  1  1  1  1  1      5   1   1   1   1       4
## 4     henk      m  MBO  23  1  1  1  1  1      5   1   1   1   1       4
## 5    klara      f  MBO  19  1  0  1  0  1      3   0   1   0   1       2
## 6      sem      m HAVO  20  1  1  1  1  1      5   1   1   1   1       4
## 7      liv      f HAVO  19  1  1  1  1  1      5   1   1   1   1       4
## 8    rinus      m  VWO  19  1  1  1  0  1      4   1   1   0   1       3
## 9     tess      f  MBO  19  0  1  1  0  0      2   1   1   0   0       2
## 10     tim      m HAVO  18  1  1  1  1  1      5   1   0   1   1       3
## 11    lisa      f HAVO  19  1  1  1  1  1      5   1   1   1   1       4
## 12   lotte      f HAVO  24  1  1  1  1  1      5   1   1   1   1       4
## 13   tinus      m  MBO  26  1  0  1  0  1      3   1   0   0   0       1
## 14   karel      m  MBO  20  1  1  1  0  1      4   1   1   1   1       4
## 15    mila      f  VWO  19  1  1  1  1  1      5   1   1   1   1       4
## 16 betrand      m HAVO  21  1  1  1  1  1      5   1   1   1   1       4
## 17    liam      m  MBO  19  1  1  1  0  1      4   1   1   0   1       3
## 18   janus      m  MBO  18  1  1  1  1  1      5   1   1   1   1       4
## 19    riek      f HAVO  19  1  0  0  1  1      3   1   0   0   1       2
## 20     zus      f HAVO  20  1  0  1  1  1      4   1   1   0   1       3
## 21  jayden      m HAVO  19  1  1  1  0  1      4   1   1   0   1       3
## 22     ans      f HAVO  22  1  1  1  1  1      5   1   1   1   1       4
## 23    sien      f  MBO  21  1  1  1  1  0      4   1   1   1   0       3
## 24  thomas      m  VWO  20  1  1  1  1  1      5   1   1   1   1       4
## 25    loek      m HAVO  24  1  1  1  0  1      4   1   1   1   0       3
## 26    daan      m  VWO  20  1  1  0  0  1      3   1   0   0   0       1
## 27    bert      m HAVO  19  1  1  1  1  1      5   1   1   1   1       4
## 28    noah      m HAVO  21  1  1  1  0  1      4   1   0   0   1       2
## 29   thijs      m  VWO  20  1  1  1  1  1      5   1   1   1   1       4
## 30   jesse      m HAVO  18  1  0  1  1  1      4   1   0   1   1       3
## 31   julia      f  VWO  20  1  1  0  1  1      4   1   1   1   1       4
## 32    piet      m  MBO  20  1  1  1  0  1      4   1   1   1   1       4
## 33     jet      f  MBO  21  1  1  1  1  1      5   1   1   1   1       4
## 34    bram      m  MBO  21  1  1  0  1  0      3   1   0   1   0       2
## 35     ans      f HAVO  25  1  1  0  0  1      3   1   1   0   0       2
## 36     eva      f HAVO  20  1  1  1  1  0      4   1   1   1   0       3
## 37    mees      m  MBO  24  1  1  1  1  1      5   1   1   1   1       4
## 38    levi      m HAVO  21  1  0  1  0  1      3   1   0   1   0       2
## 39    luuk      m  MBO  20  1  1  1  1  1      5   1   1   1   1       4
## 40    zoë      f  MBO  19  1  1  1  1  1      5   1   1   1   1       4
## 41   sjaak      m HAVO  20  1  0  1  1  1      4   1   1   1   1       4
## 42    theo      m  MBO  21  1  1  1  1  1      5   1   1   0   1       3
## 43   lucas      m HAVO  21  1  1  0  1  1      4   1   1   0   1       3
## 44  nellie      f  VWO  22  1  0  1  0  1      3   1   1   0   1       3
##    exam USG
## 1     7   S
## 2     6   S
## 3     8   G
## 4     9   G
## 5     6   S
## 6     7   S
## 7     8   G
## 8     9   G
## 9     4   U
## 10    9   G
## 11    8   G
## 12    8   G
## 13    3   U
## 14    4   U
## 15    8   G
## 16    5   U
## 17    6   S
## 18    7   S
## 19    5   U
## 20    6   S
## 21    4   U
## 22    8   G
## 23    6   S
## 24    9   G
## 25    5   U
## 26    2   U
## 27    7   S
## 28    5   U
## 29    9   G
## 30    5   U
## 31    8   G
## 32    5   U
## 33    4   U
## 34    4   U
## 35    3   U
## 36    7   S
## 37    6   S
## 38    4   U
## 39    8   G
## 40    9   G
## 41    6   S
## 42    4   U
## 43    6   S
## 44    5   U
  1. frequency distributions for
  1. Gender
table( ssr$gender)
## 
##  f  m 
## 17 27
plot(table( ssr$gender))

  1. Secondary school
table( ssr$X2nd)
## 
## HAVO  MBO  VWO 
##   20   16    8
  1. age class (17-<20; 20-<23; 23-<)
#c. age class (17-<20; 20-<23; 23-<)
a <- ssr$age[17-20]
b <- ssr$age[20-23]
max(ssr$age)
## [1] 26
c <- ssr$age[23-26]
hist(a,b,c)# col =c("red1", "yellow" , "plum3") )
## Warning in plot.histogram(r, freq = freq1, col = col, border = border,
## angle = angle, : the AREAS in the plot are wrong -- rather use 'freq =
## FALSE'
## Warning in if (freq) x$counts else x$density: the condition has length > 1
## and only the first element will be used
## Warning in if (!freq) "Density" else "Frequency": the condition has length
## > 1 and only the first element will be used

table(ssr$age)
## 
## 18 19 20 21 22 23 24 25 26 
##  3 12 11  9  2  2  3  1  1
plot(table(ssr$age))

hist(a,b,c)
## Warning in plot.histogram(r, freq = freq1, col = col, border = border,
## angle = angle, : the AREAS in the plot are wrong -- rather use 'freq =
## FALSE'
## Warning in if (freq) x$counts else x$density: the condition has length > 1
## and only the first element will be used
## Warning in if (!freq) "Density" else "Frequency": the condition has length
## > 1 and only the first element will be used

d. attendance (0-<2; 2-<4; 4; 5)

ssr$totala
##  [1] 5 5 5 5 3 5 5 4 2 5 5 5 3 4 5 5 4 5 3 4 4 5 4 5 4 3 5 4 5 4 4 4 5 3 3
## [36] 4 5 3 5 5 4 5 4 3
at1 <- seq(0,2, by=1)
at1.cut <- cut(ssr$totala, at1, right = FALSE)
at1.freq <- table (at1.cut)
at1.freq
## at1.cut
## [0,1) [1,2) 
##     0     0
at2 <- seq(2,4, by=1)
at2.cut <- cut(ssr$totala, at2, right = FALSE)
at2.freq <- table (at2.cut)
at2.freq
## at2.cut
## [2,3) [3,4) 
##     1     8
at3 <- seq(4,5, by=1)
at3.cut <- cut(ssr$totala, at3, right = FALSE)
table (at3.cut)
## at3.cut
## [4,5) 
##    14
at4 <- seq(5,6, by=1)
at4.cut <- cut(ssr$totala, at4, right = FALSE)
table (at4.cut)
## at4.cut
## [5,6) 
##    21
  1. homework submitted (0-<2; 2-<4; 4; 5)
hw<-seq(0,5)
hw.cut <- cut(ssr$totalhw,hw, right = FALSE)
table(hw.cut)
## hw.cut
## [0,1) [1,2) [2,3) [3,4) [4,5) 
##     0     2     7    13    22
  1. Central tendencies for : a.Results
mean(ssr$exam)
## [1] 6.181818
mode(ssr$exam)
## [1] "numeric"
median(ssr$exam)
## [1] 6

b.Number of attendance

mode(ssr$a1)
## [1] "numeric"
mean(ssr$a1)
## [1] 0.9772727
median(ssr$a1)
## [1] 1
mode(ssr$a2)
## [1] "numeric"
mean(ssr$a2)
## [1] 0.8181818
median(ssr$a2)
## [1] 1
mode(ssr$a3)
## [1] "numeric"
mean(ssr$a3)
## [1] 0.8636364
median(ssr$a3)
## [1] 1
mode(ssr$a4)
## [1] "numeric"
mean(ssr$a4)
## [1] 0.6818182
median(ssr$a4)
## [1] 1
mode(ssr$a5)
## [1] "numeric"
mean(ssr$a5)
## [1] 0.9090909
median(ssr$a5)
## [1] 1

c.Number of homework submitted

mode(ssr$hw2)
## [1] "numeric"
mean(ssr$hw2)
## [1] 0.9772727
median(ssr$hw2)
## [1] 1
mode(ssr$hw3)
## [1] "numeric"
mean(ssr$hw3)
## [1] 0.7954545
median(ssr$hw3)
## [1] 1
mode(ssr$hw4)
## [1] "numeric"
mean(ssr$hw4)
## [1] 0.6818182
median(ssr$hw4)
## [1] 1
mode(ssr$hw5)
## [1] "numeric"
mean(ssr$hw5)
## [1] 0.7954545
median(ssr$hw5)
## [1] 1
  1. data dispersity for results
sum(ssr$exam)
## [1] 272

number of attendance

sum(ssr$a1,ssr$a2,ssr$a3,ssr$a4,ssr$a5)
## [1] 187

number of homework submitted

sum(ssr$hw2,ssr$hw3,ssr$hw4,ssr$hw5)
## [1] 143

4.relation between: gender and USG

plot(ssr$gender, ssr$USG, xlab = "Gender", ylab = "USG")

2nd and USG

plot(ssr$X2nd,ssr$USG,xlab="Secondary school", ylab="USG")

attendance and result

plot(ssr$totala, ssr$exam, xlab = "Attendance", ylab = "Exam result")

attendance and USG

plot(ssr$totala, ssr$USG, xlab = "Attendance", ylab = "USG")

homework submitted and USG

plot(ssr$totalhw,ssr$USG, xlab = "Homework submited",ylab = "USG")

Week 6 a. the correlation between any of the variables NrSubmittedHomeworkAssignments (or any shorter name), NrAttendances and the target variable Exam (expressing the exam result)

par(mfrow = c(1, 2))
plot(ssr$totala,ssr$exam, col = "red1", xlab = "Number of attendance", ylab = "Exam result")
plot(ssr$totalhw,ssr$exam, col = "red1", xlab = "Number of Submitted homeworks", ylab = "Exam result")

par(mfrow = c(1, 2))
plot(log(ssr$totala),log(ssr$exam), pch = 16, col = "red1", xlab = "Number of attendance", ylab = "Exam result")
plot(log(ssr$totalhw),log(ssr$exam), pch = 16, col = "red1", xlab = "Number of Submitted homeworks", ylab = "Exam result")

In addition, show the correlations in a corrgram and in a corrplot. You need to install the proper R packages to be able to use these

if (!require("corrgram")) {
  install.packages("corrgram", repos="http://cran.rstudio.com/") 
  library("corrgram")
}
## Loading required package: corrgram
## Warning: package 'corrgram' was built under R version 3.2.4
head(ssr)
##   student gender X2nd age a1 a2 a3 a4 a5 totala hw2 hw3 hw4 hw5 totalhw
## 1  gerrit      m  MBO  21  1  1  1  1  1      5   1   1   1   1       4
## 2    saar      f HAVO  23  1  1  1  1  1      5   1   0   1   1       3
## 3    lars      m  VWO  19  1  1  1  1  1      5   1   1   1   1       4
## 4    henk      m  MBO  23  1  1  1  1  1      5   1   1   1   1       4
## 5   klara      f  MBO  19  1  0  1  0  1      3   0   1   0   1       2
## 6     sem      m HAVO  20  1  1  1  1  1      5   1   1   1   1       4
##   exam USG
## 1    7   S
## 2    6   S
## 3    8   G
## 4    9   G
## 5    6   S
## 6    7   S
rm(corr)
## Warning in rm(corr): object 'corr' not found
exam.attencanse.homework<- cbind(ssr$totala, ssr$exam, ssr$totalh)
colnames(exam.attencanse.homework)<- c("Total.Attendance","Exam.Result","Total.Homework")
exam.attencanse.homework
##       Total.Attendance Exam.Result Total.Homework
##  [1,]                5           7              4
##  [2,]                5           6              3
##  [3,]                5           8              4
##  [4,]                5           9              4
##  [5,]                3           6              2
##  [6,]                5           7              4
##  [7,]                5           8              4
##  [8,]                4           9              3
##  [9,]                2           4              2
## [10,]                5           9              3
## [11,]                5           8              4
## [12,]                5           8              4
## [13,]                3           3              1
## [14,]                4           4              4
## [15,]                5           8              4
## [16,]                5           5              4
## [17,]                4           6              3
## [18,]                5           7              4
## [19,]                3           5              2
## [20,]                4           6              3
## [21,]                4           4              3
## [22,]                5           8              4
## [23,]                4           6              3
## [24,]                5           9              4
## [25,]                4           5              3
## [26,]                3           2              1
## [27,]                5           7              4
## [28,]                4           5              2
## [29,]                5           9              4
## [30,]                4           5              3
## [31,]                4           8              4
## [32,]                4           5              4
## [33,]                5           4              4
## [34,]                3           4              2
## [35,]                3           3              2
## [36,]                4           7              3
## [37,]                5           6              4
## [38,]                3           4              2
## [39,]                5           8              4
## [40,]                5           9              4
## [41,]                4           6              4
## [42,]                5           4              3
## [43,]                4           6              3
## [44,]                3           5              3
correlation <- cor(exam.attencanse.homework)
corrgram(correlation, order=TRUE, lower.panel=panel.shade, upper.panel=panel.pie, text.panel=panel.txt,   main="")

if (!require("corrplot")) {
  install.packages("corrplot", repos="http://cran.rstudio.com/") 
  library("corrplot")
}
## Loading required package: corrplot
## Warning: package 'corrplot' was built under R version 3.2.4
corrplot(correlation)