require(dplyr)
## Loading required package: dplyr
##
## Attaching package: 'dplyr'
##
## The following objects are masked from 'package:stats':
##
## filter, lag
##
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
Absent <-read.csv(file = "http://www.personal.psu.edu/dlp/w540/quine.csv", header = TRUE, sep=",")
Absenteeism <- tbl_df(Absent)
Absenteeism
## Source: local data frame [146 x 4]
##
## Eth Sex Lrn Days
## (int) (int) (int) (int)
## 1 1 1 0 2
## 2 1 1 0 11
## 3 1 1 0 14
## 4 1 1 1 5
## 5 1 1 1 5
## 6 1 1 1 13
## 7 1 1 1 20
## 8 1 1 1 22
## 9 1 1 0 6
## 10 1 1 0 6
## .. ... ... ... ...
glimpse(Absenteeism)
## Observations: 146
## Variables: 4
## $ Eth (int) 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ...
## $ Sex (int) 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ...
## $ Lrn (int) 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, ...
## $ Days (int) 2, 11, 14, 5, 5, 13, 20, 22, 6, 6, 15, 7, 14, 6, 32, 53, ...
summary(Absenteeism)
## Eth Sex Lrn Days
## Min. :0.0000 Min. :0.0000 Min. :0.0000 Min. : 0.00
## 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.: 5.00
## Median :0.0000 Median :0.0000 Median :1.0000 Median :11.00
## Mean :0.4726 Mean :0.4521 Mean :0.5548 Mean :16.46
## 3rd Qu.:1.0000 3rd Qu.:1.0000 3rd Qu.:1.0000 3rd Qu.:22.75
## Max. :1.0000 Max. :1.0000 Max. :1.0000 Max. :81.00
Days.na <- Absenteeism[Absent$Eth == 0,]$Days
Days.ab <- Absenteeism[Absent$Eth == 1,]$Days
t.test(Days.na, Days.ab)
##
## Welch Two Sample t-test
##
## data: Days.na and Days.ab
## t = -3.4358, df = 126.85, p-value = 0.0007991
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -14.262384 -3.837747
## sample estimates:
## mean of x mean of y
## 12.18182 21.23188
If α = .05, then the p-value, 0.00007991, is less than α. Therefore, we can reject the null hypothesis that there is no difference betweem aboriginal absent days and no-aboriginal absent days.
Days.female <- Absenteeism[Absent$Sex == 0,]$Days
Days.male <- Absenteeism[Absent$Sex == 1,]$Days
t.test(Days.female, Days.male)
##
## Welch Two Sample t-test
##
## data: Days.female and Days.male
## t = -1.0058, df = 136.35, p-value = 0.3163
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -8.096135 2.637044
## sample estimates:
## mean of x mean of y
## 15.22500 17.95455
If α = .05, then the p-value, 0.3163, is greater than α. Therefore, we cannot reject the null hypothesis that there is no difference between female absent days and male absent days.
Days.slow <- Absenteeism[Absent$Lrn == 0,]$Days
Days.average <- Absenteeism[Absent$Lrn == 1,]$Days
t.test(Days.slow, Days.average)
##
## Welch Two Sample t-test
##
## data: Days.slow and Days.average
## t = 0.4078, df = 115.88, p-value = 0.6842
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -4.403432 6.686813
## sample estimates:
## mean of x mean of y
## 17.09231 15.95062
If α = .05, then the p-value, 0.6842, is greater than α. Therefore, we cannot reject the null hypothesis that there is no difference between average learners absent days and slow learners absent days.