require(dplyr)
## Loading required package: dplyr
## 
## Attaching package: 'dplyr'
## 
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## 
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
Absent <-read.csv(file = "http://www.personal.psu.edu/dlp/w540/quine.csv", header = TRUE, sep=",")
Absenteeism <- tbl_df(Absent)
Absenteeism
## Source: local data frame [146 x 4]
## 
##      Eth   Sex   Lrn  Days
##    (int) (int) (int) (int)
## 1      1     1     0     2
## 2      1     1     0    11
## 3      1     1     0    14
## 4      1     1     1     5
## 5      1     1     1     5
## 6      1     1     1    13
## 7      1     1     1    20
## 8      1     1     1    22
## 9      1     1     0     6
## 10     1     1     0     6
## ..   ...   ...   ...   ...
glimpse(Absenteeism)
## Observations: 146
## Variables: 4
## $ Eth  (int) 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ...
## $ Sex  (int) 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ...
## $ Lrn  (int) 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, ...
## $ Days (int) 2, 11, 14, 5, 5, 13, 20, 22, 6, 6, 15, 7, 14, 6, 32, 53, ...
summary(Absenteeism)
##       Eth              Sex              Lrn              Days      
##  Min.   :0.0000   Min.   :0.0000   Min.   :0.0000   Min.   : 0.00  
##  1st Qu.:0.0000   1st Qu.:0.0000   1st Qu.:0.0000   1st Qu.: 5.00  
##  Median :0.0000   Median :0.0000   Median :1.0000   Median :11.00  
##  Mean   :0.4726   Mean   :0.4521   Mean   :0.5548   Mean   :16.46  
##  3rd Qu.:1.0000   3rd Qu.:1.0000   3rd Qu.:1.0000   3rd Qu.:22.75  
##  Max.   :1.0000   Max.   :1.0000   Max.   :1.0000   Max.   :81.00
Days.na <- Absenteeism[Absent$Eth == 0,]$Days
Days.ab <- Absenteeism[Absent$Eth == 1,]$Days
t.test(Days.na, Days.ab) 
## 
##  Welch Two Sample t-test
## 
## data:  Days.na and Days.ab
## t = -3.4358, df = 126.85, p-value = 0.0007991
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -14.262384  -3.837747
## sample estimates:
## mean of x mean of y 
##  12.18182  21.23188

If α = .05, then the p-value, 0.00007991, is less than α. Therefore, we can reject the null hypothesis that there is no difference betweem aboriginal absent days and no-aboriginal absent days.

Days.female <- Absenteeism[Absent$Sex == 0,]$Days
Days.male <- Absenteeism[Absent$Sex == 1,]$Days
t.test(Days.female, Days.male) 
## 
##  Welch Two Sample t-test
## 
## data:  Days.female and Days.male
## t = -1.0058, df = 136.35, p-value = 0.3163
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -8.096135  2.637044
## sample estimates:
## mean of x mean of y 
##  15.22500  17.95455

If α = .05, then the p-value, 0.3163, is greater than α. Therefore, we cannot reject the null hypothesis that there is no difference between female absent days and male absent days.

Days.slow <- Absenteeism[Absent$Lrn == 0,]$Days
Days.average <- Absenteeism[Absent$Lrn == 1,]$Days
t.test(Days.slow, Days.average) 
## 
##  Welch Two Sample t-test
## 
## data:  Days.slow and Days.average
## t = 0.4078, df = 115.88, p-value = 0.6842
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -4.403432  6.686813
## sample estimates:
## mean of x mean of y 
##  17.09231  15.95062

If α = .05, then the p-value, 0.6842, is greater than α. Therefore, we cannot reject the null hypothesis that there is no difference between average learners absent days and slow learners absent days.