Hi all! This section is about hypothesis testing.

one sample t.test
one sample prop.test
two group comparisons

Download McDonalds.csv here

Download MShopping.csv here

Download Cola.csv here

Download PERatio.csv here

one sample t.test

# example 1
McDonalds <- read.csv("McDonalds.csv")
str(McDonalds)

## 'data.frame':    25 obs. of  1 variable:
##  $ Time: num  155 143 140 152 188 ...

attach(McDonalds)
# t.test
t.test(Time)

## 
##  One Sample t-test
## 
## data:  Time
## t = 44.257, df = 24, p-value < 2.2e-16
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
##  151.6068 166.4388
## sample estimates:
## mean of x 
##  159.0228

## manual calculation
m <- mean(Time)
s <- sd(Time)
n <- length(Time)
df <- n-1 # degree of freedom
# qt returns the "t" value for P(X <= x), degree of freedom = n-1
t95 <- qt(0.975, df)


## manual calculation for t statistic
t <- (m-0)/(s/sqrt(n))

## manual calculation of confidence interval at 0.05 significance level
ci <- m +c(-1,1)*t95*s/sqrt(n)
ci

## [1] 151.6068 166.4388

# example 3
t.test(Time, mu = 174.22)

## 
##  One Sample t-test
## 
## data:  Time
## t = -4.2294, df = 24, p-value = 0.0002946
## alternative hypothesis: true mean is not equal to 174.22
## 95 percent confidence interval:
##  151.6068 166.4388
## sample estimates:
## mean of x 
##  159.0228

t.test(Time, mu = 174.22, alternative = "less")

## 
##  One Sample t-test
## 
## data:  Time
## t = -4.2294, df = 24, p-value = 0.0001473
## alternative hypothesis: true mean is less than 174.22
## 95 percent confidence interval:
##      -Inf 165.1703
## sample estimates:
## mean of x 
##  159.0228

detach(McDonalds)

one sample prop.test

# example 2
MShopping <- read.csv("MShopping.csv")
str(MShopping)

## 'data.frame':    465 obs. of  1 variable:
##  $ MShopping: Factor w/ 2 levels "No","Yes": 1 1 2 2 2 2 1 1 2 1 ...

# table() count the "Yes" & "No" and put it into table
table <- table(MShopping)
table

## MShopping
##  No Yes 
## 140 325

n <- nrow(MShopping)

# proportion test, "correct = FALSE" for Yates continuity correction not applied
prop.test(table[2], n, correct = FALSE)

## 
##  1-sample proportions test without continuity correction
## 
## data:  table[2] out of n, null probability 0.5
## X-squared = 73.602, df = 1, p-value < 2.2e-16
## alternative hypothesis: true p is not equal to 0.5
## 95 percent confidence interval:
##  0.6557399 0.7388497
## sample estimates:
##         p 
## 0.6989247

# manual calculation of p
p <- table[2]/n
p

##       Yes 
## 0.6989247

# manual 2x2 table chi square contigency
ct <- rbind(table, c(n/2,n/2))
rownames(ct) <- c("Observed", "Expected")
ct

##             No   Yes
## Observed 140.0 325.0
## Expected 232.5 232.5

# manual chi square calculation
chi2 <- (140-232.5)^2/232.5 + (325-232.5)^2/232.5
chi2

## [1] 73.60215

df <- n-1

# estimate to t distribution with df = 464
se <- sqrt(p*(1-p)/n)
p + c(-1,1)*qt(0.975, df)*se

## [1] 0.6571216 0.7407278

two group comparisons

# example 4
cola <- read.csv("Cola.csv")
str(cola)

## 'data.frame':    10 obs. of  2 variables:
##  $ Normal  : int  22 34 52 62 30 40 64 84 56 59
##  $ EndAisle: int  52 71 76 54 67 83 66 90 77 84

attach(cola)
# normality check by shapiro test, note p-value here is large and not <0.05 for siginficance
shapiro.test(Normal); shapiro.test(EndAisle)

## 
##  Shapiro-Wilk normality test
## 
## data:  Normal
## W = 0.96721, p-value = 0.8638

## 
##  Shapiro-Wilk normality test
## 
## data:  EndAisle
## W = 0.9534, p-value = 0.7088

# variance equality check for normal distributions
var.test(x =  Normal, y = EndAisle)

## 
##  F test to compare two variances
## 
## data:  Normal and EndAisle
## F = 2.2289, num df = 9, denom df = 9, p-value = 0.2482
## alternative hypothesis: true ratio of variances is not equal to 1
## 95 percent confidence interval:
##  0.5536233 8.9734747
## sample estimates:
## ratio of variances 
##           2.228884

# t.test, use var.equal = TRUE
t.test(Normal, EndAisle, var.equal = TRUE)

## 
##  Two Sample t-test
## 
## data:  Normal and EndAisle
## t = -3.0446, df = 18, p-value = 0.006975
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -36.6743  -6.7257
## sample estimates:
## mean of x mean of y 
##      50.3      72.0

# just try and see the difference if var.equal = FALSe
t.test(Normal, EndAisle, var.equal = FALSE)

## 
##  Welch Two Sample t-test
## 
## data:  Normal and EndAisle
## t = -3.0446, df = 15.723, p-value = 0.007849
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -36.831299  -6.568701
## sample estimates:
## mean of x mean of y 
##      50.3      72.0

detach(cola)

# example 5
peratio <- read.csv("PERatio.csv")
str(peratio)

## 'data.frame':    9 obs. of  3 variables:
##  $ Company: int  1 2 3 4 5 6 7 8 9
##  $ Year1  : num  8.9 38.1 43 34 34.5 15.2 20.3 19.9 61.9
##  $ Year2  : num  12.7 45.4 10 27.2 22.8 ...

attach(peratio)

# normailty test
shapiro.test(Year1); shapiro.test(Year2)

## 
##  Shapiro-Wilk normality test
## 
## data:  Year1
## W = 0.95123, p-value = 0.7035

## 
##  Shapiro-Wilk normality test
## 
## data:  Year2
## W = 0.76551, p-value = 0.008197

# ansari test for distributions that are not normal
# note p-value here is large and not <0.05 for siginficance
ansari.test(Year1, Year2)

## 
##  Ansari-Bradley test
## 
## data:  Year1 and Year2
## AB = 43, p-value = 0.795
## alternative hypothesis: true ratio of scales is not equal to 1

# t.test, use paired = TRUE & var.equal = TRUE
t.test(Year1, Year2, paired = TRUE, var.equal = TRUE)

## 
##  Paired t-test
## 
## data:  Year1 and Year2
## t = -0.69909, df = 8, p-value = 0.5043
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -21.63607  11.56941
## sample estimates:
## mean of the differences 
##               -5.033333

detach(peratio)

# example 6
Beachcomber <- c(163,64,227)
Windsurfer <- c(154,108,262)
survey <- data.frame(Beachcomber, Windsurfer, Total = Beachcomber + Windsurfer,
                     row.names = c("Yes", "No", "Total"))
survey

##       Beachcomber Windsurfer Total
## Yes           163        154   317
## No             64        108   172
## Total         227        262   489

# proportion test
prop.test(as.integer(survey[1, 1:2]), as.integer(survey[3, 1:2]), correct = FALSE)

## 
##  2-sample test for equality of proportions without continuity
##  correction
## 
## data:  as.integer(survey[1, 1:2]) out of as.integer(survey[3, 1:2])
## X-squared = 9.0526, df = 1, p-value = 0.002623
## alternative hypothesis: two.sided
## 95 percent confidence interval:
##  0.04673792 0.21381291
## sample estimates:
##    prop 1    prop 2 
## 0.7180617 0.5877863

# manual calculation of chi^2
chi2 <- (163*108-154*64)^2*489/(317*172*227*262)
chi2

## [1] 9.052598

Hypothesis Testing

Chng Yan Hao

3 to 7 April 2017

Hi all! This section is about hypothesis testing.

Download McDonalds.csv here

Download MShopping.csv here

Download Cola.csv here

Download PERatio.csv here

one sample t.test

one sample prop.test

two group comparisons

Return to contents page