Hi all! This section is about hypothesis testing.

Download McDonalds.csv here

Download MShopping.csv here

Download Cola.csv here

Download PERatio.csv here

one sample t.test

# example 1
McDonalds <- read.csv("McDonalds.csv")
str(McDonalds)
## 'data.frame':    25 obs. of  1 variable:
##  $ Time: num  155 143 140 152 188 ...
attach(McDonalds)
# t.test
t.test(Time)
## 
##  One Sample t-test
## 
## data:  Time
## t = 44.257, df = 24, p-value < 2.2e-16
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
##  151.6068 166.4388
## sample estimates:
## mean of x 
##  159.0228
## manual calculation
m <- mean(Time)
s <- sd(Time)
n <- length(Time)
df <- n-1 # degree of freedom
# qt returns the "t" value for P(X <= x), degree of freedom = n-1
t95 <- qt(0.975, df)


## manual calculation for t statistic
t <- (m-0)/(s/sqrt(n))

## manual calculation of confidence interval at 0.05 significance level
ci <- m +c(-1,1)*t95*s/sqrt(n)
ci
## [1] 151.6068 166.4388
# example 3
t.test(Time, mu = 174.22)
## 
##  One Sample t-test
## 
## data:  Time
## t = -4.2294, df = 24, p-value = 0.0002946
## alternative hypothesis: true mean is not equal to 174.22
## 95 percent confidence interval:
##  151.6068 166.4388
## sample estimates:
## mean of x 
##  159.0228
t.test(Time, mu = 174.22, alternative = "less")
## 
##  One Sample t-test
## 
## data:  Time
## t = -4.2294, df = 24, p-value = 0.0001473
## alternative hypothesis: true mean is less than 174.22
## 95 percent confidence interval:
##      -Inf 165.1703
## sample estimates:
## mean of x 
##  159.0228
detach(McDonalds)

one sample prop.test

# example 2
MShopping <- read.csv("MShopping.csv")
str(MShopping)
## 'data.frame':    465 obs. of  1 variable:
##  $ MShopping: Factor w/ 2 levels "No","Yes": 1 1 2 2 2 2 1 1 2 1 ...
# table() count the "Yes" & "No" and put it into table
table <- table(MShopping)
table
## MShopping
##  No Yes 
## 140 325
n <- nrow(MShopping)

# proportion test, "correct = FALSE" for Yates continuity correction not applied
prop.test(table[2], n, correct = FALSE)
## 
##  1-sample proportions test without continuity correction
## 
## data:  table[2] out of n, null probability 0.5
## X-squared = 73.602, df = 1, p-value < 2.2e-16
## alternative hypothesis: true p is not equal to 0.5
## 95 percent confidence interval:
##  0.6557399 0.7388497
## sample estimates:
##         p 
## 0.6989247
# manual calculation of p
p <- table[2]/n
p
##       Yes 
## 0.6989247
# manual 2x2 table chi square contigency
ct <- rbind(table, c(n/2,n/2))
rownames(ct) <- c("Observed", "Expected")
ct
##             No   Yes
## Observed 140.0 325.0
## Expected 232.5 232.5
# manual chi square calculation
chi2 <- (140-232.5)^2/232.5 + (325-232.5)^2/232.5
chi2
## [1] 73.60215
df <- n-1

# estimate to t distribution with df = 464
se <- sqrt(p*(1-p)/n)
p + c(-1,1)*qt(0.975, df)*se
## [1] 0.6571216 0.7407278

two group comparisons

# example 4
cola <- read.csv("Cola.csv")
str(cola)
## 'data.frame':    10 obs. of  2 variables:
##  $ Normal  : int  22 34 52 62 30 40 64 84 56 59
##  $ EndAisle: int  52 71 76 54 67 83 66 90 77 84
attach(cola)
# normality check by shapiro test, note p-value here is large and not <0.05 for siginficance
shapiro.test(Normal); shapiro.test(EndAisle)
## 
##  Shapiro-Wilk normality test
## 
## data:  Normal
## W = 0.96721, p-value = 0.8638
## 
##  Shapiro-Wilk normality test
## 
## data:  EndAisle
## W = 0.9534, p-value = 0.7088
# variance equality check for normal distributions
var.test(x =  Normal, y = EndAisle)
## 
##  F test to compare two variances
## 
## data:  Normal and EndAisle
## F = 2.2289, num df = 9, denom df = 9, p-value = 0.2482
## alternative hypothesis: true ratio of variances is not equal to 1
## 95 percent confidence interval:
##  0.5536233 8.9734747
## sample estimates:
## ratio of variances 
##           2.228884
# t.test, use var.equal = TRUE
t.test(Normal, EndAisle, var.equal = TRUE)
## 
##  Two Sample t-test
## 
## data:  Normal and EndAisle
## t = -3.0446, df = 18, p-value = 0.006975
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -36.6743  -6.7257
## sample estimates:
## mean of x mean of y 
##      50.3      72.0
# just try and see the difference if var.equal = FALSe
t.test(Normal, EndAisle, var.equal = FALSE)
## 
##  Welch Two Sample t-test
## 
## data:  Normal and EndAisle
## t = -3.0446, df = 15.723, p-value = 0.007849
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -36.831299  -6.568701
## sample estimates:
## mean of x mean of y 
##      50.3      72.0
detach(cola)

# example 5
peratio <- read.csv("PERatio.csv")
str(peratio)
## 'data.frame':    9 obs. of  3 variables:
##  $ Company: int  1 2 3 4 5 6 7 8 9
##  $ Year1  : num  8.9 38.1 43 34 34.5 15.2 20.3 19.9 61.9
##  $ Year2  : num  12.7 45.4 10 27.2 22.8 ...
attach(peratio)

# normailty test
shapiro.test(Year1); shapiro.test(Year2)
## 
##  Shapiro-Wilk normality test
## 
## data:  Year1
## W = 0.95123, p-value = 0.7035
## 
##  Shapiro-Wilk normality test
## 
## data:  Year2
## W = 0.76551, p-value = 0.008197
# ansari test for distributions that are not normal
# note p-value here is large and not <0.05 for siginficance
ansari.test(Year1, Year2)
## 
##  Ansari-Bradley test
## 
## data:  Year1 and Year2
## AB = 43, p-value = 0.795
## alternative hypothesis: true ratio of scales is not equal to 1
# t.test, use paired = TRUE & var.equal = TRUE
t.test(Year1, Year2, paired = TRUE, var.equal = TRUE)
## 
##  Paired t-test
## 
## data:  Year1 and Year2
## t = -0.69909, df = 8, p-value = 0.5043
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -21.63607  11.56941
## sample estimates:
## mean of the differences 
##               -5.033333
detach(peratio)

# example 6
Beachcomber <- c(163,64,227)
Windsurfer <- c(154,108,262)
survey <- data.frame(Beachcomber, Windsurfer, Total = Beachcomber + Windsurfer,
                     row.names = c("Yes", "No", "Total"))
survey
##       Beachcomber Windsurfer Total
## Yes           163        154   317
## No             64        108   172
## Total         227        262   489
# proportion test
prop.test(as.integer(survey[1, 1:2]), as.integer(survey[3, 1:2]), correct = FALSE)
## 
##  2-sample test for equality of proportions without continuity
##  correction
## 
## data:  as.integer(survey[1, 1:2]) out of as.integer(survey[3, 1:2])
## X-squared = 9.0526, df = 1, p-value = 0.002623
## alternative hypothesis: two.sided
## 95 percent confidence interval:
##  0.04673792 0.21381291
## sample estimates:
##    prop 1    prop 2 
## 0.7180617 0.5877863
# manual calculation of chi^2
chi2 <- (163*108-154*64)^2*489/(317*172*227*262)
chi2
## [1] 9.052598

Return to contents page