Chapter.6 ; 두 집단의 비교와 분산분석

Is a weight-loss drug effective?

체중 감소량의 차이
독립표본 자료

x <- c(0, 0, 0, 2, 4, 5, 13, 14, 14, 14, 15, 17, 17)
y <- c(0, 6, 7, 8, 11, 13, 15, 16, 16, 16, 17, 18)
ans <- t.test(x, y, var.equal = TRUE)
confint(ans)

## (-8.33, 2.19) with 95 percent confidence

boxplot(list(placebo = x, ephedra = y), col = "grey")

var.test(x, y) # 공통 분산 검증

## 
##  F test to compare two variances
## 
## data:  x and y
## F = 1.5802, num df = 12, denom df = 11, p-value = 0.4568
## alternative hypothesis: true ratio of variances is not equal to 1
## 95 percent confidence interval:
##  0.4607529 5.2486187
## sample estimates:
## ratio of variances 
##           1.580204

t.test(x, y)

## 
##  Welch Two Sample t-test
## 
## data:  x and y
## t = -1.2185, df = 22.538, p-value = 0.2356
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -8.289271  2.148245
## sample estimates:
## mean of x mean of y 
##  8.846154 11.916667

Are two types of shoes different wear amounts?

마모량의 차이
짝자료

library(MASS)
library(UsingR)
names(shoes)

## [1] "A" "B"

ans1 <- t.test(shoes$A - shoes$B, conf.level = 0.9)
confint(ans1)

## (-0.63, -0.19) with 90 percent confidence

ans2 <- t.test(shoes$A, shoes$B, paired = TRUE, conf.level = 0.9)
#paired = TRUE 일 경우 알아서 차이를 계산함
confint(ans2)

## (-0.63, -0.19) with 90 percent confidence

Two-sample t-test

p24 level 비교
300mg vs 600mg

x <- c(284, 279, 289, 292, 287, 295, 285, 279, 306, 298)
y <- c(298, 307, 297, 279, 291, 335, 299, 300, 306, 291)
var.test(x, y)

## 
##  F test to compare two variances
## 
## data:  x and y
## F = 0.34183, num df = 9, denom df = 9, p-value = 0.1256
## alternative hypothesis: true ratio of variances is not equal to 1
## 95 percent confidence interval:
##  0.0849059 1.3762082
## sample estimates:
## ratio of variances 
##          0.3418306

t.test(x, y, var.equal = T)

## 
##  Two Sample t-test
## 
## data:  x and y
## t = -2.034, df = 18, p-value = 0.05696
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -22.1584072   0.3584072
## sample estimates:
## mean of x mean of y 
##     289.4     300.3

Wilcoxon’s rank-sum test

두 직원의 서비스 시간 비교

A <- c(5.8, 1.0, 1.1, 2.1, 2.5, 1.1, 1.0, 1.2, 3.2, 2.7)
B <- c(1.5, 2.7, 6.6, 4.6, 1.1, 1.2, 5.7, 3.2, 1.2, 1.3)
wilcox.test(A, B)

## Warning in wilcox.test.default(A, B): cannot compute exact p-value with ties

## 
##  Wilcoxon rank sum test with continuity correction
## 
## data:  A and B
## W = 34, p-value = 0.2394
## alternative hypothesis: true location shift is not equal to 0

library(UsingR)
confint(wilcox.test(A, B, conf.int = 0.95))

## Warning in wilcox.test.default(A, B, conf.int = 0.95): cannot compute exact p-
## value with ties

## Warning in wilcox.test.default(A, B, conf.int = 0.95): cannot compute exact
## confidence intervals with ties

## (-2.50, 1.00) with 95 percent confidence

Paired t-test

점수

x <- c(77, 56, 64, 60, 57, 53, 72, 62, 65, 66)
y <- c(88, 74, 83, 68, 58, 50, 67, 64, 74, 60)
t.test(x, y, paired = T)

## 
##  Paired t-test
## 
## data:  x and y
## t = -1.8904, df = 9, p-value = 0.09128
## alternative hypothesis: true mean difference is not equal to 0
## 95 percent confidence interval:
##  -11.862013   1.062013
## sample estimates:
## mean difference 
##            -5.4

Wilcoxon’s signed rank test

점수

x <- c(77, 56, 64, 60, 57, 53, 72, 62, 65, 66)
y <- c(88, 74, 83, 68, 58, 50, 67, 64, 74, 60)
wilcox.test(x, y, paired = T)

## 
##  Wilcoxon signed rank exact test
## 
## data:  x and y
## V = 12, p-value = 0.1309
## alternative hypothesis: true location shift is not equal to 0

library(UsingR)
confint(wilcox.test(x, y, paired = T, conf.int = 0.95))

## (-13.00, 2.00) with 95 percent confidence

One-way ANOVA

세 달 비교

may <- c(2166, 1568, 2233, 1882, 2019)
sep <- c(2279, 2075, 2131, 2009, 1793)
dec <- c(2226, 2154, 2583, 2010, 2190)
ex5 <- stack(list(may = may, sep = sep, dec = dec))
ex5

##    values ind
## 1    2166 may
## 2    1568 may
## 3    2233 may
## 4    1882 may
## 5    2019 may
## 6    2279 sep
## 7    2075 sep
## 8    2131 sep
## 9    2009 sep
## 10   1793 sep
## 11   2226 dec
## 12   2154 dec
## 13   2583 dec
## 14   2010 dec
## 15   2190 dec

oneway.test(values ~ ind, data = ex5, var.equal = T)

## 
##  One-way analysis of means
## 
## data:  values and ind
## F = 1.7862, num df = 2, denom df = 12, p-value = 0.2094

res <- aov(values ~ ind, data = ex5)
res

## Call:
##    aov(formula = values ~ ind, data = ex5)
## 
## Terms:
##                      ind Residuals
## Sum of Squares  174664.1  586719.6
## Deg. of Freedom        2        12
## 
## Residual standard error: 221.1183
## Estimated effects may be unbalanced

summary(res)

##             Df Sum Sq Mean Sq F value Pr(>F)
## ind          2 174664   87332   1.786  0.209
## Residuals   12 586720   48893

TukeyHSD(res)

##   Tukey multiple comparisons of means
##     95% family-wise confidence level
## 
## Fit: aov(formula = values ~ ind, data = ex5)
## 
## $ind
##          diff      lwr     upr     p adj
## sep-may  83.8 -289.294 456.894 0.8231586
## dec-may 259.0 -114.094 632.094 0.1949625
## dec-sep 175.2 -197.894 548.294 0.4467189

plot(TukeyHSD(res))

Kruskal-Wallis test

세 시험 비교

x <- c(63, 64, 95, 64, 60, 85)
y <- c(58, 56, 51, 84, 77)
z <- c(85, 79, 59, 89, 80, 71, 43)
ex6 <- stack(list(test1 = x, test2 = y, test3 = z))
kruskal.test(values ~ ind, data = ex6)

## 
##  Kruskal-Wallis rank sum test
## 
## data:  values by ind
## Kruskal-Wallis chi-squared = 1.7753, df = 2, p-value = 0.4116

ex5

##    values ind
## 1    2166 may
## 2    1568 may
## 3    2233 may
## 4    1882 may
## 5    2019 may
## 6    2279 sep
## 7    2075 sep
## 8    2131 sep
## 9    2009 sep
## 10   1793 sep
## 11   2226 dec
## 12   2154 dec
## 13   2583 dec
## 14   2010 dec
## 15   2190 dec

kruskal.test(values ~ ind, data = ex5)

## 
##  Kruskal-Wallis rank sum test
## 
## data:  values by ind
## Kruskal-Wallis chi-squared = 2.18, df = 2, p-value = 0.3362

Chapter.6 ; 두 집단의 비교와 분산분석

Joy, Son

2022-09-13

Is a weight-loss drug effective?

Are two types of shoes different wear amounts?

Two-sample t-test

Wilcoxon’s rank-sum test

Paired t-test

Wilcoxon’s signed rank test

One-way ANOVA

Kruskal-Wallis test