Chapter.5 ; 추정과 가설검정

t-distribution

자유도가 증가할수록 표준정규분포에 가까워짐

x <- seq(-4, 4, by = 0.05)
Std.Normal <- dnorm(x)
t2 <- dt(x, df = 2)
t30 <- dt(x, df = 30)
plot(x, Std.Normal, type = "l", ylim = c(0, 0.5), ylab = "density")
lines(x, t2, lty = 2)
lines(x, t30, lty = 3)
legend(1.5, 0.508, lty = 1:3, c("Standard normal", "t(2)", "t(30)"))

QQ plot of N(0,1) and t(2)

양쪽 꼬리가 두텁기 때문에 작은 백분위수에서는 이론적인 값보다 작고 큰 백분위수에서는 이론적인 값보다 큼

par(mfrow = c(1, 2))
r.normal <- rnorm(100)
r.t <- rt(100, df = 2)
qqnorm(r.normal)
qqline(r.normal)
qqnorm(r.t)
qqline(r.t)

t.test() function

정규분포 가정

library(UsingR)

## Loading required package: MASS

## Loading required package: HistData

## Loading required package: Hmisc

## Loading required package: lattice

## Loading required package: survival

## Loading required package: Formula

## Loading required package: ggplot2

## 
## Attaching package: 'Hmisc'

## The following objects are masked from 'package:base':
## 
##     format.pval, units

## 
## Attaching package: 'UsingR'

## The following object is masked from 'package:survival':
## 
##     cancer

ozs <- c(1.95, 1.80, 2.10, 1.82, 1.75, 2.01, 1.83, 1.90)
CI <- t.test(ozs, conf.level = 0.9)
confint(CI)

## (1.82, 1.97) with 90 percent confidence

qqnorm(ozs)
qqline(ozs) # approximately linear

For the top 200 CEOs’ pay in 2000

pay.00 <- c(110, 12, 2.5, 98, 1017, 540, 54, 4.3, 150, 432)
ans <- wilcox.test(log(pay.00), conf.int = TRUE, conf.level = 0.9)
confint(ans, transform = exp) # inverse of log

## (19.36, 254.56) with 90 percent confidence

boxplot(pay.00, xlab = "CEO")

boxplot(log(pay.00), xlab = "log.CEO")

Does the actual mpg of a new SUV match the advertised 17mpg?

mpg <- c(11.4, 13.1, 14.7, 15.0, 15.5, 15.6, 15.9, 16.0, 16.8)
t.test(mpg, mu = 17, alt = "less")

## 
##  One Sample t-test
## 
## data:  mpg
## t = -3.8011, df = 8, p-value = 0.002614
## alternative hypothesis: true mean is less than 17
## 95 percent confidence interval:
##      -Inf 15.92166
## sample estimates:
## mean of x 
##  14.88889

par(mfrow = c(1, 2))
boxplot(mpg)
qqnorm(mpg)
qqline(mpg)

Signed rank test for the number of recruits

library(UsingR)
data("salmon.rate")
summary(salmon.rate)

##     Min.  1st Qu.   Median     Mean  3rd Qu.     Max. 
## 0.000619 0.002747 0.005879 0.013969 0.014505 0.125744

ans <- wilcox.test(log(salmon.rate), mu = log(0.005), alt = "greater")
ans

## 
##  Wilcoxon signed rank test with continuity correction
## 
## data:  log(salmon.rate)
## V = 2077, p-value = 0.065
## alternative hypothesis: true location is greater than -5.298317

ans$p.value

## [1] 0.06499583

boxplot(list(salmon.rate * 100, log(salmon.rate * 100)), names = c("rate", "log rate"))

정규성 검토

ex4 <- c(104, 121, 147, 147, 140, 145, 146, 149, 160, 168) / 10
stem(ex4)

## 
##   The decimal point is at the |
## 
##   10 | 4
##   12 | 1
##   14 | 056779
##   16 | 08

par(mfrow = c(1, 2))
boxplot(ex4, main = "Box plot")
qqnorm(ex4)
qqline(ex4)

shapiro.test(ex4)

## 
##  Shapiro-Wilk normality test
## 
## data:  ex4
## W = 0.89071, p-value = 0.1727

par(mfrow = c(1,1))
library(MASS)
bc <- boxcox(ex4 ~ 1, lambda = seq(-6, 6))

lambda <- bc$x[which.max(bc$y)]
lambda

## [1] 3.69697

bcex4 <- (ex4 ^ lambda - 1) / lambda
shapiro.test(bcex4)

## 
##  Shapiro-Wilk normality test
## 
## data:  bcex4
## W = 0.94025, p-value = 0.5558

par(mfrow = c(1, 2))
boxplot(bcex4, main = "Box plot")
qqnorm(bcex4)
qqline(bcex4)

prop.test() function

ans <- prop.test(466, 1013, conf.level = 0.95)
names(ans)

## [1] "statistic"   "parameter"   "p.value"     "estimate"    "null.value" 
## [6] "conf.int"    "alternative" "method"      "data.name"

ans$conf.int

## [1] 0.4290475 0.4912989
## attr(,"conf.level")
## [1] 0.95

Exact CI for p

ans <- binom.test(466, 1013, conf.level = 0.95)
ans$conf.int

## [1] 0.4289889 0.4912836
## attr(,"conf.level")
## [1] 0.95

library(UsingR)
confint(ans)

## (0.43, 0.49) with 95 percent confidence

Does the figure of the year-2001 show an increase from 11.3%?

ans <- prop.test(x = 5850, n = 50000, p = 0.113, alt = "greater")
ans

## 
##  1-sample proportions test with continuity correction
## 
## data:  5850 out of 50000, null probability 0.113
## X-squared = 7.9417, df = 1, p-value = 0.002415
## alternative hypothesis: true p is greater than 0.113
## 95 percent confidence interval:
##  0.1146464 1.0000000
## sample estimates:
##     p 
## 0.117

ans$p.value

## [1] 0.002415415

Chi-square distribution

x <- seq(0, 30, by = 0.05)
chis1 <- dchisq(x, df = 1)
chis5 <- dchisq(x, df = 5)
chis20 <- dchisq(x, df = 20)
plot(x, chis1, type = "l", ylim = c(0, 1), ylab = "density")
lines(x, chis5, lty = 2)
lines(x, chis20, lty = 3)
legend(23.1,1, lty = 1:3, c("Chisq(1)", "Chisq(5)", "Chisq(20)"))

Chapter.5 ; 추정과 가설검정

Joy, Son

2022-09-03

t-distribution

QQ plot of N(0,1) and t(2)

t.test() function

For the top 200 CEOs’ pay in 2000

Does the actual mpg of a new SUV match the advertised 17mpg?

Signed rank test for the number of recruits

정규성 검토

prop.test() function

Exact CI for p

Does the figure of the year-2001 show an increase from 11.3%?

Chi-square distribution