t-distribution
x <- seq(-4, 4, by = 0.05)
Std.Normal <- dnorm(x)
t2 <- dt(x, df = 2)
t30 <- dt(x, df = 30)
plot(x, Std.Normal, type = "l", ylim = c(0, 0.5), ylab = "density")
lines(x, t2, lty = 2)
lines(x, t30, lty = 3)
legend(1.5, 0.508, lty = 1:3, c("Standard normal", "t(2)", "t(30)"))

QQ plot of N(0,1) and t(2)
- 양쪽 꼬리가 두텁기 때문에 작은 백분위수에서는 이론적인 값보다
작고 큰 백분위수에서는 이론적인 값보다
큼
par(mfrow = c(1, 2))
r.normal <- rnorm(100)
r.t <- rt(100, df = 2)
qqnorm(r.normal)
qqline(r.normal)
qqnorm(r.t)
qqline(r.t)

t.test() function
## Loading required package: MASS
## Loading required package: HistData
## Loading required package: Hmisc
## Loading required package: lattice
## Loading required package: survival
## Loading required package: Formula
## Loading required package: ggplot2
##
## Attaching package: 'Hmisc'
## The following objects are masked from 'package:base':
##
## format.pval, units
##
## Attaching package: 'UsingR'
## The following object is masked from 'package:survival':
##
## cancer
ozs <- c(1.95, 1.80, 2.10, 1.82, 1.75, 2.01, 1.83, 1.90)
CI <- t.test(ozs, conf.level = 0.9)
confint(CI)
## (1.82, 1.97) with 90 percent confidence
qqnorm(ozs)
qqline(ozs) # approximately linear

For the top 200 CEOs’ pay in 2000
pay.00 <- c(110, 12, 2.5, 98, 1017, 540, 54, 4.3, 150, 432)
ans <- wilcox.test(log(pay.00), conf.int = TRUE, conf.level = 0.9)
confint(ans, transform = exp) # inverse of log
## (19.36, 254.56) with 90 percent confidence
boxplot(pay.00, xlab = "CEO")

boxplot(log(pay.00), xlab = "log.CEO")

Does the actual mpg of a new SUV match the advertised 17mpg?
mpg <- c(11.4, 13.1, 14.7, 15.0, 15.5, 15.6, 15.9, 16.0, 16.8)
t.test(mpg, mu = 17, alt = "less")
##
## One Sample t-test
##
## data: mpg
## t = -3.8011, df = 8, p-value = 0.002614
## alternative hypothesis: true mean is less than 17
## 95 percent confidence interval:
## -Inf 15.92166
## sample estimates:
## mean of x
## 14.88889
par(mfrow = c(1, 2))
boxplot(mpg)
qqnorm(mpg)
qqline(mpg)

Signed rank test for the number of recruits
library(UsingR)
data("salmon.rate")
summary(salmon.rate)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.000619 0.002747 0.005879 0.013969 0.014505 0.125744
ans <- wilcox.test(log(salmon.rate), mu = log(0.005), alt = "greater")
ans
##
## Wilcoxon signed rank test with continuity correction
##
## data: log(salmon.rate)
## V = 2077, p-value = 0.065
## alternative hypothesis: true location is greater than -5.298317
## [1] 0.06499583
boxplot(list(salmon.rate * 100, log(salmon.rate * 100)), names = c("rate", "log rate"))

정규성 검토
ex4 <- c(104, 121, 147, 147, 140, 145, 146, 149, 160, 168) / 10
stem(ex4)
##
## The decimal point is at the |
##
## 10 | 4
## 12 | 1
## 14 | 056779
## 16 | 08
par(mfrow = c(1, 2))
boxplot(ex4, main = "Box plot")
qqnorm(ex4)
qqline(ex4)

##
## Shapiro-Wilk normality test
##
## data: ex4
## W = 0.89071, p-value = 0.1727
par(mfrow = c(1,1))
library(MASS)
bc <- boxcox(ex4 ~ 1, lambda = seq(-6, 6))

lambda <- bc$x[which.max(bc$y)]
lambda
## [1] 3.69697
bcex4 <- (ex4 ^ lambda - 1) / lambda
shapiro.test(bcex4)
##
## Shapiro-Wilk normality test
##
## data: bcex4
## W = 0.94025, p-value = 0.5558
par(mfrow = c(1, 2))
boxplot(bcex4, main = "Box plot")
qqnorm(bcex4)
qqline(bcex4)

prop.test() function
ans <- prop.test(466, 1013, conf.level = 0.95)
names(ans)
## [1] "statistic" "parameter" "p.value" "estimate" "null.value"
## [6] "conf.int" "alternative" "method" "data.name"
## [1] 0.4290475 0.4912989
## attr(,"conf.level")
## [1] 0.95
Exact CI for p
ans <- binom.test(466, 1013, conf.level = 0.95)
ans$conf.int
## [1] 0.4289889 0.4912836
## attr(,"conf.level")
## [1] 0.95
library(UsingR)
confint(ans)
## (0.43, 0.49) with 95 percent confidence
Chi-square distribution
x <- seq(0, 30, by = 0.05)
chis1 <- dchisq(x, df = 1)
chis5 <- dchisq(x, df = 5)
chis20 <- dchisq(x, df = 20)
plot(x, chis1, type = "l", ylim = c(0, 1), ylab = "density")
lines(x, chis5, lty = 2)
lines(x, chis20, lty = 3)
legend(23.1,1, lty = 1:3, c("Chisq(1)", "Chisq(5)", "Chisq(20)"))
