code 6.1
set.seed(12365)
x<-rnorm(100000, 70, 10)
k<-4
length(x)
## [1] 100000
theta1<-c()
for (i in 1:100)
{
theta1<-append(theta1, sum(sample(x, size=10, replace=FALSE))/(10-k))
}
mean(theta1)
## [1] 116.9818
## 분모 값 변화에 따른 추정량 기댓값 변화
theta1<-c()
theta1
## NULL
sim1<- function(k)
{
theta1<-c()
for (i in 1:100)
{
theta1<-append(theta1, sum(sample(x, size=10, replace=FALSE))/(10-k))
}
y<-mean(theta1)
return(y)
}
i<-c(4, 3, 2, 1, 0, -1, -2, -3)
tapply(i, i, sim1)
## -3 -2 -1 0 1 2 3
## 53.92218 58.65781 63.72558 69.98569 77.44002 87.51300 99.67509
## 4
## 116.00243
code 6.2
#ggplot2 소환
library(ggplot2)
#사용하고자 하는 함수를 먼저 정의한다. 표준정규분포라면 이런거 안해도 된다
dnorm1 <- function(x)
{
y <-dnorm(x, mean=10, sd=5)
return(y)
}
dnorm2 <- function(x)
{
y <-dnorm(x, mean=10, sd=8)
return(y)
}
dnorm3 <- function(x)
{
y <-dnorm(x, mean=7, sd=3)
return(y)
}
#그리자^^
ggplot(data.frame(x=c(-20,40)),aes(x=x)) +
stat_function(fun=dnorm1, color="blue", size=1) +
stat_function(fun=dnorm2, color="gray", size=1) +
stat_function(fun=dnorm3, color="green", size=1) +
geom_vline(xintercept=10, color="red", size=1)

code 6.3
set.seed(1234)
theta.hat<-seq(-20, 40, by=0.1)
mse<-data.frame(theta.hat)
mse$pdf1<-dnorm(mse$theta.hat, mean=10, sd=5)
mse$pdf2<-dnorm(mse$theta.hat, mean=10, sd=8)
mse$pdf3<-dnorm(mse$theta.hat, mean=7, sd=3)
mse$theta<-10
head(mse)
## theta.hat pdf1 pdf2 pdf3 theta
## 1 -20.0 1.215177e-09 4.407446e-05 3.426591e-19 10
## 2 -19.9 1.369834e-09 4.618603e-05 4.622845e-19 10
## 3 -19.8 1.543557e-09 4.839120e-05 6.229797e-19 10
## 4 -19.7 1.738616e-09 5.069374e-05 8.386019e-19 10
## 5 -19.6 1.957542e-09 5.309753e-05 1.127601e-18 10
## 6 -19.5 2.203153e-09 5.560663e-05 1.514510e-18 10
mse$mse1<-0.1*mse$pdf1*(mse$theta.hat - mse$theta)^2 #여기서 0.1을 왜 곱하나?
mse$mse2<-0.1*mse$pdf2*(mse$theta.hat - mse$theta)^2 #적분이 아니니까 bin scale만큼 튀겨주는 것
mse$mse3<-0.1*mse$pdf3*(mse$theta.hat - mse$theta)^2
apply(mse[,6:8], 2, sum)
## mse1 mse2 mse3
## 25.00000 63.82338 18.00000
## code 6.4
mu<-60
sigma<-15
n1<-5
n2<-20
n3<-100
#사용하고자 하는 함수를 먼저 정의한다. 표준정규분포라면 이런거 안해도 된다
dnorm1 <- function(x)
{
y <-dnorm(x, mean=mu, sd=sigma/n1^.5)
return(y)
}
dnorm2 <- function(x)
{
y <-dnorm(x, mean=mu, sd=sigma/n2^.5)
return(y)
}
dnorm3 <- function(x)
{
y <-dnorm(x, mean=mu, sd=sigma/n3^.5)
return(y)
}
#그리자^^
library(ggplot2)
ggplot(data.frame(x=c(50,70)),aes(x=x)) +
stat_function(fun=dnorm1, color="blue", size=1) +
stat_function(fun=dnorm2, color="gray", size=1) +
stat_function(fun=dnorm3, color="green", size=1) +
geom_vline(xintercept=60, color="red", size=1)

xbar<-seq(50, 70, by=0.1)
pdf<-dnorm(xbar, mean=mu, sd=sigma/n1^.5)
c1<-data.frame(xbar, pdf)
c1$type<-"pdf1"
pdf<-dnorm(xbar, mean=mu, sd=sigma/n2^.5)
c2<-data.frame(xbar, pdf)
c2$type<-"pdf2"
pdf<-dnorm(xbar, mean=mu, sd=sigma/n3^.5)
c3<-data.frame(xbar, pdf)
c3$type<-"pdf3"
consistency<-rbind(c1, c2, c3)
head(consistency)
## xbar pdf type
## 1 50.0 0.01957737 pdf1
## 2 50.1 0.02001507 pdf1
## 3 50.2 0.02045801 pdf1
## 4 50.3 0.02090610 pdf1
## 5 50.4 0.02135926 pdf1
## 6 50.5 0.02181740 pdf1
ggplot(consistency, aes(x=xbar, y=pdf, color=type)) + geom_line() +
geom_vline(xintercept=60, color="red", size=1)

code 6.5
set.seed(12345)
gender<-round(runif(min=0, max=1.1, 30),0)
income<-round(runif(min=0, max=500, 30),0)
t.test(income, conf.level=0.95)
##
## One Sample t-test
##
## data: income
## t = 8.4201, df = 29, p-value = 2.799e-09
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
## 179.7362 295.0638
## sample estimates:
## mean of x
## 237.4
gender<-c(1,1,1,1,1,1,0,0,0,0,1,0,1,1,1,0,1,1,1,1,1,1,1,1,1,0,1,0,1,0)
income<-c(497,429,140,191,438,421,467,184,371,420,
497,369,350,206, 7,375,285,486,421,129,
260,197,406,148,177,187,182,276, 31,375)
t.test(income, conf.level=0.95)
##
## One Sample t-test
##
## data: income
## t = 11.526, df = 29, p-value = 2.396e-12
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
## 244.6281 350.1719
## sample estimates:
## mean of x
## 297.4
#신뢰구간 노가다로 구하기
alpha<-0.05
xbar<-mean(income)
n<-30
s<-sd(income)
t.right<-qt(1-alpha/2, n-1)
t.left<-qt(alpha/2, n-1)
se<-s/n^.5
lcl<-xbar+t.left*se
ucl<-xbar+t.right*se
lcl
## [1] 244.6281
ucl
## [1] 350.1719
code 6.6
id<-seq(1, 4)
confidence<-data.frame(id)
confidence$n <- 30
confidence$s <- 141.325645
confidence$alpha <- seq(0.025, 0.1, by=0.025)
confidence$cl <- with(confidence, 1-alpha)
confidence$t.right<-with(confidence, qt(1-alpha/2, n-1))
confidence$t.left <-with(confidence, qt(alpha/2, n-1))
confidence$se<-with(confidence, s/n^.5)
confidence$lcl<-with(confidence, xbar+t.left*se)
confidence$ucl<-with(confidence, xbar+t.right*se)
confidence
## id n s alpha cl t.right t.left se lcl ucl
## 1 1 30 141.3256 0.025 0.975 2.363846 -2.363846 25.80241 236.4071 358.3929
## 2 2 30 141.3256 0.050 0.950 2.045230 -2.045230 25.80241 244.6281 350.1719
## 3 3 30 141.3256 0.075 0.925 1.846826 -1.846826 25.80241 249.7474 345.0526
## 4 4 30 141.3256 0.100 0.900 1.699127 -1.699127 25.80241 253.5584 341.2416
#error bar를 그리자
ggplot(confidence, aes(x=cl, y=xbar)) + geom_point()+
geom_errorbar(aes(ymin=lcl, ymax=ucl), width=.01)+
geom_text(aes(label=cl), hjust=-.2)

code 6.7
alpha<-0.05
n<-30
s<-141.325645
cl<-qchisq(alpha/2, n-1)
cu<-qchisq(1-alpha/2, n-1)
lcl<-(n-1)*s^2/cu
ucl<-(n-1)*s^2/cl
variance<-data.frame(alpha, n, s, cl, cu, lcl, ucl)
variance
## alpha n s cl cu lcl ucl
## 1 0.05 30 141.3256 16.04707 45.72229 12668.12 36094.76
code 6.8
alpha<-0.05
phat<-0.7
n<-30
cl=qnorm(alpha/2)
cu=qnorm(1-alpha/2)
se<-(phat*(1-phat)/n)^.5
lcl<-phat+cl*se
ucl<-phat+cu*se
confidence<-data.frame(alpha, phat, n, cl, cu, se, lcl, ucl)
confidence
## alpha phat n cl cu se lcl ucl
## 1 0.05 0.7 30 -1.959964 1.959964 0.083666 0.5360176 0.8639824
## prop.test 문 사용
gender<-c(1,1,1,1,1,1,0,0,0,0,1,0,1,1,1,0,1,1,1,1,1,1,1,1,1,0,1,0,1,0)
income<-c(497,429,140,191,438,421,467,184,371,420,
497,369,350,206, 7,375,285,486,421,129,
260,197,406,148,177,187,182,276, 31,375)
prop.test(x=21, n=30, conf.level=0.95) #정확 신뢰한계
##
## 1-sample proportions test with continuity correction
##
## data: 21 out of 30, null probability 0.5
## X-squared = 4.0333, df = 1, p-value = 0.04461
## alternative hypothesis: true p is not equal to 0.5
## 95 percent confidence interval:
## 0.5044209 0.8458720
## sample estimates:
## p
## 0.7
prop.test(x=21, n=30, conf.level=0.95, correct=FALSE) #정확 신뢰한계
##
## 1-sample proportions test without continuity correction
##
## data: 21 out of 30, null probability 0.5
## X-squared = 4.8, df = 1, p-value = 0.02846
## alternative hypothesis: true p is not equal to 0.5
## 95 percent confidence interval:
## 0.5212421 0.8333525
## sample estimates:
## p
## 0.7