code 6.1

set.seed(12365)
x<-rnorm(100000, 70, 10)
k<-4

length(x)
## [1] 100000
theta1<-c()

for (i in 1:100)
{
  theta1<-append(theta1, sum(sample(x, size=10, replace=FALSE))/(10-k))
}

mean(theta1)
## [1] 116.9818
## 분모 값 변화에 따른 추정량 기댓값 변화

theta1<-c()
theta1
## NULL
sim1<- function(k)
{
    theta1<-c()

    for (i in 1:100)
    {
        theta1<-append(theta1, sum(sample(x, size=10, replace=FALSE))/(10-k))
    }

    y<-mean(theta1)
    return(y)
}

i<-c(4, 3, 2, 1, 0, -1, -2, -3)

tapply(i, i, sim1)
##        -3        -2        -1         0         1         2         3 
##  53.92218  58.65781  63.72558  69.98569  77.44002  87.51300  99.67509 
##         4 
## 116.00243

code 6.2

#ggplot2 소환
library(ggplot2)

#사용하고자 하는 함수를 먼저 정의한다. 표준정규분포라면 이런거 안해도 된다
dnorm1 <- function(x) 
{
    y <-dnorm(x, mean=10, sd=5)
    return(y)
}

dnorm2 <- function(x) 
{
    y <-dnorm(x, mean=10, sd=8)
    return(y)
}

dnorm3 <- function(x) 
{
    y <-dnorm(x, mean=7, sd=3)
    return(y)
}

#그리자^^
ggplot(data.frame(x=c(-20,40)),aes(x=x)) +
    stat_function(fun=dnorm1, color="blue", size=1) +
    stat_function(fun=dnorm2, color="gray", size=1) +
    stat_function(fun=dnorm3, color="green", size=1) +
    geom_vline(xintercept=10, color="red", size=1)

code 6.3

set.seed(1234)

theta.hat<-seq(-20, 40, by=0.1)

mse<-data.frame(theta.hat)

mse$pdf1<-dnorm(mse$theta.hat, mean=10, sd=5)
mse$pdf2<-dnorm(mse$theta.hat, mean=10, sd=8)
mse$pdf3<-dnorm(mse$theta.hat, mean=7,  sd=3)
mse$theta<-10

head(mse)
##   theta.hat         pdf1         pdf2         pdf3 theta
## 1     -20.0 1.215177e-09 4.407446e-05 3.426591e-19    10
## 2     -19.9 1.369834e-09 4.618603e-05 4.622845e-19    10
## 3     -19.8 1.543557e-09 4.839120e-05 6.229797e-19    10
## 4     -19.7 1.738616e-09 5.069374e-05 8.386019e-19    10
## 5     -19.6 1.957542e-09 5.309753e-05 1.127601e-18    10
## 6     -19.5 2.203153e-09 5.560663e-05 1.514510e-18    10
mse$mse1<-0.1*mse$pdf1*(mse$theta.hat - mse$theta)^2 #여기서 0.1을 왜 곱하나?
mse$mse2<-0.1*mse$pdf2*(mse$theta.hat - mse$theta)^2 #적분이 아니니까 bin scale만큼 튀겨주는 것
mse$mse3<-0.1*mse$pdf3*(mse$theta.hat - mse$theta)^2



apply(mse[,6:8], 2, sum)
##     mse1     mse2     mse3 
## 25.00000 63.82338 18.00000
## code 6.4

mu<-60
sigma<-15
n1<-5
n2<-20
n3<-100


#사용하고자 하는 함수를 먼저 정의한다. 표준정규분포라면 이런거 안해도 된다
dnorm1 <- function(x) 
{
    y <-dnorm(x, mean=mu, sd=sigma/n1^.5)
    return(y)
}

dnorm2 <- function(x) 
{
    y <-dnorm(x, mean=mu, sd=sigma/n2^.5)
    return(y)
}

dnorm3 <- function(x) 
{
    y <-dnorm(x, mean=mu, sd=sigma/n3^.5)
    return(y)
}

#그리자^^

library(ggplot2)

ggplot(data.frame(x=c(50,70)),aes(x=x)) +
    stat_function(fun=dnorm1, color="blue", size=1) +
    stat_function(fun=dnorm2, color="gray", size=1) +
    stat_function(fun=dnorm3, color="green", size=1) +
    geom_vline(xintercept=60, color="red", size=1)

xbar<-seq(50, 70, by=0.1)
pdf<-dnorm(xbar, mean=mu, sd=sigma/n1^.5)
c1<-data.frame(xbar, pdf)
c1$type<-"pdf1"

pdf<-dnorm(xbar, mean=mu, sd=sigma/n2^.5)
c2<-data.frame(xbar, pdf)
c2$type<-"pdf2"

pdf<-dnorm(xbar, mean=mu, sd=sigma/n3^.5)
c3<-data.frame(xbar, pdf)
c3$type<-"pdf3"

consistency<-rbind(c1, c2, c3)

head(consistency)
##   xbar        pdf type
## 1 50.0 0.01957737 pdf1
## 2 50.1 0.02001507 pdf1
## 3 50.2 0.02045801 pdf1
## 4 50.3 0.02090610 pdf1
## 5 50.4 0.02135926 pdf1
## 6 50.5 0.02181740 pdf1
ggplot(consistency, aes(x=xbar, y=pdf, color=type)) + geom_line() +
    geom_vline(xintercept=60, color="red", size=1)

code 6.5

set.seed(12345)
gender<-round(runif(min=0, max=1.1, 30),0)
income<-round(runif(min=0, max=500, 30),0)

t.test(income, conf.level=0.95)
## 
##  One Sample t-test
## 
## data:  income
## t = 8.4201, df = 29, p-value = 2.799e-09
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
##  179.7362 295.0638
## sample estimates:
## mean of x 
##     237.4
gender<-c(1,1,1,1,1,1,0,0,0,0,1,0,1,1,1,0,1,1,1,1,1,1,1,1,1,0,1,0,1,0)
income<-c(497,429,140,191,438,421,467,184,371,420,
         497,369,350,206,  7,375,285,486,421,129,
         260,197,406,148,177,187,182,276, 31,375)

t.test(income, conf.level=0.95)
## 
##  One Sample t-test
## 
## data:  income
## t = 11.526, df = 29, p-value = 2.396e-12
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
##  244.6281 350.1719
## sample estimates:
## mean of x 
##     297.4
#신뢰구간 노가다로 구하기


alpha<-0.05
xbar<-mean(income)
n<-30
s<-sd(income)
t.right<-qt(1-alpha/2, n-1)
t.left<-qt(alpha/2, n-1)

se<-s/n^.5
lcl<-xbar+t.left*se
ucl<-xbar+t.right*se

lcl
## [1] 244.6281
ucl
## [1] 350.1719

code 6.6

id<-seq(1, 4)

confidence<-data.frame(id)

confidence$n <- 30
confidence$s <- 141.325645
confidence$alpha <- seq(0.025, 0.1, by=0.025)
confidence$cl <- with(confidence, 1-alpha)
confidence$t.right<-with(confidence, qt(1-alpha/2, n-1))
confidence$t.left <-with(confidence, qt(alpha/2,   n-1))
confidence$se<-with(confidence, s/n^.5)
confidence$lcl<-with(confidence, xbar+t.left*se)
confidence$ucl<-with(confidence, xbar+t.right*se)

confidence
##   id  n        s alpha    cl  t.right    t.left       se      lcl      ucl
## 1  1 30 141.3256 0.025 0.975 2.363846 -2.363846 25.80241 236.4071 358.3929
## 2  2 30 141.3256 0.050 0.950 2.045230 -2.045230 25.80241 244.6281 350.1719
## 3  3 30 141.3256 0.075 0.925 1.846826 -1.846826 25.80241 249.7474 345.0526
## 4  4 30 141.3256 0.100 0.900 1.699127 -1.699127 25.80241 253.5584 341.2416
#error bar를 그리자
ggplot(confidence, aes(x=cl, y=xbar)) + geom_point()+
    geom_errorbar(aes(ymin=lcl, ymax=ucl), width=.01)+
    geom_text(aes(label=cl), hjust=-.2)

code 6.7

alpha<-0.05
n<-30
s<-141.325645

cl<-qchisq(alpha/2,   n-1) 
cu<-qchisq(1-alpha/2, n-1) 

lcl<-(n-1)*s^2/cu 
ucl<-(n-1)*s^2/cl

variance<-data.frame(alpha, n, s, cl, cu, lcl, ucl)

variance
##   alpha  n        s       cl       cu      lcl      ucl
## 1  0.05 30 141.3256 16.04707 45.72229 12668.12 36094.76

code 6.8

alpha<-0.05
phat<-0.7
n<-30

cl=qnorm(alpha/2)
cu=qnorm(1-alpha/2)

se<-(phat*(1-phat)/n)^.5

lcl<-phat+cl*se
ucl<-phat+cu*se

confidence<-data.frame(alpha, phat, n, cl, cu, se, lcl, ucl)
confidence
##   alpha phat  n        cl       cu       se       lcl       ucl
## 1  0.05  0.7 30 -1.959964 1.959964 0.083666 0.5360176 0.8639824
## prop.test 문 사용
gender<-c(1,1,1,1,1,1,0,0,0,0,1,0,1,1,1,0,1,1,1,1,1,1,1,1,1,0,1,0,1,0)
income<-c(497,429,140,191,438,421,467,184,371,420,
         497,369,350,206,  7,375,285,486,421,129,
         260,197,406,148,177,187,182,276, 31,375)

prop.test(x=21, n=30, conf.level=0.95)  #정확 신뢰한계
## 
##  1-sample proportions test with continuity correction
## 
## data:  21 out of 30, null probability 0.5
## X-squared = 4.0333, df = 1, p-value = 0.04461
## alternative hypothesis: true p is not equal to 0.5
## 95 percent confidence interval:
##  0.5044209 0.8458720
## sample estimates:
##   p 
## 0.7
prop.test(x=21, n=30, conf.level=0.95, correct=FALSE)   #정확 신뢰한계
## 
##  1-sample proportions test without continuity correction
## 
## data:  21 out of 30, null probability 0.5
## X-squared = 4.8, df = 1, p-value = 0.02846
## alternative hypothesis: true p is not equal to 0.5
## 95 percent confidence interval:
##  0.5212421 0.8333525
## sample estimates:
##   p 
## 0.7