Chapter5 参数估计

0.1 无偏性模拟
0.2 有效性模拟
0.3 一致性模拟
0.4 一个总体均值的置信区间
- 0.4.1 大样本下，总体方差未知，用样本方差代替
- 0.4.2 小样本下，总体方差未知
0.5 两个总体均值之差
0.6 一个总体比例的区间估计
- 0.6.1 大样本下
0.7 两个总体比例之差的置信区间
- 0.7.1 大样本下
0.8 一个总体方差的估计：首先假定总体服从正态分布
0.9 两个总体方差比的估计

0.1 无偏性模拟

x=vector();m=vector();v=vector()
n=10
for (i in 1:10000){
  d=rnorm(n,50,10)#生成正态分布样本，均值50，标准差10
  x=append(x,mean(d))#样本均值
  m=append(m,median(d))#样本中位数
  v=append(v,var(d))#样本方差
}
data.frame(mean(x),mean(m),mean(v))

##    mean.x.  mean.m.  mean.v.
## 1 50.01912 50.03753 100.2282

0.2 有效性模拟

x=vector();m=vector();
n=10
for (i in 1:10000){
  d=rnorm(n)
  x=append(x,mean(d))#样本均值
  m=append(m,median(d))#样本中位数
}
data.frame(mean(x),mean(m),mean(v))

##        mean.x.     mean.m.  mean.v.
## 1 -0.003001812 -0.00321979 100.2282

#绘制样本均值、样本中位数分布直方图
par(mfrow=c(1,2),mai=c(0.7,0.7,0.4,0.2),cex=0.8)
hist(x,probability = T,col = 'lightblue',xlim = c(-1.5,1.5),ylim = c(0,1.2),xlab = '样本均值',main='样本均值的分布',cex.main=0.8)
lines(density(x),col='red',lwd=2)
hist(m,probability = T,col = 'lightblue',xlim = c(-1.5,1.5),ylim = c(0,1.2),xlab = '中位数',main='样本中位数的分布',cex.main=0.8)
lines(density(m),col='red',lwd=2)

0.3 一致性模拟

set.seed(12)
N=rnorm(1000,50,10)
mu=mean(N)
xbar10=mean(sample(N,10,replace = F))
xbar100=mean(sample(N,100,replace = F))
xbar500=mean(sample(N,500,replace = F))
xbar900=mean(sample(N,900,replace = F))
data.frame(总体均值=mu,xbar10,xbar100,xbar500,xbar900)

##   总体均值   xbar10  xbar100  xbar500 xbar900
## 1 49.73563 49.17006 50.66988 49.94489 49.7666

data.frame('d10'=(xbar10-mu),'d100'=(xbar100-mu),'d500'=(xbar500-mu),'d900'=(xbar900-mu))#样本均值与总体均值的差值，随n的增大而减小，一致性的体现

##          d10      d100     d500       d900
## 1 -0.5655775 0.9342487 0.209254 0.03096257

0.4 一个总体均值的置信区间

0.4.1 大样本下，总体方差未知，用样本方差代替

load("D:\\New_Folder\\Study_Programming\\R_Programme\\Applied Statistics\\datas - Copy\\example\\ch5\\example5_1.RData")
library(BSDA)

## Loading required package: lattice

## 
## Attaching package: 'BSDA'

## The following object is masked from 'package:datasets':
## 
##     Orange

z.test(example5_1$耗油量,mu=0,sigma.x=sd(example5_1$耗油量),conf.level=0.90)

## 
##  One-sample z-Test
## 
## data:  example5_1$耗油量
## z = 99.575, p-value < 2.2e-16
## alternative hypothesis: true mean is not equal to 0
## 90 percent confidence interval:
##  7.835887 8.099113
## sample estimates:
## mean of x 
##    7.9675

z.test(example5_1$耗油量,mu=0,sigma.x=sd(example5_1$耗油量),conf.level=0.90)$conf.int#只输出置信区间的信息

## [1] 7.835887 8.099113
## attr(,"conf.level")
## [1] 0.9

0.4.2 小样本下，总体方差未知

load("D:\\New_Folder\\Study_Programming\\R_Programme\\Applied Statistics\\datas - Copy\\example\\ch5\\example5_2.RData")
t.test(example5_2,conf.level = 0.95)

## 
##  One Sample t-test
## 
## data:  example5_2
## t = 54.565, df = 24, p-value < 2.2e-16
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
##  101.3748 109.3452
## sample estimates:
## mean of x 
##    105.36

t.test(example5_2,conf.level = 0.95)$conf.int#只输出置信区间的信息

## [1] 101.3748 109.3452
## attr(,"conf.level")
## [1] 0.95

0.5 两个总体均值之差

0.5.1 独立大样本下

load("D:\\New_Folder\\Study_Programming\\R_Programme\\Applied Statistics\\datas - Copy\\example\\ch5\\example5_3.RData")
library(BSDA)
z.test(example5_3$男性工资,example5_3$女性工资, mu=0,sigma.x=sd(example5_3$男性工资),sigma.y=sd(example5_3$女性工资))$conf.int

## [1] 1826.052 2212.398
## attr(,"conf.level")
## [1] 0.95

0.5.2 独立小样本下

\[\sigma_1^2=\sigma_2^2\]

load("D:\\New_Folder\\Study_Programming\\R_Programme\\Applied Statistics\\datas - Copy\\example\\ch5\\example5_4.RData")
t.test(x=example5_4$方法一,y=example5_4$方法二,var.equal=TRUE)$conf.int

## [1] 0.1402936 7.2597064
## attr(,"conf.level")
## [1] 0.95

    #$\sigma_1^2≠sigma_2^2$
t.test(x=example5_4$方法一,y=example5_4$方法二,var.equal=FALSE)$conf.int

## [1] 0.1384265 7.2615735
## attr(,"conf.level")
## [1] 0.95

0.5.3 配对样本下

load("D:\\New_Folder\\Study_Programming\\R_Programme\\Applied Statistics\\datas - Copy\\example\\ch5\\example5_5.RData")
t.test(example5_5$试卷A,example5_5$试卷B,paired = TRUE)$conf.int

## [1]  6.327308 15.672692
## attr(,"conf.level")
## [1] 0.95

0.6 一个总体比例的区间估计

0.6.1 大样本下

n=500;x=325;p=x/n;q=qnorm(0.975)#qnorm gives the quantile function
LCI=p-q*sqrt(p*(1-p)/n);UCI=p+q*sqrt(p*(1-p)/n)
data.frame(LCI,UCI)

##         LCI       UCI
## 1 0.6081925 0.6918075

library(Hmisc)

## Loading required package: survival

## Loading required package: Formula

## Loading required package: ggplot2

## 
## Attaching package: 'Hmisc'

## The following objects are masked from 'package:base':
## 
##     format.pval, units

n=500;x=325
binconf(x,n,alpha = 0.05,method='all')#asymptotic的结果与上式相同

##            PointEst     Lower     Upper
## Exact          0.65 0.6064011 0.6918131
## Wilson         0.65 0.6071929 0.6905198
## Asymptotic     0.65 0.6081925 0.6918075

0.7 两个总体比例之差的置信区间

0.7.1 大样本下

p1=225/500;p2=128/400;q=qnorm(0.975)
LCI=(p1-p2)-q*sqrt(p1*(1-p1)/500+p2*(1-p2)/400);UCI=(p1-p2)+q*sqrt(p1*(1-p1)/500+p2*(1-p2)/400)
data.frame(LCI,UCI)

##          LCI       UCI
## 1 0.06682346 0.1931765

0.8 一个总体方差的估计：首先假定总体服从正态分布

load("D:\\New_Folder\\Study_Programming\\R_Programme\\Applied Statistics\\datas - Copy\\example\\ch5\\example5_2.RData")
library(TeachingDemos)

## 
## Attaching package: 'TeachingDemos'

## The following objects are masked from 'package:Hmisc':
## 
##     cnvrt.coords, subplot

## The following object is masked from 'package:BSDA':
## 
##     z.test

sigma.test(example5_2$食品重量,conf.level = 0.95)$conf.int

## [1]  56.82897 180.38811
## attr(,"conf.level")
## [1] 0.95

0.9 两个总体方差比的估计

load("D:\\New_Folder\\Study_Programming\\R_Programme\\Applied Statistics\\datas - Copy\\example\\ch5\\example5_4.RData")
var.test(example5_4$方法一,example5_4$方法二,alternative='two.sided')$conf.int

## [1] 0.2378836 2.8704428
## attr(,"conf.level")
## [1] 0.95