Sampling Distribution

page-52 Example:

x <- c()                                                  #當持續使用相同變數(例如：x)時，務必記得一開始先歸零(放空)，以避免重複執行下， x 累積了之前的結果

for (i in 1:10000) {x<-rbind(x,sum(sample(1:6,2,rep=T))/2)}  #模擬擲骰子2次的平均，做10000次，求整體平均數與變異數
mean(x) ; var(x)    # 理論值是 mean(x)=3.50, var(x)=35/24=1.4583333...

## [1] 3.49565

##          [,1]
## [1,] 1.467103

page-52

par(mfrow=c(1,2))   # 調整成一列兩欄的圖形配置
x<-c()
for (i in 1:10000) {x<-rbind(x,(sample(1:6,1)+sample(1:6,1))/2)}
hist(x,seq(0,7,.5),main= "size=10,000")    # ((6-1)/0.5)+1=11(組)
hist(x,seq(0,7,.01) ,main= "size=10,000")

page-52 練習：

1.上例不設定seq，圖形有何變化?

(R會將資料自動分組，可能得到與我們想像有差距的圖形輸出)

par(mfrow=c(1,1))  # 調整回一列一欄的圖形配置
hist(x,main= "size=10,000 ")

2.改成投擲一顆公正骰子5次的平均，抽樣分配又有何變化

x<-c()
for (i in 1:10000) {x<-rbind(x,sum(sample(1:6,5,rep=T))/5)}  # 取後放回很重要! 
# 模擬擲骰子5次的平均，做10000次
hist(x, main= "size=10,000 ")

hist(x,seq(0,7,.01),main= "size=10,000")   # 調整直方圖

3.考慮從班上1-50號同學中隨機抽出三位，模擬所得座號平均之抽樣分配

# 取後放回
x<-c()
for (i in 1:10000) {x<-rbind(x,(sample(1:50,1)+sample(1:50,1)+sample(1:50,1))/3)}
##  上一行也可以寫成 for ( i in 1:10000) {x<-rbind(x,sum(sample(1:50,3,rep=T))/3)} 
hist(x, seq(0,50,1/3), main="size =10,000")

# 取後不放回
x<-c()
for (i in 1:10000) {x<-rbind(x,sum(sample(1:50,3,rep=F))/3)}  #

模擬隨機抽班上三位同學(抽後不放回)所得座號的平均，做10000次

hist(x, main= "size=10,000 ")

par(mfrow=c(1,2))
hist(x,seq(0,50,.01),main= "size=10,000")   # 調整直方圖
hist(x,seq(0,50,1/3),main= "size=10,000")   # 調整直方圖

(重要!) page-53 練習：

# 取後放回
par(mfrow=c(1,3))
x<-c()
for (i in 1:10000) {x<-rbind(x,sum(sample(c(2,2,3,9,9),2,rep=T))/2)}  # 
## 以取後放回方式抽出兩球得號碼平均，做10000次
hist(x, main= "size=10,000 ")
hist(x,seq(0,10,.5),main= "size=10,000")   # 調整直方圖
hist(x,seq(0,10,.01),main= "size=10,000")   # 調整直方圖

# 取後不放回
par(mfrow=c(1,3))
x<-c()
for (i in 1:10000) {x<-rbind(x,sum(sample(c(2,2,3,9,9),2,rep=F))/2)}  
## 以取後不放回方式抽出兩球得號碼平均，做10000次
hist(x, main= "size=10,000 ")
hist(x,seq(0,10,.5),main= "size=10,000")   # 調整直方圖
hist(x,seq(0,10,.01),main= "size=10,000")   # 調整直方圖

模擬取後放回與不放回各10000次，並將結果並列比較

par(mfrow=c(1,2))    
x<-c()
for (i in 1:10000) {x<-rbind(x,sum(sample(c(2,2,3,9,9),2,rep=T))/2)}
hist(x,seq(0,10,.01),main= "size=10,000, with replacement")
x<-c()
for (i in 1:10000) {x<-rbind(x,sum(sample(c(2,2,3,9,9),2,rep=F))/2)}
hist(x,seq(0,10,.01),main= "size=10,000, without replacement")

page-54 Example:

Example: 用電腦模擬10000次結果，求得平均數與變異數的平均情形，與理論值比較

x<-c()
for (i in 1:10000) {x<-rbind(x,sum(sample(1:6,3,rep=T))/3)}
mean(x) ; var(x)

## [1] 3.4956

##        [,1]
## [1,] 0.9715

理論值是 \(\mu=E(x)=3.50, \ \sigma^2 = var(x)=\frac{35}{36} \approx 0.9722222...\)

page-054 Example：

par(mfrow=c(1,3))   ## 調整為 1x3 的圖形配置
x<-c()
for (i in 1:10000) {x<-rbind(x,sum(sample(1:6,100,rep=T))/100)}
mean(x) ; var(x)

## [1] 3.499772

##            [,1]
## [1,] 0.02954668

hist(x,seq(0,7,.01),main= "size=10,000, n=100")
x<-c()
for (i in 1:10000) {x<-rbind(x,sum(sample(1:6,400,rep=T))/400)}
mean(x) ; var(x)

## [1] 3.499362

##             [,1]
## [1,] 0.007181995

hist(x,seq(0,7,.01),main= "size=10,000, n=400")
x<-c()
for (i in 1:10000) {x<-rbind(x,sum(sample(1:6,10000,rep=T))/10000)}
mean(x) ; var(x)     # 理論值是 mean(x)=3.50, var(x)=35/120000=0.000291666...

## [1] 3.500165

##              [,1]
## [1,] 0.0002959452

hist(x,seq(0,7,.01),main= "size=10,000, n=10,000")

理論值分別是 \(\mu=E(x)=3.50\) (1)\(\sigma^2 = var(x)=\frac{35}{1200} \approx 0.0291666...\) (2)\(\sigma^2 = var(x)=\frac{35}{4800} \approx 0.007291666...\) (3)\(\sigma^2 = var(x)=\frac{35}{120000} \approx 0.000291666...\)

page-55

# 利用R模擬 n=10 的情況，實驗100次
par(mfrow=c(1,2)) 
x<-c()
for (i in 1:100) {x<-rbind(x, rbinom(1,10,0.5) /10)}  
hist(x,seq(0,1,.001),main="size=100, n=10")
x<-c()
for (i in 1:100) {x<-rbind(x, rbinom(1,10,0.5) /10)}  
hist(x, prob=T, main="size=100, n=10")
curve(dnorm(x, 0.5, sqrt(0.025)), add=T)

page-55 練習：

par(mfrow=c(1,3)) 
x<-c()
for (i in 1:10000) {x<-rbind(x, rbinom(1,10,0.5) /10)}  
hist(x,freq=F,main="size=10000, n=10")
curve(dnorm(x, 0.5, sqrt(0.025)), add=T)    #n=10時，近似常態效果不佳
x<-c()
for (i in 1:10000) {x<-rbind(x, rbinom(1,100,0.5) /100)}  
hist(x,prob=T,main="size=10000, n=100")
curve(dnorm(x, 0.5, sqrt(0.0025)), add=T)
x<-c()
for (i in 1:10000) {x<-rbind(x, rbinom(1,10000,0.5) /10000)}  
hist(x,prob=T,main="size=10000, n=10000")
curve(dnorm(x, 0.5, sqrt(0.000025)), add=T)

page-56

par(mfrow=c(3,3))                   # p接近0.5時，近似常態效果佳
for (k in 1:9){ 
  x<-c() ;
  for (i in 1:10000) {x<-rbind(x, rbinom(1,30,0.1*k) /30)} ; 
  hist(x,seq(0,1,.001),main=paste("size=10,000, n=30, p=", 0.1*k))}

page-56 練習：

# 伯努力分配執行n次(二項分配)模擬抽樣與理論常態近似結果比較
par(mfrow=c(1,3)) 
x<-c()
for (i in 1:10000) {x<-rbind(x, rbinom(1,3,0.3) /3)}  
hist(x,freq=F,main="size=10000, n=3")
curve(dnorm(x, 0.3, sqrt(0.07)), add=T)  
x<-c()
for (i in 1:10000) {x<-rbind(x, rbinom(1,10,0.3) /10)}  
hist(x,freq=F,main="size=10000, n=10")
curve(dnorm(x, 0.3, sqrt(0.021)), add=T)  
x<-c()
for (i in 1:10000) {x<-rbind(x, rbinom(1,30,0.3) /30)}  
hist(x,freq=F,main="size=10000, n=30")
curve(dnorm(x, 0.3, sqrt(0.007)), add=T)  # n達到30時，近似常態效果佳

page-56 練習：

# 標準常態分配的理論值(曲線)與模擬抽樣結果
par(mfrow=c(1,4)) 
x <- c()
for (i in 1:10000) {x <- rbind(x, rnorm(1) /1)}  
hist(x, freq=F, main="size=10000, n=1")
curve(dnorm(x, 0, sqrt(1)), add=T) 
x <- c()
for (i in 1:10000) {x<-rbind(x, sum(rnorm(3)) /3)}  
hist(x, freq=F, main="size=10000, n=3")
curve(dnorm(x, 0, sqrt(1/3)), add=T)
x <- c()
for (i in 1:10000) {x<-rbind(x, sum(rnorm(10)) /10)}  
hist(x, freq=F, main="size=10000, n=10")
curve(dnorm(x, 0, sqrt(1/10)), add=T) 
x <- c()
for (i in 1:10000) {x<-rbind(x, sum(rnorm(30)) /30)}  
hist(x,freq=F,main="size=10000, n=30")
curve(dnorm(x, 0, sqrt(1/30)), add=T)

# <補充> ## n = 1,4,16,64 ，可看出標準差大致以2倍速度減少
par(mfrow=c(1,4))   
x<-c(); for ( i in 1:10000) {x<-rbind(x, rnorm(1) )}  
hist(x,seq(-4,4,.001),main="size=10,000, n=1")     # 若出現錯誤可設定(-5,5)
x<-c(); for ( i in 1:10000) {x<-rbind(x, sum(rnorm(4) )/4)}  
hist(x,seq(-4,4,.001),main="size=10,000, n=4")
x<-c(); for ( i in 1:10000) {x<-rbind(x, sum(rnorm(16))/16)}  
hist(x,seq(-4,4,.001),main="size=10,000, n=16")
x<-c(); for ( i in 1:10000) {x<-rbind(x, sum(rnorm(64)) /64)}  
hist(x,seq(-4,4,.001),main="size=10,000, n=64")

# <補充> 繪製標準常態分配之 p.d.f. 與 t分配 (自由度 3, 10, 30) 之 p.d.f.  
par(mfrow=c(1,1)) 
curve(dnorm(x),from=-4,to=4,col="red")
curve(dt(x,3),from=-4,to=4,add=T,col="blue")
curve(dt(x,10),from=-4,to=4,add=T,col="green")
curve(dt(x,30),from=-4,to=4,add=T,col="black")

page-57 Example：

# (1)小樣本，用t分配
(1-pt(0.9035,9))       # [1] 0.1949014

## [1] 0.1949014

# (2)大樣本，用常態分配近似
(1-pnorm(2.875))       # [1] 0.002020137

## [1] 0.002020137

# (3)大樣本，用常態分配近似，查P(-3 < Z < 2)
(pnorm(2)-pnorm(-3))   # [1] 0.9759

## [1] 0.9759

以下針對前述小題條件，透過模擬10000次方式計算比較

x<-0 ; count<-0    # 小樣本
for (i in 1:10000) {x<-sum(rnorm(10,66,14))/10; ifelse(x>70,count<-count+1,count<-count) }  
(p<-count/10000)

## [1] 0.1848

x<-0 ; count<-0    # 大樣本
for (i in 1:10000) {x<-sum(rnorm(100,66,14))/100; ifelse(x>70,count<-count+1,count<-count) }  
(p<-count/10000)

## [1] 0.002

x<-0 ; count<-0    # 常態的例子
for (i in 1:10000) {x<-sum(rnorm(36,66,12))/36; ifelse(x>60 & x<70 ,count<-count+1,count<-count) }  
(p<-count/10000)

## [1] 0.9748

x<-0 ; count<-0    # 均勻分配的例子 (B+A=132,B-A=sqrt(144*12))
for (i in 1:10000) {x<-sum(runif(36,45.21539,86.78461))/36; ifelse(x>60 & x<70 ,count<-count+1,count<-count) }  
(p<-count/10000)

## [1] 0.9751

page-59 Example：

(qchisq(0.90,5))        # [1] 9.236357

## [1] 9.236357

#卡方分配 p.d.f.，v=3,5,10
curve(dchisq(x,3),from=0,to=20,col="red")
curve(dchisq(x,5),from=0,to=20,add=T,col="blue")
curve(dchisq(x,10),from=0,to=20,add=T,col="green")

#卡方分配 p.d.f.v=15 和 常態分配 N(15,30) p.d.f 的比較
curve(dchisq(x,15),from=0,to=30)
curve(dnorm(x,15,sqrt(30)),from=0,to=30,add=T,col="blue")

page-60

(0.33-0.2)/sqrt((0.2)*(0.8)/100)     # 王同學得票數計算之Z分數

## [1] 3.25

(0.15-0.2)/sqrt((0.2)*(0.8)/100)     # 李同學得票數計算之Z分數

## [1] -1.25

# 模擬100張有效票的開票結果(五位參選人勢均力敵)
x<-0
x<-sample(1:5,100,rep=T)
length(x[x[]==1])

## [1] 25

length(x[x[]==2])

## [1] 19

length(x[x[]==3])

## [1] 23

length(x[x[]==4])

## [1] 18

length(x[x[]==5])

## [1] 15

# 模擬100張有效票的開票結果(五位參選人估計得票率分別為 0.3, 0.2, 0.1, 0.2, 0.2)
x<-0
x<-sample(1:5,100,rep=T,prob=c(0.3,0.2,0.1,0.2,0.2))
length(x[x[]==1])

## [1] 29

length(x[x[]==2])

## [1] 19

length(x[x[]==3])

## [1] 14

length(x[x[]==4])

## [1] 16

length(x[x[]==5])

## [1] 22

# 五位參選人勢均力敵，某位候選人(2)在估計得票率為 0.2 的情況下，模擬10000次投票計算當選率
x<-0; count<-0
for (i in 1:10000){x<-sample(1:5,100,rep=T); ifelse(length(x[x[]==2])>19,count <- count+1,count <- count)}
(p <- count/10000)

## [1] 0.534

y<-0                    # 多項分配觀點
y<-rmultinom(10000,size=100,prob=c(rep(1/5,5)))
(p <- length(y[y[2,]>19])/50000)

## [1] 0.5455

z<-0; count<-0          # 二項分配觀點
for (i in 1:10000){z<-rbinom(1,100,1/5); ifelse(z[]>19,count <- count+1,count <- count)}
(p <- count/10000)

## [1] 0.5467

1-pbinom(19,100,1/5)    # 二項分配累積機率觀點

## [1] 0.5398386

Sampling Distribution

ggcchen

2023-09-03

page-52 Example:

page-52

page-52 練習：

1.上例不設定seq，圖形有何變化?

2.改成投擲一顆公正骰子5次的平均，抽樣分配又有何變化

3.考慮從班上1-50號同學中隨機抽出三位，模擬所得座號平均之抽樣分配

(重要!) page-53 練習：

page-54 Example:

Example: 用電腦模擬10000次結果，求得平均數與變異數的平均情形，與理論值比較

page-054 Example：

page-55

page-55 練習：

page-56

page-56 練習：

page-56 練習：

page-57 Example：

page-59 Example：

page-60