page-03 Example:

x <- c()                                                  #務必記得一開始先歸零(放空),以避免重複執行下, x 累積了之前的結果
for (i in 1:10000) {x<-rbind(x,sum(sample(1:6,2,rep=T))/2)}  #模擬擲骰子2次的平均,做10000次,求整體平均數與變異數
mean(x) ; var(x)    # 理論值是 mean(x)=3.50, var(x)=35/24=1.4583333...
## [1] 3.51935
##          [,1]
## [1,] 1.469698

page-03

par(mfrow=c(1,2))   # 調整成一列兩欄的圖形配置
x<-c()
for (i in 1:10000) {x<-rbind(x,(sample(1:6,1)+sample(1:6,1))/2)}
hist(x,seq(0,7,.5),main= "size=10,000")    # ((6-1)/0.5)+1=11(組)
hist(x,seq(0,7,.01) ,main= "size=10,000")

page-03 練習:

1.上例不設定seq,圖形有何變化?

(R會將資料自動分組,可能得到與我們想像有差距的圖形輸出)

par(mfrow=c(1,1))  # 調整回一列一欄的圖形配置
hist(x,main= "size=10,000 ")

2.改成投擲一顆公正骰子5次的平均,抽樣分配又有何變化

x<-c()
for (i in 1:10000) {x<-rbind(x,sum(sample(1:6,5,rep=T))/5)}  # 取後放回很重要! 
# 模擬擲骰子5次的平均,做10000次
hist(x, main= "size=10,000 ")

hist(x,seq(0,7,.01),main= "size=10,000")   # 調整直方圖

3.考慮從班上1-50號同學中隨機抽出三位,模擬所得座號平均之抽樣分配

# 取後放回
x<-c()
for (i in 1:10000) {x<-rbind(x,(sample(1:50,1)+sample(1:50,1)+sample(1:50,1))/3)}
##  上一行也可以寫成 for ( i in 1:10000) {x<-rbind(x,sum(sample(1:50,3,rep=T))/3)} 
hist(x, seq(0,50,1/3), main="size =10,000")

# 取後不放回
x<-c()
for (i in 1:10000) {x<-rbind(x,sum(sample(1:50,3,rep=F))/3)}  # 
  • 模擬隨機抽班上三位同學(抽後不放回)所得座號的平均,做10000次
hist(x, main= "size=10,000 ")

par(mfrow=c(1,2))
hist(x,seq(0,50,.01),main= "size=10,000")   # 調整直方圖
hist(x,seq(0,50,1/3),main= "size=10,000")   # 調整直方圖

page-04 練習:

# 取後放回
par(mfrow=c(1,3))
x<-c()
for (i in 1:10000) {x<-rbind(x,sum(sample(c(2,2,3,9,9),2,rep=T))/2)}  # 
## 以取後放回方式抽出兩球得號碼平均,做10000次
hist(x, main= "size=10,000 ")
hist(x,seq(0,10,.5),main= "size=10,000")   # 調整直方圖
hist(x,seq(0,10,.01),main= "size=10,000")   # 調整直方圖

# 取後不放回
par(mfrow=c(1,3))
x<-c()
for (i in 1:10000) {x<-rbind(x,sum(sample(c(2,2,3,9,9),2,rep=F))/2)}  
## 以取後不放回方式抽出兩球得號碼平均,做10000次
hist(x, main= "size=10,000 ")
hist(x,seq(0,10,.5),main= "size=10,000")   # 調整直方圖
hist(x,seq(0,10,.01),main= "size=10,000")   # 調整直方圖

par(mfrow=c(1,2))    
x<-c()
for (i in 1:10000) {x<-rbind(x,sum(sample(c(2,2,3,9,9),2,rep=T))/2)}
hist(x,seq(0,10,.01),main= "size=10,000, with replacement")
x<-c()
for (i in 1:10000) {x<-rbind(x,sum(sample(c(2,2,3,9,9),2,rep=F))/2)}
hist(x,seq(0,10,.01),main= "size=10,000, without replacement")

page-05 Example:

Example: 用電腦模擬10000次結果,求得平均數與變異數的平均情形,與理論值比較

x<-c()
for (i in 1:10000) {x<-rbind(x,sum(sample(1:6,3,rep=T))/3)}
mean(x) ; var(x)     
## [1] 3.485467
##           [,1]
## [1,] 0.9915768
  • 理論值是 \(\mu=E(x)=3.50, \ \sigma^2 = var(x)=\frac{35}{36} \approx 0.9722222...\)

page-05 Example:

par(mfrow=c(1,3))   ## 調整為 1x3 的圖形配置
x<-c()
for (i in 1:10000) {x<-rbind(x,sum(sample(1:6,100,rep=T))/100)}
mean(x) ; var(x)     
## [1] 3.497438
##            [,1]
## [1,] 0.02931243
hist(x,seq(0,7,.01),main= "size=10,000, n=100")
x<-c()
for (i in 1:10000) {x<-rbind(x,sum(sample(1:6,400,rep=T))/400)}
mean(x) ; var(x)     
## [1] 3.499651
##             [,1]
## [1,] 0.007275753
hist(x,seq(0,7,.01),main= "size=10,000, n=400")
x<-c()
for (i in 1:10000) {x<-rbind(x,sum(sample(1:6,10000,rep=T))/10000)}
mean(x) ; var(x)     # 理論值是 mean(x)=3.50, var(x)=35/120000=0.000291666...
## [1] 3.500141
##              [,1]
## [1,] 0.0002913706
hist(x,seq(0,7,.01),main= "size=10,000, n=10,000")

page-06

# 利用R模擬 n=10 的情況,實驗100次
par(mfrow=c(1,2)) 
x<-c()
for (i in 1:100) {x<-rbind(x, rbinom(1,10,0.5) /10)}  
hist(x,seq(0,1,.001),main="size=100, n=10")
x<-c()
for (i in 1:100) {x<-rbind(x, rbinom(1,10,0.5) /10)}  
hist(x, prob=T, main="size=100, n=10")
curve(dnorm(x, 0.5, sqrt(0.025)), add=T)

page-06 練習:

par(mfrow=c(1,3)) 
x<-c()
for (i in 1:10000) {x<-rbind(x, rbinom(1,10,0.5) /10)}  
hist(x,freq=F,main="size=10000, n=10")
curve(dnorm(x, 0.5, sqrt(0.025)), add=T)    #n=10時,近似常態效果不佳
x<-c()
for (i in 1:10000) {x<-rbind(x, rbinom(1,100,0.5) /100)}  
hist(x,prob=T,main="size=10000, n=100")
curve(dnorm(x, 0.5, sqrt(0.0025)), add=T)
x<-c()
for (i in 1:10000) {x<-rbind(x, rbinom(1,10000,0.5) /10000)}  
hist(x,prob=T,main="size=10000, n=10000")
curve(dnorm(x, 0.5, sqrt(0.000025)), add=T)

page-07

par(mfrow=c(3,3))                   # p接近0.5時,近似常態效果佳
for (k in 1:9){ 
  x<-c() ;
  for (i in 1:10000) {x<-rbind(x, rbinom(1,30,0.1*k) /30)} ; 
  hist(x,seq(0,1,.001),main=paste("size=10,000, n=30, p=", 0.1*k))}

page-07 練習:

# 伯努力分配執行n次(二項分配)模擬抽樣與理論常態近似結果比較
par(mfrow=c(1,3)) 
x<-c()
for (i in 1:10000) {x<-rbind(x, rbinom(1,3,0.3) /3)}  
hist(x,freq=F,main="size=10000, n=3")
curve(dnorm(x, 0.3, sqrt(0.07)), add=T)  
x<-c()
for (i in 1:10000) {x<-rbind(x, rbinom(1,10,0.3) /10)}  
hist(x,freq=F,main="size=10000, n=10")
curve(dnorm(x, 0.3, sqrt(0.021)), add=T)  
x<-c()
for (i in 1:10000) {x<-rbind(x, rbinom(1,30,0.3) /30)}  
hist(x,freq=F,main="size=10000, n=30")
curve(dnorm(x, 0.3, sqrt(0.007)), add=T)  # n達到30時,近似常態效果佳

page-07 練習:

# 標準常態分配的理論值(曲線)與模擬抽樣結果
par(mfrow=c(1,4)) 
x <- c()
for (i in 1:10000) {x <- rbind(x, rnorm(1) /1)}  
hist(x, freq=F, main="size=10000, n=1")
curve(dnorm(x, 0, sqrt(1)), add=T) 
x <- c()
for (i in 1:10000) {x<-rbind(x, sum(rnorm(3)) /3)}  
hist(x, freq=F, main="size=10000, n=3")
curve(dnorm(x, 0, sqrt(1/3)), add=T)
x <- c()
for (i in 1:10000) {x<-rbind(x, sum(rnorm(10)) /10)}  
hist(x, freq=F, main="size=10000, n=10")
curve(dnorm(x, 0, sqrt(1/10)), add=T) 
x <- c()
for (i in 1:10000) {x<-rbind(x, sum(rnorm(30)) /30)}  
hist(x,freq=F,main="size=10000, n=30")
curve(dnorm(x, 0, sqrt(1/30)), add=T)     

# <補充> ## n = 1,4,16,64 ,可看出標準差大致以2倍速度減少
par(mfrow=c(1,4))   
x<-c(); for ( i in 1:10000) {x<-rbind(x, rnorm(1) )}  
hist(x,seq(-4,4,.001),main="size=10,000, n=1")     # 若出現錯誤可設定(-5,5)
x<-c(); for ( i in 1:10000) {x<-rbind(x, sum(rnorm(4) )/4)}  
hist(x,seq(-4,4,.001),main="size=10,000, n=4")
x<-c(); for ( i in 1:10000) {x<-rbind(x, sum(rnorm(16))/16)}  
hist(x,seq(-4,4,.001),main="size=10,000, n=16")
x<-c(); for ( i in 1:10000) {x<-rbind(x, sum(rnorm(64)) /64)}  
hist(x,seq(-4,4,.001),main="size=10,000, n=64")

# <補充> 繪製標準常態分配之 p.d.f. 與 t分配 (自由度 3, 10, 30) 之 p.d.f.  
par(mfrow=c(1,1)) 
curve(dnorm(x),from=-4,to=4,col="red")
curve(dt(x,3),from=-4,to=4,add=T,col="blue")
curve(dt(x,10),from=-4,to=4,add=T,col="green")
curve(dt(x,30),from=-4,to=4,add=T,col="black")

page-08 Example:

# (1)小樣本,用t分配
(1-pt(0.9035,9))       # [1] 0.1949014
## [1] 0.1949014
# (2)大樣本,用常態分配近似
(1-pnorm(2.875))       # [1] 0.002020137
## [1] 0.002020137
# (3)大樣本,用常態分配近似,查P(-3 < Z < 2)
(pnorm(2)-pnorm(-3))   # [1] 0.9759
## [1] 0.9759
x<-0 ; count<-0    # 小樣本
for (i in 1:10000) {x<-sum(rnorm(10,66,14))/10; ifelse(x>70,count<-count+1,count<-count) }  
(p<-count/10000)   
## [1] 0.1826
x<-0 ; count<-0    # 大樣本
for (i in 1:10000) {x<-sum(rnorm(100,66,14))/100; ifelse(x>70,count<-count+1,count<-count) }  
(p<-count/10000)   
## [1] 0.0022
x<-0 ; count<-0    # 常態的例子
for (i in 1:10000) {x<-sum(rnorm(36,66,12))/36; ifelse(x>60 & x<70 ,count<-count+1,count<-count) }  
(p<-count/10000)
## [1] 0.9751
x<-0 ; count<-0    # 均勻分配的例子 (B+A=132,B-A=sqrt(144*12))
for (i in 1:10000) {x<-sum(runif(36,45.21539,86.78461))/36; ifelse(x>60 & x<70 ,count<-count+1,count<-count) }  
(p<-count/10000)
## [1] 0.9728

page-10 Example:

(qchisq(0.90,5))        # [1] 9.236357
## [1] 9.236357
#卡方分配 p.d.f.,v=3,5,10
curve(dchisq(x,3),from=0,to=20,col="red")
curve(dchisq(x,5),from=0,to=20,add=T,col="blue")
curve(dchisq(x,10),from=0,to=20,add=T,col="green")

#卡方分配 p.d.f.v=15 和 常態分配 N(15,30) p.d.f 的比較
curve(dchisq(x,15),from=0,to=30)
curve(dnorm(x,15,sqrt(30)),from=0,to=30,add=T,col="blue")

page-11

(0.33-0.2)/sqrt((0.2)*(0.8)/100)     # 王同學得票數計算之Z分數
## [1] 3.25
(0.15-0.2)/sqrt((0.2)*(0.8)/100)     # 李同學得票數計算之Z分數
## [1] -1.25
# 模擬100張有效票的開票結果(五位參選人勢均力敵)
x<-0
x<-sample(1:5,100,rep=T)
length(x[x[]==1])
## [1] 25
length(x[x[]==2])
## [1] 21
length(x[x[]==3])
## [1] 18
length(x[x[]==4])
## [1] 21
length(x[x[]==5])
## [1] 15
# 模擬100張有效票的開票結果(五位參選人估計得票率分別為 0.3, 0.2, 0.1, 0.2, 0.2)
x<-0
x<-sample(1:5,100,rep=T,prob=c(0.3,0.2,0.1,0.2,0.2))
length(x[x[]==1])
## [1] 27
length(x[x[]==2])
## [1] 18
length(x[x[]==3])
## [1] 11
length(x[x[]==4])
## [1] 23
length(x[x[]==5])
## [1] 21
# 五位參選人勢均力敵,某位候選人(2)在估計得票率為 0.2 的情況下,模擬10000次投票計算當選率
x<-0; count<-0
for (i in 1:10000){x<-sample(1:5,100,rep=T); ifelse(length(x[x[]==2])>19,count <- count+1,count <- count)}
(p <- count/10000)
## [1] 0.5356
y<-0                    # 多項分配觀點
y<-rmultinom(10000,size=100,prob=c(rep(1/5,5)))
(p <- length(y[y[2,]>19])/50000)
## [1] 0.5427
z<-0; count<-0          # 二項分配觀點
for (i in 1:10000){z<-rbinom(1,100,1/5); ifelse(z[]>19,count <- count+1,count <- count)}
(p <- count/10000)
## [1] 0.5399
1-pbinom(19,100,1/5)    # 二項分配累積機率觀點
## [1] 0.5398386

page-12 Example:

t1<-0;t2<-0;t3<-0;t4<-0;t5<-0;x<-0   #以下程式碼有額外多加一個估計量t5(即x-bar)列入比較
for (i in 1:10000) 
{x<-rnorm(4,15,10);
t1<-rbind(t1,(x[1]+2*x[2]+2*x[3]-2*x[4])/3);
t2<-rbind(t2,(2*x[1]+(-1)*x[2]+2*x[3]+x[4])/3);
t3<-rbind(t3,(2*x[1]+x[2]+x[3]+2*x[4])/6);
t4<-rbind(t4,(x[1]+2*x[2]+2*x[3]+(-1)*x[4])/4);
t5<-rbind(t5,(x[1]+x[2]+x[3]+x[4])/4)}
cat("mean(theta1)=",mean(t1),"var(theta1)=",var(t1),"theorem=",13/9*100,"\n",
    "mean(theta2)=",mean(t2),"var(theta2)=",var(t2),"theorem=",10/9*100,"\n",
    "mean(theta3)=",mean(t3),"var(theta3)=",var(t3),"theorem=",10/36*100,"\n",
    "mean(theta4)=",mean(t4),"var(theta4)=",var(t4),"theorem=",10/16*100,"\n",
    "mean(theta5)=",mean(t5),"var(theta5)=",var(t5),"theorem=",4/16*100)
## mean(theta1)= 14.87907 var(theta1)= 145.173 theorem= 144.4444 
##  mean(theta2)= 19.94605 var(theta2)= 112.2418 theorem= 111.1111 
##  mean(theta3)= 14.9377 var(theta3)= 28.01794 theorem= 27.77778 
##  mean(theta4)= 14.89811 var(theta4)= 63.14811 theorem= 62.5 
##  mean(theta5)= 14.92743 var(theta5)= 25.35235 theorem= 25

page-13 用R畫信賴區間

count<-0                                        # count用來計算不包含0的C.I.個數
plot(0:4,-2:2,type="n",xlab=" ",ylab=" ")
for(i in 1:100){
  x<-rnorm(30);
  xl<-mean(x)+qnorm(0.025)*1/sqrt(30);
  xu<-mean(x)+qnorm(0.975)*1/sqrt(30);
  
  segments(0.04*i,xl,0.04*i,xu);
  abline(0,0);
  
  ifelse(xl>0 | xu<0, count <-count+1, count<-count)
}

(count)
## [1] 4
count<-0                                      # 畫成直的
plot(-2:2,0:4,type="n",xlab=" ",ylab=" ")
for(i in 1:100){
  x<-rnorm(30);
  xl<-mean(x)+qnorm(0.025)*1/sqrt(30);
  xu<-mean(x)+qnorm(0.975)*1/sqrt(30);
  
  segments(xl,0.04*i,xu,0.04*i);
  lines(c(0,0),c(0,4));
  
  ifelse(xl>0 | xu<0, count <-count+1, count<-count)
}

(count)
## [1] 11

page-14 Example:

x <- c(65, 56, 78, 82, 60, 90, 69, 76)
(mean(x))           # [1] 72
## [1] 72
(qnorm(0.975))      # [1] 1.959964
## [1] 1.959964
(lb <- mean(x)-qnorm(0.975)*10/sqrt(8))   # [1] 65.07048
## [1] 65.07048
(ub <- mean(x)+qnorm(0.975)*10/sqrt(8))   # [1] 78.92952
## [1] 78.92952
(qt(0.975,7))       # [1] 2.364624   
## [1] 2.364624
sd(x)               # [1] 11.55113
## [1] 11.55113
(lb <- mean(x)-qt(0.975,7)*sd(x)/sqrt(8))   # [1] 62.34301
## [1] 62.34301
(ub <- mean(x)+qt(0.975,7)*sd(x)/sqrt(8))   # [1] 81.65699
## [1] 81.65699

page-14 練習:

x<-rnorm(10,72,10)                                  

L1<-mean(x)-qnorm(0.975)*10/sqrt(length(x))
U1<-mean(x)+qnorm(0.975)*10/sqrt(length(x))
cat("Lower bound=",L1,"Upper bound=",U1)    # 母體標準差已知
## Lower bound= 63.61439 Upper bound= 76.01029
L2<-mean(x)-qt(0.975,9)*sd(x)/sqrt(length(x))
U2<-mean(x)+qt(0.975,9)*sd(x)/sqrt(length(x))
cat("Lower bound=",L2,"Upper bound=",U2)    # 母體標準差未知
## Lower bound= 61.38492 Upper bound= 78.23977

page-15 Example:

va <- var(c(65, 56, 78, 82, 60, 90, 69, 76))   # [1] 133.4286
qchisq(0.025,7)        # [1] 1.689869     
## [1] 1.689869
qchisq(0.975,7)        # [1] 16.01276 
## [1] 16.01276
(lb=(8-1)*va/qchisq(0.975,7))   # 下限 Lower Bound
## [1] 58.32847
(ub=(8-1)*va/qchisq(0.025,7))   # 上限 Upper Bound
## [1] 552.7055
qchisq(0.05,7)        # [1] 2.16735     
## [1] 2.16735
qchisq(0.95,7)        # [1] 14.06714     
## [1] 14.06714
(lb=sqrt((8-1)*va/qchisq(0.95,7)))   # 下限 Lower Bound
## [1] 8.148366
(ub=sqrt((8-1)*va/qchisq(0.05,7)))   # 上限 Upper Bound
## [1] 20.75912

page-16 練習:

x<-rnorm(10,50,10)                              
lb1<-9*var(x)/qchisq(0.975,9)
ub1<-9*var(x)/qchisq(0.025,9)
cat("Lower bound=",lb1,"Upper bound=",ub1)    # 母體標準差已知
## Lower bound= 32.05921 Upper bound= 225.8396
lb2<-sqrt(9*var(x)/qchisq(0.95,9))
ub2<-sqrt(9*var(x)/qchisq(0.05,9))
cat("Lower bound=",lb2,"Upper bound=",ub2)    # 母體標準差未知
## Lower bound= 6.0038 Upper bound= 13.54285
sd(x)
## [1] 8.231747
x<-rnorm(100,50,10)
lb1<-99*var(x)/qchisq(0.975,99)
ub1<-99*var(x)/qchisq(0.025,99) 
cat("Lower bound=",lb1,"Upper bound=",ub1)    # 母體標準差已知
## Lower bound= 84.61043 Upper bound= 148.1145
lb2<-sqrt(99*var(x)/qchisq(0.95,99))
ub2<-sqrt(99*var(x)/qchisq(0.05,99))
cat("Lower bound=",lb2,"Upper bound=",ub2)    # 母體標準差未知
## Lower bound= 9.390351 Upper bound= 11.8756
sd(x)
## [1] 10.47645

page-17

ph<-0.255
(lb<-ph-qnorm(0.975)*sqrt(ph*(1-ph)/800))   ## [1] 0.2247969
## [1] 0.2247969
(ub<-ph+qnorm(0.975)*sqrt(ph*(1-ph)/800))   ## [1] 0.2852031
## [1] 0.2852031

page-19 Example:

b_x1<-32000; b_x2<-29000; v_1<-3000^2; v_2<-4000^2; 
(b_x1-b_x2-qnorm(0.99)*sqrt(v_1/12+v_2/10))     ## [1] -566.2238
## [1] -566.2238
(b_x1-b_x2+qnorm(0.99)*sqrt(v_1/12+v_2/10))     ## [1] 6566.224
## [1] 6566.224
## 在原題的條件下,可得一包含0之信賴區間,顯示在較嚴格的標準下(alpha=0.02),兩公司初階主管的薪資無法呈現顯著的差異。


b_x1<-32000; b_x2<-29000; v_1<-3000^2; v_2<-4000^2; 
(b_x1-b_x2-qnorm(0.95)*sqrt(v_1/12+v_2/10))    ## [1] 478.4871
## [1] 478.4871
(b_x1-b_x2+qnorm(0.95)*sqrt(v_1/12+v_2/10))    ## [1] 5521.513
## [1] 5521.513
## 可得一不包含0之信賴區間,顯示在較寬鬆的標準下(alpha=0.10),兩公司初階主管的薪資,才有機會呈現顯著的差異。

page-20 Example:

(alb <- 498-qnorm(0.975)*50/sqrt(100))    # [1] 488.2002
## [1] 488.2002
(aub <- 498+qnorm(0.975)*50/sqrt(100))    # [1] 507.7998
## [1] 507.7998
(blb <- 522-qnorm(0.975)*30/sqrt(120))    # [1] 516.6324
## [1] 516.6324
(bub <- 522+qnorm(0.975)*30/sqrt(120))    # [1] 527.3676
## [1] 527.3676
# 由於兩校學生個別之信賴區間沒有重疊,可判斷兩校學生之多益成績似乎有顯著的差異。
(ablb <- (498-522)-qnorm(0.975)*sqrt(50^2/100+30^2/120))     #[1] -35.17351
## [1] -35.17351
(abub <- (498-522)+qnorm(0.975)*sqrt(50^2/100+30^2/120))     #[1] -12.82649
## [1] -12.82649
# 由信賴區間不包含0,可判斷兩校學生之多益成績似乎有顯著的差異。

page-20 Example: (兩母體變異數未知且相等)

x1<-c(65 ,71, 88 , 76, 72)
x2<-c(75 ,80, 79 , 64 , 93)
mean(x1)         ##  [1] 74.4
## [1] 74.4
mean(x2)         ##  [1] 78.2
## [1] 78.2
var(x1)          ##  [1] 73.3
## [1] 73.3
var(x2)          ##  [1] 108.7
## [1] 108.7
(spool_sq<-(4*var(x1)+4*var(x2))/(5+5-2))     ##  [1] 91
## [1] 91
(lb<-mean(x1)-mean(x2)-qt(0.975,8)*sqrt(spool_sq/5+spool_sq/5))   ##   [1] -17.71268
## [1] -17.71268
(ub<-mean(x1)-mean(x2)+qt(0.975,8)*sqrt(spool_sq/5+spool_sq/5))   ##   [1] 10.11268
## [1] 10.11268
x1<-c(65 ,71, 88 , 76, 72)
x2<-c(75 ,80, 79 , 64 , 93)
t.test(x1, x2, mu=0, alternative = "two.sided", var.equal = T) # 檢定,順便求信賴區間
## 
##  Two Sample t-test
## 
## data:  x1 and x2
## t = -0.62984, df = 8, p-value = 0.5464
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -17.71268  10.11268
## sample estimates:
## mean of x mean of y 
##      74.4      78.2
count<-0                                     # count用來計算不包含 -4 的C.I.個數
plot(0:100,-50:50,type="n",xlab=" ",ylab=" ")
for(i in 1:100){
  x1<-rnorm(5,75,10);                 
  x2<-rnorm(5,79,12);
  (spool_sq<-(4*var(x1)+4*var(x2))/(5+5-2))
  lb<-mean(x1)-mean(x2)-qt(0.975,8)*sqrt(spool_sq/5+spool_sq/5);
  ub<-mean(x1)-mean(x2)+qt(0.975,8)*sqrt(spool_sq/5+spool_sq/5);
  
  segments(i,lb,i,ub);
  abline(-4,0)
  
  ifelse(lb> -4 | ub< -4, count <-count+1, count<-count)
}

(count)
## [1] 6

page-21 Example:

(lb<-(0.84-0.95)-qnorm(0.975)*sqrt(0.84*0.16/100+0.95*0.05/100)) ## [1] -0.1935919
## [1] -0.1935919
(ub<-(0.84-0.95)+qnorm(0.975)*sqrt(0.84*0.16/100+0.95*0.05/100)) ## [1] -0.02640805
## [1] -0.02640805
pb <- (84+95)/(100+100)
(lb<-(0.84-0.95)-qnorm(0.975)*sqrt(pb*(1-pb)/100+pb*(1-pb)/100)) ## [1] -0.1949707
## [1] -0.1949707
(ub<-(0.84-0.95)+qnorm(0.975)*sqrt(pb*(1-pb)/100+pb*(1-pb)/100)) ## [1] -0.02502929
## [1] -0.02502929

page-23 Example:

(lb<-(250/160)*1/qf(0.975,24,24))    ##  [1] 0.6885452
## [1] 0.6885452
(ub<-(250/160)*qf(0.975,24,24))      ##  [1] 3.545746
## [1] 3.545746
(lb<-(250/160)*1/qf(0.95,24,24))    ##  [1] 0.7876459
## [1] 0.7876459
(ub<-(250/160)*qf(0.95,24,24))      ##  [1] 3.099624   
## [1] 3.099624

page-31 Example:

page-31 Example:

x<-c(65, 56, 78, 82, 60, 90, 69, 76, 72, 62)
(lb<-mean(x)-qnorm(0.975)*10/sqrt(10)) ## [1] 64.80205
## [1] 64.80205
(ub<-mean(x)+qnorm(0.975)*10/sqrt(10)) ## [1] 77.19795
## [1] 77.19795
(lb<-mean(x)-qt(0.975,9)*sd(x)/sqrt(10)) ## [1] 63.36953
## [1] 63.36953
(ub<-mean(x)+qt(0.975,9)*sd(x)/sqrt(10)) ## [1] 78.63047
## [1] 78.63047
x<-c(65, 56, 78, 82, 60, 90, 69, 76, 72, 62)
t.test(x,mu=78,alternative="two.sided")
## 
##  One Sample t-test
## 
## data:  x
## t = -2.0752, df = 9, p-value = 0.06779
## alternative hypothesis: true mean is not equal to 78
## 95 percent confidence interval:
##  63.36953 78.63047
## sample estimates:
## mean of x 
##        71

page-32 Example:

#  1. 臨界值:計算得 c = 123.2897公克,抽樣之平均添加物重量為123公克小於c,故不拒絕 H0
(c <- 120 + qnorm(0.95)*12/sqrt(36))    ## [1] 123.2897
## [1] 123.2897
# 2. 檢定統計量(大樣本,用z近似t):計算得 z = 1.5 < 1.645 = z_{0.05} ,故不拒絕 H0
(z <- (123-120)/(12/sqrt(36)))          ## [1] 1.5
## [1] 1.5
# 3. 信賴區間(單尾):計算得mu之95%單(右)尾信賴區間為 (119.7103, 無窮大), mu0 = 120 被包含在內,故不拒絕 H0
(lb <- 123-qnorm(0.95)*12/sqrt(36))     ## [1] 119.7103
## [1] 119.7103
# 4. p值:計算得p-value = 0.0668072 > 0.05 ,故不拒絕 H0
(pv <- 1-pnorm(1.5))                    ## [1] 0.0668072
## [1] 0.0668072

page-033 Example:

# 1. 臨界值:計算得 c = 595.6137公克,抽樣之平均罐頭重量為596公克大於c,故不拒絕 H0
(c <- 600 - qnorm(0.95)*16/sqrt(36))    ## [1] 595.6137
## [1] 595.6137
# 2. 檢定統計量(大樣本,用z近似t):計算得 z = -1.5 > -1.645 = -z_{0.05} ,故不拒絕 H0
(z <- (596-600)/(16/sqrt(36)))          ## [1] -1.5
## [1] -1.5
# 3. 信賴區間(單尾):計算得mu之95%單(左)尾信賴區間為 (負無窮大, 600.3863),因 mu_{0} = 600 被包含在內,故不拒絕 H0
(lb <- 596+qnorm(0.95)*16/sqrt(36))     ## [1] 600.3863
## [1] 600.3863
# 4. p值:計算得p-value = 0.0668072 > 0.05 ,故不拒絕 H0
(pv <- pnorm(-1.5))                    ## [1] 0.0668072
## [1] 0.0668072

page-34 Example:

# 得 t = 1.697749 < 2.024394 = qt(0.975,20+20-2),故不拒絕 H0
(sp2 <- (14^2+12^2)/2)
## [1] 170
(t <- ((601-594)-0) / sqrt(sp2*(1/10)) )
## [1] 1.697749
(qt(0.975,38))
## [1] 2.024394
#  得 t = -2.2 < -1.75305 = qt(0.05,15),故拒絕 H0
(t <- (183.5-200)/(30/sqrt(16)))
## [1] -2.2
(qt(0.05,15))
## [1] -1.75305
#  得 z = -1.212678 > -1.959964 = qnorm(0.025),故不拒絕 H0
(pb <- (28+36)/(100+100))
## [1] 0.32
(z <- (((28/100)-(36/100))-0)/sqrt(2*pb*(1-pb)/100) )
## [1] -1.212678
(qnorm(0.025))
## [1] -1.959964
# 亦可使用兩母體比例檢定(卡方適合度檢定當獨立母體個數k=2時,結果有透過連續性修正處理),p-value = 0.2886 > 0.05,故不拒絕 H0
prop.test(c(28,36),c(100,100))
## 
##  2-sample test for equality of proportions with continuity correction
## 
## data:  c(28, 36) out of c(100, 100)
## X-squared = 1.1259, df = 1, p-value = 0.2886
## alternative hypothesis: two.sided
## 95 percent confidence interval:
##  -0.21882198  0.05882198
## sample estimates:
## prop 1 prop 2 
##   0.28   0.36

page-34 Example:

# 檢定 H0:mu中-mu西 <= 20 ,v.s. H1:mu中-mu西 > 20
# 得 t = 0.4945 <  1.734064 = qt(0.95, 18),故不拒絕 H0
chi<-c(101, 125, 112, 94, 109, 113, 113, 123, 116, 121)
west<-c(82, 100, 90, 74, 83, 94,    95, 98, 94, 97)
var.test(chi,west)      # 先檢定變異數是否一致,結果差異未達顯著                          
## 
##  F test to compare two variances
## 
## data:  chi and west
## F = 1.3217, num df = 9, denom df = 9, p-value = 0.6845
## alternative hypothesis: true ratio of variances is not equal to 1
## 95 percent confidence interval:
##  0.3282955 5.3212202
## sample estimates:
## ratio of variances 
##           1.321716
t.test(chi,west,mu=20,alternative="greater",paired=T, var.equal = T)
## 
##  Paired t-test
## 
## data:  chi and west
## t = 2.1764, df = 9, p-value = 0.02875
## alternative hypothesis: true difference in means is greater than 20
## 95 percent confidence interval:
##  20.31549      Inf
## sample estimates:
## mean of the differences 
##                      22
(qt(0.95,18))
## [1] 1.734064

page-36 Example:

(z <- (26/200-1/10)/sqrt((0.13)*(0.87)/200 ))
## [1] 1.261551
(qnorm(0.95))
## [1] 1.644854
# 亦可使用單一母體比例檢定(卡方適合度檢定),p-value = 0.09743 > 0.05,故不拒絕 H0
prop.test(26,200,0.1, alternative ="greater")
## 
##  1-sample proportions test with continuity correction
## 
## data:  26 out of 200, null probability 0.1
## X-squared = 1.6806, df = 1, p-value = 0.09743
## alternative hypothesis: true p is greater than 0.1
## 95 percent confidence interval:
##  0.09361944 1.00000000
## sample estimates:
##    p 
## 0.13
(qnorm(0.95))
## [1] 1.644854
# 亦即 (x/200-0.1)/sqrt(((x/200)*(1-x/200)/200)) > 1.644854 =>
# (x-20)^2/(x*(200-x)) > 1.644854^2/200 =  0.01352772 =>
# (x-20)^2 >   0.01352772 * x (200-x) => 1.01352772*x^2 -42.705544*x + 400 >0
polyroot(c(400,-42.705544,1.01352772))   # 解出 x > = 28.08133 ,要超過28.08133次(達29次或以上)
## [1] 14.05422+0i 28.08133-0i
# 超過qnorm(0.975) = 1.959964 或低於 qnorm(0.025) = -1.959964 才有足夠證據拒絕 H0 (此為近似解, p=1/10 偏態嚴重)
(qnorm(0.975)); (qnorm(0.025))
## [1] 1.959964
## [1] -1.959964
# 亦即 (x/200-0.1)/sqrt(((x/200)*(1-x/200)/200)) > 1.959964 =>
# (x-20)^2/(x*(200-x)) > 1.959964^2/200 =  0.01920729 =>
# (x-20)^2 >   0.01920729 * x (200-x) => 1.01920729*x^2 -43.841458*x + 400 > 0
polyroot(c(400,-43.841458,1.01920729))   # 解出 x > 29.88116 ,要超過29.88116次(達30次或以上)
## [1] 13.13409+0i 29.88116-0i
# 亦或 (x/200-0.1)/sqrt(((x/200)*(1-x/200)/200)) < -1.959964 =>
# (x-20)^2/(x*(200-x)) > (1.959964)^2/200 =  0.01920729 =>
# (x-20)^2 >   0.01920729 * x (200-x) => 1.01920729*x^2 -43.841458*x + 400 > 0
polyroot(c(400,-43.841458,1.01920729))   # 解出 x < 13.13409 ,要低於13.13409次(達13次或以下)
## [1] 13.13409+0i 29.88116-0i
# 另解:將200號碼的產出視作執行200次伯努力實驗,成功次數 x 服從 B(200,1/10)
for (i in 1:40){ cat("i=",i,"c.d.f.=",pbinom(i,200,1/10) ,"\n") }
## i= 1 c.d.f.= 1.638346e-08 
## i= 2 c.d.f.= 1.897119e-07 
## i= 3 c.d.f.= 1.460788e-06 
## i= 4 c.d.f.= 8.416395e-06 
## i= 5 c.d.f.= 3.871193e-05 
## i= 6 c.d.f.= 0.0001481125 
## i= 7 c.d.f.= 0.0004849967 
## i= 8 c.d.f.= 0.001388034 
## i= 9 c.d.f.= 0.003528566 
## i= 10 c.d.f.= 0.00807125 
## i= 11 c.d.f.= 0.01678953 
## i= 12 c.d.f.= 0.03204653 
## i= 13 c.d.f.= 0.05656204 
## i= 14 c.d.f.= 0.09294618 
## i= 15 c.d.f.= 0.1430754 
## i= 16 c.d.f.= 0.2074776 
## i= 17 c.d.f.= 0.2849286 
## i= 18 c.d.f.= 0.3724195 
## i= 19 c.d.f.= 0.4655385 
## i= 20 c.d.f.= 0.5591748 
## i= 21 c.d.f.= 0.6483522 
## i= 22 c.d.f.= 0.7289722 
## i= 23 c.d.f.= 0.7982976 
## i= 24 c.d.f.= 0.855106 
## i= 25 c.d.f.= 0.8995427 
## i= 26 c.d.f.= 0.9327753 
## i= 27 c.d.f.= 0.9565715 
## i= 28 c.d.f.= 0.9729078 
## i= 29 c.d.f.= 0.9836734 
## i= 30 c.d.f.= 0.9904917 
## i= 31 c.d.f.= 0.9946462 
## i= 32 c.d.f.= 0.9970841 
## i= 33 c.d.f.= 0.9984631 
## i= 34 c.d.f.= 0.9992157 
## i= 35 c.d.f.= 0.9996123 
## i= 36 c.d.f.= 0.9998142 
## i= 37 c.d.f.= 0.9999137 
## i= 38 c.d.f.= 0.9999611 
## i= 39 c.d.f.= 0.999983 
## i= 40 c.d.f.= 0.9999928
# 由 c.d.f. 可得知當成功次數 x < = 11 次 或 > = 29 次 時,有足夠證據推翻H0,故"7"出現次數在12~28次之間,搖獎機可算是公正的 

page-36 Example:

# 檢定 H0:p1 = p2 ,v.s. H1:p1 not equal to p2,p-value =  0.03836 < 0.05,故拒絕 H0
prop.test(c(273,378),c(350,450))
## 
##  2-sample test for equality of proportions with continuity correction
## 
## data:  c(273, 378) out of c(350, 450)
## X-squared = 4.2888, df = 1, p-value = 0.03836
## alternative hypothesis: two.sided
## 95 percent confidence interval:
##  -0.117591731 -0.002408269
## sample estimates:
## prop 1 prop 2 
##   0.78   0.84

page-37 Example:

qchisq(0.025,9)        # [1] 2.700389     
## [1] 2.700389
qchisq(0.975,9)        # [1] 19.02277     
## [1] 19.02277
(lb=(10-1)*4.5^2/qchisq(0.975,9))  # 下限 Lower Bound
## [1] 9.580625
(ub=(10-1)*4.5^2/qchisq(0.025,9))  # 上限 Upper Bound
## [1] 67.49026
qchisq(0.05,9)        # [1] 3.325113     
## [1] 3.325113
qchisq(0.95,9)        # [1] 16.91898     
## [1] 16.91898
(lb=sqrt((10-1)*4.5^2/qchisq(0.95,9)))  # 下限 Lower Bound
## [1] 3.282061
(ub=sqrt((10-1)*4.5^2/qchisq(0.05,9)))  # 上限 Upper Bound
## [1] 7.403389
# 1. 檢定洗選蛋與鐵蛋重量之變異數是否一致,結果差異達到顯著
(FS <- 4.5^2/1.2^2)    # [1] 14.0625
## [1] 14.0625
qf(0.95,9,9)           # [1] 3.178893
## [1] 3.178893
# F = 14.0625 > 3.178893 = qf(0.95,9,9),故拒絕H0
# 2. 檢定洗選蛋是否比鐵蛋重16公克以上
# H0:mu1-mu2<=16 v.s. H1:mu1-mu2>16
# 因變異數有顯著差異,故使用調整的自由度 phi = 10.27356
(phi <- (4.5^2/10+1.2^2/10)^2/((4.5^2/10)^2/9+(1.2^2/10)^2/9))
## [1] 10.27356
(td <- ((48-28)-16)/sqrt(4.5^2/10 + 1.2^2/10))
## [1] 2.716003
(qt(0.95,10.27356))  
## [1] 1.807576
# 因為td = 2.716003 > 1.807576 = qt(0.95,10.27356),故拒絕H0
we <- rnorm(10,48,4.5)
ie <- rnorm(10,28,1.2)
(mean(we));(sd(we))     # 模擬抽取洗選蛋10顆的平均重量與標準差
## [1] 46.26646
## [1] 5.011966
(mean(ie));(sd(ie))     # 模擬抽取鐵蛋10顆的平均重量與標準差
## [1] 27.69196
## [1] 0.9855127
t.test(we,ie,mu=16,alternative="greater",paired=F,var.equal=F)
## 
##  Welch Two Sample t-test
## 
## data:  we and ie
## t = 1.5938, df = 9.6949, p-value = 0.07151
## alternative hypothesis: true difference in means is greater than 16
## 95 percent confidence interval:
##  15.6375     Inf
## sample estimates:
## mean of x mean of y 
##  46.26646  27.69196

page-37 Example:

# (1) 檢定兩類股票的平均股利之變異數是否一致,結果差異未達顯著
x1<-c(6.82,3.35,4.60,6.32,5.84,5.66,4.76,5.55,6.20,7.42,6.40)
x2<-c(4.11,4.81,3.68,5.20,4.65,5.89,3.74,4.88,5.68,5.26)
var.test(x1,x2)
## 
##  F test to compare two variances
## 
## data:  x1 and x2
## F = 2.2417, num df = 10, denom df = 9, p-value = 0.2399
## alternative hypothesis: true ratio of variances is not equal to 1
## 95 percent confidence interval:
##  0.5655428 8.4714342
## sample estimates:
## ratio of variances 
##           2.241735
# (2)   檢定當顯著水準α = 0.05時,兩類股票的平均股利是否相同
t.test(x1,x2,mu=0,alternative="t", var.equal=T)
## 
##  Two Sample t-test
## 
## data:  x1 and x2
## t = 2.1742, df = 19, p-value = 0.04253
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  0.03473545 1.82526455
## sample estimates:
## mean of x mean of y 
##      5.72      4.79
# (3)   求兩類股票平均股利差的90%信賴區間
t.test(x1,x2,mu=0,alternative="t", var.equal=T, conf.level=0.90)
## 
##  Two Sample t-test
## 
## data:  x1 and x2
## t = 2.1742, df = 19, p-value = 0.04253
## alternative hypothesis: true difference in means is not equal to 0
## 90 percent confidence interval:
##  0.1903853 1.6696147
## sample estimates:
## mean of x mean of y 
##      5.72      4.79

page-39 Example:

sell<-read.csv("anova1.csv",header=T)
library(reshape2)
## Warning: 套件 'reshape2' 是用 R 版本 4.1.2 來建造的
sell <- melt(sell,id.vars=c(),na.rm = T, variable.name = "area",value.name = "numbers")
anova_sell<-aov(numbers~factor(area),data=sell)
summary(anova_sell)
##              Df Sum Sq Mean Sq F value Pr(>F)  
## factor(area)  2     68   34.00     6.5  0.011 *
## Residuals    13     68    5.23                 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

page-40 Example:

x<-c("145-155"=30,"155-165"=42,"165-175"=60,"175-185"=34,"185-195"=34)
chisq.test(x)  ##  or chisq.test(x, p=c(1/5,1/5,1/5,1/5,1/5))
## 
##  Chi-squared test for given probabilities
## 
## data:  x
## X-squared = 14.4, df = 4, p-value = 0.006122

page-40 Example:

x<-c("1點"=30,"2點"=11,"3點"=15,"4點"=24,"5點"=14,"6點"=26)
chisq.test(x, p=c(rep(1/6,6)))  ##  or  chisq.test(x)
## 
##  Chi-squared test for given probabilities
## 
## data:  x
## X-squared = 14.7, df = 5, p-value = 0.01172

page-41 Example:

# (1) 卡方獨立性檢定
black<-c(20,27,23,14,16)
white<-c(28,54,52,31,35)
chisq.test(rbind(black,white))
## 
##  Pearson's Chi-squared test
## 
## data:  rbind(black, white)
## X-squared = 1.9282, df = 4, p-value = 0.749
# (2) 比例檢定
prop.test(c(47,53),c(129,171))   
## 
##  2-sample test for equality of proportions with continuity correction
## 
## data:  c(47, 53) out of c(129, 171)
## X-squared = 0.74969, df = 1, p-value = 0.3866
## alternative hypothesis: two.sided
## 95 percent confidence interval:
##  -0.06057317  0.16937230
## sample estimates:
##    prop 1    prop 2 
## 0.3643411 0.3099415

page-43 Example:

# (1) 讀資料畫散佈圖
pulse<-read.csv("pulse.csv",header=T)
attach(pulse)                           # 連結到變數名稱以利運用
plot(時間,心跳數)

# (2)   散佈圖顯示兩者有強烈負相關
# (3)   相關係數與散佈圖反映的情況一致(去掉一個離群值,負相關更高)
cor(時間,心跳數)                        #  [1] -0.5953083
## [1] -0.5953083
cor(時間[時間[]<30],心跳數[時間[]<30])  #  [1] -0.7387117
## [1] -0.7387117
# (4)   計時單位改變不影響相關係數
cor(60*時間,心跳數)                     #  [1] -0.5953083
## [1] -0.5953083
# (5)   建立線性模式,決定簡單迴歸方程式  
runp <- lm(心跳數~時間)
summary(runp)
## 
## Call:
## lm(formula = 心跳數 ~ 時間)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -15.252  -5.177   1.912   5.622  11.683 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 219.3602    23.4177   9.367 6.01e-09 ***
## 時間         -2.8941     0.8524  -3.395  0.00273 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 7.788 on 21 degrees of freedom
## Multiple R-squared:  0.3544, Adjusted R-squared:  0.3236 
## F-statistic: 11.53 on 1 and 21 DF,  p-value: 0.002729

page-45 Example:

# (1)、(2) 讀取資料,建立線性模式,決定簡單迴歸方程式
pd <- read.table("product.txt",header=T)
plot(pd$x,pd$y)

reg_pd <- lm(y~x,data=pd)
summary(reg_pd)
## 
## Call:
## lm(formula = y ~ x, data = pd)
## 
## Residuals:
##      1      2      3      4      5 
## -2.895  2.863 -1.190  3.948 -2.725 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)   
## (Intercept)  17.9804     4.7363   3.796  0.03209 * 
## x             1.8105     0.2114   8.566  0.00334 **
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 3.697 on 3 degrees of freedom
## Multiple R-squared:  0.9607, Adjusted R-squared:  0.9476 
## F-statistic: 73.38 on 1 and 3 DF,  p-value: 0.003344