本次实验要求使用舍选法得到正态分布的随机数。根据已有知识,服从于正态分布的随机变量\(Y\),有\(Y\sim N\left( \mu ,{{\sigma }^{2}} \right)\),其定义域为\((-\infty ,+\infty )\),密度函数为\(h(x)=\frac{1}{\sqrt{2\pi {{\sigma }^{2}}}}\exp \left( -\frac{{{\left( x-\mu \right)}^{2}}}{2{{\sigma }^{2}}} \right)\)。若\(X\sim N\left( 0,1 \right)\),\(Y\)可以表示为\(Y=\mu X+\sigma\)。
现考虑\(X\)(密度函数:\(f(x)=\frac{1}{\sqrt{2\pi }}\exp \left( -\frac{{{x}^{2}}}{2} \right)\)),有函数上界\(M\left( x \right)\)对\(\forall x\),应该使用第一类抽样法。采用标准指数分布为试投密度\(g\left( x \right)={{e}^{-x}}\)。那么有\(f\left( x \right)\le {{C}_{g}}g\left( x \right)\),取 \({{C}_{g}}=\underset{x}{\mathop{\max }}\,\left\{ \frac{f\left( x \right)}{g\left( x \right)} \right\}\) 。下求\({{C}_{g}}\):
设: \[ \varphi \left( x \right)=\frac{f\left( x \right)}{g\left( x \right)}={\frac{1}{\sqrt{2\pi }}\exp \left( -\frac{{{x}^{2}}}{2} \right)}/{\exp \left( -x \right)}\; \] 其导数: \[ {{\varphi }^{'}}\left( x \right)=\frac{f\left( x \right)}{g\left( x \right)}=\frac{1}{\sqrt{2\pi }}\exp \left( -\frac{{{x}^{2}}}{2}+x \right)\centerdot \left( -x+1 \right) \]
驻点: \[ {{\varphi }^{'}}\left( x \right)=0\Rightarrow {{x}_{0}}=1 \] 带入\({{\varphi }^{''}}\left( {{x}_{0}} \right)<0\),可知它为极大值点
如此便求得: \[ {{C}_{g}}=\varphi \left( {{x}_{0}} \right)=\varphi \left( 1 \right)=\sqrt{\frac{e}{2\pi }}\approx 1.3155 \]
f <- function(x) {
(sqrt(2 / pi) * exp(-1 / 2 * x ^ 2)) / exp(-x)
}# 创建函数f(x)
data <- data.frame(x = 0:0.1:10, y = f(0:0.1:10))# 取0:0.1:10的数据绘图
ggplot(
data,
aes(x = x, y = y)
) +
geom_line(
color = 'cornflowerblue',
size = 1.5
) +
geom_point(
aes(x = 1, y = f(1)),
color = 'red', size = 3
) +
annotate(
"rect",
xmin = 1.5,
xmax = 3.5,
ymin = 1.23,
ymax = 1.29,
alpha = 0.2,
fill = 'lightgrey'
) +
annotate(
#增加表示均值的标签
"text",
x = 2.5,
y = 1.27,
label = 'Maximum point',
alpha = 0.5,
color = 'black'
) +
labs(
title = 'Generate normal random numbers',
x = 'x-axis', y = 'density'
) +
theme_light()
optimize(f, c(0, 100), tol = 0.00001, maximum = TRUE)# optimize函数计算极大值点和极大值
## $maximum
## [1] 0.9999999
##
## $objective
## [1] 1.315489
可以看出,理论推导和试验验证符合的很好,于是取 \[ {{C}_{g}}=\sqrt{\frac{2e}{\pi }}\approx 1.3155 \]
此时半正态密度函数\({{f}_{1}}\left( x \right)\)与\({{C}_{g}}g\left( x \right)\)的密度函数比较:
g1 <- function(x){
sqrt(2 / pi) * exp(-1 / 2 * x ^ 2)
}
g2 <- function(x){
sqrt(2*exp(1)/pi)*exp(-x)
}
ggplot(NULL, aes(x=x, colour = Legend)) +
stat_function(data = data.frame(x = 0:10, Legend = factor(1)), fun = g1,
size = 1.2) +
stat_function(data = data.frame(x = 0:10, Legend = factor(2)), fun = g2,
size = 1.2) +
geom_point(
aes(x = 1, y = g1(1)),
color = 'red', size = 3
) +
scale_colour_manual(values = c('darkorange', "purple"), labels = c("Half-dnorm(0,1)", "cg*dexp(1)"))+labs(
title = 'f1(x) && Cg*g(x) Plot',
x = 'x-axis', y = 'density'
) +
theme_light()
可以发现:\({{C}_{g}}g\left( x \right)\)的密度曲线恰好“覆盖”了半正态密度函数\({{f}_{1}}\left( x \right)\)密度曲线,所取\({{C}_{g}}\)符合要求。
Step 1: 输入想要抽取的随机数个数\(n\),期望随机数服从的正态分布的均值\(Miu\),标准差\(Sigma\);
Step 2: 建立空向量\(z\)存储抽取的随机数,计数器\(i=0\);
Step 3: 若\(i\le n\),抽取\(x\sim N\left( 0,1 \right)\),\(r\sim e\left( 1 \right)\);否则转Step5;
Step 4: 若 \({{C}_{g}}r\centerdot {{f}_{e\left( 1 \right)}}\left( x \right)\le {{f}_{N\left( 0,1 \right)}}\left( x \right)\)
SubStep 4.1:若\(runif(1)<\frac{1}{2}\),接受\(x\),\(z\left[ i \right]\leftarrow x\);否则接受\(-x\),\(z\left[ i \right]\leftarrow -x\),
SubStep 4.2:计数器加一:\(i\leftarrow i+1\),转Step 3;
否则转Step 3;
Step 5: 输出:表格、图像。
MyNormal( )函数用于抽取服从任意一维正态分布的随机数。
MyNormal(100,1,4)
## num rf
## 1 1 -1.5207483
## 2 2 0.3363276
## 3 3 -3.2643723
## 4 4 -3.6237959
## 5 5 6.3579906
## 6 6 -3.8213302
## 7 7 -0.4436386
## 8 8 2.3285127
## 9 9 1.2263719
## 10 10 -4.0794447
MyNormal(1000,1,4)
## num rf
## 1 1 1.3700008
## 2 2 1.6835225
## 3 3 -2.3326142
## 4 4 -3.6604311
## 5 5 3.4222585
## 6 6 2.7055533
## 7 7 3.8056851
## 8 8 -3.5088324
## 9 9 -2.7863660
## 10 10 -0.9751812
MyNormal(10000,1,4)
## num rf
## 1 1 3.579991
## 2 2 -3.819953
## 3 3 11.198451
## 4 4 1.502526
## 5 5 8.921007
## 6 6 0.427411
## 7 7 3.671110
## 8 8 -6.983073
## 9 9 -3.810043
## 10 10 5.288325
MyNormal(10000,1,2)
## num rf
## 1 1 2.1889829
## 2 2 -0.8011811
## 3 3 -2.2460005
## 4 4 1.2190525
## 5 5 -0.1169991
## 6 6 2.2583028
## 7 7 1.2684008
## 8 8 0.4848765
## 9 9 3.1476955
## 10 10 0.0380693
MyNormal(10000,4,2)
## num rf
## 1 1 5.7939440
## 2 2 3.8921345
## 3 3 4.3799757
## 4 4 0.9066728
## 5 5 6.1261894
## 6 6 4.6997117
## 7 7 1.1323761
## 8 8 2.9959920
## 9 9 2.0104318
## 10 10 2.6606709
MyNormal <- function(n, Miu, Sigma) {
# -----------------------------------
# input n:抽取随机数的个数,数据类型:int
# input Miu:待求正态分布的均值,数据类型:double
# input Sigma:待求正态分布的方差,数据类型:double
# -----------------------------------
i <- 0# 计数变量i
z <- c()#建立空向量Z储存抽取的随机数,
while (i <= n) {
x <- rexp(1, rate = 1)# 抽取x~e(1)
r <- runif(1, 0, 1)# 抽取r~U[0,1]
cg <- sqrt(2*exp(1)/pi)# 已求
if (cg * r * dexp(x, rate = 1) <= dnorm(x, mean = 0, sd = 1)) {# cg*r*g(x)<=f(x)
if(runif(1)<1/2){
z[i] <- x# 接受x
}else{
z[i] <- -x# 接受-x
}
i <- i + 1# 计数加1
} else{
next# 重新抽取
}
}
Z <- data.frame(num = 1:n, rf = Sigma * z + Miu)# Z用来储存抽得的随机数
#表1:前十个抽样点
print(head(Z,10L))
#图1:抽样点分布
pic1 <- ggplot(Z)+
geom_point(
aes(x = num,y = rf),
color = 'cornflowerblue'
)+
geom_hline(
yintercept = c(Miu,Miu+3*Sigma,Miu-3*Sigma),
color = c('tomato','darkblue','darkblue'),
linetype = c('dashed', 'dotted','dotted'),
size = c(1,0.8,0.8)
)+
labs(#加题目
title = 'Random Sampling'
) +
theme_light()#改主题
#图2:抽样点密度曲线和理论密度曲线对比
pic2 <- ggplot(Z) +
geom_histogram(#绘制抽得的随机数直方图
aes(x = rf, y = ..density..),
bins = 30,
fill = 'lightgreen',
color = 'darkgrey'
) +
geom_density(#绘制抽得的随机数密度曲线
aes(x = rf, y = ..density.., color = 'density'),
size = 1.5
) +
stat_function(#绘制理论密度曲线
aes(x = rf, color = 'norm'),
fun = dnorm,
args = list(mean = Miu, sd = Sigma),
size = 1.2
) +
geom_vline(#绘制均值虚线
aes(xintercept = Miu),
linetype = "dashed",
size = 1
) +
annotate(#增加表示均值的标签
"text",
x = Miu,
y = 0.01,
label = 'mean (norm)',
alpha = 0.5,
color = 'black'
) +
labs(#加题目,改x,y轴
title = 'Generate normal random numbers',
x = 'x-axis',
y = 'density'
) +
guides(#改图例题目
color = guide_legend(title = "Legend")
) +
theme_light()#改主题
pic1/pic2#竖排排列
}