# 定义函数
f <- function(p) {
return(choose(50, 7) * p^7 * (1-p)^43)
}
# 生成一组在0到1之间的p值
p_values <- seq(0, 1, length.out = 100)
# 计算对应的f(p)值
f_values <- f(p_values)
# 画图
plot(p_values, f_values, type = "l", col = "blue", lwd = 2,
xlab = "p", ylab = expression(choose(50, 7)* p^7 * (1-p)^43),
main = expression(f(p) == choose(50, 7) * p^7 * (1-p)^43))
### 构造似然函数‘lik.bin’
lik.bin <- function(p){
gamma(n+1)/gamma(y+1)/gamma(n-y+1)*p^y*(1-p)^(n-y)
}
n <- 50
y <- 7
注:gamma(n+1)=n!,gamma(y+1)=y!,gamma(n-y+1)=(n-y)!
curve(lik.bin(x), xlim=c(0,1),ylim=c(0,0.16), xlab='p', ylab='likelihood',cex.lab=1.5, cex.axis=1.5)
### Frequentist approach to estimating p: Maximum likelihood
estimation(估计p的频率论方法:极大似然估计) \[
l(p)=\log \left(\begin{array}{l}
n \\
y
\end{array}\right)+y \log (p)+(n-y) \log (1-p), 0<p<1
\] curvature of log likelihood is related to sample variance of
the MLE:对数似然的曲率与最大似然的样本方差有关 \[
\begin{gathered}
\frac{d l(p)}{d p}=\frac{y}{p}-\frac{n-y}{1-p}=0 \\
\hat{p}=\frac{y}{n}
\end{gathered}
\] \[
\hat{p}=\frac{7}{50}=0.14 \text {. }
\] ### Sampling distribution of the MLE最大似然值的抽样分布 \[
\hat{\theta} \simeq N\left(\theta,-E\left(l^{\prime
\prime}(\hat{\theta})\right)^{-1}\right)
\] \[
\hat{p} \simeq N\left(p, \frac{\hat{p}(1-\hat{p})}{n}\right) .
\]
The estimated standard deviation of an estimator is called its standard error.估计量的估计标准差称为它的标准误差。 standard error: \(se(\hat{p})=\sqrt{\frac{\hat{p}(1-\hat{p})}{n}}\) confidence interval (CI) for population proportion p can be calculated using asymptotic sampling distribution: \[ \left(\hat{p}-z_{1-\alpha / 2}^* s e(\hat{p}), \hat{p}+z_{1-\alpha / 2}^* s e(\hat{p})\right) \] ### Exact confidence interval of p \[\left(\frac{1}{1+\frac{n-y+1}{y} F_{1-\alpha / 2}(2(n-y+1), 2 y)}, \frac{\frac{y+1}{n-y} F_{1-\alpha / 2}(2(y+1), 2(n-y))}{1+\frac{y+1}{n-y} F_{1-\alpha / 2}(2(y+1), 2(n-y))}\right) .\] ### R code for frequentist estimation:approximate based on MLE using proportional test
prop.test(7, 50, conf.level=0.90, correct=FALSE)
##
## 1-sample proportions test without continuity correction
##
## data: 7 out of 50, null probability 0.5
## X-squared = 25.92, df = 1, p-value = 3.559e-07
## alternative hypothesis: true p is not equal to 0.5
## 90 percent confidence interval:
## 0.0777209 0.2392390
## sample estimates:
## p
## 0.14
CI.p <- function(n, y, alpha){
CI_left <- (1+(n-y+1)/y*qf(1-alpha/2,2*(n-y+1), 2*y))^(-1);
CI_right <- (y+1)/(n-y)*qf(1-alpha/2,
2*(y+1), 2*(n-y))/(1+(y+1)/(n-y)*qf(1-alpha/2,2*(y+1), 2*(n-y)));
p_exact <- c(CI_left, CI_right);
return(p_exact)
}
n <- 50;
y <- 7;
alpha <- 0.10;
CI.p(n, y, alpha)
## [1] 0.0675967 0.2469352
binom.test(7, 50, conf.level=0.90)
##
## Exact binomial test
##
## data: 7 and 50
## number of successes = 7, number of trials = 50, p-value = 2.099e-07
## alternative hypothesis: true probability of success is not equal to 0.5
## 90 percent confidence interval:
## 0.0675967 0.2469352
## sample estimates:
## probability of success
## 0.14
curve(dunif(x), xlim=c(0,1),ylim=c(0.6,1.4), xlab='p', ylab='prior density',cex.lab=1.5, cex.axis=1.5)
curve(dbeta(x, 1, 1))
curve(dbeta(x, 1, 2))
curve(dnorm(x, 0, 1), xlim=c(-4,4), ylim=c(0, 1),col='red', lwd=1, lty=1)
curve(dnorm(x, 0, 0.5), xlim=c(-4,4), col='blue', lwd=3, lty=3, add=T)
par(mfrow=c(1,2))
curve(dbeta(x, 8, 44), xlim=c(0,1), ylim=c(0,8),
xlab='p', ylab='Prior density',
cex.lab=1.5, cex.axis=1.5)
library(LearnBayes)
midpt = seq(0.025, 0.975, by = 0.1)
prior = c(0, 1, 0, 0, 0, 0, 0, 0, 0, 0)
#prior = prior/sum(prior)
#par(mfrow=c(2,1))
# use plot
p = seq(0,1, length=500)
plot(p,histprior(p,midpt,prior),type='l',
ylab='Prior density',
cex.lab=1.5, cex.axis=1.5)
library(LearnBayes)
midpt = seq(0.05, 0.95, by = 0.1)
prior = c(2.5, 5, 2, 0.5, rep(0,6))
prior = prior/sum(prior)
#par(mfrow=c(2,1))
# use plot
plot(p,histprior(p,midpt,prior),type='l', col=2)
# use curve with histprior function given by Jim Albert in his package
curve(histprior(x,midpt,prior), from=0, to=1,
ylab="Prior density",ylim=c(0,5),add=T)
curve(dbeta(x, 8, 44), xlim=c(0,1),ylim=c(0,8), xlab='p', ylab='pi(p|y)',cex.lab=1.5, cex.axis=1.5)
\[ f(x ; \alpha, \beta)=\frac{x^{\alpha-1}(1-x)^{\beta-1}}{B(\alpha, \beta)}I{[0,1]} \]
其中, \(\alpha\) 和 \(\beta\) 是 Beta 分布的形状参数,其必须大于0,当 \(\alpha=\beta=1\)时是均匀分布,而 \(B(\alpha, \beta)\) 是 Beta 函数。
\[ B(\alpha, \beta)=\int_0^1 t^{\alpha-1} \cdot(1-t)^{\beta-1} d t \]
期望: Beta 分布的期望可以通过如下公式计算: \[ E[X]=\frac{\alpha}{\alpha^{+} \beta} \]
方差: Beta 分布的方差可以通过以下公式计算: \[ \operatorname{Var}[X]=\frac{\alpha \beta}{(\alpha+\beta)^2(\alpha+\beta+1)} \]
par(mfrow=c(2,2))
curve(dbeta(x, 10, 40), xlim=c(0,1),ylim=c(0,8),
main='Beta(10,40)', xlab='p',
ylab='prior density',cex.lab=1.5, cex.axis=1.5)
curve(dbeta(x, 5, 20), xlim=c(0,1),ylim=c(0,8),
main='Beta(5,20)', xlab='p', ylab='prior density',
cex.lab=1.5, cex.axis=1.5)
curve(dbeta(x, 2.5, 10), xlim=c(0,1),ylim=c(0,8),
main='Beta(2.5,10)', xlab='p', ylab='prior density',
cex.lab=1.5, cex.axis=1.5)
curve(dbeta(x, 1.25, 5), xlim=c(0,1),ylim=c(0,8),
main='Beta(1.25,5)', xlab='p', ylab='prior density',
cex.lab=1.5, cex.axis=1.5)
par(mfrow=c(2,2))
prior1=c(10,40)
data=c(7,43)
par(cex = 0.6, lwd = 0.8) # 调整文字大小和线条宽度
triplot(prior1,data)
prior2=c(5, 20)
triplot(prior2,data,where="topright")
prior3=c(2.5, 10)
triplot(prior3,data)
prior4=c(1.25, 5)
triplot(prior4,data)
lik.bin.norm <- function(p, n, y){
p^y*(1-p)^(n-y)/beta(y+1, n-y+1)
}
par(mfrow=c(2,2))
curve(dbeta(x, 10, 40), xlim=c(0,1),ylim=c(0,12), lty=1, col=1, main="Prior: Beta(10, 40)\n Data: y=7, n-y=43", xlab='p', ylab='Density', cex.lab=1.5, cex.axis=1.5)
curve(lik.bin.norm(x, 50, 7), lty=2, col=1, add=T)
curve(dbeta(x, 17, 83), lty=4, col=2, add=T)#这是后验。后验参数满足:$\alpha+y=17,\beta+n-y=83$
legend('topright', c("Prior", "Likelihood", "Posterior"), lty=c(1,2,4), col=c(1,1,2), cex=1.2)
##################################
curve(dbeta(x, 5, 20), xlim=c(0,1),ylim=c(0,12), lty=1, col=1, main="Prior: Beta(5, 20)\n Data: y=7, n-y=43", xlab='p', ylab='Density', cex.lab=1.5, cex.axis=1.5)
curve(lik.bin.norm(x, 50, 7), lty=2, col=1, add=T)
curve(dbeta(x, 12, 63), lty=4, col=2, add=T)
legend('topright', c("Prior", "Likelihood", "Posterior"), lty=c(1,2,4), col=c(1,1,2), cex=1.2)
##################################
curve(dbeta(x, 2.5, 10), xlim=c(0,1),ylim=c(0,12), lty=1, col=1, main="Prior: Beta(2.5, 10)\n Data: y=7, n-y=43", xlab='p', ylab='Density', cex.lab=1.5, cex.axis=1.5)
curve(lik.bin.norm(x, 50, 7), lty=2, col=1, add=T)
curve(dbeta(x, 9.5, 53), lty=4, col=2, add=T)
legend('topright', c("Prior", "Likelihood", "Posterior"), lty=c(1,2,4), col=c(1,1,2), cex=1.2)
######################################
curve(dbeta(x, 1.25, 5),
xlim=c(0,1), ylim=c(0,12),
lty=1, col=1,
main="Prior: Beta(1.25, 5)\n Data: y=7, n-y=43",
xlab='p', ylab='Density',
cex.lab=1.5, cex.axis=1.5)
curve(lik.bin.norm(x, 50, 7), lty=2, col=1, add=T)
curve(dbeta(x, 8.25, 48), lty=4, col=2, add=T)
legend('topright',
c("Prior", "Likelihood", "Posterior"),
lty=c(1,2,4),
col=c(1,1,2),
cex=1.2)
g()=^{-} e^{/ 2}
> ?sd
> sd(1:2)
[1] 0.7071068
> help("dgamma")
> qbeta(c(0.025,0.975),10,40)
[1] 0.1024494 0.3202212
> ?plot
> seq(0,1,0.01)
[1] 0.00 0.01 0.02 0.03 0.04 0.05 0.06 0.07 0.08 0.09 0.10 0.11 0.12 0.13 0.14
[16] 0.15 0.16 0.17 0.18 0.19 0.20 0.21 0.22 0.23 0.24 0.25 0.26 0.27 0.28 0.29
[31] 0.30 0.31 0.32 0.33 0.34 0.35 0.36 0.37 0.38 0.39 0.40 0.41 0.42 0.43 0.44
[46] 0.45 0.46 0.47 0.48 0.49 0.50 0.51 0.52 0.53 0.54 0.55 0.56 0.57 0.58 0.59
[61] 0.60 0.61 0.62 0.63 0.64 0.65 0.66 0.67 0.68 0.69 0.70 0.71 0.72 0.73 0.74
[76] 0.75 0.76 0.77 0.78 0.79 0.80 0.81 0.82 0.83 0.84 0.85 0.86 0.87 0.88 0.89
[91] 0.90 0.91 0.92 0.93 0.94 0.95 0.96 0.97 0.98 0.99 1.00
> x<- seq(0.005,0.995,length=100)
> x
[1] 0.005 0.015 0.025 0.035 0.045 0.055 0.065 0.075 0.085 0.095 0.105 0.115 0.125
[14] 0.135 0.145 0.155 0.165 0.175 0.185 0.195 0.205 0.215 0.225 0.235 0.245 0.255
[27] 0.265 0.275 0.285 0.295 0.305 0.315 0.325 0.335 0.345 0.355 0.365 0.375 0.385
[40] 0.395 0.405 0.415 0.425 0.435 0.445 0.455 0.465 0.475 0.485 0.495 0.505 0.515
[53] 0.525 0.535 0.545 0.555 0.565 0.575 0.585 0.595 0.605 0.615 0.625 0.635 0.645
[66] 0.655 0.665 0.675 0.685 0.695 0.705 0.715 0.725 0.735 0.745 0.755 0.765 0.775
[79] 0.785 0.795 0.805 0.815 0.825 0.835 0.845 0.855 0.865 0.875 0.885 0.895 0.905
[92] 0.915 0.925 0.935 0.945 0.955 0.965 0.975 0.985 0.995
> y<-dbeta(x,10,40)
> plot(x,y,type = "l")
> plot(x,y,type = "l")
> plot(x,y,type = "o")
> plot(x,y, "l")
> plot(x,y,type = "l",xlab = "pi",ylab = "density",main = "Beta(10,40)")
> plot(x,y,type = "l",xlab = expression(pi),ylab = "density",main = "Beta(10,40)")
```seq(0,1,0.01)
[1] 0.00 0.01 0.02 0.03 0.04 0.05 0.06 0.07 0.08 0.09 0.10 0.11 0.12 0.13 0.14
[16] 0.15 0.16 0.17 0.18 0.19 0.20 0.21 0.22 0.23 0.24 0.25 0.26 0.27 0.28 0.29
[31] 0.30 0.31 0.32 0.33 0.34 0.35 0.36 0.37 0.38 0.39 0.40 0.41 0.42 0.43 0.44
[46] 0.45 0.46 0.47 0.48 0.49 0.50 0.51 0.52 0.53 0.54 0.55 0.56 0.57 0.58 0.59
[61] 0.60 0.61 0.62 0.63 0.64 0.65 0.66 0.67 0.68 0.69 0.70 0.71 0.72 0.73 0.74
[76] 0.75 0.76 0.77 0.78 0.79 0.80 0.81 0.82 0.83 0.84 0.85 0.86 0.87 0.88 0.89
[91] 0.90 0.91 0.92 0.93 0.94 0.95 0.96 0.97 0.98 0.99 1.00
> x<- seq(0.005,0.995,length=100)
> x
[1] 0.005 0.015 0.025 0.035 0.045 0.055 0.065 0.075 0.085 0.095 0.105 0.115 0.125
[14] 0.135 0.145 0.155 0.165 0.175 0.185 0.195 0.205 0.215 0.225 0.235 0.245 0.255
[27] 0.265 0.275 0.285 0.295 0.305 0.315 0.325 0.335 0.345 0.355 0.365 0.375 0.385
[40] 0.395 0.405 0.415 0.425 0.435 0.445 0.455 0.465 0.475 0.485 0.495 0.505 0.515
[53] 0.525 0.535 0.545 0.555 0.565 0.575 0.585 0.595 0.605 0.615 0.625 0.635 0.645
[66] 0.655 0.665 0.675 0.685 0.695 0.705 0.715 0.725 0.735 0.745 0.755 0.765 0.775
[79] 0.785 0.795 0.805 0.815 0.825 0.835 0.845 0.855 0.865 0.875 0.885 0.895 0.905
[92] 0.915 0.925 0.935 0.945 0.955 0.965 0.975 0.985 0.995
> y<-dbeta(x,10,40)
> plot(x,y,type = "l")
> plot(x,y,type = "l")
> plot(x,y,type = "o")
> plot(x,y, "l")
> plot(x,y,type = "l",xlab = "pi",ylab = "density",main = "Beta(10,40)")
> plot(x,y,type = "l",xlab = expression(pi),ylab = "density",main = "Beta(10,40)")
> qbeta(c(0,1),0.5,0.5)
[1] 0 1
> x<-seq(qnorm(0.005,20,5),qnorm(0.995,20,5),length=100)
> y<-dnorm(x,20,5)
> plot(x,y,"l")
> x<-seq(-100,100,length=100000)
> y<-dnorm(x,20,5)
> plot(x,y,"l")
> x<-seq(-50,90,length=100000)
> y<-dnorm(x,20,5)
> plot(x,y,"l")
> x<-seq(qnorm(0.005,20,5),qnorm(0.995,20,5),length=100)
> y<-dnorm(x,20,5)
> plot(x,y,"l")
> qnorm(0.005,20,5)
[1] 7.120853
> qnorm(0.995,20,5)
[1] 32.87915
prior=c(3,10)
data=c(10,6)
triplot(prior,data,where="topright")
triplot(c(2.5,10),c(7,43))
triplot(c(2.5,10),c(7,43),"center")
For the beta density with parameters \(\alpha=2\) and \(\beta=7\), 1.Referring to Table A.2, calculate the mean and mode as functions of the parameters
# 定义参数
alpha <- 2
beta <- 7
mean <- alpha / (alpha + beta)
mode <- (alpha - 1) / (alpha + beta - 2)
print(paste0("Mean: ", mean))
## [1] "Mean: 0.222222222222222"
print(paste0("Mode: ", mode))
## [1] "Mode: 0.142857142857143"
2.Use an R function to dertermine the median and a 90% central interval
qbeta(c(0.5),2,7)
## [1] 0.2011312
qbeta(c(0.05,0.95),2,7)
## [1] 0.04638926 0.47067941
3.Plot the density
x<-seq(0.005,0.995,length=100)
y<-dbeta(x,2,7)
plot(x,y,"l")
3.Plot the density
x<-seq(0.005,0.995,length=100)
y1<-dbeta(x,0.5,0.5)
y2<-dbeta(x,10.2,1.5)
y3<-dbeta(x,1.5,10.2)
y4<-dbeta(x,100,62)
plot(x,y1,type = "l",col="red")
lines(x,y2,type = "o",col="green")
lines(x,y3,type = "p",col="blue")
lines(x,y4,type = "l",col="black")
par(mfrow=c(2,2))
curve(dbeta(x, 0.5,0.5), xlim=c(0,1),
main='Beta(0.5,0.5)', xlab='p',
ylab='prior density',cex.lab=1.5, cex.axis=1.5)
curve(dbeta(x, 10.2,1.5), xlim=c(0,1),
main='Beta(10.2,1.5)', xlab='p', ylab='prior density',
cex.lab=1.5, cex.axis=1.5)
curve(dbeta(x, 1.5,10.2), xlim=c(0,1),
main='Beta(1.5,10.2)', xlab='p', ylab='prior density',
cex.lab=1.5, cex.axis=1.5)
curve(dbeta(x, 100,62), xlim=c(0,1),
main='Beta(100,62)', xlab='p', ylab='prior density',
cex.lab=1.5, cex.axis=1.5)
1.U(0,1)=Beta(1,1)
y4<-dbeta(x,1,1)
plot(x,y4,type = "l",col="red")
Estimating a proportion with a discrete prior Bob claims to have ESP. To test this claim, you propose the following experiment. You will select one card from four large cards with different geometric figures, and Bob will try to identify it. Let \(p\) denote the probability that Bob is correct in identifying the figure for a single card. You believe that Bob has no ESP ability \((p=.25)\), but there is a small chance that \(p\) is either larger or smaller than .25. After some thought, you place the following prior distribution on \(p\) :
p <- c(0,0.125,0.25,.375,.500,.625,.750,.875,1)
prior<-c(.001,.001,.950,.008,.008,.008,.008,.008,.008)
data<-c(6,4)
rbind(p,prior)
## [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9]
## p 0.000 0.125 0.25 0.375 0.500 0.625 0.750 0.875 1.000
## prior 0.001 0.001 0.95 0.008 0.008 0.008 0.008 0.008 0.008
post<-pdisc(p,prior,data)
round(rbind(p,prior,post),3)
## [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9]
## p 0.000 0.125 0.25 0.375 0.500 0.625 0.750 0.875 1.000
## prior 0.001 0.001 0.95 0.008 0.008 0.008 0.008 0.008 0.008
## post 0.000 0.000 0.73 0.034 0.078 0.094 0.055 0.009 0.000
Suppose that the experiment is repeated ten times and Bob is correct six times and incorrect four times.Using the function pdisc, find the posterior probabilities of these values of \(p\). What is your posterior probability that Bob has no ESP ability? ####Bob has no ESP ability \((p=0.73)\)
For the quitting school example, we want to see the influence of sample size(amount of information from data).
Plot the likelihood, prior and posterior on the same graph given the sample sizes and successes \((n,y)=(50,7),(100,14),(150,21),(250,35)\).
what can you get about the influence of \(n\) on the posterior distribution?
par(mfrow=c(2,2))
prior=c(10,40)
data1=c(50,7)
par(cex = 0.8, lwd = 0.8) # 调整文字大小和线条宽度
triplot(prior,data1,where="topleft")
data2=c(100,14)
triplot(prior,data2,where="topleft")
data3=c(150,21)
triplot(prior,data3,where="topleft")
data4=c(250,35)
triplot(prior,data4,where="topleft")
随着样本容量的增加,后验概率密度与似然函数越接近 As \(n\) increased,the density of posterior get
closer to the likelihood function.
(b)If we use beta(1.25,5) instead,what can you get?
par(mfrow=c(2,2))
prior=c(1.25,5)
data1=c(50,7)
par(cex = 0.7, lwd = 0.8) # 调整文字大小和线条宽度
triplot(prior,data1,where="topleft")
data2=c(100,14)
triplot(prior,data2,where="topleft")
data3=c(150,21)
triplot(prior,data3,where="topleft")
data4=c(250,35)
triplot(prior,data4,where="topleft")
现象更明显了,当样本容量增加到250+35=285时,后验分布和似然函数非常接近。
Estimating a proportion with a histogram prior Consider the following experiment. Hold a penny on edge on a flat hard surface, and spin it with your fingers. Let \(p\) denote the probability that it lands heads. To estimate this probability, we will use a histogram to model our prior beliefs about \(p\). Divide the interval \([0,1]\) into the ten subintervals \([0, .1]\), \([.1, .2], \cdots,[.9,1]\), and specify probabilities that \(p\) is in each interval. Next spin the penny 20 times and count the number of successes (heads) and failures (tails). Simulate from the posterior distribution by
midpt=seq(0.05,1,by = 0.1)
#p <- c(0,0.125,0.25,.375,.500,.625,.750,.875,1)
prior<-c(.01,.02,.03,.04,.4,.4,.04,.03,.02,.01)
#data<-c(6,4)
#p=seq(.01,.99,by=.01)
curve(histprior(x,midpt,prior),from=0,to=1,ylab="Prior density")
s<-0:20
f<-20-s
curve(histprior(x,midpt,prior)*dbeta(x,s+1,f+1),
from=0,to=1,ylab="Posterior density")
midpt=seq(0.05,1,by = 0.1)
prior<-c(.01,.02,.03,.04,.4,.4,.04,.03,.02,.01)
p=seq(.01,.99,by=.01)
plot(p,histprior(p,midpt,prior),ylab="Prior density",type = "l")
s<-0:20
f<-20-s
plot(p,histprior(p,midpt,prior)*dbeta(p,s+1,f+1),
ylab="Posterior density",type = "l")
round(cbind(p,histprior(p,midpt,prior)*dbeta(p,s+1,f+1)),3)
## p
## [1,] 0.01 0.172
## [2,] 0.02 0.057
## [3,] 0.03 0.021
## [4,] 0.04 0.008
## [5,] 0.05 0.003
## [6,] 0.06 0.001
## [7,] 0.07 0.000
## [8,] 0.08 0.000
## [9,] 0.09 0.000
## [10,] 0.10 0.000
## [11,] 0.11 0.000
## [12,] 0.12 0.000
## [13,] 0.13 0.000
## [14,] 0.14 0.000
## [15,] 0.15 0.000
## [16,] 0.16 0.000
## [17,] 0.17 0.000
## [18,] 0.18 0.000
## [19,] 0.19 0.000
## [20,] 0.20 0.000
## [21,] 0.21 0.000
## [22,] 0.22 0.004
## [23,] 0.23 0.020
## [24,] 0.24 0.049
## [25,] 0.25 0.084
## [26,] 0.26 0.113
## [27,] 0.27 0.125
## [28,] 0.28 0.118
## [29,] 0.29 0.098
## [30,] 0.30 0.096
## [31,] 0.31 0.063
## [32,] 0.32 0.037
## [33,] 0.33 0.019
## [34,] 0.34 0.009
## [35,] 0.35 0.004
## [36,] 0.36 0.001
## [37,] 0.37 0.000
## [38,] 0.38 0.000
## [39,] 0.39 0.000
## [40,] 0.40 0.000
## [41,] 0.41 0.000
## [42,] 0.42 0.000
## [43,] 0.43 0.000
## [44,] 0.44 0.001
## [45,] 0.45 0.007
## [46,] 0.46 0.026
## [47,] 0.47 0.077
## [48,] 0.48 0.182
## [49,] 0.49 0.363
## [50,] 0.50 0.621
## [51,] 0.51 0.928
## [52,] 0.52 1.222
## [53,] 0.53 1.428
## [54,] 0.54 1.481
## [55,] 0.55 1.363
## [56,] 0.56 1.107
## [57,] 0.57 0.787
## [58,] 0.58 0.481
## [59,] 0.59 0.248
## [60,] 0.60 0.010
## [61,] 0.61 0.003
## [62,] 0.62 0.001
## [63,] 0.63 0.000
## [64,] 0.64 0.000
## [65,] 0.65 0.000
## [66,] 0.66 0.000
## [67,] 0.67 0.000
## [68,] 0.68 0.000
## [69,] 0.69 0.000
## [70,] 0.70 0.000
## [71,] 0.71 0.000
## [72,] 0.72 0.001
## [73,] 0.73 0.003
## [74,] 0.74 0.008
## [75,] 0.75 0.017
## [76,] 0.76 0.032
## [77,] 0.77 0.056
## [78,] 0.78 0.085
## [79,] 0.79 0.116
## [80,] 0.80 0.092
## [81,] 0.81 0.091
## [82,] 0.82 0.073
## [83,] 0.83 0.041
## [84,] 0.84 0.013
## [85,] 0.85 0.000
## [86,] 0.86 0.000
## [87,] 0.87 0.000
## [88,] 0.88 0.000
## [89,] 0.89 0.000
## [90,] 0.90 0.000
## [91,] 0.91 0.000
## [92,] 0.92 0.000
## [93,] 0.93 0.000
## [94,] 0.94 0.000
## [95,] 0.95 0.000
## [96,] 0.96 0.000
## [97,] 0.97 0.000
## [98,] 0.98 0.000
## [99,] 0.99 0.000
The posterior density of \(p\) on a grid of values on \((0,1)\) is
round(rbind(p,histprior(p,midpt,prior)*dbeta(p,s+1,f+1)),3)
## [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9] [,10] [,11] [,12] [,13]
## p 0.010 0.020 0.030 0.040 0.050 0.060 0.07 0.08 0.09 0.1 0.11 0.12 0.13
## 0.172 0.057 0.021 0.008 0.003 0.001 0.00 0.00 0.00 0.0 0.00 0.00 0.00
## [,14] [,15] [,16] [,17] [,18] [,19] [,20] [,21] [,22] [,23] [,24] [,25] [,26]
## p 0.14 0.15 0.16 0.17 0.18 0.19 0.2 0.21 0.220 0.23 0.240 0.250 0.260
## 0.00 0.00 0.00 0.00 0.00 0.00 0.0 0.00 0.004 0.02 0.049 0.084 0.113
## [,27] [,28] [,29] [,30] [,31] [,32] [,33] [,34] [,35] [,36] [,37] [,38] [,39]
## p 0.270 0.280 0.290 0.300 0.310 0.320 0.330 0.340 0.350 0.360 0.37 0.38 0.39
## 0.125 0.118 0.098 0.096 0.063 0.037 0.019 0.009 0.004 0.001 0.00 0.00 0.00
## [,40] [,41] [,42] [,43] [,44] [,45] [,46] [,47] [,48] [,49] [,50] [,51] [,52]
## p 0.4 0.41 0.42 0.43 0.440 0.450 0.460 0.470 0.480 0.490 0.500 0.510 0.520
## 0.0 0.00 0.00 0.00 0.001 0.007 0.026 0.077 0.182 0.363 0.621 0.928 1.222
## [,53] [,54] [,55] [,56] [,57] [,58] [,59] [,60] [,61] [,62] [,63] [,64] [,65]
## p 0.530 0.540 0.550 0.560 0.570 0.580 0.590 0.60 0.610 0.620 0.63 0.64 0.65
## 1.428 1.481 1.363 1.107 0.787 0.481 0.248 0.01 0.003 0.001 0.00 0.00 0.00
## [,66] [,67] [,68] [,69] [,70] [,71] [,72] [,73] [,74] [,75] [,76] [,77] [,78]
## p 0.66 0.67 0.68 0.69 0.7 0.71 0.720 0.730 0.740 0.750 0.760 0.770 0.780
## 0.00 0.00 0.00 0.00 0.0 0.00 0.001 0.003 0.008 0.017 0.032 0.056 0.085
## [,79] [,80] [,81] [,82] [,83] [,84] [,85] [,86] [,87] [,88] [,89] [,90] [,91]
## p 0.790 0.800 0.810 0.820 0.830 0.840 0.85 0.86 0.87 0.88 0.89 0.9 0.91
## 0.116 0.092 0.091 0.073 0.041 0.013 0.00 0.00 0.00 0.00 0.00 0.0 0.00
## [,92] [,93] [,94] [,95] [,96] [,97] [,98] [,99]
## p 0.92 0.93 0.94 0.95 0.96 0.97 0.98 0.99
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
p<-seq(0.0,1,length=500)
s<-0:20
f<-20-s
Pip = dbeta(p,s+1,f+1)
Lp = histprior(p,midpt,prior)
post1 = Lp*Pip
post1 = post1/sum(post1)
ps = sample(p,5000,replace = TRUE,prob = post1)
par(mfrow=c(1,2))
hist(ps,breaks = 30,xlab = "p",main = "")
ps1 = sample(p,500,replace = TRUE,prob = post1)
hist(ps1,breaks = 30,xlab = "p",main = "")
区间概率没有太大变化 ### chapter2 Albert 画图
p=seq(0.05,0.95,by=0.1)
prior=c(1,5.2,8,7.2,4.6,2.1,0.7,0.1,0,0)
prior=prior/sum(prior)
plot(p,prior,type = "h",ylab = "Prior Probability")
data=c(11,16)
post=pdisc(p,prior,data)
round(cbind(p,prior,post),2)
## p prior post
## [1,] 0.05 0.03 0.00
## [2,] 0.15 0.18 0.00
## [3,] 0.25 0.28 0.13
## [4,] 0.35 0.25 0.48
## [5,] 0.45 0.16 0.33
## [6,] 0.55 0.07 0.06
## [7,] 0.65 0.02 0.00
## [8,] 0.75 0.00 0.00
## [9,] 0.85 0.00 0.00
## [10,] 0.95 0.00 0.00
library(lattice)
PRIOR=data.frame("prior",p,prior)
PRIOR
## X.prior. p prior
## 1 prior 0.05 0.034602076
## 2 prior 0.15 0.179930796
## 3 prior 0.25 0.276816609
## 4 prior 0.35 0.249134948
## 5 prior 0.45 0.159169550
## 6 prior 0.55 0.072664360
## 7 prior 0.65 0.024221453
## 8 prior 0.75 0.003460208
## 9 prior 0.85 0.000000000
## 10 prior 0.95 0.000000000
POST=data.frame("posterior",p,post)
POST
## X.posterior. p post
## 1 posterior 0.05 1.451714e-08
## 2 posterior 0.15 2.256022e-03
## 3 posterior 0.25 1.291349e-01
## 4 posterior 0.35 4.767910e-01
## 5 posterior 0.45 3.338350e-01
## 6 posterior 0.55 5.587820e-02
## 7 posterior 0.65 2.098297e-03
## 8 posterior 0.75 6.642742e-06
## 9 posterior 0.85 0.000000e+00
## 10 posterior 0.95 0.000000e+00
names(PRIOR)=c("Type","P","Probability")
names(POST)=c("Type","P","Probability")
data=rbind(PRIOR,POST)
xyplot(Probability~P|Type,data,layout=c(1,2),type="h",lwd=3,col="blue")
POST[3,3]+POST[4,3]+POST[5,3]
## [1] 0.9397608
quantile2=list(p=.9,x=.5)
quantile2
## $p
## [1] 0.9
##
## $x
## [1] 0.5
quantile1=list(p=.5,x=.3)
quantile1
## $p
## [1] 0.5
##
## $x
## [1] 0.3
beta.select(quantile1,quantile2)
## [1] 3.26 7.19
a = 3.26
b = 7.19
s = 11
f = 16
curve(dbeta(x,a+s,b+f),from = 0,to = 1,xlab = "p",ylab ="Density",lty = 1,lwd = 4 )
curve(dbeta(x,s+1,f+1),add = TRUE,lty = 2,lwd = 4 )
curve(dbeta(x,a,b),add = TRUE,lty = 3,lwd = 4 )
legend(.7,4,c("Prior","Likelihood","Posterior"),lty = c(3,2,1),lwd = c(3,3,3))
1 - pbeta(0.5,a+s, b+f)
## [1] 0.0690226
qbeta(0.5,a+s, b+f)
## [1] 0.3786304
qbeta(c(0.05,0.95),a+s, b+f)
## [1] 0.2555267 0.5133608
ps = rbeta(1000,a+s,b+f)
hist(ps,xlab = "p",main="")
sum(ps >= 0.5)/1000
## [1] 0.069
quantile(ps,c(0.05,0.95))
## 5% 95%
## 0.2573519 0.5099952
midpt = seq(0.05,0.95,by = 0.1)
prior = c(1,5.2,8,7.2,4.6,2.1,0.7,0.1,0,0)
prior = prior/sum(prior)
curve(histprior(x,midpt,prior),from = 0,to = 1,
ylab = "Prior density", ylim=c(0,.3))
curve(histprior(x,midpt,prior)*dbeta(x,s+1,f+1),
from=0,to=1,ylab="Posterior density")
### Figure2.7
p = seq(0,1,length=500)
post = histprior(p,midpt,prior)*dbeta(p,s+1,f+1)
post = post/sum(post)
ps = sample(p,replace = TRUE, prob = post)
hist(ps, xlab = "p",main = "")
p = seq(0.05,0.95,by = .1)
prior = c(1,5.2,8,7.2,4.6,2.1,0.7,0.1,0,0)
prior = prior/sum(prior)
m = 20; ys = 0:20
pred = pdiscp(p,prior,m,ys)
round(cbind(0:20,pred),3)
## pred
## [1,] 0 0.020
## [2,] 1 0.044
## [3,] 2 0.069
## [4,] 3 0.092
## [5,] 4 0.106
## [6,] 5 0.112
## [7,] 6 0.110
## [8,] 7 0.102
## [9,] 8 0.089
## [10,] 9 0.074
## [11,] 10 0.059
## [12,] 11 0.044
## [13,] 12 0.031
## [14,] 13 0.021
## [15,] 14 0.013
## [16,] 15 0.007
## [17,] 16 0.004
## [18,] 17 0.002
## [19,] 18 0.001
## [20,] 19 0.000
## [21,] 20 0.000
ab=c(3.26,7.19)
m=20;ys=0:20
pred = pbetap(ab,m,ys)
p=rbeta(1000,3.26,7.19)
y = rbinom(1000,20,p)
table(y)
## y
## 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18
## 18 42 65 108 87 96 120 101 96 76 56 46 40 18 16 7 3 4 1
freq = table(y)
ys = as.integer(names(freq))
predprob = freq/sum(freq)
plot(ys, predprob, type = "h", xlab = "y",
ylab = "Predictive Probability")
dist=cbind(ys, predprob)
dist
## ys predprob
## 0 0 0.018
## 1 1 0.042
## 2 2 0.065
## 3 3 0.108
## 4 4 0.087
## 5 5 0.096
## 6 6 0.120
## 7 7 0.101
## 8 8 0.096
## 9 9 0.076
## 10 10 0.056
## 11 11 0.046
## 12 12 0.040
## 13 13 0.018
## 14 14 0.016
## 15 15 0.007
## 16 16 0.003
## 17 17 0.004
## 18 18 0.001
covprob = .9
discint(dist, covprob)
## $prob
## 12
## 0.933
##
## $set
## 1 2 3 4 5 6 7 8 9 10 11 12
## 1 2 3 4 5 6 7 8 9 10 11 12