Normal sampling model (known variance) with a normal prior on the mean

One point observation

Let \(y\vert \mu \sim N(\mu,\sigma^2=\tau^{-1}),\) and suppose that \(\tau>0\), the precision parameter, is known, and that the prior for \(\mu\) is normal with mean \(m\in{\mathbb R}\) and precision \(t>0\). This is \[\begin{eqnarray*} \color{blue}{y \vert \mu} &\sim& N(\mu,\sigma^2=\tau^{-1}) \\ \color{red}{\mu\vert m,t} &\sim& N(m,s^2 = t^{-1}). \end{eqnarray*}\] Then, the posterior distribution of \(\mu\) is: \[\begin{eqnarray*} \pi(\mu\vert y) &=& {\color{blue}{\left(\frac{\tau}{2\pi}\right)^{1/2} \exp\left\{-\frac{\tau}{2}(y-\mu)^2\right\}}} \times {\color{red}{\left(\frac{t}{2\pi}\right)^{1/2} \exp\left\{-\frac{t}{2}(\mu-m)^2\right\}}} \\ & \propto& \exp\left\{ -\frac{\tau}{2}(y-\mu)^2 -\frac{t}{2}(\mu-m)^2\right\} \\ & \ldots & \text{ after some algebra ...} \\ & \propto& N\left\{ \frac{ \tau y + t m}{\tau + t},\mbox{precision }=(\tau+t) \right\}. \end{eqnarray*}\]

This is, the posterior distribution is again normal with mean \(\mu' = \frac{ \tau y + t m}{\tau + t}\) and precision \(\tau' = \tau+t\). This implies that the Normal prior is a conjugate prior for the normal sampling model with known variance (or, equivalently, known precision). The following R code shows the influence of the choice of the hyperparamters (the parameters of the prior distribution) on the shape of posterior distribution for the case when \(y=0\) and \(\tau = 1\). Note that, in the R command dnorm(), we need to specify the mean and the standard deviation.

# See some details of the R command dnorm() with ?dnorm


###############################################################
# Effect of t (the precision of the prior)
###############################################################

# m = 0, t = 1/2
m <- 0; tau <- 1; t <- 1/2; y <- 0  
sigma <- 1/sqrt(t) # Prior standard deviation
mup <- (tau*y + m*t)/(tau + t)  # Posterior mean
taup <- tau+ t  # Posterior precision
sigmap = 1/sqrt(taup)   # Posterior standard deviation
# Likelihood, Prior, and Posterior functions
likelihood <- Vectorize(function(mu)  dnorm(mu,y,1/sqrt(tau)))
pi <- Vectorize(function(mu)  dnorm(mu,m,sigma))
post1 <- Vectorize(function(mu)  dnorm(mu,mup,sigmap))
curve(post1,-5,5, xlab=~mu, ylab="Density", main="Normal prior: m=0, t=1/2",lwd=2,n=10000,ylim=c(0,0.6))
curve(pi, xlab=~theta,lwd=1,lty = 2, add=T,n=10000)
curve(likelihood, xlab=~theta,lwd=1,lty = 3, add=T,n=10000)
legend(2, 0.5, c("Posterior","Prior","Likelihood"), col=c("black","black","black"),
       text.col = "black", lty = c(1,2,3), lwd = c(2,1,1),
       merge = TRUE, bg = "gray90",cex=1)

# m = 0, t = 1
m <- 0; tau <- 1; t <- 1; y <- 0  
sigma <- 1/sqrt(t) # Prior standard deviation
mup <- (tau*y + m*t)/(tau + t)  # Posterior mean
taup <- tau+ t  # Posterior precision
sigmap = 1/sqrt(taup)   # Posterior standard deviation
# Likelihood, Prior, and Posterior functions
likelihood <- Vectorize(function(mu)  dnorm(mu,y,1/sqrt(tau)))
pi <- Vectorize(function(mu)  dnorm(mu,m,sigma))
post1 <- Vectorize(function(mu)  dnorm(mu,mup,sigmap))
curve(post1,-5,5, xlab=~mu, ylab="Density", main="Normal prior: m=0, t=1",lwd=2,n=10000,ylim=c(0,0.6))
curve(pi, xlab=~theta,lwd=1,lty = 2, add=T,n=10000)
curve(likelihood, xlab=~theta,lwd=1,lty = 3, add=T,n=10000)
legend(2, 0.5, c("Posterior","Prior","Likelihood"), col=c("black","black","black"),
       text.col = "black", lty = c(1,2,3), lwd = c(2,1,1),
       merge = TRUE, bg = "gray90",cex=1)

# m = 0, t = 1/10
m <- 0; tau <- 1; t <- 1/10; y <- 0  
sigma <- 1/sqrt(t) # Prior standard deviation
mup <- (tau*y + m*t)/(tau + t)  # Posterior mean
taup <- tau+ t  # Posterior precision
sigmap = 1/sqrt(taup)   # Posterior standard deviation
# Likelihood, Prior, and Posterior functions
likelihood <- Vectorize(function(mu)  dnorm(mu,y,1/sqrt(tau)))
pi <- Vectorize(function(mu)  dnorm(mu,m,sigma))
post1 <- Vectorize(function(mu)  dnorm(mu,mup,sigmap))
curve(post1,-5,5, xlab=~mu, ylab="Density", main="Normal prior: m=0, t=1/10",lwd=2,n=10000,ylim=c(0,0.5))
curve(pi, xlab=~theta,lwd=1,lty = 2, add=T,n=10000)
curve(likelihood, xlab=~theta,lwd=1,lty = 3, add=T,n=10000)
legend(2, 0.5, c("Posterior","Prior","Likelihood"), col=c("black","black","black"),
       text.col = "black", lty = c(1,2,3), lwd = c(2,1,1),
       merge = TRUE, bg = "gray90",cex=1)

# m = 0, t = 1/100
m <- 0; tau <- 1; t <- 1/100; y <- 0  
sigma <- 1/sqrt(t) # Prior standard deviation
mup <- (tau*y + m*t)/(tau + t)  # Posterior mean
taup <- tau+ t  # Posterior precision
sigmap = 1/sqrt(taup)   # Posterior standard deviation
# Likelihood, Prior, and Posterior functions
likelihood <- Vectorize(function(mu)  dnorm(mu,y,1/sqrt(tau)))
pi <- Vectorize(function(mu)  dnorm(mu,m,sigma))
post1 <- Vectorize(function(mu)  dnorm(mu,mup,sigmap))
curve(post1,-5,5, xlab=~mu, ylab="Density", main="Normal prior: m=0, t=1/100",lwd=2,n=10000,ylim=c(0,0.5))
curve(pi, xlab=~theta,lwd=1,lty = 2, add=T,n=10000)
curve(likelihood, xlab=~theta,lwd=1,lty = 3, add=T,n=10000)
legend(2, 0.5, c("Posterior","Prior","Likelihood"), col=c("black","black","black"),
       text.col = "black", lty = c(1,2,3), lwd = c(2,1,1),
       merge = TRUE, bg = "gray90",cex=1)

###############################################################
# Effect of m (the mean of the prior)
###############################################################

# m = -2, t = 1
m <- -2; tau <- 1; t <- 1; y <- 0  
sigma <- 1/sqrt(t) # Prior standard deviation
mup <- (tau*y + m*t)/(tau + t)  # Posterior mean
taup <- tau+ t  # Posterior precision
sigmap = 1/sqrt(taup)   # Posterior standard deviation
# Likelihood, Prior, and Posterior functions
likelihood <- Vectorize(function(mu)  dnorm(mu,y,1/sqrt(tau)))
pi <- Vectorize(function(mu)  dnorm(mu,m,sigma))
post1 <- Vectorize(function(mu)  dnorm(mu,mup,sigmap))
curve(post1,-5,5, xlab=~mu, ylab="Density", main="Normal prior: m=-2, t=1",lwd=2,n=10000,ylim=c(0,0.6))
curve(pi, xlab=~theta,lwd=1,lty = 2, add=T,n=10000)
curve(likelihood, xlab=~theta,lwd=1,lty = 3, add=T,n=10000)
legend(2, 0.5, c("Posterior","Prior","Likelihood"), col=c("black","black","black"),
       text.col = "black", lty = c(1,2,3), lwd = c(2,1,1),
       merge = TRUE, bg = "gray90",cex=1)

# m = -1, t = 1
m <- -1; tau <- 1; t <- 1; y <- 0  
sigma <- 1/sqrt(t) # Prior standard deviation
mup <- (tau*y + m*t)/(tau + t)  # Posterior mean
taup <- tau+ t  # Posterior precision
sigmap = 1/sqrt(taup)   # Posterior standard deviation
# Likelihood, Prior, and Posterior functions
likelihood <- Vectorize(function(mu)  dnorm(mu,y,1/sqrt(tau)))
pi <- Vectorize(function(mu)  dnorm(mu,m,sigma))
post1 <- Vectorize(function(mu)  dnorm(mu,mup,sigmap))
curve(post1,-5,5, xlab=~mu, ylab="Density", main="Normal prior: m=-1, t=1",lwd=2,n=10000,ylim=c(0,0.6))
curve(pi, xlab=~theta,lwd=1,lty = 2, add=T,n=10000)
curve(likelihood, xlab=~theta,lwd=1,lty = 3, add=T,n=10000)
legend(2, 0.5, c("Posterior","Prior","Likelihood"), col=c("black","black","black"),
       text.col = "black", lty = c(1,2,3), lwd = c(2,1,1),
       merge = TRUE, bg = "gray90",cex=1)

# m = 0, t = 1
m <- 0; tau <- 1; t <- 1; y <- 0  
sigma <- 1/sqrt(t) # Prior standard deviation
mup <- (tau*y + m*t)/(tau + t)  # Posterior mean
taup <- tau+ t  # Posterior precision
sigmap = 1/sqrt(taup)   # Posterior standard deviation
# Likelihood, Prior, and Posterior functions
likelihood <- Vectorize(function(mu)  dnorm(mu,y,1/sqrt(tau)))
pi <- Vectorize(function(mu)  dnorm(mu,m,sigma))
post1 <- Vectorize(function(mu)  dnorm(mu,mup,sigmap))
curve(post1,-5,5, xlab=~mu, ylab="Density", main="Normal prior: m=0, t=1",lwd=2,n=10000,ylim=c(0,0.6))
curve(pi, xlab=~theta,lwd=1,lty = 2, add=T,n=10000)
curve(likelihood, xlab=~theta,lwd=1,lty = 3, add=T,n=10000)
legend(2, 0.5, c("Posterior","Prior","Likelihood"), col=c("black","black","black"),
       text.col = "black", lty = c(1,2,3), lwd = c(2,1,1),
       merge = TRUE, bg = "gray90",cex=1)

# m = 1, t = 1
m <- 1; tau <- 1; t <- 1; y <- 0  
sigma <- 1/sqrt(t) # Prior standard deviation
mup <- (tau*y + m*t)/(tau + t)  # Posterior mean
taup <- tau+ t  # Posterior precision
sigmap = 1/sqrt(taup)   # Posterior standard deviation
# Likelihood, Prior, and Posterior functions
likelihood <- Vectorize(function(mu)  dnorm(mu,y,1/sqrt(tau)))
pi <- Vectorize(function(mu)  dnorm(mu,m,sigma))
post1 <- Vectorize(function(mu)  dnorm(mu,mup,sigmap))
curve(post1,-5,5, xlab=~mu, ylab="Density", main="Normal prior: m=1, t=1",lwd=2,n=10000,ylim=c(0,0.6))
curve(pi, xlab=~theta,lwd=1,lty = 2, add=T,n=10000)
curve(likelihood, xlab=~theta,lwd=1,lty = 3, add=T,n=10000)
legend(-4, 0.5, c("Posterior","Prior","Likelihood"), col=c("black","black","black"),
       text.col = "black", lty = c(1,2,3), lwd = c(2,1,1),
       merge = TRUE, bg = "gray90",cex=1)

# m = 2, t = 1
m <- 2; tau <- 1; t <- 1; y <- 0  
sigma <- 1/sqrt(t) # Prior standard deviation
mup <- (tau*y + m*t)/(tau + t)  # Posterior mean
taup <- tau+ t  # Posterior precision
sigmap = 1/sqrt(taup)   # Posterior standard deviation
# Likelihood, Prior, and Posterior functions
likelihood <- Vectorize(function(mu)  dnorm(mu,y,1/sqrt(tau)))
pi <- Vectorize(function(mu)  dnorm(mu,m,sigma))
post1 <- Vectorize(function(mu)  dnorm(mu,mup,sigmap))
curve(post1,-5,5, xlab=~mu, ylab="Density", main="Normal prior: m=2, t=1",lwd=2,n=10000,ylim=c(0,0.6))
curve(pi, xlab=~theta,lwd=1,lty = 2, add=T,n=10000)
curve(likelihood, xlab=~theta,lwd=1,lty = 3, add=T,n=10000)
legend(-4, 0.5, c("Posterior","Prior","Likelihood"), col=c("black","black","black"),
       text.col = "black", lty = c(1,2,3), lwd = c(2,1,1),
       merge = TRUE, bg = "gray90",cex=1)

\(n \geq 1\) observations

Suppose that we have
\[\begin{eqnarray*} y_i,\dots,y_n \mid \mu &\stackrel{iid}{\sim}& N(\mu,\sigma^2=1/\tau),\\ \mu &\sim& N(m,t), \end{eqnarray*}\]

where \(\tau\) is the precision parameter, which is assumed to be known. Then, after some algebra, we can find that the posterior distribution of the mean \(\mu\) is normal with mean \((n\tau \bar y + m t)/(n\tau + t)\) and precision \(n\tau+t\). The following R code shows the effect of \(n\) for a normal prior with \(m=0\), \(t=1\), \(\tau=1\).

###############################################################
# Effect of n 
###############################################################

# Simulated sample n = 2
n <- 2
set.seed(123)
y <- rnorm(n)
my <- mean(y)
summary(y)

##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
## -0.5605 -0.4779 -0.3953 -0.3953 -0.3128 -0.2302

m <- 0; tau <- 1; t <- 1; 
sigma <- 1/sqrt(t) # Prior standard deviation
mup <- (tau*n*my + m*t)/(n*tau + t)  # Posterior mean
taup <- n*tau+ t  # Posterior precision
sigmap = 1/sqrt(taup)   # Posterior standard deviation
# Likelihood, Prior, and Posterior functions
pi <- Vectorize(function(mu)  dnorm(mu,m,sigma))
post1 <- Vectorize(function(mu)  dnorm(mu,mup,sigmap))
curve(post1,-3,3, xlab=~mu, ylab="Density", main="Normal prior: n=2",lwd=2,n=10000,ylim=c(0,0.8))
curve(pi, xlab=~theta,lwd=1,lty = 2, add=T,n=10000)
legend(1, 0.75, c("Posterior","Prior"), col=c("black","black"),
       text.col = "black", lty = c(1,2), lwd = c(2,1),
       merge = TRUE, bg = "gray90",cex=1)

# Simulated sample n = 10
n <- 10
set.seed(123)
y <- rnorm(n)
my <- mean(y)
hist(y)

summary(y)

##     Min.  1st Qu.   Median     Mean  3rd Qu.     Max. 
## -1.26500 -0.53180 -0.07983  0.07463  0.37800  1.71500

m <- 0; tau <- 1; t <- 1; 
sigma <- 1/sqrt(t) # Prior standard deviation
mup <- (tau*n*my + m*t)/(n*tau + t)  # Posterior mean
taup <- n*tau+ t  # Posterior precision
sigmap = 1/sqrt(taup)   # Posterior standard deviation
# Likelihood, Prior, and Posterior functions
pi <- Vectorize(function(mu)  dnorm(mu,m,sigma))
post1 <- Vectorize(function(mu)  dnorm(mu,mup,sigmap))
curve(post1,-3,3, xlab=~mu, ylab="Density", main="Normal prior: n=10",lwd=2,n=10000,ylim=c(0,1.5))
curve(pi, xlab=~theta,lwd=1,lty = 2, add=T,n=10000)
legend(1, 1.5, c("Posterior","Prior"), col=c("black","black"),
       text.col = "black", lty = c(1,2), lwd = c(2,1),
       merge = TRUE, bg = "gray90",cex=1)

# Simulated sample n = 25
n <- 25
set.seed(123)
y <- rnorm(n)
my <- mean(y)
hist(y)

summary(y)

##     Min.  1st Qu.   Median     Mean  3rd Qu.     Max. 
## -1.96700 -0.62500 -0.21800 -0.03333  0.46090  1.78700

m <- 0; tau <- 1; t <- 1; 
sigma <- 1/sqrt(t) # Prior standard deviation
mup <- (tau*n*my + m*t)/(n*tau + t)  # Posterior mean
taup <- n*tau+ t  # Posterior precision
sigmap = 1/sqrt(taup)   # Posterior standard deviation
# Likelihood, Prior, and Posterior functions
pi <- Vectorize(function(mu)  dnorm(mu,m,sigma))
post2 <- Vectorize(function(mu)  dnorm(mu,mup,sigmap))
curve(post2,-3,3, xlab=~mu, ylab="Density", main="Normal prior: n=25",lwd=2,n=10000,ylim=c(0,2.25))
curve(pi, xlab=~theta,lwd=1,lty = 2, add=T,n=10000)
legend(1, 2, c("Posterior","Prior"), col=c("black","black"),
       text.col = "black", lty = c(1,2), lwd = c(2,1),
       merge = TRUE, bg = "gray90",cex=1)

# Simulated sample n = 50
n <- 50
set.seed(123)
y <- rnorm(n)
my <- mean(y)
hist(y)

summary(y)

##     Min.  1st Qu.   Median     Mean  3rd Qu.     Max. 
## -1.96700 -0.55930 -0.07264  0.03440  0.69820  2.16900

m <- 0; tau <- 1; t <- 1; 
sigma <- 1/sqrt(t) # Prior standard deviation
mup <- (tau*n*my + m*t)/(n*tau + t)  # Posterior mean
taup <- n*tau+ t  # Posterior precision
sigmap = 1/sqrt(taup)   # Posterior standard deviation
# Likelihood, Prior, and Posterior functions
pi <- Vectorize(function(mu)  dnorm(mu,m,sigma))
post3 <- Vectorize(function(mu)  dnorm(mu,mup,sigmap))
curve(post3,-3,3, xlab=~mu, ylab="Density", main="Normal prior: n=50",lwd=2,n=10000,ylim=c(0,3))
curve(pi, xlab=~theta,lwd=1,lty = 2, add=T,n=10000)
legend(1, 2.5, c("Posterior","Prior"), col=c("black","black"),
       text.col = "black", lty = c(1,2), lwd = c(2,1),
       merge = TRUE, bg = "gray90",cex=1)

# Simulated sample n = 100
n <- 100
set.seed(123)
y <- rnorm(n)
my <- mean(y)
hist(y)

summary(y)

##     Min.  1st Qu.   Median     Mean  3rd Qu.     Max. 
## -2.30900 -0.49390  0.06176  0.09041  0.69180  2.18700

m <- 0; tau <- 1; t <- 1; 
sigma <- 1/sqrt(t) # Prior standard deviation
mup <- (tau*n*my + m*t)/(n*tau + t)  # Posterior mean
taup <- n*tau+ t  # Posterior precision
sigmap = 1/sqrt(taup)   # Posterior standard deviation
# Likelihood, Prior, and Posterior functions
pi <- Vectorize(function(mu)  dnorm(mu,m,sigma))
post4 <- Vectorize(function(mu)  dnorm(mu,mup,sigmap))
curve(post4,-3,3, xlab=~mu, ylab="Density", main="Normal prior: n=100",lwd=2,n=10000,ylim=c(0,4.1))
curve(pi, xlab=~theta,lwd=1,lty = 2, add=T,n=10000)
legend(1, 4, c("Posterior","Prior"), col=c("black","black"),
       text.col = "black", lty = c(1,2), lwd = c(2,1),
       merge = TRUE, bg = "gray90",cex=1)

The Normal-Normal Bayesian model (known variance)

F. Javier Rubio

23 November, 2017

Normal sampling model (known variance) with a normal prior on the mean

One point observation

\(n \geq 1\) observations