1. Posterior distribution of Normal Inverse-Gamma prior
Assume \(y \sim N\left( X \beta, \sigma^{2}
V\right)\), where \(V\) is known
and \(\left\{\beta,
\sigma^{2}\right\}\) is unknown. We use a Normal Inverse-Gamma
(NIG) prior: \[
\begin{align}
P(\beta, \sigma^{2}) &= NIG \left(\beta, \sigma^{2} \mid m_{0},
M_{0}, a_{0}, b_{0}\right) \\
&= IG\left(\sigma^{2} \mid a_{0}, b_{0}\right) \cdot
N\left(\beta \mid m_{0}, \sigma^{2} M_{0}\right) \\
\end{align}
\]
The posterior distribution is given by:
\[
\boxed{
\begin{align}\label{eq:post_dist}
P\left(\beta, \sigma^{2} \mid y\right) & \propto
\left(\frac{1}{\sigma^{2}}\right)^{a_{0}+p+1}
e^{-\frac{b_{0}}{\sigma^{2}}}
e^{-\frac{1}{2 \sigma^{2}} [Q \left(\beta, m_{0}, M_{0}\right)+Q
\left(y, X \beta, V\right)]}\;
\end{align}
}
\]
Click for details
\[
\begin{align}
P\left(\beta, \sigma^{2} \mid y\right) & \propto NIG\left(\beta,
\sigma^{2} \mid m_{0}, M_{0}, a_{0}, b_{0}\right) \cdot N\left(y \mid X
\beta, \sigma^{2} V\right) \nonumber\\
& \propto IG\left(\sigma^{2} \mid a_{0}, b_{0}\right) \cdot
N\left(\beta \mid m_{0}, \sigma^{2} M_{0}\right) \cdot N\left(y \mid X
\beta, \sigma^{2} V\right) \nonumber\\
& \propto \frac{b_0^{a_0}}{\Gamma\left(a_{0}\right)}
\left(\frac{1}{\sigma^{2}}\right)^{a_{0}+1}
e^{-\frac{b_{0}}{\sigma^{2}}}
\frac{1}{(2 \pi \sigma^{2})^{\frac{p}{2}}\left|
M_{0}\right|^{\frac{1}{2}}} e^{-\frac{1}{2 \sigma^{2}} Q \left(\beta,
m_{0}, M_{0}\right)}
\frac{1}{(2 \pi \sigma^{2})^{\frac{p}{2}}\left| V\right|^{\frac{1}{2}}}
e^{-\frac{1}{2 \sigma^{2}} Q \left(y, X \beta, V\right)} \nonumber\\
& \propto \left(\frac{1}{\sigma^{2}}\right)^{a_{0}+p+1}
e^{-\frac{b_{0}}{\sigma^{2}}}
e^{-\frac{1}{2 \sigma^{2}} [Q \left(\beta, m_{0}, M_{0}\right)+Q
\left(y, X \beta, V\right)]}\;
\end{align}
\]
where \(Q(x, m, M)=(x-m)^{\top} M^{-1}
(x-m)\).
We can further simplify that: \[
\boxed{
\begin{align}\label{eq:multivariate_completion_square}
Q \left(\beta, m_{0}, M_{0}\right)+Q \left(y, X \beta, V\right)
&= (\beta - M_{1}m_{1})^{\top}M_{1}^{-1}(\beta - M_{1}m_{1})
+c^{\ast}\;
\end{align}
}
\]
Click for details
\[
\begin{align}
Q \left(\beta, m_{0}, M_{0}\right)+Q \left(y, X \beta, V\right) &=
(\beta - m_{0})^{\top}M_{0}^{-1}(\beta - m_{0}) + (y -
X\beta)^{\top}V^{-1}(y - X\beta)\; \nonumber\\
&= \beta^{\top}M_{0}^{-1}\beta - 2\beta^{\top}M_{0}^{-1}m_{0} +
m_{0}^{\top}M_{0}^{-1}m_{0} \nonumber\\
&\qquad + \beta^{\top}X^{\top}V^{-1}X\beta - 2\beta^{\top}
X^{\top}V^{-1}y + y^{\top}V^{-1}y \nonumber\\
&= \beta^{\top} \left(M_{0}^{-1} + X^{\top}V^{-1}X\right) \beta -
2\beta^{\top}\left(M_{0}^{-1}m_{0} + X^{\top}V^{-1}y\right) \nonumber\\
&\qquad + m_{0}^{\top} M_{0}^{-1}m_{0} + y^{\top}V^{-1}y \nonumber
\\
&= \beta^{\top}M_{1}^{-1}\beta - 2\beta^{\top} m_{1} +
c\nonumber\\
&= (\beta - M_{1}m_{1})^{\top}M_{1}^{-1}(\beta - M_{1}m_{1}) -
m_{1}^{\top}M_{1}m_{1} +c \nonumber\\
&= (\beta - M_{1}m_{1})^{\top}M_{1}^{-1}(\beta - M_{1}m_{1})
+c^{\ast}\;
\end{align}
\]
where \(M_{1}\) is a symmetric
positive definite matrix, \(m_{1}\) is
a vector, and \(c\) & \(c^{\ast}\) are scalars given by:
\[
\begin{align}
M_{1}^{-1} &= M_{0}^{-1} + X^{\top}V^{-1}X\; \\
m_{1} &= M_{0}^{-1}m_{0} + X^{\top}V^{-1}y\; \\
c &= m_{0}^{\top} M_{0}^{-1}m_{0} + y^{\top}V^{-1}y\; \\
c^{\ast} &= c - m^{\top}Mm = m_{0}^{\top} M_{0}^{-1}m_{0} +
y^{\top}V^{-1}y - m_{1}^{\top}M_{1}m_{1}\;
\end{align}
\]
Note: \(M_{1}\), \(m_{1}\) and \(c\) do not depend upon \(\beta\).
Then, we have:
\[
\boxed{
\begin{align}
P\left(\beta, \sigma^{2} \mid y\right)
& \propto NIG\left(\beta, \sigma^{2} \mid M_{1}m_{1}, M_{1}, a_{1},
b_{1}\right) \;
\end{align}
}
\]
Click for details
\[
\begin{align}
P\left(\beta, \sigma^{2} \mid y\right) & \propto
\left(\frac{1}{\sigma^{2}}\right)^{a_{0}+p+1}
e^{-\frac{b_{0}}{\sigma^{2}}}
e^{-\frac{1}{2 \sigma^{2}} ((\beta -
M_{1}m_{1})^{\top}M_{1}^{-1}(\beta - M_{1}m_{1}) +c^{\ast})}\\
& \propto \left(\frac{1}{\sigma^{2}}\right)^{a_{0}+p+1}
e^{-\frac{b_{0}+\frac{c^{\ast}}{2}}{\sigma^{2}}}
e^{-\frac{1}{2 \sigma^{2}} (\beta - M_{1}m_{1})^{\top}M_{1}^{-1}(\beta
- M_{1}m_{1})}\\
& \propto \left(\frac{1}{\sigma^{2}}\right)^{a_{0}+\frac{p}{2}+1}
e^{-\frac{b_{0}+\frac{c^{\ast}}{2}}{\sigma^{2}}}
(\frac{1}{\sigma^2})^{\frac{p}{2}}
e^{-\frac{1}{2 \sigma^{2}} (\beta - M_{1}m_{1})^{\top}M_{1}^{-1}(\beta
- M_{1}m_{1})}\\
&= IG\left(\sigma^{2} \mid a_{0}+\frac{p}{2},
b_{0}+\frac{c^{\ast}}{2} \right) \cdot N\left(\beta \mid M_{1}m_{1},
\sigma^{2} M_{1}\right) \\
&= IG\left(\sigma^{2} \mid a_{1}, b_{1} \right) \cdot N\left(\beta
\mid M_{1}m_{1}, \sigma^{2} M_{1}\right) \\
&= NIG\left(\beta, \sigma^{2} \mid M_{1}m_{1}, M_{1}, a_{1},
b_{1}\right) \;
\end{align}
\]
where
\[
\begin{align}
m_{1}&=M_{0}^{-1} m_{0}+X^{\top} V^{-1} y \; \\
M_{1}^{-1} &=M_{0}^{-1}+X^{\top} V^{-1} X \; \\
a_{1}&=a_{0}+\frac{p}{2} \; \\
b_{1}&=b_{0}+\frac{c^{\ast}}{2}= b_{0}+\frac{1}{2}\left(m_{0}^{\top}
M_{0}^{-1} m_{0}+y^{\top} V^{-1} y-m_{1}^{\top} M_{1} m_{1}\right)\;
\end{align}
\]
2. Updating Form of the Posterior Distribution
To calculate \(M_1\), we utilize the
well-known Sherman-Woodbury-Morrison
identity in matrix algebra: \[\begin{equation}\label{ShermanWoodburyMorrison}
\left(A + BDC\right)^{-1} = A^{-1} -
A^{-1}B\left(D^{-1}+CA^{-1}B\right)^{-1}CA^{-1}
\end{equation}\] where \(A\) and
\(D\) are square matrices that are
invertible and \(B\) and \(C\) are rectangular (square if \(A\) and \(D\) have the same dimensions) matrices such
that the multiplications are well-defined. This identity is easily
verified by multiplying the right hand side with \(A + BDC\) and simplifying to reduce it to
the identity matrix.
\[
\begin{aligned}
M_1 & = (M_{0}^{-1} + X^{\top}V^{-1}X)^{-1} \\
& = M_0-M_0 X^{\top}\left(V+X M_0 X^{\top}\right)^{-1} X M_0 \\
& = M_0-M_0 X^{\top} Q^{-1} X M_0
\end{aligned}
\]
where \(Q = V + X M_0 X^{\top}\)
We can show that: \[
\boxed{
\begin{align}
M_1 m_1 & =m_0+M_0 X^{\top} Q^{-1}\left(y-X m_0\right) \;.
\end{align}
}
\]
Click for details
\[\begin{align}
M_1 m_1 & = \left(M_0^{-1}+X^{\top} V^{-1} X\right)^{-1} m_1 \\
& = [M_0-M_0 X^{\top}\left(V+X M_0 X^{\top}\right)^{-1} X M_0]m_1
\\
& = (M_0-M_0 X^{\top} Q^{-1} X M_0) m_1 \\
& = (M_0-M_0 X^{\top} Q^{-1} X M_0)(M_0^{-1} m_0+X^{\top} V^{-1}
y) \\
& = m_0+M_0 X^{\top} V^{-1} y-M_0 X^{\top} Q^{-1} X m_0 - M_0
X^{\top} Q^{-1} X M_0 X^{\top} V^{-1} y \\
& = m_0+M_0 X^{\top}\left(I-Q^{-1} X M_0 X^{\top}\right) V^{-1} y
- M_0 X^{\top} Q^{-1} X m_0 \\
& = m_0+M_0 X^{\top} Q^{-1}\left(Q-X M_0 X^{\top}\right)V^{-1} y -
M_0 X^{\top} Q^{-1} X m_0 \\
& \left(\text { since } Q=V+X M_0 X^{\top}\right) \\
& = m_0+M_0 X^{\top} Q^{-1}(V) V^{-1} y-M_0 X^{\top} Q^{-1} X m_0
\\
& = m_0+M_0 X^{\top} Q^{-1} y-M_0 X^{\top} Q^{-1} X m_0 \\
& = m_0+M_0 X^{\top} Q^{-1}\left(y-X m_0\right) \\
\end{align}\]
Furthermore, we can simplify that: \[
\boxed{
\begin{align}
m_0^{\top} M_0^{-1} m_0+y^{\top} V^{-1} y-m_1^{\top} M_1 m_1 & =
\left(y-X m_0\right)^{\top} Q^{-1}\left(y-X m_0\right) \;.
\end{align}
}
\]
Click for details
\[\begin{align}
m_0^{\top} M_0^{-1} m_0+y^{\top} V^{-1} y-m_1^{\top} M_1 m_1 & =
m_0^{\top} M_0^{-1} m_0+y^{\top} V^{-1} y-m_1^{\top} [m_0+M_0 X^{\top}
Q^{-1} (y - X m_0)] \\
& = m_0^{\top} M_0^{-1} m_0+y^{\top} V^{-1} y-m_1^{\top} m_0 -
m_1^{\top} M_0 X^{\top} Q^{-1}\left(y-X m_0\right) \\
& = m_0^{\top} M_0^{-1} m_0+y^{\top} V^{-1} y
-m_0^{\top}\left(M_0^{-1} m_0+X^{\top} V^{-1} y\right) \\
& \qquad \qquad \qquad - m_1^{\top} M_0 X^{\top} Q^{-1}\left(y-X
m_0\right) \\
& = y^{\top} V^{-1} y-y^{\top} V^{-1} X m_0 - m_1^{\top} M_0
X^{\top} Q^{-1}\left(y-X m_0\right) \\
& = y^{\top} V^{-1}\left(y-X m_0 \right)-m_1^{\top} M_0 X^{\top}
Q^{-1}\left(y-X m_0\right) \\
& =y^{\top} V^{-1}\left(y-X m_0\right)-\underbrace{m_1^{\top} M_0
X^{\top} Q^{-1}\left(y-X m_0\right)}_{\substack{\text { simplify from
left to right }}} \\
& =y^{\top} V^{-1}\left(y-X m_0\right)-\left(M_0 m_1\right)^{\top}
X^{\top} Q^{-1}\left(y-X m_0\right) \\
& =y^{\top} V^{-1}\left(y-X m_0\right)-\left(m_0+M_0 X^{\top}
V^{-1} y\right)^{\top} X^{\top} Q^{-1}\left(y-m_0\right) \\
& =y^{\top} V^{-1}\left(y-X m_0\right)-\left(X m_0+X M_0 X^{\top}
V^{-1} y\right)^{\top} Q^{-1}\left(y-X m_0\right)\\
& =y^{\top} V^{-1}\left(y-X m_0\right) -\left(Q^{-1} X
m_0+Q^{-1}\left(X M_0 X^{\top}\right)V^{-1} y\right)\left(y-X m_0\right)
\\
& =y^{\top} V^{-1}\left(y-X m_0\right)-[Q^{-1} X m_0+Q^{-1}(Q-V)
V^{-1} y]^{\top}(y-X m_0) \\
& =y^{\top} V^{-1}\left(y-X m_0\right) -\left(Q^{-1} X m_0+V^{-1}
y- Q^{-1} y \right)^{\top}\left(y-X m_0\right) \\
& =y^{\top} V^{-1}\left(y-X m_0\right) -[V^{-1} y+Q^{-1}\left(X
m_0-y\right)]^{\top}\left(y-X m_0\right) \\
& =y^{\top} V^{-1}\left(y-X m_0\right)-y^{\top} V^{-1}\left(y-X
m_0\right) +\left(y-X m_0\right)^{\top} Q^{-1}\left(y-X m_0\right) \\
& =\left(y-X m_0\right)^{\top} Q^{-1}\left(y-X m_0\right) \\
\end{align}\]
So, we get the following updating form of the posterior distribution
from Bayesian Linear Regression:
\[
\boxed{
\begin{aligned}
P\left(\beta, \sigma^2 \mid y\right) &\propto I G\left(\sigma^2 \mid
a_0, b_0\right) N\left(\beta \mid m_0, \sigma^2 M_0\right) N(y \mid X
\beta, \sigma^2 V)\\
& \propto I G\left(\sigma^2 \mid a_1, b_1\right) N\left(\beta \mid
\tilde{m}_1, \sigma^2 \tilde{M}_1\right)\\
\end{aligned}
}
\] where,
\[
\begin{aligned}
\tilde{m}_1 & =M_1 m_1=m_0+M_0 X^{\top} Q^{-1}\left(y-X m_0\right)
\\
\tilde{M}_1 & =M_1=M_0-M_0 X^{\top} Q^{-1} X M_0 \\
Q & =V+X M_0 X^{\top} \\
a_1 & =a_0+\frac{p}{2} \\
b_1 & =b_0+\frac{1}{2}\left(y-X m_0\right)^{\top} Q^{-1}\left(y-X
m_0\right)
\end{aligned}
\]