Title

This is an R HTML document. When you click the Knit HTML button a web page will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this:

library(MASS)
library(coda)
#Bayesian analysis of simulated data using a gamma likelihood function
#and Jeffreys prior on gamma dist parameters
#derivation of Jeffreys not shown



#simulate data from gamma(3,6) distibution
set.seed(848)
n <- 30
a <- 3
b <- 6
data<-rgamma(n, shape=a, rate=b)
prod = prod(data) #for liklihood ratios
sum = sum(data) #for liklihood ratios
data

##  [1] 0.6584520 0.2159348 0.3527761 0.8196269 0.7027882 0.4004256 0.1423023
##  [8] 0.6388360 0.7269984 0.6119328 0.3873454 0.3896036 0.4543655 0.2381978
## [15] 0.2727744 0.5910706 0.5319610 0.3538756 0.6798906 0.5459093 1.3275193
## [22] 1.0673249 0.4382307 0.6646942 1.0054855 0.5808849 0.3238377 0.1757317
## [29] 0.1937139 0.5186247

cov = matrix(0,2,2)
cov[,1] = c(1,0)
cov[,2] = c(0,1)
#drive our parameters around a paramter space by sampling
#from a bivariate_normal(mu, cov) distribution
#parameters: mu->vector containing position of (a,b) parameters at step in MCMC
#            cov-> var/cov matrix (assumed)
#return: new vector of parameters (a,b) to be proposed for next step of MCMC
driver <- function(mu, cov){
  return(mvrnorm(n = 1, mu, cov))
}



#compute ratio of likelihood functions for two different sets of (a,b)
#assuming data comes from gamma dist
#Parameters: a,b -> current shape and rate parameters in MCMC
#            propa, probb -> proposed shape and rate parameters
#returns likelihood ratio, used to calculate acceptance rate in MCMC sim
likehoodratio<-function(a, b, propa, propb){
  return((((propb^propa)/(b^a))^n)*prod^(propa-a)*
           ((gamma(a)/gamma(propa))^n)*exp((b-propb)*sum))
}



#compute the prior ratio based on current and proposed values of a and b
#hard coded Jeffery's prior, could have used trigamma function instead
#Parameters: a,b -> current shape and rate parameters in MCMC
#            propa, probb -> proposed shape and rate parameters
#returns pior ratio, used to calculate acceptance rate in MCMC sim
priorratio<-function(a,b,propa,propb){
  numerator = 0
  den = 0
  for(i in 0:100){
    numerator = numerator+(a)/((b^2)*(i+a)^2)
    den = den+(propa)/((propb^2)*(i+propa)^2)
  }
  numerator = sqrt(numerator)
  den = sqrt(den)
  return(den/numerator)
}



#parent function to compute acceptance rate for MCMC
#that is to be compared to a uniform random variable on the interval 0,1
#Parameters: cura,curb -> current shape and rate parameters in MCMC
#            propa, probb -> proposed shape and rate parameters
#returns alpha, used to dictate next step in MCMC sim
dist <- function(cura, curb, propa, propb){
  l <- likehoodratio(cura, curb, propa, propb)
  p <- priorratio(cura, curb, propa, propb)
  return(l*p)
}



#metropolis-hastings algorithm
#Parameters: init -> initial starting point for MCMC sim (irrelevant)
#            chain -> empty matrix to retain accepted vectors of (a, b)
#returns:    matrix with posterior samples of f(a, b)
metro_hast <- function(init, chain){
  acc = 0                           #number of acceptances

  for(i in 1:(length(chain)/2)){    #interate through empty nx2 matrix
    prop <- driver(init, cov)       #propose new values of a and b from driver
    if(0<prop[1]&&0<prop[2]){       #values must be positive for gamma dist
      p_1 <- dist(init[1], init[2], prop[1], prop[2]) #acceptance ratio

      if(runif(1)<p_1){
        init<-prop                    #accept proposed step with probability p_1
        acc = acc+1                   #note we accpepted something
        chain[i,]<-init}              #store values
    }
  }

  print(acc/(length(chain)/2))      #print % of accepted steps
  chain=na.omit(chain)              #clear chain of NAs
  return(chain)                     #return MCMC walk
}

init <- c(5,5) #initialize
chain <- matrix(NA, nrow = 100000, ncol = length(init))
mcmc <- metro_hast(init, chain)   #run algorithm, accepted 32.401% of the time

## [1] 0.32401

mcmc=mcmc[1000:(length(mcmc)/2),] #get rid of first 1k vectors stored, (burn-in)

mcmc.obj <- mcmc(mcmc)
summary(mcmc.obj)

## 
## Iterations = 1:31402
## Thinning interval = 1 
## Number of chains = 1 
## Sample size per chain = 31402 
## 
## 1. Empirical mean and standard deviation for each variable,
##    plus standard error of the mean:
## 
##       Mean    SD Naive SE Time-series SE
## [1,] 4.095 0.978 0.005519        0.02401
## [2,] 7.682 1.927 0.010874        0.04915
## 
## 2. Quantiles for each variable:
## 
##       2.5%   25%   50%   75%  97.5%
## var1 2.392 3.396 4.021 4.722  6.181
## var2 4.367 6.293 7.545 8.921 11.768

autocorr.plot(mcmc.obj) #data appears very correlated with around last 30 points

plot(mcmc.obj)          #good mixing

#diagnostic test of MCMC



#Gelman and Rubin 
mcmc1 <- mcmc(metro_hast(init, chain)[1:25000,])

## [1] 0.32753

mcmc2 <- mcmc(metro_hast(init, chain)[1:25000,])

## [1] 0.32592

mcmc3 <- mcmc(metro_hast(init, chain)[1:25000,])

## [1] 0.32761

mcmc4 <- mcmc(metro_hast(init, chain)[1:25000,])

## [1] 0.32802

mcmc5 <- mcmc(metro_hast(init, chain)[1:25000,])

## [1] 0.32137

mh.list <- mcmc.list(list(mcmc1, mcmc2, mcmc3, mcmc4, mcmc5))
gelman.diag(mh.list)

## Potential scale reduction factors:
## 
##      Point est. Upper C.I.
## [1,]          1       1.00
## [2,]          1       1.01
## 
## Multivariate psrf
## 
## 1

gelman.plot(mh.list)

#psrf=1, appears we have converged to stationary distribution



#geweke
geweke.diag(mcmc.obj)

## 
## Fraction in 1st window = 0.1
## Fraction in 2nd window = 0.5 
## 
##     var1     var2 
## -0.03172  0.15577

#very low variences, looks like the first half and second half are 
#statistically similar 



#Raftery and Lewis
raftery.diag(mcmc.obj,q=0.025,r=0.005,s=0.95)

## 
## Quantile (q) = 0.025
## Accuracy (r) = +/- 0.005
## Probability (s) = 0.95 
##                                        
##  Burn-in  Total Lower bound  Dependence
##  (M)      (N)   (Nmin)       factor (I)
##  20       25800 3746         6.89      
##  20       22064 3746         5.89

#dependence factor 6.89 and 5.89, I beleive this is due to high auto correlations
#Although not shown I did thin my data, when thinned we obtaina acceptable values
#I will continue with unthinned data



#Heidelberg & Welch
heidel.diag(mcmc.obj)

##                                    
##      Stationarity start     p-value
##      test         iteration        
## var1 passed       1         0.884  
## var2 passed       1         0.889  
##                              
##      Halfwidth Mean Halfwidth
##      test                    
## var1 passed    4.09 0.0471   
## var2 passed    7.68 0.0963

#passes both tests, there is good evidence we have samples from the 
#posterior distribution

#Marginal posterior densities of shape and rate parameters

#take the marginal posteriors to be the the column vectors of our mcmc sample
densplot(mcmc.obj[,1], main = 'Marginal Posterior of Shape Parameter')

densplot(mcmc.obj[,2], main = 'Marginal Posterior of Rate Parameter')

hpd<-function(x,p){
  dx<-density(x)
  md<-dx$x[dx$y==max(dx$y)]
  px<-dx$y/sum(dx$y)
  pxs<--sort(-px)
  ct<-min(pxs[cumsum(pxs)< p])
  list(hpdr=range(dx$x[px>=ct]),mode=md)
}

hpd(mcmc.obj[,1], .95) #(2.293340, 6.062022)

## $hpdr
## [1] 2.293340 6.062022
## 
## $mode
## [1] 3.797356

hpd(mcmc.obj[,2], .95) #(4.144687, 11.516936)

## $hpdr
## [1]  4.144687 11.516936
## 
## $mode
## [1] 7.243641

#actual values of shape and rate parameters within 95% CI 


###################