This is an R HTML document. When you click the Knit HTML button a web page will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this:
library(MASS) library(coda) #Bayesian analysis of simulated data using a gamma likelihood function #and Jeffreys prior on gamma dist parameters #derivation of Jeffreys not shown #simulate data from gamma(3,6) distibution set.seed(848) n <- 30 a <- 3 b <- 6 data<-rgamma(n, shape=a, rate=b) prod = prod(data) #for liklihood ratios sum = sum(data) #for liklihood ratios data
## [1] 0.6584520 0.2159348 0.3527761 0.8196269 0.7027882 0.4004256 0.1423023 ## [8] 0.6388360 0.7269984 0.6119328 0.3873454 0.3896036 0.4543655 0.2381978 ## [15] 0.2727744 0.5910706 0.5319610 0.3538756 0.6798906 0.5459093 1.3275193 ## [22] 1.0673249 0.4382307 0.6646942 1.0054855 0.5808849 0.3238377 0.1757317 ## [29] 0.1937139 0.5186247
cov = matrix(0,2,2) cov[,1] = c(1,0) cov[,2] = c(0,1) #drive our parameters around a paramter space by sampling #from a bivariate_normal(mu, cov) distribution #parameters: mu->vector containing position of (a,b) parameters at step in MCMC # cov-> var/cov matrix (assumed) #return: new vector of parameters (a,b) to be proposed for next step of MCMC driver <- function(mu, cov){ return(mvrnorm(n = 1, mu, cov)) } #compute ratio of likelihood functions for two different sets of (a,b) #assuming data comes from gamma dist #Parameters: a,b -> current shape and rate parameters in MCMC # propa, probb -> proposed shape and rate parameters #returns likelihood ratio, used to calculate acceptance rate in MCMC sim likehoodratio<-function(a, b, propa, propb){ return((((propb^propa)/(b^a))^n)*prod^(propa-a)* ((gamma(a)/gamma(propa))^n)*exp((b-propb)*sum)) } #compute the prior ratio based on current and proposed values of a and b #hard coded Jeffery's prior, could have used trigamma function instead #Parameters: a,b -> current shape and rate parameters in MCMC # propa, probb -> proposed shape and rate parameters #returns pior ratio, used to calculate acceptance rate in MCMC sim priorratio<-function(a,b,propa,propb){ numerator = 0 den = 0 for(i in 0:100){ numerator = numerator+(a)/((b^2)*(i+a)^2) den = den+(propa)/((propb^2)*(i+propa)^2) } numerator = sqrt(numerator) den = sqrt(den) return(den/numerator) } #parent function to compute acceptance rate for MCMC #that is to be compared to a uniform random variable on the interval 0,1 #Parameters: cura,curb -> current shape and rate parameters in MCMC # propa, probb -> proposed shape and rate parameters #returns alpha, used to dictate next step in MCMC sim dist <- function(cura, curb, propa, propb){ l <- likehoodratio(cura, curb, propa, propb) p <- priorratio(cura, curb, propa, propb) return(l*p) } #metropolis-hastings algorithm #Parameters: init -> initial starting point for MCMC sim (irrelevant) # chain -> empty matrix to retain accepted vectors of (a, b) #returns: matrix with posterior samples of f(a, b) metro_hast <- function(init, chain){ acc = 0 #number of acceptances for(i in 1:(length(chain)/2)){ #interate through empty nx2 matrix prop <- driver(init, cov) #propose new values of a and b from driver if(0<prop[1]&&0<prop[2]){ #values must be positive for gamma dist p_1 <- dist(init[1], init[2], prop[1], prop[2]) #acceptance ratio if(runif(1)<p_1){ init<-prop #accept proposed step with probability p_1 acc = acc+1 #note we accpepted something chain[i,]<-init} #store values } } print(acc/(length(chain)/2)) #print % of accepted steps chain=na.omit(chain) #clear chain of NAs return(chain) #return MCMC walk } init <- c(5,5) #initialize chain <- matrix(NA, nrow = 100000, ncol = length(init)) mcmc <- metro_hast(init, chain) #run algorithm, accepted 32.401% of the time
## [1] 0.32401
mcmc=mcmc[1000:(length(mcmc)/2),] #get rid of first 1k vectors stored, (burn-in) mcmc.obj <- mcmc(mcmc) summary(mcmc.obj)
## ## Iterations = 1:31402 ## Thinning interval = 1 ## Number of chains = 1 ## Sample size per chain = 31402 ## ## 1. Empirical mean and standard deviation for each variable, ## plus standard error of the mean: ## ## Mean SD Naive SE Time-series SE ## [1,] 4.095 0.978 0.005519 0.02401 ## [2,] 7.682 1.927 0.010874 0.04915 ## ## 2. Quantiles for each variable: ## ## 2.5% 25% 50% 75% 97.5% ## var1 2.392 3.396 4.021 4.722 6.181 ## var2 4.367 6.293 7.545 8.921 11.768
autocorr.plot(mcmc.obj) #data appears very correlated with around last 30 points
plot(mcmc.obj) #good mixing
#diagnostic test of MCMC #Gelman and Rubin mcmc1 <- mcmc(metro_hast(init, chain)[1:25000,])
## [1] 0.32753
mcmc2 <- mcmc(metro_hast(init, chain)[1:25000,])
## [1] 0.32592
mcmc3 <- mcmc(metro_hast(init, chain)[1:25000,])
## [1] 0.32761
mcmc4 <- mcmc(metro_hast(init, chain)[1:25000,])
## [1] 0.32802
mcmc5 <- mcmc(metro_hast(init, chain)[1:25000,])
## [1] 0.32137
mh.list <- mcmc.list(list(mcmc1, mcmc2, mcmc3, mcmc4, mcmc5)) gelman.diag(mh.list)
## Potential scale reduction factors: ## ## Point est. Upper C.I. ## [1,] 1 1.00 ## [2,] 1 1.01 ## ## Multivariate psrf ## ## 1
gelman.plot(mh.list)
#psrf=1, appears we have converged to stationary distribution #geweke geweke.diag(mcmc.obj)
## ## Fraction in 1st window = 0.1 ## Fraction in 2nd window = 0.5 ## ## var1 var2 ## -0.03172 0.15577
#very low variences, looks like the first half and second half are #statistically similar #Raftery and Lewis raftery.diag(mcmc.obj,q=0.025,r=0.005,s=0.95)
## ## Quantile (q) = 0.025 ## Accuracy (r) = +/- 0.005 ## Probability (s) = 0.95 ## ## Burn-in Total Lower bound Dependence ## (M) (N) (Nmin) factor (I) ## 20 25800 3746 6.89 ## 20 22064 3746 5.89
#dependence factor 6.89 and 5.89, I beleive this is due to high auto correlations #Although not shown I did thin my data, when thinned we obtaina acceptable values #I will continue with unthinned data #Heidelberg & Welch heidel.diag(mcmc.obj)
## ## Stationarity start p-value ## test iteration ## var1 passed 1 0.884 ## var2 passed 1 0.889 ## ## Halfwidth Mean Halfwidth ## test ## var1 passed 4.09 0.0471 ## var2 passed 7.68 0.0963
#passes both tests, there is good evidence we have samples from the #posterior distribution #Marginal posterior densities of shape and rate parameters #take the marginal posteriors to be the the column vectors of our mcmc sample densplot(mcmc.obj[,1], main = 'Marginal Posterior of Shape Parameter')
densplot(mcmc.obj[,2], main = 'Marginal Posterior of Rate Parameter')
hpd<-function(x,p){ dx<-density(x) md<-dx$x[dx$y==max(dx$y)] px<-dx$y/sum(dx$y) pxs<--sort(-px) ct<-min(pxs[cumsum(pxs)< p]) list(hpdr=range(dx$x[px>=ct]),mode=md) } hpd(mcmc.obj[,1], .95) #(2.293340, 6.062022)
## $hpdr ## [1] 2.293340 6.062022 ## ## $mode ## [1] 3.797356
hpd(mcmc.obj[,2], .95) #(4.144687, 11.516936)
## $hpdr ## [1] 4.144687 11.516936 ## ## $mode ## [1] 7.243641
#actual values of shape and rate parameters within 95% CI ###################