JAGS Analysis of Copy Number

Prerequisites

You need to have R installed, and the R package rjags, which requires the installation of JAGS.

Data

The data are here in the files freq.txt, and ethnicity.txt. Load these into R using

freq <- read.table("https://www.staff.ncl.ac.uk/i.j.wilson/data/freq.txt", header=TRUE)
ethnicity <- factor(scan("https://www.staff.ncl.ac.uk/i.j.wilson/data/ethnicity.txt", what=character(0)))

We need to create a list to hold all the data used in the analysis

all_data <- list(nrows=nrow(freq), 
                 y=freq, 
                 n=rowSums(freq),
                 nethnicities=nlevels(ethnicity),
                 ethnicity=ethnicity,
                 z = 7, m = 340,  
                 n_point = c(525, 736), x_point=c(18, 37)
                 )

The model can be read from a file. Here I use a string to hold the model.

modelstring <- "
model {
  for (i in 1:nrows) {
    x[i,1] <- 2*p[ethnicity[i],1]*p[ethnicity[i],2] + p[ethnicity[i],1]*p[ethnicity[i],1]  
    x[i,2] <- 2*p[ethnicity[i],1]*p[ethnicity[i],3] + p[ethnicity[i],2]*p[ethnicity[i],2]
    x[i,3] <- 2*p[ethnicity[i],2]*p[ethnicity[i],3] + p[ethnicity[i],3]*p[ethnicity[i],3]
    y[i,] ~ dmulti(x[i,], n[i])
  }
  for (j in 1:nethnicities) {
    for (k in 1:3) {
      a[j,k] ~ dexp(1)                                   ## These lines give a dirichlet
      p[j,k] <- a[j,k]/(a[j,1] + a[j,2] + a[j,3])        ## prior for p
    }
  }
  ## Lets assume the rate of mutations is the same for each, but just estimate it from Caucasians
  r <- 2*mu*(1-p[1,1])/(p[1,1]+2*mu*(1-p[1,1]))
  z ~ dbinom(r, m)
  mu ~ dgamma(1, 1000)                                   ## Prior for the mutation rate

  ## The point mutation process
  beta <- 2*pD/(p[1,1]+2*pD)
  for (ii in 1:2) {
    x_point[ii] ~ dbinom(beta, n_point[ii])
  }
  pD ~ dbeta(1,100)                                      ## Prior for the frequency of point mutations

  #================== Calculate Predicted Disease Frequencies and Carrier Frequencies=================
  for (ii in 1:nethnicities) {
     p1[ii] <- p[ii,2] - pD                                     ## normal copy number 1
     p1carrier[ii] <- 2*p[ii,1]*p1[ii]                          ## CN 1 and a disease allele
     p2carrier[ii] <- 2*p[ii,3]*p[ii,1] + 2*p1[ii]*pD           ## CN 2 and a disease allele
     p3carrier[ii] <- 2*p[ii,3]*pD                              ## CN3 carrier

     pcarrier[ii] <-  p1carrier[ii] + p2carrier[ii] + p3carrier[ii]   ## probability of being a carrier
     pdisease[ii] <- pcarrier[ii]*pcarrier[ii]*(0.25+0.5*mu+0.25*mu*mu) 
                   + 2*pcarrier[ii]*(1-pcarrier[ii])*(0.5*mu+0.5*mu*mu)  
                   + (1-pcarrier[ii])*(1-pcarrier[ii])*mu*mu    ## probability a child has the disease 
  }
}

"

Run Analysis

To run the analysis, you load the model. Here I use sensible initial values for a (and hence p) and mu.

library(rjags)                  ## Load the rjags library

## Loading required package: coda

## Linked to JAGS 4.2.0

## Loaded modules: basemod,bugs

init <- list(mu=0.0001, a=matrix(c(0.02,0.9,0.08), ncol=3, nrow=nlevels(ethnicity), byrow=T))
jags <- jags.model(textConnection(modelstring),
                   data = all_data,
                   n.chains = 4, inits = init)

## Compiling model graph
##    Resolving undeclared variables
##    Allocating nodes
## Graph information:
##    Observed stochastic nodes: 54
##    Unobserved stochastic nodes: 20
##    Total graph size: 1154
## 
## Initializing model

update(jags, n.iter = 5000)    ## burn-in the model before collecting samples 

output <- coda.samples(model=jags, variable.names = c("p",  "mu", "pD", "pcarrier", "pdisease", "p2carrier"), n.iter = 20000, thin=5)

summary(output)

## 
## Iterations = 6005:26000
## Thinning interval = 5 
## Number of chains = 4 
## Sample size per chain = 4000 
## 
## 1. Empirical mean and standard deviation for each variable,
##    plus standard error of the mean:
## 
##                   Mean        SD  Naive SE Time-series SE
## mu           1.313e-04 4.700e-05 3.715e-07      3.715e-07
## p[1,1]       1.092e-02 2.178e-04 1.722e-06      1.815e-06
## p[2,1]       6.989e-03 8.311e-04 6.571e-06      6.985e-06
## p[3,1]       1.105e-02 4.374e-04 3.458e-06      3.592e-06
## p[4,1]       6.741e-03 6.405e-04 5.064e-06      5.037e-06
## p[5,1]       1.008e-02 2.361e-03 1.866e-05      1.874e-05
## p[6,1]       9.721e-03 4.722e-04 3.733e-06      4.694e-06
## p[1,2]       9.491e-01 4.716e-04 3.728e-06      3.750e-06
## p[2,2]       7.166e-01 4.217e-03 3.334e-05      3.333e-05
## p[3,2]       9.536e-01 8.825e-04 6.977e-06      7.014e-06
## p[4,2]       9.148e-01 2.150e-03 1.700e-05      1.634e-05
## p[5,2]       9.079e-01 6.846e-03 5.412e-05      5.371e-05
## p[6,2]       9.337e-01 1.198e-03 9.475e-06      1.035e-05
## p[1,3]       3.996e-02 4.148e-04 3.279e-06      3.279e-06
## p[2,3]       2.765e-01 4.115e-03 3.253e-05      3.259e-05
## p[3,3]       3.531e-02 7.677e-04 6.069e-06      6.051e-06
## p[4,3]       7.848e-02 2.056e-03 1.625e-05      1.560e-05
## p[5,3]       8.198e-02 6.399e-03 5.059e-05      5.021e-05
## p[6,3]       5.661e-02 1.099e-03 8.688e-06      9.718e-06
## p2carrier[1] 1.354e-03 7.233e-05 5.718e-07      5.718e-07
## p2carrier[2] 4.228e-03 4.661e-04 3.685e-06      3.911e-06
## p2carrier[3] 1.264e-03 7.599e-05 6.008e-07      5.982e-07
## p2carrier[4] 1.522e-03 1.221e-04 9.656e-07      9.657e-07
## p2carrier[5] 2.114e-03 4.129e-04 3.264e-06      3.248e-06
## p2carrier[6] 1.574e-03 8.761e-05 6.926e-07      7.617e-07
## pD           2.538e-04 3.528e-05 2.789e-07      2.773e-07
## pcarrier[1]  2.209e-02 4.421e-04 3.495e-06      3.684e-06
## pcarrier[2]  1.438e-02 1.639e-03 1.296e-05      1.377e-05
## pcarrier[3]  2.235e-02 8.586e-04 6.788e-06      7.040e-06
## pcarrier[4]  1.389e-02 1.265e-03 9.999e-06      9.944e-06
## pcarrier[5]  2.044e-02 4.618e-03 3.651e-05      3.665e-05
## pcarrier[6]  1.975e-02 9.286e-04 7.341e-06      9.233e-06
## pdisease[1]  1.250e-04 5.107e-06 4.038e-08      4.270e-08
## pdisease[2]  5.425e-05 1.219e-05 9.639e-08      1.022e-07
## pdisease[3]  1.279e-04 9.786e-06 7.737e-08      8.066e-08
## pdisease[4]  5.047e-05 9.076e-06 7.176e-08      7.133e-08
## pdisease[5]  1.124e-04 5.084e-05 4.019e-07      4.046e-07
## pdisease[6]  1.003e-04 9.370e-06 7.407e-08      9.271e-08
## 
## 2. Quantiles for each variable:
## 
##                   2.5%       25%       50%       75%     97.5%
## mu           5.569e-05 9.740e-05 1.258e-04 1.592e-04 2.382e-04
## p[1,1]       1.050e-02 1.077e-02 1.092e-02 1.106e-02 1.135e-02
## p[2,1]       5.463e-03 6.411e-03 6.959e-03 7.545e-03 8.716e-03
## p[3,1]       1.019e-02 1.075e-02 1.104e-02 1.133e-02 1.193e-02
## p[4,1]       5.559e-03 6.297e-03 6.723e-03 7.152e-03 8.074e-03
## p[5,1]       6.038e-03 8.401e-03 9.899e-03 1.153e-02 1.522e-02
## p[6,1]       8.813e-03 9.404e-03 9.708e-03 1.003e-02 1.067e-02
## p[1,2]       9.482e-01 9.488e-01 9.491e-01 9.494e-01 9.500e-01
## p[2,2]       7.082e-01 7.137e-01 7.166e-01 7.194e-01 7.247e-01
## p[3,2]       9.519e-01 9.531e-01 9.537e-01 9.542e-01 9.553e-01
## p[4,2]       9.105e-01 9.133e-01 9.148e-01 9.162e-01 9.189e-01
## p[5,2]       8.940e-01 9.035e-01 9.081e-01 9.126e-01 9.208e-01
## p[6,2]       9.313e-01 9.329e-01 9.337e-01 9.345e-01 9.360e-01
## p[1,3]       3.916e-02 3.969e-02 3.996e-02 4.024e-02 4.078e-02
## p[2,3]       2.685e-01 2.737e-01 2.765e-01 2.793e-01 2.847e-01
## p[3,3]       3.384e-02 3.479e-02 3.530e-02 3.583e-02 3.684e-02
## p[4,3]       7.449e-02 7.710e-02 7.848e-02 7.986e-02 8.257e-02
## p[5,3]       6.999e-02 7.761e-02 8.185e-02 8.619e-02 9.514e-02
## p[6,3]       5.448e-02 5.588e-02 5.661e-02 5.734e-02 5.877e-02
## p2carrier[1] 1.221e-03 1.304e-03 1.351e-03 1.401e-03 1.505e-03
## p2carrier[2] 3.367e-03 3.901e-03 4.207e-03 4.540e-03 5.180e-03
## p2carrier[3] 1.124e-03 1.212e-03 1.261e-03 1.314e-03 1.421e-03
## p2carrier[4] 1.295e-03 1.438e-03 1.519e-03 1.601e-03 1.773e-03
## p2carrier[5] 1.406e-03 1.821e-03 2.079e-03 2.366e-03 3.014e-03
## p2carrier[6] 1.407e-03 1.515e-03 1.573e-03 1.632e-03 1.753e-03
## pD           1.892e-04 2.294e-04 2.521e-04 2.762e-04 3.287e-04
## pcarrier[1]  2.124e-02 2.179e-02 2.209e-02 2.239e-02 2.297e-02
## pcarrier[2]  1.136e-02 1.324e-02 1.432e-02 1.548e-02 1.778e-02
## pcarrier[3]  2.068e-02 2.176e-02 2.234e-02 2.291e-02 2.407e-02
## pcarrier[4]  1.156e-02 1.301e-02 1.386e-02 1.470e-02 1.653e-02
## pcarrier[5]  1.251e-02 1.716e-02 2.010e-02 2.327e-02 3.046e-02
## pcarrier[6]  1.797e-02 1.912e-02 1.973e-02 2.036e-02 2.162e-02
## pdisease[1]  1.153e-04 1.215e-04 1.249e-04 1.283e-04 1.352e-04
## pdisease[2]  3.370e-05 4.552e-05 5.320e-05 6.194e-05 8.137e-05
## pdisease[3]  1.095e-04 1.212e-04 1.277e-04 1.343e-04 1.479e-04
## pdisease[4]  3.488e-05 4.407e-05 4.988e-05 5.595e-05 7.052e-05
## pdisease[5]  4.071e-05 7.581e-05 1.037e-04 1.385e-04 2.360e-04
## pdisease[6]  8.299e-05 9.390e-05 9.988e-05 1.063e-04 1.200e-04

Graphical Summary

For graphics I use the ggplot2 library.

library(ggplot2)
### convert output to a column matrix
xx <-as.matrix(output)
## Plot histogram of mutation rate
ggplot(as.data.frame(xx), aes(mu)) + geom_histogram(bins=40) + scale_x_continuous(name=expression(mu), breaks=c(0,1e-4,2e-4,3e-4,4e-4), labels=c("0","0.0001","0.0002","0.0003","0.0004")) + ggtitle("Posterior Mutation Rate")

## Extract frequencies of CN 0
p0 <- xx[,2:7]
colnames(p0) <- levels(ethnicity)
p0stack <- stack(as.data.frame(p0))
colnames(p0stack) <- c("mu", "ethnicity")
ggplot(p0stack, aes(mu, col=ethnicity)) + geom_density() + scale_x_log10(name=expression(p[0]), breaks=c(0.005, 0.01, 0.02, 0.04), labels=c("0.005","0.01","0.02","0.04"))

JAGS Analysis of Copy Number

Ian Wilson

05 November, 2018

Prerequisites

Data

Run Analysis

Graphical Summary