https://genviz.org/module-04-expression/0004/02/01/DifferentialExpression/ SQ NOTE: can run ALL

library(DESeq2)
## Loading required package: S4Vectors
## Loading required package: stats4
## Loading required package: BiocGenerics
## 
## Attaching package: 'BiocGenerics'
## The following objects are masked from 'package:stats':
## 
##     IQR, mad, sd, var, xtabs
## The following objects are masked from 'package:base':
## 
##     anyDuplicated, append, as.data.frame, basename, cbind, colnames,
##     dirname, do.call, duplicated, eval, evalq, Filter, Find, get, grep,
##     grepl, intersect, is.unsorted, lapply, Map, mapply, match, mget,
##     order, paste, pmax, pmax.int, pmin, pmin.int, Position, rank,
##     rbind, Reduce, rownames, sapply, setdiff, sort, table, tapply,
##     union, unique, unsplit, which.max, which.min
## 
## Attaching package: 'S4Vectors'
## The following objects are masked from 'package:base':
## 
##     expand.grid, I, unname
## Loading required package: IRanges
## 
## Attaching package: 'IRanges'
## The following object is masked from 'package:grDevices':
## 
##     windows
## Loading required package: GenomicRanges
## Loading required package: GenomeInfoDb
## Loading required package: SummarizedExperiment
## Loading required package: MatrixGenerics
## Loading required package: matrixStats
## 
## Attaching package: 'MatrixGenerics'
## The following objects are masked from 'package:matrixStats':
## 
##     colAlls, colAnyNAs, colAnys, colAvgsPerRowSet, colCollapse,
##     colCounts, colCummaxs, colCummins, colCumprods, colCumsums,
##     colDiffs, colIQRDiffs, colIQRs, colLogSumExps, colMadDiffs,
##     colMads, colMaxs, colMeans2, colMedians, colMins, colOrderStats,
##     colProds, colQuantiles, colRanges, colRanks, colSdDiffs, colSds,
##     colSums2, colTabulates, colVarDiffs, colVars, colWeightedMads,
##     colWeightedMeans, colWeightedMedians, colWeightedSds,
##     colWeightedVars, rowAlls, rowAnyNAs, rowAnys, rowAvgsPerColSet,
##     rowCollapse, rowCounts, rowCummaxs, rowCummins, rowCumprods,
##     rowCumsums, rowDiffs, rowIQRDiffs, rowIQRs, rowLogSumExps,
##     rowMadDiffs, rowMads, rowMaxs, rowMeans2, rowMedians, rowMins,
##     rowOrderStats, rowProds, rowQuantiles, rowRanges, rowRanks,
##     rowSdDiffs, rowSds, rowSums2, rowTabulates, rowVarDiffs, rowVars,
##     rowWeightedMads, rowWeightedMeans, rowWeightedMedians,
##     rowWeightedSds, rowWeightedVars
## Loading required package: Biobase
## Welcome to Bioconductor
## 
##     Vignettes contain introductory material; view with
##     'browseVignettes()'. To cite Bioconductor, see
##     'citation("Biobase")', and for packages 'citation("pkgname")'.
## 
## Attaching package: 'Biobase'
## The following object is masked from 'package:MatrixGenerics':
## 
##     rowMedians
## The following objects are masked from 'package:matrixStats':
## 
##     anyMissing, rowMedians
library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.1 ──
## ✔ ggplot2 3.3.6     ✔ purrr   0.3.4
## ✔ tibble  3.1.7     ✔ dplyr   1.0.9
## ✔ tidyr   1.2.0     ✔ stringr 1.4.0
## ✔ readr   2.1.2     ✔ forcats 0.5.1
## Warning: package 'ggplot2' was built under R version 4.2.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::collapse()   masks IRanges::collapse()
## ✖ dplyr::combine()    masks Biobase::combine(), BiocGenerics::combine()
## ✖ dplyr::count()      masks matrixStats::count()
## ✖ dplyr::desc()       masks IRanges::desc()
## ✖ tidyr::expand()     masks S4Vectors::expand()
## ✖ dplyr::filter()     masks stats::filter()
## ✖ dplyr::first()      masks S4Vectors::first()
## ✖ dplyr::lag()        masks stats::lag()
## ✖ ggplot2::Position() masks BiocGenerics::Position(), base::Position()
## ✖ purrr::reduce()     masks GenomicRanges::reduce(), IRanges::reduce()
## ✖ dplyr::rename()     masks S4Vectors::rename()
## ✖ dplyr::slice()      masks IRanges::slice()
# Read in the raw read counts
rawCounts <- read.delim("http://genomedata.org/gen-viz-workshop/intro_to_deseq2/tutorial/E-GEOD-50760-raw-counts.tsv")
head(rawCounts)
##           Gene.ID Gene.Name SRR975551 SRR975552 SRR975553 SRR975554 SRR975555
## 1 ENSG00000000003    TSPAN6      6617      1352      1492      3390      1464
## 2 ENSG00000000005      TNMD        69         1        20        23        12
## 3 ENSG00000000419      DPM1      2798       714       510      1140      1667
## 4 ENSG00000000457     SCYL3       486       629       398       239       383
## 5 ENSG00000000460  C1orf112       466       342        73       227       193
## 6 ENSG00000000938       FGR        75        95       158       107       135
##   SRR975556 SRR975557 SRR975558 SRR975559 SRR975560 SRR975561 SRR975562
## 1      1251       207      1333      2126      1799      1362      3435
## 2         4        20         2         3         6        10        15
## 3       322       273       621      1031       677       480      1194
## 4       290       164       452       172       229       264       297
## 5        35        38       184       174        68        46       173
## 6        75       236       254       121       107        94        90
##   SRR975563 SRR975564 SRR975565 SRR975566 SRR975567 SRR975568 SRR975569
## 1      1711       487      4326      3652      1090      2460      2893
## 2         2         3        21         7         3         4        18
## 3       634       679      1347       849       692      1112       756
## 4       548       232       454       172       412       331       609
## 5        81       194       241       158       223       170       117
## 6       113       360        84       227       168        95       111
##   SRR975570 SRR975571 SRR975572 SRR975573 SRR975574 SRR975575 SRR975576
## 1       691      2271      2187       999      1283       906      1359
## 2         4        20        17        23        10         5         4
## 3       278       743       488       376       300       406       320
## 4       225       539       446       325       286       296       256
## 5        38        76        73        63        56        68        33
## 6        35       212        92        72       107        83        69
##   SRR975577 SRR975578 SRR975579 SRR975580 SRR975581 SRR975582 SRR975583
## 1      1309      1008       310      2704       674      1121      1776
## 2        19        10        13        21         5        19         3
## 3       389       271       213       783       268       467       452
## 4       355       243       159       324       257       360       409
## 5        59        21        17       131        52        57        86
## 6        88        70        55        93        71        91        74
##   SRR975584 SRR975585 SRR975586 SRR975587 SRR975588 SRR975589 SRR975590
## 1       981      1185      2012      3190      1837      1177      3055
## 2        10         7         2         6        11         4         6
## 3       419       473       607      1274       774       804       968
## 4       310       430       248       314       591       447       260
## 5        70        65        79       251       108       330       197
## 6        96        82        77       215        99       712       111
##   SRR975591 SRR975592 SRR975593 SRR975594 SRR975595 SRR975596 SRR975597
## 1      1621      1276      2488      1009      2786      1717      1691
## 2         3         8        10         0        18         7        10
## 3      1105       866      1339       250       852      1095      1066
## 4       364       160       466       177       244       156       256
## 5       166       176       167        48       121       105       172
## 6       616       191       563       132        81       150       148
##   SRR975598 SRR975599 SRR975600 SRR975601 SRR975602 SRR975603 SRR975604
## 1      6192       722       680      2608      2436      1739      2134
## 2        40        14         1        16         0         2         7
## 3      1273       405       597       974      1000      1287      1250
## 4       422        95       255       331       165       347       349
## 5       293        73       148       180       160       288       100
## 6        72       111       579       108        65       593       643
# Read in the sample mappings
sampleData <- read.delim("http://genomedata.org/gen-viz-workshop/intro_to_deseq2/tutorial/E-GEOD-50760-experiment-design.tsv")
head(sampleData)
##         Run Sample.Characteristic.biopsy.site.
## 1 SRR975551                      primary tumor
## 2 SRR975552                      primary tumor
## 3 SRR975553                      primary tumor
## 4 SRR975554                      primary tumor
## 5 SRR975555                      primary tumor
## 6 SRR975556                      primary tumor
##   Sample.Characteristic.Ontology.Term.biopsy.site.
## 1             http://www.ebi.ac.uk/efo/EFO_0000616
## 2             http://www.ebi.ac.uk/efo/EFO_0000616
## 3             http://www.ebi.ac.uk/efo/EFO_0000616
## 4             http://www.ebi.ac.uk/efo/EFO_0000616
## 5             http://www.ebi.ac.uk/efo/EFO_0000616
## 6             http://www.ebi.ac.uk/efo/EFO_0000616
##   Sample.Characteristic.disease. Sample.Characteristic.Ontology.Term.disease.
## 1              colorectal cancer         http://www.ebi.ac.uk/efo/EFO_0005842
## 2              colorectal cancer         http://www.ebi.ac.uk/efo/EFO_0005842
## 3              colorectal cancer         http://www.ebi.ac.uk/efo/EFO_0005842
## 4              colorectal cancer         http://www.ebi.ac.uk/efo/EFO_0005842
## 5              colorectal cancer         http://www.ebi.ac.uk/efo/EFO_0005842
## 6              colorectal cancer         http://www.ebi.ac.uk/efo/EFO_0005842
##   Sample.Characteristic.disease.staging.
## 1             Stage IV Colorectal Cancer
## 2             Stage IV Colorectal Cancer
## 3             Stage IV Colorectal Cancer
## 4             Stage IV Colorectal Cancer
## 5             Stage IV Colorectal Cancer
## 6             Stage IV Colorectal Cancer
##   Sample.Characteristic.Ontology.Term.disease.staging.
## 1                                                   NA
## 2                                                   NA
## 3                                                   NA
## 4                                                   NA
## 5                                                   NA
## 6                                                   NA
##   Sample.Characteristic.individual.
## 1                             AMC_2
## 2                             AMC_3
## 3                             AMC_5
## 4                             AMC_6
## 5                             AMC_7
## 6                             AMC_8
##   Sample.Characteristic.Ontology.Term.individual.
## 1                                              NA
## 2                                              NA
## 3                                              NA
## 4                                              NA
## 5                                              NA
## 6                                              NA
##   Sample.Characteristic.organism. Sample.Characteristic.Ontology.Term.organism.
## 1                    Homo sapiens http://purl.obolibrary.org/obo/NCBITaxon_9606
## 2                    Homo sapiens http://purl.obolibrary.org/obo/NCBITaxon_9606
## 3                    Homo sapiens http://purl.obolibrary.org/obo/NCBITaxon_9606
## 4                    Homo sapiens http://purl.obolibrary.org/obo/NCBITaxon_9606
## 5                    Homo sapiens http://purl.obolibrary.org/obo/NCBITaxon_9606
## 6                    Homo sapiens http://purl.obolibrary.org/obo/NCBITaxon_9606
##   Sample.Characteristic.organism.part.
## 1                                colon
## 2                                colon
## 3                                colon
## 4                                colon
## 5                                colon
## 6                                colon
##   Sample.Characteristic.Ontology.Term.organism.part. Factor.Value.biopsy.site.
## 1      http://purl.obolibrary.org/obo/UBERON_0001155             primary tumor
## 2      http://purl.obolibrary.org/obo/UBERON_0001155             primary tumor
## 3      http://purl.obolibrary.org/obo/UBERON_0001155             primary tumor
## 4      http://purl.obolibrary.org/obo/UBERON_0001155             primary tumor
## 5      http://purl.obolibrary.org/obo/UBERON_0001155             primary tumor
## 6      http://purl.obolibrary.org/obo/UBERON_0001155             primary tumor
##   Factor.Value.Ontology.Term.biopsy.site. Analysed
## 1    http://www.ebi.ac.uk/efo/EFO_0000616      Yes
## 2    http://www.ebi.ac.uk/efo/EFO_0000616      Yes
## 3    http://www.ebi.ac.uk/efo/EFO_0000616      Yes
## 4    http://www.ebi.ac.uk/efo/EFO_0000616      Yes
## 5    http://www.ebi.ac.uk/efo/EFO_0000616      Yes
## 6    http://www.ebi.ac.uk/efo/EFO_0000616      Yes
# Also save a copy for later
sampleData_v2 <- sampleData
# Convert count data to a matrix of appropriate form that DEseq2 can read
geneID <- rawCounts$Gene.ID
sampleIndex <- grepl("SRR\\d+", colnames(rawCounts))
rawCounts <- as.matrix(rawCounts[,sampleIndex])
rownames(rawCounts) <- geneID
head(rawCounts)
##                 SRR975551 SRR975552 SRR975553 SRR975554 SRR975555 SRR975556
## ENSG00000000003      6617      1352      1492      3390      1464      1251
## ENSG00000000005        69         1        20        23        12         4
## ENSG00000000419      2798       714       510      1140      1667       322
## ENSG00000000457       486       629       398       239       383       290
## ENSG00000000460       466       342        73       227       193        35
## ENSG00000000938        75        95       158       107       135        75
##                 SRR975557 SRR975558 SRR975559 SRR975560 SRR975561 SRR975562
## ENSG00000000003       207      1333      2126      1799      1362      3435
## ENSG00000000005        20         2         3         6        10        15
## ENSG00000000419       273       621      1031       677       480      1194
## ENSG00000000457       164       452       172       229       264       297
## ENSG00000000460        38       184       174        68        46       173
## ENSG00000000938       236       254       121       107        94        90
##                 SRR975563 SRR975564 SRR975565 SRR975566 SRR975567 SRR975568
## ENSG00000000003      1711       487      4326      3652      1090      2460
## ENSG00000000005         2         3        21         7         3         4
## ENSG00000000419       634       679      1347       849       692      1112
## ENSG00000000457       548       232       454       172       412       331
## ENSG00000000460        81       194       241       158       223       170
## ENSG00000000938       113       360        84       227       168        95
##                 SRR975569 SRR975570 SRR975571 SRR975572 SRR975573 SRR975574
## ENSG00000000003      2893       691      2271      2187       999      1283
## ENSG00000000005        18         4        20        17        23        10
## ENSG00000000419       756       278       743       488       376       300
## ENSG00000000457       609       225       539       446       325       286
## ENSG00000000460       117        38        76        73        63        56
## ENSG00000000938       111        35       212        92        72       107
##                 SRR975575 SRR975576 SRR975577 SRR975578 SRR975579 SRR975580
## ENSG00000000003       906      1359      1309      1008       310      2704
## ENSG00000000005         5         4        19        10        13        21
## ENSG00000000419       406       320       389       271       213       783
## ENSG00000000457       296       256       355       243       159       324
## ENSG00000000460        68        33        59        21        17       131
## ENSG00000000938        83        69        88        70        55        93
##                 SRR975581 SRR975582 SRR975583 SRR975584 SRR975585 SRR975586
## ENSG00000000003       674      1121      1776       981      1185      2012
## ENSG00000000005         5        19         3        10         7         2
## ENSG00000000419       268       467       452       419       473       607
## ENSG00000000457       257       360       409       310       430       248
## ENSG00000000460        52        57        86        70        65        79
## ENSG00000000938        71        91        74        96        82        77
##                 SRR975587 SRR975588 SRR975589 SRR975590 SRR975591 SRR975592
## ENSG00000000003      3190      1837      1177      3055      1621      1276
## ENSG00000000005         6        11         4         6         3         8
## ENSG00000000419      1274       774       804       968      1105       866
## ENSG00000000457       314       591       447       260       364       160
## ENSG00000000460       251       108       330       197       166       176
## ENSG00000000938       215        99       712       111       616       191
##                 SRR975593 SRR975594 SRR975595 SRR975596 SRR975597 SRR975598
## ENSG00000000003      2488      1009      2786      1717      1691      6192
## ENSG00000000005        10         0        18         7        10        40
## ENSG00000000419      1339       250       852      1095      1066      1273
## ENSG00000000457       466       177       244       156       256       422
## ENSG00000000460       167        48       121       105       172       293
## ENSG00000000938       563       132        81       150       148        72
##                 SRR975599 SRR975600 SRR975601 SRR975602 SRR975603 SRR975604
## ENSG00000000003       722       680      2608      2436      1739      2134
## ENSG00000000005        14         1        16         0         2         7
## ENSG00000000419       405       597       974      1000      1287      1250
## ENSG00000000457        95       255       331       165       347       349
## ENSG00000000460        73       148       180       160       288       100
## ENSG00000000938       111       579       108        65       593       643
# Convert sample variable mappings to an appropriate form that DESeq2 can read
head(sampleData)
##         Run Sample.Characteristic.biopsy.site.
## 1 SRR975551                      primary tumor
## 2 SRR975552                      primary tumor
## 3 SRR975553                      primary tumor
## 4 SRR975554                      primary tumor
## 5 SRR975555                      primary tumor
## 6 SRR975556                      primary tumor
##   Sample.Characteristic.Ontology.Term.biopsy.site.
## 1             http://www.ebi.ac.uk/efo/EFO_0000616
## 2             http://www.ebi.ac.uk/efo/EFO_0000616
## 3             http://www.ebi.ac.uk/efo/EFO_0000616
## 4             http://www.ebi.ac.uk/efo/EFO_0000616
## 5             http://www.ebi.ac.uk/efo/EFO_0000616
## 6             http://www.ebi.ac.uk/efo/EFO_0000616
##   Sample.Characteristic.disease. Sample.Characteristic.Ontology.Term.disease.
## 1              colorectal cancer         http://www.ebi.ac.uk/efo/EFO_0005842
## 2              colorectal cancer         http://www.ebi.ac.uk/efo/EFO_0005842
## 3              colorectal cancer         http://www.ebi.ac.uk/efo/EFO_0005842
## 4              colorectal cancer         http://www.ebi.ac.uk/efo/EFO_0005842
## 5              colorectal cancer         http://www.ebi.ac.uk/efo/EFO_0005842
## 6              colorectal cancer         http://www.ebi.ac.uk/efo/EFO_0005842
##   Sample.Characteristic.disease.staging.
## 1             Stage IV Colorectal Cancer
## 2             Stage IV Colorectal Cancer
## 3             Stage IV Colorectal Cancer
## 4             Stage IV Colorectal Cancer
## 5             Stage IV Colorectal Cancer
## 6             Stage IV Colorectal Cancer
##   Sample.Characteristic.Ontology.Term.disease.staging.
## 1                                                   NA
## 2                                                   NA
## 3                                                   NA
## 4                                                   NA
## 5                                                   NA
## 6                                                   NA
##   Sample.Characteristic.individual.
## 1                             AMC_2
## 2                             AMC_3
## 3                             AMC_5
## 4                             AMC_6
## 5                             AMC_7
## 6                             AMC_8
##   Sample.Characteristic.Ontology.Term.individual.
## 1                                              NA
## 2                                              NA
## 3                                              NA
## 4                                              NA
## 5                                              NA
## 6                                              NA
##   Sample.Characteristic.organism. Sample.Characteristic.Ontology.Term.organism.
## 1                    Homo sapiens http://purl.obolibrary.org/obo/NCBITaxon_9606
## 2                    Homo sapiens http://purl.obolibrary.org/obo/NCBITaxon_9606
## 3                    Homo sapiens http://purl.obolibrary.org/obo/NCBITaxon_9606
## 4                    Homo sapiens http://purl.obolibrary.org/obo/NCBITaxon_9606
## 5                    Homo sapiens http://purl.obolibrary.org/obo/NCBITaxon_9606
## 6                    Homo sapiens http://purl.obolibrary.org/obo/NCBITaxon_9606
##   Sample.Characteristic.organism.part.
## 1                                colon
## 2                                colon
## 3                                colon
## 4                                colon
## 5                                colon
## 6                                colon
##   Sample.Characteristic.Ontology.Term.organism.part. Factor.Value.biopsy.site.
## 1      http://purl.obolibrary.org/obo/UBERON_0001155             primary tumor
## 2      http://purl.obolibrary.org/obo/UBERON_0001155             primary tumor
## 3      http://purl.obolibrary.org/obo/UBERON_0001155             primary tumor
## 4      http://purl.obolibrary.org/obo/UBERON_0001155             primary tumor
## 5      http://purl.obolibrary.org/obo/UBERON_0001155             primary tumor
## 6      http://purl.obolibrary.org/obo/UBERON_0001155             primary tumor
##   Factor.Value.Ontology.Term.biopsy.site. Analysed
## 1    http://www.ebi.ac.uk/efo/EFO_0000616      Yes
## 2    http://www.ebi.ac.uk/efo/EFO_0000616      Yes
## 3    http://www.ebi.ac.uk/efo/EFO_0000616      Yes
## 4    http://www.ebi.ac.uk/efo/EFO_0000616      Yes
## 5    http://www.ebi.ac.uk/efo/EFO_0000616      Yes
## 6    http://www.ebi.ac.uk/efo/EFO_0000616      Yes
rownames(sampleData) <- sampleData$Run
keep <- c("Sample.Characteristic.biopsy.site.", "Sample.Characteristic.individual.")
sampleData <- sampleData[,keep]
colnames(sampleData) <- c("tissueType", "individualID")
sampleData$individualID <- factor(sampleData$individualID)
head(sampleData)
##              tissueType individualID
## SRR975551 primary tumor        AMC_2
## SRR975552 primary tumor        AMC_3
## SRR975553 primary tumor        AMC_5
## SRR975554 primary tumor        AMC_6
## SRR975555 primary tumor        AMC_7
## SRR975556 primary tumor        AMC_8
# Put the columns of the count data in the same order as rows names of the sample mapping, then make sure it worked
rawCounts <- rawCounts[,unique(rownames(sampleData))]
all(colnames(rawCounts) == rownames(sampleData))
## [1] TRUE
# rename the tissue types
rename_tissues <- function(x){
  x <- switch(as.character(x), "normal"="normal-looking surrounding colonic epithelium", "primary tumor"="primary colorectal cancer",  "colorectal cancer metastatic in the liver"="metastatic colorectal cancer to the liver")
  return(x)
}
sampleData$tissueType <- unlist(lapply(sampleData$tissueType, rename_tissues))

# Order the tissue types so that it is sensible and make sure the control sample is first: normal sample -> primary tumor -> metastatic tumor
sampleData$tissueType <- factor(sampleData$tissueType, levels=c("normal-looking surrounding colonic epithelium", "primary colorectal cancer", "metastatic colorectal cancer to the liver"))

# Create the DEseq2DataSet object
deseq2Data <- DESeqDataSetFromMatrix(countData=rawCounts, colData=sampleData, design= ~ individualID + tissueType)
##   Note: levels of factors in the design contain characters other than
##   letters, numbers, '_' and '.'. It is recommended (but not required) to use
##   only letters, numbers, and delimiters '_' or '.', as these are safe characters
##   for column names in R. [This is a message, not a warning or an error]
# Perform pre-filtering of the data
dim(deseq2Data)
## [1] 65217    54
deseq2Data <- deseq2Data[rowSums(counts(deseq2Data)) > 5, ]

# Register the number of cores to use
library(BiocParallel)
register(MulticoreParam(4))
## Warning in MulticoreParam(4): MulticoreParam() not supported on Windows, use
## SnowParam()
# do one of the following three options:
# 1. Run pipeline for differential expression steps (if you set up parallel processing, set parallel = TRUE here)
deseq2Data <- DESeq(deseq2Data)
## estimating size factors
##   Note: levels of factors in the design contain characters other than
##   letters, numbers, '_' and '.'. It is recommended (but not required) to use
##   only letters, numbers, and delimiters '_' or '.', as these are safe characters
##   for column names in R. [This is a message, not a warning or an error]
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
##   Note: levels of factors in the design contain characters other than
##   letters, numbers, '_' and '.'. It is recommended (but not required) to use
##   only letters, numbers, and delimiters '_' or '.', as these are safe characters
##   for column names in R. [This is a message, not a warning or an error]
## final dispersion estimates
##   Note: levels of factors in the design contain characters other than
##   letters, numbers, '_' and '.'. It is recommended (but not required) to use
##   only letters, numbers, and delimiters '_' or '.', as these are safe characters
##   for column names in R. [This is a message, not a warning or an error]
## fitting model and testing
# 2. Load the R environment with this object from the web (optional)
# load(url("http://genomedata.org/gen-viz-workshop/intro_to_deseq2/tutorial/deseq2Data_v1.RData"))

# 3. Download the .Rdata file and load directly(optional)
# load("deseq2Data_v1.RData")

# Extract differential expression results
# For "tissueType" perform primary vs normal comparison
deseq2Results <- results(deseq2Data, contrast=c("tissueType", "primary colorectal cancer", "normal-looking surrounding colonic epithelium"))

# View summary of results
summary(deseq2Results)
## 
## out of 35179 with nonzero total read count
## adjusted p-value < 0.1
## LFC > 0 (up)       : 4816, 14%
## LFC < 0 (down)     : 5718, 16%
## outliers [1]       : 0, 0%
## low counts [2]     : 10231, 29%
## (mean count < 1)
## [1] see 'cooksCutoff' argument of ?results
## [2] see 'independentFiltering' argument of ?results
# Using DEseq2 built in method
plotMA(deseq2Results)

# Load libraries
# install.packages(c("ggplot2", "scales", "viridis"))
library(ggplot2)
library(scales) # needed for oob parameter
## 
## Attaching package: 'scales'
## The following object is masked from 'package:purrr':
## 
##     discard
## The following object is masked from 'package:readr':
## 
##     col_factor
library(viridis)
## Loading required package: viridisLite
## 
## Attaching package: 'viridis'
## The following object is masked from 'package:scales':
## 
##     viridis_pal
# Coerce to a data frame
deseq2ResDF <- as.data.frame(deseq2Results)

# Examine this data frame
head(deseq2ResDF)
##                   baseMean log2FoldChange     lfcSE       stat       pvalue
## ENSG00000000003 1824.88077      0.2319793 0.1754965  1.3218460 1.862194e-01
## ENSG00000000005   10.85195     -0.3581575 0.4171716 -0.8585377 3.905956e-01
## ENSG00000000419  725.17340      0.7034028 0.1319274  5.3317414 9.727542e-08
## ENSG00000000457  311.35784     -0.2259470 0.1094811 -2.0637992 3.903676e-02
## ENSG00000000460  126.36193      1.0474994 0.2050559  5.1083599 3.249672e-07
## ENSG00000000938  163.15327      0.3837251 0.2277006  1.6852175 9.194662e-02
##                         padj
## ENSG00000000003 3.163423e-01
## ENSG00000000005 5.420883e-01
## ENSG00000000419 3.812710e-06
## ENSG00000000457 9.389795e-02
## ENSG00000000460 9.537978e-06
## ENSG00000000938 1.836724e-01
# Set a boolean column for significance
deseq2ResDF$significant <- ifelse(deseq2ResDF$padj < .1, "Significant", NA)

# Plot the results similar to DEseq2
ggplot(deseq2ResDF, aes(baseMean, log2FoldChange, colour=significant)) + geom_point(size=1) + scale_y_continuous(limits=c(-3, 3), oob=squish) + scale_x_log10() + geom_hline(yintercept = 0, colour="tomato1", size=2) + labs(x="mean of normalized counts", y="log fold change") + scale_colour_manual(name="q-value", values=("Significant"="red"), na.value="grey50") + theme_bw()

# Let's add some more detail
ggplot(deseq2ResDF, aes(baseMean, log2FoldChange, colour=padj)) + geom_point(size=1) + scale_y_continuous(limits=c(-3, 3), oob=squish) + scale_x_log10() + geom_hline(yintercept = 0, colour="darkorchid4", size=1, linetype="longdash") + labs(x="mean of normalized counts", y="log fold change") + scale_colour_viridis(direction=-1, trans='sqrt') + theme_bw() + geom_density_2d(colour="black", size=0.5)

# Extract counts for the gene otop2
otop2Counts <- plotCounts(deseq2Data, gene="ENSG00000183034", intgroup=c("tissueType", "individualID"), returnData=TRUE)

# Plot the data using ggplot2
colourPallette <- c("#7145cd","#bbcfc4","#90de4a","#cd46c1","#77dd8e","#592b79","#d7c847","#6378c9","#619a3c","#d44473","#63cfb6","#dd5d36","#5db2ce","#8d3b28","#b1a4cb","#af8439","#c679c0","#4e703f","#753148","#cac88e","#352b48","#cd8d88","#463d25","#556f73")
ggplot(otop2Counts, aes(x=tissueType, y=count, colour=individualID, group=individualID)) + geom_point() + geom_line() + theme_bw() + theme(axis.text.x=element_text(angle=15, hjust=1)) + scale_colour_manual(values=colourPallette) + guides(colour=guide_legend(ncol=3)) + ggtitle("OTOP2")

deseq2ResDF["ENSG00000183034",]
##                 baseMean log2FoldChange    lfcSE      stat       pvalue
## ENSG00000183034 375.7911      -7.337968 0.748847 -9.799022 1.136808e-22
##                         padj significant
## ENSG00000183034 4.051584e-19 Significant
rawCounts["ENSG00000183034",]
## SRR975551 SRR975552 SRR975553 SRR975554 SRR975555 SRR975556 SRR975557 SRR975558 
##         2         6       250         4        19        11         0         7 
## SRR975559 SRR975560 SRR975561 SRR975562 SRR975563 SRR975564 SRR975565 SRR975566 
##         0         8       159         0       235         1         1         1 
## SRR975567 SRR975568 SRR975569 SRR975570 SRR975571 SRR975572 SRR975573 SRR975574 
##        84         0      1281       620       335       798      1058       505 
## SRR975575 SRR975576 SRR975577 SRR975578 SRR975579 SRR975580 SRR975581 SRR975582 
##       861      1321      1336       736       517      1418      1337      1013 
## SRR975583 SRR975584 SRR975585 SRR975586 SRR975587 SRR975588 SRR975589 SRR975590 
##      1397       613      1251       721         2      1368         1         1 
## SRR975591 SRR975592 SRR975593 SRR975594 SRR975595 SRR975596 SRR975597 SRR975598 
##         1         1         0         0         0         2         0         2 
## SRR975599 SRR975600 SRR975601 SRR975602 SRR975603 SRR975604 
##         2         2         0         0         1         3
normals=row.names(sampleData[sampleData[,"tissueType"]=="normal-looking surrounding colonic epithelium",])
primaries=row.names(sampleData[sampleData[,"tissueType"]=="primary colorectal cancer",])
rawCounts["ENSG00000183034",normals]
## SRR975569 SRR975570 SRR975571 SRR975572 SRR975573 SRR975574 SRR975575 SRR975576 
##      1281       620       335       798      1058       505       861      1321 
## SRR975577 SRR975578 SRR975579 SRR975580 SRR975581 SRR975582 SRR975583 SRR975584 
##      1336       736       517      1418      1337      1013      1397       613 
## SRR975585 SRR975586 SRR975587 
##      1251       721         2
rawCounts["ENSG00000183034",primaries]
## SRR975551 SRR975552 SRR975553 SRR975554 SRR975555 SRR975556 SRR975557 SRR975558 
##         2         6       250         4        19        11         0         7 
## SRR975559 SRR975560 SRR975561 SRR975562 SRR975563 SRR975564 SRR975565 SRR975566 
##         0         8       159         0       235         1         1         1 
## SRR975567 SRR975568 
##        84         0
# Transform count data using the variance stablilizing transform
deseq2VST <- vst(deseq2Data)

# Convert the DESeq transformed object to a data frame
deseq2VST <- assay(deseq2VST)
deseq2VST <- as.data.frame(deseq2VST)
deseq2VST$Gene <- rownames(deseq2VST)
head(deseq2VST)
##                 SRR975551 SRR975552 SRR975553 SRR975554 SRR975555 SRR975556
## ENSG00000000003 11.923929  9.968500 10.588833 11.791039 10.058138 10.737563
## ENSG00000000005  5.625681  2.799376  4.866074  5.031436  4.065530  3.717037
## ENSG00000000419 10.686895  9.059366  9.056915 10.226302 10.243933  8.802204
## ENSG00000000457  8.199791  8.879902  8.706264  8.013961  8.158745  8.654508
## ENSG00000000460  8.141111  8.024470  6.392902  7.942364  7.214920  5.820591
## ENSG00000000938  5.725709  6.301239  7.421013  6.916156  6.736584  6.794418
##                 SRR975557 SRR975558 SRR975559 SRR975560 SRR975561 SRR975562
## ENSG00000000003  8.267878  9.835585 10.817518 10.757317 10.763156 11.706322
## ENSG00000000005  5.236297  2.998960  3.269549  3.755656  4.395245  4.523032
## ENSG00000000419  8.656613  8.750360  9.781350  9.360215  9.272733 10.189392
## ENSG00000000457  7.943242  8.303669  7.271484  7.835688  8.427880  8.216450
## ENSG00000000460  5.998354  7.066791  7.287183  6.212864  6.075822  7.468416
## ENSG00000000938  8.451694  7.504600  6.799352  6.802135  7.005928  6.592145
##                 SRR975563 SRR975564 SRR975565 SRR975566 SRR975567 SRR975568
## ENSG00000000003 10.210433  8.859864 11.858910 12.088638  9.905801 11.127106
## ENSG00000000005  3.003668  3.321234  4.730903  4.034155  3.285318  3.461712
## ENSG00000000419  8.797326  9.331147 10.183658  9.994004  9.258376  9.988913
## ENSG00000000457  8.591742  7.821325  8.637253  7.741407  8.525071  8.271511
## ENSG00000000460  6.015813  7.574696  7.752903  7.624403  7.669804  7.350325
## ENSG00000000938  6.439217  8.434081  6.341108  8.126546  7.282308  6.572592
##                 SRR975569 SRR975570 SRR975571 SRR975572 SRR975573 SRR975574
## ENSG00000000003 11.059529 10.356867 10.569298 10.854676 10.205960 10.701622
## ENSG00000000005  4.432915  3.955506  4.438338  4.507882  5.160314  4.410729
## ENSG00000000419  9.141181  9.058456  8.975535  8.715784  8.814977  8.631387
## ENSG00000000457  8.834970  8.759218  8.522509  8.588946  8.609350  8.564093
## ENSG00000000460  6.571444  6.334997  5.896374  6.141287  6.375558  6.347129
## ENSG00000000938  6.502814  6.229869  7.230902  6.437108  6.548687  7.203174
##                 SRR975575 SRR975576 SRR975577 SRR975578 SRR975579 SRR975580
## ENSG00000000003  9.963238 10.749029 10.276631 10.471313  9.142663 11.172871
## ENSG00000000005  3.720561  3.666685  4.724335  4.485812  5.000971  4.723688
## ENSG00000000419  8.821720  8.687815  8.551434  8.601738  8.612026  9.399138
## ENSG00000000457  8.376800  8.373983  8.422896  8.448369  8.201763  8.153977
## ENSG00000000460  6.381203  5.658967  6.012253  5.255633  5.297264  6.914239
## ENSG00000000938  6.640545  6.586080  6.522322  6.743217  6.752428  6.463241
##                 SRR975581 SRR975582 SRR975583 SRR975584 SRR975585 SRR975586
## ENSG00000000003  9.761890 10.113720 10.870166 10.124489 10.086779 10.862657
## ENSG00000000005  3.829705  4.766055  3.382458  4.289463  3.829410  3.121770
## ENSG00000000419  8.454632  8.867350  8.916874  8.913281  8.780319  9.150469
## ENSG00000000457  8.395799  8.500433  8.775569  8.488346  8.645863  7.892788
## ENSG00000000460  6.235882  6.020642  6.634603  6.461639  6.091997  6.355340
## ENSG00000000938  6.638795  6.619980  6.438706  6.876938  6.387855  6.322406
##                 SRR975587 SRR975588 SRR975589 SRR975590 SRR975591 SRR975592
## ENSG00000000003 11.267575 10.333011 10.187163 11.868828 10.220378 11.040559
## ENSG00000000005  3.610149  3.968818  3.509957  3.933814  3.199553  4.427753
## ENSG00000000419  9.951607  9.100348  9.642677 10.218513  9.672814 10.484407
## ENSG00000000457  7.971886  8.718730  8.809136  8.352412  8.103841  8.088880
## ENSG00000000460  7.662030  6.399539  8.381879  7.965092  7.027259  8.221922
## ENSG00000000938  7.449577  6.287749  9.469536  7.177886  8.842797  8.336400
##                 SRR975593 SRR975594 SRR975595 SRR975596 SRR975597 SRR975598
## ENSG00000000003 10.634407 11.034236 11.521405 10.657266 10.812194 12.409812
## ENSG00000000005  3.827427  2.232901  4.782965  3.849417  4.232950  5.459134
## ENSG00000000419  9.747850  9.040705  9.822536 10.013052 10.150926 10.137215
## ENSG00000000457  8.254051  8.552997  8.055002  7.277367  8.129814  8.568186
## ENSG00000000460  6.848925  6.761790  7.092787  6.746588  7.579064  8.057261
## ENSG00000000938  8.519288  8.141869  6.560104  7.224172  7.373415  6.175141
##                 SRR975599 SRR975600 SRR975601 SRR975602 SRR975603 SRR975604
## ENSG00000000003 10.424530  9.227771 11.185892 11.654602 10.240971 10.592443
## ENSG00000000005  5.139221  2.848146  4.502146  2.232901  3.005591  3.666156
## ENSG00000000419  9.598121  9.043083  9.774641 10.376102  9.810639  9.826985
## ENSG00000000457  7.561362  7.849440  8.247018  7.826785  7.959503  8.023970
## ENSG00000000460  7.202222  7.105346  7.404384  7.784218  7.701531  6.340575
## ENSG00000000938  7.775770  8.999707  6.718028  6.568981  8.710157  8.881872
##                            Gene
## ENSG00000000003 ENSG00000000003
## ENSG00000000005 ENSG00000000005
## ENSG00000000419 ENSG00000000419
## ENSG00000000457 ENSG00000000457
## ENSG00000000460 ENSG00000000460
## ENSG00000000938 ENSG00000000938
# Keep only the significantly differentiated genes where the fold-change was at least 3
sigGenes <- rownames(deseq2ResDF[deseq2ResDF$padj <= .05 & abs(deseq2ResDF$log2FoldChange) > 3,])
deseq2VST <- deseq2VST[deseq2VST$Gene %in% sigGenes,]

# Convert the VST counts to long format for ggplot2
library(reshape2)
## 
## Attaching package: 'reshape2'
## The following object is masked from 'package:tidyr':
## 
##     smiths
# First compare wide vs long version
deseq2VST_wide <- deseq2VST
deseq2VST_long <- melt(deseq2VST, id.vars=c("Gene"))

head(deseq2VST_wide)
##                 SRR975551 SRR975552 SRR975553 SRR975554 SRR975555 SRR975556
## ENSG00000007306 10.568338  9.159074 16.233798 10.131600 13.751881 12.519868
## ENSG00000012504  3.231426  3.345376  8.403741  3.176825  4.193719  6.522882
## ENSG00000015413 11.169901 10.657855  5.582772 14.553933 11.907716  6.104372
## ENSG00000016602  8.153040  6.466539 13.864836  8.013961 10.864525  8.870742
## ENSG00000029559  3.101820  4.379893  2.232901  4.131232  4.364544  2.232901
## ENSG00000034971  2.232901  2.799376  6.955727  2.232901  2.232901  2.232901
##                 SRR975557 SRR975558 SRR975559 SRR975560 SRR975561 SRR975562
## ENSG00000007306  5.667957 12.776169  6.144317 14.304986 14.984041 12.098811
## ENSG00000012504  3.022599  4.199691  3.085199  7.112992  2.974334  3.765197
## ENSG00000015413  5.344300  4.457055 12.167876  9.521065 10.161442 13.350185
## ENSG00000016602  4.607561  9.224555  4.510159 12.528673 12.408709  4.523032
## ENSG00000029559  2.232901  6.411503  5.578436  5.112705  2.232901  4.585088
## ENSG00000034971  7.993170  2.232901  2.839882  3.632837  5.593554  2.882770
##                 SRR975563 SRR975564 SRR975565 SRR975566 SRR975567 SRR975568
## ENSG00000007306 15.114977  8.077518  8.483470  6.816881 12.947413 10.863179
## ENSG00000012504  3.885169  3.849195  4.578502  3.238968  3.573172  2.861074
## ENSG00000015413  4.468793 11.161337 11.322352 12.570130 11.191026 11.039218
## ENSG00000016602 11.258701  5.310833  4.056402  5.555016 11.548776  5.635144
## ENSG00000029559  2.232901  5.165728  3.429873  4.034155  6.057155  2.861074
## ENSG00000034971  3.538801  2.232901  2.232901  2.232901  3.986470  2.232901
##                 SRR975569 SRR975570 SRR975571 SRR975572 SRR975573 SRR975574
## ENSG00000007306 15.563131 14.308692 16.204152 15.055571 16.165601 15.575148
## ENSG00000012504  4.975840  4.778122  8.374255  4.507882  8.322641  7.727500
## ENSG00000015413  4.432915  4.430717  5.742498  4.263200  5.537905  5.759378
## ENSG00000016602 13.012940 14.815772 13.859241 12.312736 15.981762 14.202857
## ENSG00000029559  2.232901  2.232901  2.232901  2.232901  2.232901  2.232901
## ENSG00000034971  6.571444  6.743818  6.933769  7.268226  6.030424  4.212182
##                 SRR975575 SRR975576 SRR975577 SRR975578 SRR975579 SRR975580
## ENSG00000007306 15.174126 14.422544 15.775346 15.907044 13.868314 15.399874
## ENSG00000012504  4.169669  5.861471  7.743603  9.306858  3.917586  5.845152
## ENSG00000015413  4.499098  4.710961  6.133733 10.081967  3.710734 12.799715
## ENSG00000016602 13.467160 12.697889 14.008965 13.577174 12.061149 13.960487
## ENSG00000029559  2.232901  2.232901  2.232901  2.232901  2.232901  2.841968
## ENSG00000034971  8.060166  6.586080  6.209339  6.170983  7.994167  5.237540
##                 SRR975581 SRR975582 SRR975583 SRR975584 SRR975585 SRR975586
## ENSG00000007306 15.050135 15.274470 15.535940 15.791451 15.071779 14.159704
## ENSG00000012504  3.493410  7.973717  4.136058  7.691774  4.840240  6.322406
## ENSG00000015413  4.859370  6.146305  4.136058  5.788260  5.135739  5.070622
## ENSG00000016602 12.517156 14.375398 14.550607 15.092849 13.345953 13.398398
## ENSG00000029559  2.232901  2.886038  2.232901  2.232901  2.862579  2.232901
## ENSG00000034971  7.352685  6.484078  6.588078  3.423110  7.103513  4.271017
##                 SRR975587 SRR975588 SRR975589 SRR975590 SRR975591 SRR975592
## ENSG00000007306  9.224802 14.013551 10.556329  7.744511  8.333508  9.773894
## ENSG00000012504  9.150519  5.666463  6.734259  7.444022  8.733186  8.008271
## ENSG00000015413  8.053613  3.818527  5.592327 14.584635  9.594582 10.591239
## ENSG00000016602  5.308877 14.645248  5.500416  3.933814  4.573319  3.406627
## ENSG00000029559  3.224530  2.232901  7.966007  4.266310  6.076565  5.439764
## ENSG00000034971  2.232901  6.018477  2.886867  2.232901  2.797879  3.073968
##                 SRR975593 SRR975594 SRR975595 SRR975596 SRR975597 SRR975598
## ENSG00000007306  6.706770  5.528820 10.605091  8.851557 11.164379 12.652243
## ENSG00000012504  6.519254 10.965734  7.328385  4.423801  6.777223  6.882624
## ENSG00000015413 12.481088  5.157102 12.790684 13.036171 12.108500 13.100530
## ENSG00000016602  4.086850  4.028977  4.228051  3.739617  4.788758  4.546585
## ENSG00000029559  7.502999  2.232901  3.695395  7.862908  3.387194  2.851402
## ENSG00000034971  2.760253  2.232901  2.232901  2.232901  2.910991  2.232901
##                 SRR975599 SRR975600 SRR975601 SRR975602 SRR975603 SRR975604
## ENSG00000007306 10.865647  7.897514  7.389139  6.984942  6.288510  7.637623
## ENSG00000012504  6.437264  7.620326  8.989993  4.231262  8.527824  8.521452
## ENSG00000015413 13.421117 10.612505  7.218498 13.316369 10.100135 11.060091
## ENSG00000016602  4.675336  5.325398  4.817130  5.751074  5.158338  4.846432
## ENSG00000029559  3.486933  5.045923  3.451599  6.825973  7.203106  4.361161
## ENSG00000034971  2.232901  2.232901  2.232901  2.232901  2.782500  2.232901
##                            Gene
## ENSG00000007306 ENSG00000007306
## ENSG00000012504 ENSG00000012504
## ENSG00000015413 ENSG00000015413
## ENSG00000016602 ENSG00000016602
## ENSG00000029559 ENSG00000029559
## ENSG00000034971 ENSG00000034971
head(deseq2VST_long)
##              Gene  variable     value
## 1 ENSG00000007306 SRR975551 10.568338
## 2 ENSG00000012504 SRR975551  3.231426
## 3 ENSG00000015413 SRR975551 11.169901
## 4 ENSG00000016602 SRR975551  8.153040
## 5 ENSG00000029559 SRR975551  3.101820
## 6 ENSG00000034971 SRR975551  2.232901
# Now overwrite our original data frame with the long format
deseq2VST <- melt(deseq2VST, id.vars=c("Gene"))

# Make a heatmap
heatmap <- ggplot(deseq2VST, aes(x=variable, y=Gene, fill=value)) + geom_raster() + scale_fill_viridis(trans="sqrt") + theme(axis.text.x=element_text(angle=65, hjust=1), axis.text.y=element_blank(), axis.ticks.y=element_blank())
heatmap

# Convert the significant genes back to a matrix for clustering
deseq2VSTMatrix <- dcast(deseq2VST, Gene ~ variable)
rownames(deseq2VSTMatrix) <- deseq2VSTMatrix$Gene
deseq2VSTMatrix$Gene <- NULL

# Compute a distance calculation on both dimensions of the matrix
distanceGene <- dist(deseq2VSTMatrix)
distanceSample <- dist(t(deseq2VSTMatrix))

# Cluster based on the distance calculations
clusterGene <- hclust(distanceGene, method="average")
clusterSample <- hclust(distanceSample, method="average")

# Construct a dendogram for samples
# install.packages("ggdendro")
library(ggdendro)
## Warning: package 'ggdendro' was built under R version 4.2.3
sampleModel <- as.dendrogram(clusterSample)
sampleDendrogramData <- segment(dendro_data(sampleModel, type = "rectangle"))
sampleDendrogram <- ggplot(sampleDendrogramData) + geom_segment(aes(x = x, y = y, xend = xend, yend = yend)) + theme_dendro()

# Re-factor samples for ggplot2
deseq2VST$variable <- factor(deseq2VST$variable, levels=clusterSample$labels[clusterSample$order])

# Construct the heatmap. note that at this point we have only clustered the samples NOT the genes
heatmap <- ggplot(deseq2VST, aes(x=variable, y=Gene, fill=value)) + geom_raster() + scale_fill_viridis(trans="sqrt") + theme(axis.text.x=element_text(angle=65, hjust=1), axis.text.y=element_blank(), axis.ticks.y=element_blank())
heatmap

 # Combine the dendrogram and the heatmap
#install.packages("gridExtra")
library(gridExtra)
## 
## Attaching package: 'gridExtra'
## The following object is masked from 'package:dplyr':
## 
##     combine
## The following object is masked from 'package:Biobase':
## 
##     combine
## The following object is masked from 'package:BiocGenerics':
## 
##     combine
grid.arrange(sampleDendrogram, heatmap, ncol=1, heights=c(1,5))

# Load in libraries necessary for modifying plots
#install.packages("gtable")
library(gtable)
library(grid)

# Modify the ggplot objects
sampleDendrogram_1 <- sampleDendrogram + scale_x_continuous(expand=c(.0085, .0085)) + scale_y_continuous(expand=c(0, 0))
heatmap_1 <- heatmap + scale_x_discrete(expand=c(0, 0)) + scale_y_discrete(expand=c(0, 0))

# Convert both grid based objects to grobs
sampleDendrogramGrob <- ggplotGrob(sampleDendrogram_1)
heatmapGrob <- ggplotGrob(heatmap_1)

# Check the widths of each grob
sampleDendrogramGrob$widths
## [1] 5.5points            0cm                  0cm                 
## [4] sum(0cm, 2.75points) 1null                0cm                 
## [7] 0cm                  0points              5.5points
heatmapGrob$widths
##  [1] 5.5points            0cm                  1grobwidth          
##  [4] sum(0cm, 2.75points) 1null                0cm                 
##  [7] 0cm                  11points             1.53346700913242cm  
## [10] 0points              5.5points
# Add in the missing columns
sampleDendrogramGrob <- gtable_add_cols(sampleDendrogramGrob, heatmapGrob$widths[7], 6)
sampleDendrogramGrob <- gtable_add_cols(sampleDendrogramGrob, heatmapGrob$widths[8], 7)

# Make sure every width between the two grobs is the same
maxWidth <- unit.pmax(sampleDendrogramGrob$widths, heatmapGrob$widths)
sampleDendrogramGrob$widths <- as.list(maxWidth)
heatmapGrob$widths <- as.list(maxWidth)

# Arrange the grobs into a plot
finalGrob <- arrangeGrob(sampleDendrogramGrob, heatmapGrob, ncol=1, heights=c(2,5))

# Draw the plot
grid.draw(finalGrob)

# Re-order the sample data to match the clustering we did
sampleData_v2$Run <- factor(sampleData_v2$Run, levels=clusterSample$labels[clusterSample$order])

# Construct a plot to show the clinical data
colours <- c("#743B8B", "#8B743B", "#8B3B52")
sampleClinical <- ggplot(sampleData_v2, aes(x=Run, y=1, fill=Sample.Characteristic.biopsy.site.)) + geom_tile() + scale_x_discrete(expand=c(0, 0)) + scale_y_discrete(expand=c(0, 0)) + scale_fill_manual(name="Tissue", values=colours) + theme_void()

# Convert the clinical plot to a grob
sampleClinicalGrob <- ggplotGrob(sampleClinical)

# Make sure every width between all grobs is the same
maxWidth <- unit.pmax(sampleDendrogramGrob$widths, heatmapGrob$widths, sampleClinicalGrob$widths)
sampleDendrogramGrob$widths <- as.list(maxWidth)
heatmapGrob$widths <- as.list(maxWidth)
sampleClinicalGrob$widths <- as.list(maxWidth)

# Arrange and output the final plot
finalGrob <- arrangeGrob(sampleDendrogramGrob, sampleClinicalGrob, heatmapGrob, ncol=1, heights=c(2,1,5))
grid.draw(finalGrob)