https://genviz.org/module-04-expression/0004/02/01/DifferentialExpression/ SQ NOTE: can run ALL
library(DESeq2)
## Loading required package: S4Vectors
## Loading required package: stats4
## Loading required package: BiocGenerics
##
## Attaching package: 'BiocGenerics'
## The following objects are masked from 'package:stats':
##
## IQR, mad, sd, var, xtabs
## The following objects are masked from 'package:base':
##
## anyDuplicated, append, as.data.frame, basename, cbind, colnames,
## dirname, do.call, duplicated, eval, evalq, Filter, Find, get, grep,
## grepl, intersect, is.unsorted, lapply, Map, mapply, match, mget,
## order, paste, pmax, pmax.int, pmin, pmin.int, Position, rank,
## rbind, Reduce, rownames, sapply, setdiff, sort, table, tapply,
## union, unique, unsplit, which.max, which.min
##
## Attaching package: 'S4Vectors'
## The following objects are masked from 'package:base':
##
## expand.grid, I, unname
## Loading required package: IRanges
##
## Attaching package: 'IRanges'
## The following object is masked from 'package:grDevices':
##
## windows
## Loading required package: GenomicRanges
## Loading required package: GenomeInfoDb
## Loading required package: SummarizedExperiment
## Loading required package: MatrixGenerics
## Loading required package: matrixStats
##
## Attaching package: 'MatrixGenerics'
## The following objects are masked from 'package:matrixStats':
##
## colAlls, colAnyNAs, colAnys, colAvgsPerRowSet, colCollapse,
## colCounts, colCummaxs, colCummins, colCumprods, colCumsums,
## colDiffs, colIQRDiffs, colIQRs, colLogSumExps, colMadDiffs,
## colMads, colMaxs, colMeans2, colMedians, colMins, colOrderStats,
## colProds, colQuantiles, colRanges, colRanks, colSdDiffs, colSds,
## colSums2, colTabulates, colVarDiffs, colVars, colWeightedMads,
## colWeightedMeans, colWeightedMedians, colWeightedSds,
## colWeightedVars, rowAlls, rowAnyNAs, rowAnys, rowAvgsPerColSet,
## rowCollapse, rowCounts, rowCummaxs, rowCummins, rowCumprods,
## rowCumsums, rowDiffs, rowIQRDiffs, rowIQRs, rowLogSumExps,
## rowMadDiffs, rowMads, rowMaxs, rowMeans2, rowMedians, rowMins,
## rowOrderStats, rowProds, rowQuantiles, rowRanges, rowRanks,
## rowSdDiffs, rowSds, rowSums2, rowTabulates, rowVarDiffs, rowVars,
## rowWeightedMads, rowWeightedMeans, rowWeightedMedians,
## rowWeightedSds, rowWeightedVars
## Loading required package: Biobase
## Welcome to Bioconductor
##
## Vignettes contain introductory material; view with
## 'browseVignettes()'. To cite Bioconductor, see
## 'citation("Biobase")', and for packages 'citation("pkgname")'.
##
## Attaching package: 'Biobase'
## The following object is masked from 'package:MatrixGenerics':
##
## rowMedians
## The following objects are masked from 'package:matrixStats':
##
## anyMissing, rowMedians
library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.1 ──
## ✔ ggplot2 3.3.6 ✔ purrr 0.3.4
## ✔ tibble 3.1.7 ✔ dplyr 1.0.9
## ✔ tidyr 1.2.0 ✔ stringr 1.4.0
## ✔ readr 2.1.2 ✔ forcats 0.5.1
## Warning: package 'ggplot2' was built under R version 4.2.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::collapse() masks IRanges::collapse()
## ✖ dplyr::combine() masks Biobase::combine(), BiocGenerics::combine()
## ✖ dplyr::count() masks matrixStats::count()
## ✖ dplyr::desc() masks IRanges::desc()
## ✖ tidyr::expand() masks S4Vectors::expand()
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::first() masks S4Vectors::first()
## ✖ dplyr::lag() masks stats::lag()
## ✖ ggplot2::Position() masks BiocGenerics::Position(), base::Position()
## ✖ purrr::reduce() masks GenomicRanges::reduce(), IRanges::reduce()
## ✖ dplyr::rename() masks S4Vectors::rename()
## ✖ dplyr::slice() masks IRanges::slice()
# Read in the raw read counts
rawCounts <- read.delim("http://genomedata.org/gen-viz-workshop/intro_to_deseq2/tutorial/E-GEOD-50760-raw-counts.tsv")
head(rawCounts)
## Gene.ID Gene.Name SRR975551 SRR975552 SRR975553 SRR975554 SRR975555
## 1 ENSG00000000003 TSPAN6 6617 1352 1492 3390 1464
## 2 ENSG00000000005 TNMD 69 1 20 23 12
## 3 ENSG00000000419 DPM1 2798 714 510 1140 1667
## 4 ENSG00000000457 SCYL3 486 629 398 239 383
## 5 ENSG00000000460 C1orf112 466 342 73 227 193
## 6 ENSG00000000938 FGR 75 95 158 107 135
## SRR975556 SRR975557 SRR975558 SRR975559 SRR975560 SRR975561 SRR975562
## 1 1251 207 1333 2126 1799 1362 3435
## 2 4 20 2 3 6 10 15
## 3 322 273 621 1031 677 480 1194
## 4 290 164 452 172 229 264 297
## 5 35 38 184 174 68 46 173
## 6 75 236 254 121 107 94 90
## SRR975563 SRR975564 SRR975565 SRR975566 SRR975567 SRR975568 SRR975569
## 1 1711 487 4326 3652 1090 2460 2893
## 2 2 3 21 7 3 4 18
## 3 634 679 1347 849 692 1112 756
## 4 548 232 454 172 412 331 609
## 5 81 194 241 158 223 170 117
## 6 113 360 84 227 168 95 111
## SRR975570 SRR975571 SRR975572 SRR975573 SRR975574 SRR975575 SRR975576
## 1 691 2271 2187 999 1283 906 1359
## 2 4 20 17 23 10 5 4
## 3 278 743 488 376 300 406 320
## 4 225 539 446 325 286 296 256
## 5 38 76 73 63 56 68 33
## 6 35 212 92 72 107 83 69
## SRR975577 SRR975578 SRR975579 SRR975580 SRR975581 SRR975582 SRR975583
## 1 1309 1008 310 2704 674 1121 1776
## 2 19 10 13 21 5 19 3
## 3 389 271 213 783 268 467 452
## 4 355 243 159 324 257 360 409
## 5 59 21 17 131 52 57 86
## 6 88 70 55 93 71 91 74
## SRR975584 SRR975585 SRR975586 SRR975587 SRR975588 SRR975589 SRR975590
## 1 981 1185 2012 3190 1837 1177 3055
## 2 10 7 2 6 11 4 6
## 3 419 473 607 1274 774 804 968
## 4 310 430 248 314 591 447 260
## 5 70 65 79 251 108 330 197
## 6 96 82 77 215 99 712 111
## SRR975591 SRR975592 SRR975593 SRR975594 SRR975595 SRR975596 SRR975597
## 1 1621 1276 2488 1009 2786 1717 1691
## 2 3 8 10 0 18 7 10
## 3 1105 866 1339 250 852 1095 1066
## 4 364 160 466 177 244 156 256
## 5 166 176 167 48 121 105 172
## 6 616 191 563 132 81 150 148
## SRR975598 SRR975599 SRR975600 SRR975601 SRR975602 SRR975603 SRR975604
## 1 6192 722 680 2608 2436 1739 2134
## 2 40 14 1 16 0 2 7
## 3 1273 405 597 974 1000 1287 1250
## 4 422 95 255 331 165 347 349
## 5 293 73 148 180 160 288 100
## 6 72 111 579 108 65 593 643
# Read in the sample mappings
sampleData <- read.delim("http://genomedata.org/gen-viz-workshop/intro_to_deseq2/tutorial/E-GEOD-50760-experiment-design.tsv")
head(sampleData)
## Run Sample.Characteristic.biopsy.site.
## 1 SRR975551 primary tumor
## 2 SRR975552 primary tumor
## 3 SRR975553 primary tumor
## 4 SRR975554 primary tumor
## 5 SRR975555 primary tumor
## 6 SRR975556 primary tumor
## Sample.Characteristic.Ontology.Term.biopsy.site.
## 1 http://www.ebi.ac.uk/efo/EFO_0000616
## 2 http://www.ebi.ac.uk/efo/EFO_0000616
## 3 http://www.ebi.ac.uk/efo/EFO_0000616
## 4 http://www.ebi.ac.uk/efo/EFO_0000616
## 5 http://www.ebi.ac.uk/efo/EFO_0000616
## 6 http://www.ebi.ac.uk/efo/EFO_0000616
## Sample.Characteristic.disease. Sample.Characteristic.Ontology.Term.disease.
## 1 colorectal cancer http://www.ebi.ac.uk/efo/EFO_0005842
## 2 colorectal cancer http://www.ebi.ac.uk/efo/EFO_0005842
## 3 colorectal cancer http://www.ebi.ac.uk/efo/EFO_0005842
## 4 colorectal cancer http://www.ebi.ac.uk/efo/EFO_0005842
## 5 colorectal cancer http://www.ebi.ac.uk/efo/EFO_0005842
## 6 colorectal cancer http://www.ebi.ac.uk/efo/EFO_0005842
## Sample.Characteristic.disease.staging.
## 1 Stage IV Colorectal Cancer
## 2 Stage IV Colorectal Cancer
## 3 Stage IV Colorectal Cancer
## 4 Stage IV Colorectal Cancer
## 5 Stage IV Colorectal Cancer
## 6 Stage IV Colorectal Cancer
## Sample.Characteristic.Ontology.Term.disease.staging.
## 1 NA
## 2 NA
## 3 NA
## 4 NA
## 5 NA
## 6 NA
## Sample.Characteristic.individual.
## 1 AMC_2
## 2 AMC_3
## 3 AMC_5
## 4 AMC_6
## 5 AMC_7
## 6 AMC_8
## Sample.Characteristic.Ontology.Term.individual.
## 1 NA
## 2 NA
## 3 NA
## 4 NA
## 5 NA
## 6 NA
## Sample.Characteristic.organism. Sample.Characteristic.Ontology.Term.organism.
## 1 Homo sapiens http://purl.obolibrary.org/obo/NCBITaxon_9606
## 2 Homo sapiens http://purl.obolibrary.org/obo/NCBITaxon_9606
## 3 Homo sapiens http://purl.obolibrary.org/obo/NCBITaxon_9606
## 4 Homo sapiens http://purl.obolibrary.org/obo/NCBITaxon_9606
## 5 Homo sapiens http://purl.obolibrary.org/obo/NCBITaxon_9606
## 6 Homo sapiens http://purl.obolibrary.org/obo/NCBITaxon_9606
## Sample.Characteristic.organism.part.
## 1 colon
## 2 colon
## 3 colon
## 4 colon
## 5 colon
## 6 colon
## Sample.Characteristic.Ontology.Term.organism.part. Factor.Value.biopsy.site.
## 1 http://purl.obolibrary.org/obo/UBERON_0001155 primary tumor
## 2 http://purl.obolibrary.org/obo/UBERON_0001155 primary tumor
## 3 http://purl.obolibrary.org/obo/UBERON_0001155 primary tumor
## 4 http://purl.obolibrary.org/obo/UBERON_0001155 primary tumor
## 5 http://purl.obolibrary.org/obo/UBERON_0001155 primary tumor
## 6 http://purl.obolibrary.org/obo/UBERON_0001155 primary tumor
## Factor.Value.Ontology.Term.biopsy.site. Analysed
## 1 http://www.ebi.ac.uk/efo/EFO_0000616 Yes
## 2 http://www.ebi.ac.uk/efo/EFO_0000616 Yes
## 3 http://www.ebi.ac.uk/efo/EFO_0000616 Yes
## 4 http://www.ebi.ac.uk/efo/EFO_0000616 Yes
## 5 http://www.ebi.ac.uk/efo/EFO_0000616 Yes
## 6 http://www.ebi.ac.uk/efo/EFO_0000616 Yes
# Also save a copy for later
sampleData_v2 <- sampleData
# Convert count data to a matrix of appropriate form that DEseq2 can read
geneID <- rawCounts$Gene.ID
sampleIndex <- grepl("SRR\\d+", colnames(rawCounts))
rawCounts <- as.matrix(rawCounts[,sampleIndex])
rownames(rawCounts) <- geneID
head(rawCounts)
## SRR975551 SRR975552 SRR975553 SRR975554 SRR975555 SRR975556
## ENSG00000000003 6617 1352 1492 3390 1464 1251
## ENSG00000000005 69 1 20 23 12 4
## ENSG00000000419 2798 714 510 1140 1667 322
## ENSG00000000457 486 629 398 239 383 290
## ENSG00000000460 466 342 73 227 193 35
## ENSG00000000938 75 95 158 107 135 75
## SRR975557 SRR975558 SRR975559 SRR975560 SRR975561 SRR975562
## ENSG00000000003 207 1333 2126 1799 1362 3435
## ENSG00000000005 20 2 3 6 10 15
## ENSG00000000419 273 621 1031 677 480 1194
## ENSG00000000457 164 452 172 229 264 297
## ENSG00000000460 38 184 174 68 46 173
## ENSG00000000938 236 254 121 107 94 90
## SRR975563 SRR975564 SRR975565 SRR975566 SRR975567 SRR975568
## ENSG00000000003 1711 487 4326 3652 1090 2460
## ENSG00000000005 2 3 21 7 3 4
## ENSG00000000419 634 679 1347 849 692 1112
## ENSG00000000457 548 232 454 172 412 331
## ENSG00000000460 81 194 241 158 223 170
## ENSG00000000938 113 360 84 227 168 95
## SRR975569 SRR975570 SRR975571 SRR975572 SRR975573 SRR975574
## ENSG00000000003 2893 691 2271 2187 999 1283
## ENSG00000000005 18 4 20 17 23 10
## ENSG00000000419 756 278 743 488 376 300
## ENSG00000000457 609 225 539 446 325 286
## ENSG00000000460 117 38 76 73 63 56
## ENSG00000000938 111 35 212 92 72 107
## SRR975575 SRR975576 SRR975577 SRR975578 SRR975579 SRR975580
## ENSG00000000003 906 1359 1309 1008 310 2704
## ENSG00000000005 5 4 19 10 13 21
## ENSG00000000419 406 320 389 271 213 783
## ENSG00000000457 296 256 355 243 159 324
## ENSG00000000460 68 33 59 21 17 131
## ENSG00000000938 83 69 88 70 55 93
## SRR975581 SRR975582 SRR975583 SRR975584 SRR975585 SRR975586
## ENSG00000000003 674 1121 1776 981 1185 2012
## ENSG00000000005 5 19 3 10 7 2
## ENSG00000000419 268 467 452 419 473 607
## ENSG00000000457 257 360 409 310 430 248
## ENSG00000000460 52 57 86 70 65 79
## ENSG00000000938 71 91 74 96 82 77
## SRR975587 SRR975588 SRR975589 SRR975590 SRR975591 SRR975592
## ENSG00000000003 3190 1837 1177 3055 1621 1276
## ENSG00000000005 6 11 4 6 3 8
## ENSG00000000419 1274 774 804 968 1105 866
## ENSG00000000457 314 591 447 260 364 160
## ENSG00000000460 251 108 330 197 166 176
## ENSG00000000938 215 99 712 111 616 191
## SRR975593 SRR975594 SRR975595 SRR975596 SRR975597 SRR975598
## ENSG00000000003 2488 1009 2786 1717 1691 6192
## ENSG00000000005 10 0 18 7 10 40
## ENSG00000000419 1339 250 852 1095 1066 1273
## ENSG00000000457 466 177 244 156 256 422
## ENSG00000000460 167 48 121 105 172 293
## ENSG00000000938 563 132 81 150 148 72
## SRR975599 SRR975600 SRR975601 SRR975602 SRR975603 SRR975604
## ENSG00000000003 722 680 2608 2436 1739 2134
## ENSG00000000005 14 1 16 0 2 7
## ENSG00000000419 405 597 974 1000 1287 1250
## ENSG00000000457 95 255 331 165 347 349
## ENSG00000000460 73 148 180 160 288 100
## ENSG00000000938 111 579 108 65 593 643
# Convert sample variable mappings to an appropriate form that DESeq2 can read
head(sampleData)
## Run Sample.Characteristic.biopsy.site.
## 1 SRR975551 primary tumor
## 2 SRR975552 primary tumor
## 3 SRR975553 primary tumor
## 4 SRR975554 primary tumor
## 5 SRR975555 primary tumor
## 6 SRR975556 primary tumor
## Sample.Characteristic.Ontology.Term.biopsy.site.
## 1 http://www.ebi.ac.uk/efo/EFO_0000616
## 2 http://www.ebi.ac.uk/efo/EFO_0000616
## 3 http://www.ebi.ac.uk/efo/EFO_0000616
## 4 http://www.ebi.ac.uk/efo/EFO_0000616
## 5 http://www.ebi.ac.uk/efo/EFO_0000616
## 6 http://www.ebi.ac.uk/efo/EFO_0000616
## Sample.Characteristic.disease. Sample.Characteristic.Ontology.Term.disease.
## 1 colorectal cancer http://www.ebi.ac.uk/efo/EFO_0005842
## 2 colorectal cancer http://www.ebi.ac.uk/efo/EFO_0005842
## 3 colorectal cancer http://www.ebi.ac.uk/efo/EFO_0005842
## 4 colorectal cancer http://www.ebi.ac.uk/efo/EFO_0005842
## 5 colorectal cancer http://www.ebi.ac.uk/efo/EFO_0005842
## 6 colorectal cancer http://www.ebi.ac.uk/efo/EFO_0005842
## Sample.Characteristic.disease.staging.
## 1 Stage IV Colorectal Cancer
## 2 Stage IV Colorectal Cancer
## 3 Stage IV Colorectal Cancer
## 4 Stage IV Colorectal Cancer
## 5 Stage IV Colorectal Cancer
## 6 Stage IV Colorectal Cancer
## Sample.Characteristic.Ontology.Term.disease.staging.
## 1 NA
## 2 NA
## 3 NA
## 4 NA
## 5 NA
## 6 NA
## Sample.Characteristic.individual.
## 1 AMC_2
## 2 AMC_3
## 3 AMC_5
## 4 AMC_6
## 5 AMC_7
## 6 AMC_8
## Sample.Characteristic.Ontology.Term.individual.
## 1 NA
## 2 NA
## 3 NA
## 4 NA
## 5 NA
## 6 NA
## Sample.Characteristic.organism. Sample.Characteristic.Ontology.Term.organism.
## 1 Homo sapiens http://purl.obolibrary.org/obo/NCBITaxon_9606
## 2 Homo sapiens http://purl.obolibrary.org/obo/NCBITaxon_9606
## 3 Homo sapiens http://purl.obolibrary.org/obo/NCBITaxon_9606
## 4 Homo sapiens http://purl.obolibrary.org/obo/NCBITaxon_9606
## 5 Homo sapiens http://purl.obolibrary.org/obo/NCBITaxon_9606
## 6 Homo sapiens http://purl.obolibrary.org/obo/NCBITaxon_9606
## Sample.Characteristic.organism.part.
## 1 colon
## 2 colon
## 3 colon
## 4 colon
## 5 colon
## 6 colon
## Sample.Characteristic.Ontology.Term.organism.part. Factor.Value.biopsy.site.
## 1 http://purl.obolibrary.org/obo/UBERON_0001155 primary tumor
## 2 http://purl.obolibrary.org/obo/UBERON_0001155 primary tumor
## 3 http://purl.obolibrary.org/obo/UBERON_0001155 primary tumor
## 4 http://purl.obolibrary.org/obo/UBERON_0001155 primary tumor
## 5 http://purl.obolibrary.org/obo/UBERON_0001155 primary tumor
## 6 http://purl.obolibrary.org/obo/UBERON_0001155 primary tumor
## Factor.Value.Ontology.Term.biopsy.site. Analysed
## 1 http://www.ebi.ac.uk/efo/EFO_0000616 Yes
## 2 http://www.ebi.ac.uk/efo/EFO_0000616 Yes
## 3 http://www.ebi.ac.uk/efo/EFO_0000616 Yes
## 4 http://www.ebi.ac.uk/efo/EFO_0000616 Yes
## 5 http://www.ebi.ac.uk/efo/EFO_0000616 Yes
## 6 http://www.ebi.ac.uk/efo/EFO_0000616 Yes
rownames(sampleData) <- sampleData$Run
keep <- c("Sample.Characteristic.biopsy.site.", "Sample.Characteristic.individual.")
sampleData <- sampleData[,keep]
colnames(sampleData) <- c("tissueType", "individualID")
sampleData$individualID <- factor(sampleData$individualID)
head(sampleData)
## tissueType individualID
## SRR975551 primary tumor AMC_2
## SRR975552 primary tumor AMC_3
## SRR975553 primary tumor AMC_5
## SRR975554 primary tumor AMC_6
## SRR975555 primary tumor AMC_7
## SRR975556 primary tumor AMC_8
# Put the columns of the count data in the same order as rows names of the sample mapping, then make sure it worked
rawCounts <- rawCounts[,unique(rownames(sampleData))]
all(colnames(rawCounts) == rownames(sampleData))
## [1] TRUE
# rename the tissue types
rename_tissues <- function(x){
x <- switch(as.character(x), "normal"="normal-looking surrounding colonic epithelium", "primary tumor"="primary colorectal cancer", "colorectal cancer metastatic in the liver"="metastatic colorectal cancer to the liver")
return(x)
}
sampleData$tissueType <- unlist(lapply(sampleData$tissueType, rename_tissues))
# Order the tissue types so that it is sensible and make sure the control sample is first: normal sample -> primary tumor -> metastatic tumor
sampleData$tissueType <- factor(sampleData$tissueType, levels=c("normal-looking surrounding colonic epithelium", "primary colorectal cancer", "metastatic colorectal cancer to the liver"))
# Create the DEseq2DataSet object
deseq2Data <- DESeqDataSetFromMatrix(countData=rawCounts, colData=sampleData, design= ~ individualID + tissueType)
## Note: levels of factors in the design contain characters other than
## letters, numbers, '_' and '.'. It is recommended (but not required) to use
## only letters, numbers, and delimiters '_' or '.', as these are safe characters
## for column names in R. [This is a message, not a warning or an error]
# Perform pre-filtering of the data
dim(deseq2Data)
## [1] 65217 54
deseq2Data <- deseq2Data[rowSums(counts(deseq2Data)) > 5, ]
# Register the number of cores to use
library(BiocParallel)
register(MulticoreParam(4))
## Warning in MulticoreParam(4): MulticoreParam() not supported on Windows, use
## SnowParam()
# do one of the following three options:
# 1. Run pipeline for differential expression steps (if you set up parallel processing, set parallel = TRUE here)
deseq2Data <- DESeq(deseq2Data)
## estimating size factors
## Note: levels of factors in the design contain characters other than
## letters, numbers, '_' and '.'. It is recommended (but not required) to use
## only letters, numbers, and delimiters '_' or '.', as these are safe characters
## for column names in R. [This is a message, not a warning or an error]
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## Note: levels of factors in the design contain characters other than
## letters, numbers, '_' and '.'. It is recommended (but not required) to use
## only letters, numbers, and delimiters '_' or '.', as these are safe characters
## for column names in R. [This is a message, not a warning or an error]
## final dispersion estimates
## Note: levels of factors in the design contain characters other than
## letters, numbers, '_' and '.'. It is recommended (but not required) to use
## only letters, numbers, and delimiters '_' or '.', as these are safe characters
## for column names in R. [This is a message, not a warning or an error]
## fitting model and testing
# 2. Load the R environment with this object from the web (optional)
# load(url("http://genomedata.org/gen-viz-workshop/intro_to_deseq2/tutorial/deseq2Data_v1.RData"))
# 3. Download the .Rdata file and load directly(optional)
# load("deseq2Data_v1.RData")
# Extract differential expression results
# For "tissueType" perform primary vs normal comparison
deseq2Results <- results(deseq2Data, contrast=c("tissueType", "primary colorectal cancer", "normal-looking surrounding colonic epithelium"))
# View summary of results
summary(deseq2Results)
##
## out of 35179 with nonzero total read count
## adjusted p-value < 0.1
## LFC > 0 (up) : 4816, 14%
## LFC < 0 (down) : 5718, 16%
## outliers [1] : 0, 0%
## low counts [2] : 10231, 29%
## (mean count < 1)
## [1] see 'cooksCutoff' argument of ?results
## [2] see 'independentFiltering' argument of ?results
# Using DEseq2 built in method
plotMA(deseq2Results)
# Load libraries
# install.packages(c("ggplot2", "scales", "viridis"))
library(ggplot2)
library(scales) # needed for oob parameter
##
## Attaching package: 'scales'
## The following object is masked from 'package:purrr':
##
## discard
## The following object is masked from 'package:readr':
##
## col_factor
library(viridis)
## Loading required package: viridisLite
##
## Attaching package: 'viridis'
## The following object is masked from 'package:scales':
##
## viridis_pal
# Coerce to a data frame
deseq2ResDF <- as.data.frame(deseq2Results)
# Examine this data frame
head(deseq2ResDF)
## baseMean log2FoldChange lfcSE stat pvalue
## ENSG00000000003 1824.88077 0.2319793 0.1754965 1.3218460 1.862194e-01
## ENSG00000000005 10.85195 -0.3581575 0.4171716 -0.8585377 3.905956e-01
## ENSG00000000419 725.17340 0.7034028 0.1319274 5.3317414 9.727542e-08
## ENSG00000000457 311.35784 -0.2259470 0.1094811 -2.0637992 3.903676e-02
## ENSG00000000460 126.36193 1.0474994 0.2050559 5.1083599 3.249672e-07
## ENSG00000000938 163.15327 0.3837251 0.2277006 1.6852175 9.194662e-02
## padj
## ENSG00000000003 3.163423e-01
## ENSG00000000005 5.420883e-01
## ENSG00000000419 3.812710e-06
## ENSG00000000457 9.389795e-02
## ENSG00000000460 9.537978e-06
## ENSG00000000938 1.836724e-01
# Set a boolean column for significance
deseq2ResDF$significant <- ifelse(deseq2ResDF$padj < .1, "Significant", NA)
# Plot the results similar to DEseq2
ggplot(deseq2ResDF, aes(baseMean, log2FoldChange, colour=significant)) + geom_point(size=1) + scale_y_continuous(limits=c(-3, 3), oob=squish) + scale_x_log10() + geom_hline(yintercept = 0, colour="tomato1", size=2) + labs(x="mean of normalized counts", y="log fold change") + scale_colour_manual(name="q-value", values=("Significant"="red"), na.value="grey50") + theme_bw()
# Let's add some more detail
ggplot(deseq2ResDF, aes(baseMean, log2FoldChange, colour=padj)) + geom_point(size=1) + scale_y_continuous(limits=c(-3, 3), oob=squish) + scale_x_log10() + geom_hline(yintercept = 0, colour="darkorchid4", size=1, linetype="longdash") + labs(x="mean of normalized counts", y="log fold change") + scale_colour_viridis(direction=-1, trans='sqrt') + theme_bw() + geom_density_2d(colour="black", size=0.5)
# Extract counts for the gene otop2
otop2Counts <- plotCounts(deseq2Data, gene="ENSG00000183034", intgroup=c("tissueType", "individualID"), returnData=TRUE)
# Plot the data using ggplot2
colourPallette <- c("#7145cd","#bbcfc4","#90de4a","#cd46c1","#77dd8e","#592b79","#d7c847","#6378c9","#619a3c","#d44473","#63cfb6","#dd5d36","#5db2ce","#8d3b28","#b1a4cb","#af8439","#c679c0","#4e703f","#753148","#cac88e","#352b48","#cd8d88","#463d25","#556f73")
ggplot(otop2Counts, aes(x=tissueType, y=count, colour=individualID, group=individualID)) + geom_point() + geom_line() + theme_bw() + theme(axis.text.x=element_text(angle=15, hjust=1)) + scale_colour_manual(values=colourPallette) + guides(colour=guide_legend(ncol=3)) + ggtitle("OTOP2")
deseq2ResDF["ENSG00000183034",]
## baseMean log2FoldChange lfcSE stat pvalue
## ENSG00000183034 375.7911 -7.337968 0.748847 -9.799022 1.136808e-22
## padj significant
## ENSG00000183034 4.051584e-19 Significant
rawCounts["ENSG00000183034",]
## SRR975551 SRR975552 SRR975553 SRR975554 SRR975555 SRR975556 SRR975557 SRR975558
## 2 6 250 4 19 11 0 7
## SRR975559 SRR975560 SRR975561 SRR975562 SRR975563 SRR975564 SRR975565 SRR975566
## 0 8 159 0 235 1 1 1
## SRR975567 SRR975568 SRR975569 SRR975570 SRR975571 SRR975572 SRR975573 SRR975574
## 84 0 1281 620 335 798 1058 505
## SRR975575 SRR975576 SRR975577 SRR975578 SRR975579 SRR975580 SRR975581 SRR975582
## 861 1321 1336 736 517 1418 1337 1013
## SRR975583 SRR975584 SRR975585 SRR975586 SRR975587 SRR975588 SRR975589 SRR975590
## 1397 613 1251 721 2 1368 1 1
## SRR975591 SRR975592 SRR975593 SRR975594 SRR975595 SRR975596 SRR975597 SRR975598
## 1 1 0 0 0 2 0 2
## SRR975599 SRR975600 SRR975601 SRR975602 SRR975603 SRR975604
## 2 2 0 0 1 3
normals=row.names(sampleData[sampleData[,"tissueType"]=="normal-looking surrounding colonic epithelium",])
primaries=row.names(sampleData[sampleData[,"tissueType"]=="primary colorectal cancer",])
rawCounts["ENSG00000183034",normals]
## SRR975569 SRR975570 SRR975571 SRR975572 SRR975573 SRR975574 SRR975575 SRR975576
## 1281 620 335 798 1058 505 861 1321
## SRR975577 SRR975578 SRR975579 SRR975580 SRR975581 SRR975582 SRR975583 SRR975584
## 1336 736 517 1418 1337 1013 1397 613
## SRR975585 SRR975586 SRR975587
## 1251 721 2
rawCounts["ENSG00000183034",primaries]
## SRR975551 SRR975552 SRR975553 SRR975554 SRR975555 SRR975556 SRR975557 SRR975558
## 2 6 250 4 19 11 0 7
## SRR975559 SRR975560 SRR975561 SRR975562 SRR975563 SRR975564 SRR975565 SRR975566
## 0 8 159 0 235 1 1 1
## SRR975567 SRR975568
## 84 0
# Transform count data using the variance stablilizing transform
deseq2VST <- vst(deseq2Data)
# Convert the DESeq transformed object to a data frame
deseq2VST <- assay(deseq2VST)
deseq2VST <- as.data.frame(deseq2VST)
deseq2VST$Gene <- rownames(deseq2VST)
head(deseq2VST)
## SRR975551 SRR975552 SRR975553 SRR975554 SRR975555 SRR975556
## ENSG00000000003 11.923929 9.968500 10.588833 11.791039 10.058138 10.737563
## ENSG00000000005 5.625681 2.799376 4.866074 5.031436 4.065530 3.717037
## ENSG00000000419 10.686895 9.059366 9.056915 10.226302 10.243933 8.802204
## ENSG00000000457 8.199791 8.879902 8.706264 8.013961 8.158745 8.654508
## ENSG00000000460 8.141111 8.024470 6.392902 7.942364 7.214920 5.820591
## ENSG00000000938 5.725709 6.301239 7.421013 6.916156 6.736584 6.794418
## SRR975557 SRR975558 SRR975559 SRR975560 SRR975561 SRR975562
## ENSG00000000003 8.267878 9.835585 10.817518 10.757317 10.763156 11.706322
## ENSG00000000005 5.236297 2.998960 3.269549 3.755656 4.395245 4.523032
## ENSG00000000419 8.656613 8.750360 9.781350 9.360215 9.272733 10.189392
## ENSG00000000457 7.943242 8.303669 7.271484 7.835688 8.427880 8.216450
## ENSG00000000460 5.998354 7.066791 7.287183 6.212864 6.075822 7.468416
## ENSG00000000938 8.451694 7.504600 6.799352 6.802135 7.005928 6.592145
## SRR975563 SRR975564 SRR975565 SRR975566 SRR975567 SRR975568
## ENSG00000000003 10.210433 8.859864 11.858910 12.088638 9.905801 11.127106
## ENSG00000000005 3.003668 3.321234 4.730903 4.034155 3.285318 3.461712
## ENSG00000000419 8.797326 9.331147 10.183658 9.994004 9.258376 9.988913
## ENSG00000000457 8.591742 7.821325 8.637253 7.741407 8.525071 8.271511
## ENSG00000000460 6.015813 7.574696 7.752903 7.624403 7.669804 7.350325
## ENSG00000000938 6.439217 8.434081 6.341108 8.126546 7.282308 6.572592
## SRR975569 SRR975570 SRR975571 SRR975572 SRR975573 SRR975574
## ENSG00000000003 11.059529 10.356867 10.569298 10.854676 10.205960 10.701622
## ENSG00000000005 4.432915 3.955506 4.438338 4.507882 5.160314 4.410729
## ENSG00000000419 9.141181 9.058456 8.975535 8.715784 8.814977 8.631387
## ENSG00000000457 8.834970 8.759218 8.522509 8.588946 8.609350 8.564093
## ENSG00000000460 6.571444 6.334997 5.896374 6.141287 6.375558 6.347129
## ENSG00000000938 6.502814 6.229869 7.230902 6.437108 6.548687 7.203174
## SRR975575 SRR975576 SRR975577 SRR975578 SRR975579 SRR975580
## ENSG00000000003 9.963238 10.749029 10.276631 10.471313 9.142663 11.172871
## ENSG00000000005 3.720561 3.666685 4.724335 4.485812 5.000971 4.723688
## ENSG00000000419 8.821720 8.687815 8.551434 8.601738 8.612026 9.399138
## ENSG00000000457 8.376800 8.373983 8.422896 8.448369 8.201763 8.153977
## ENSG00000000460 6.381203 5.658967 6.012253 5.255633 5.297264 6.914239
## ENSG00000000938 6.640545 6.586080 6.522322 6.743217 6.752428 6.463241
## SRR975581 SRR975582 SRR975583 SRR975584 SRR975585 SRR975586
## ENSG00000000003 9.761890 10.113720 10.870166 10.124489 10.086779 10.862657
## ENSG00000000005 3.829705 4.766055 3.382458 4.289463 3.829410 3.121770
## ENSG00000000419 8.454632 8.867350 8.916874 8.913281 8.780319 9.150469
## ENSG00000000457 8.395799 8.500433 8.775569 8.488346 8.645863 7.892788
## ENSG00000000460 6.235882 6.020642 6.634603 6.461639 6.091997 6.355340
## ENSG00000000938 6.638795 6.619980 6.438706 6.876938 6.387855 6.322406
## SRR975587 SRR975588 SRR975589 SRR975590 SRR975591 SRR975592
## ENSG00000000003 11.267575 10.333011 10.187163 11.868828 10.220378 11.040559
## ENSG00000000005 3.610149 3.968818 3.509957 3.933814 3.199553 4.427753
## ENSG00000000419 9.951607 9.100348 9.642677 10.218513 9.672814 10.484407
## ENSG00000000457 7.971886 8.718730 8.809136 8.352412 8.103841 8.088880
## ENSG00000000460 7.662030 6.399539 8.381879 7.965092 7.027259 8.221922
## ENSG00000000938 7.449577 6.287749 9.469536 7.177886 8.842797 8.336400
## SRR975593 SRR975594 SRR975595 SRR975596 SRR975597 SRR975598
## ENSG00000000003 10.634407 11.034236 11.521405 10.657266 10.812194 12.409812
## ENSG00000000005 3.827427 2.232901 4.782965 3.849417 4.232950 5.459134
## ENSG00000000419 9.747850 9.040705 9.822536 10.013052 10.150926 10.137215
## ENSG00000000457 8.254051 8.552997 8.055002 7.277367 8.129814 8.568186
## ENSG00000000460 6.848925 6.761790 7.092787 6.746588 7.579064 8.057261
## ENSG00000000938 8.519288 8.141869 6.560104 7.224172 7.373415 6.175141
## SRR975599 SRR975600 SRR975601 SRR975602 SRR975603 SRR975604
## ENSG00000000003 10.424530 9.227771 11.185892 11.654602 10.240971 10.592443
## ENSG00000000005 5.139221 2.848146 4.502146 2.232901 3.005591 3.666156
## ENSG00000000419 9.598121 9.043083 9.774641 10.376102 9.810639 9.826985
## ENSG00000000457 7.561362 7.849440 8.247018 7.826785 7.959503 8.023970
## ENSG00000000460 7.202222 7.105346 7.404384 7.784218 7.701531 6.340575
## ENSG00000000938 7.775770 8.999707 6.718028 6.568981 8.710157 8.881872
## Gene
## ENSG00000000003 ENSG00000000003
## ENSG00000000005 ENSG00000000005
## ENSG00000000419 ENSG00000000419
## ENSG00000000457 ENSG00000000457
## ENSG00000000460 ENSG00000000460
## ENSG00000000938 ENSG00000000938
# Keep only the significantly differentiated genes where the fold-change was at least 3
sigGenes <- rownames(deseq2ResDF[deseq2ResDF$padj <= .05 & abs(deseq2ResDF$log2FoldChange) > 3,])
deseq2VST <- deseq2VST[deseq2VST$Gene %in% sigGenes,]
# Convert the VST counts to long format for ggplot2
library(reshape2)
##
## Attaching package: 'reshape2'
## The following object is masked from 'package:tidyr':
##
## smiths
# First compare wide vs long version
deseq2VST_wide <- deseq2VST
deseq2VST_long <- melt(deseq2VST, id.vars=c("Gene"))
head(deseq2VST_wide)
## SRR975551 SRR975552 SRR975553 SRR975554 SRR975555 SRR975556
## ENSG00000007306 10.568338 9.159074 16.233798 10.131600 13.751881 12.519868
## ENSG00000012504 3.231426 3.345376 8.403741 3.176825 4.193719 6.522882
## ENSG00000015413 11.169901 10.657855 5.582772 14.553933 11.907716 6.104372
## ENSG00000016602 8.153040 6.466539 13.864836 8.013961 10.864525 8.870742
## ENSG00000029559 3.101820 4.379893 2.232901 4.131232 4.364544 2.232901
## ENSG00000034971 2.232901 2.799376 6.955727 2.232901 2.232901 2.232901
## SRR975557 SRR975558 SRR975559 SRR975560 SRR975561 SRR975562
## ENSG00000007306 5.667957 12.776169 6.144317 14.304986 14.984041 12.098811
## ENSG00000012504 3.022599 4.199691 3.085199 7.112992 2.974334 3.765197
## ENSG00000015413 5.344300 4.457055 12.167876 9.521065 10.161442 13.350185
## ENSG00000016602 4.607561 9.224555 4.510159 12.528673 12.408709 4.523032
## ENSG00000029559 2.232901 6.411503 5.578436 5.112705 2.232901 4.585088
## ENSG00000034971 7.993170 2.232901 2.839882 3.632837 5.593554 2.882770
## SRR975563 SRR975564 SRR975565 SRR975566 SRR975567 SRR975568
## ENSG00000007306 15.114977 8.077518 8.483470 6.816881 12.947413 10.863179
## ENSG00000012504 3.885169 3.849195 4.578502 3.238968 3.573172 2.861074
## ENSG00000015413 4.468793 11.161337 11.322352 12.570130 11.191026 11.039218
## ENSG00000016602 11.258701 5.310833 4.056402 5.555016 11.548776 5.635144
## ENSG00000029559 2.232901 5.165728 3.429873 4.034155 6.057155 2.861074
## ENSG00000034971 3.538801 2.232901 2.232901 2.232901 3.986470 2.232901
## SRR975569 SRR975570 SRR975571 SRR975572 SRR975573 SRR975574
## ENSG00000007306 15.563131 14.308692 16.204152 15.055571 16.165601 15.575148
## ENSG00000012504 4.975840 4.778122 8.374255 4.507882 8.322641 7.727500
## ENSG00000015413 4.432915 4.430717 5.742498 4.263200 5.537905 5.759378
## ENSG00000016602 13.012940 14.815772 13.859241 12.312736 15.981762 14.202857
## ENSG00000029559 2.232901 2.232901 2.232901 2.232901 2.232901 2.232901
## ENSG00000034971 6.571444 6.743818 6.933769 7.268226 6.030424 4.212182
## SRR975575 SRR975576 SRR975577 SRR975578 SRR975579 SRR975580
## ENSG00000007306 15.174126 14.422544 15.775346 15.907044 13.868314 15.399874
## ENSG00000012504 4.169669 5.861471 7.743603 9.306858 3.917586 5.845152
## ENSG00000015413 4.499098 4.710961 6.133733 10.081967 3.710734 12.799715
## ENSG00000016602 13.467160 12.697889 14.008965 13.577174 12.061149 13.960487
## ENSG00000029559 2.232901 2.232901 2.232901 2.232901 2.232901 2.841968
## ENSG00000034971 8.060166 6.586080 6.209339 6.170983 7.994167 5.237540
## SRR975581 SRR975582 SRR975583 SRR975584 SRR975585 SRR975586
## ENSG00000007306 15.050135 15.274470 15.535940 15.791451 15.071779 14.159704
## ENSG00000012504 3.493410 7.973717 4.136058 7.691774 4.840240 6.322406
## ENSG00000015413 4.859370 6.146305 4.136058 5.788260 5.135739 5.070622
## ENSG00000016602 12.517156 14.375398 14.550607 15.092849 13.345953 13.398398
## ENSG00000029559 2.232901 2.886038 2.232901 2.232901 2.862579 2.232901
## ENSG00000034971 7.352685 6.484078 6.588078 3.423110 7.103513 4.271017
## SRR975587 SRR975588 SRR975589 SRR975590 SRR975591 SRR975592
## ENSG00000007306 9.224802 14.013551 10.556329 7.744511 8.333508 9.773894
## ENSG00000012504 9.150519 5.666463 6.734259 7.444022 8.733186 8.008271
## ENSG00000015413 8.053613 3.818527 5.592327 14.584635 9.594582 10.591239
## ENSG00000016602 5.308877 14.645248 5.500416 3.933814 4.573319 3.406627
## ENSG00000029559 3.224530 2.232901 7.966007 4.266310 6.076565 5.439764
## ENSG00000034971 2.232901 6.018477 2.886867 2.232901 2.797879 3.073968
## SRR975593 SRR975594 SRR975595 SRR975596 SRR975597 SRR975598
## ENSG00000007306 6.706770 5.528820 10.605091 8.851557 11.164379 12.652243
## ENSG00000012504 6.519254 10.965734 7.328385 4.423801 6.777223 6.882624
## ENSG00000015413 12.481088 5.157102 12.790684 13.036171 12.108500 13.100530
## ENSG00000016602 4.086850 4.028977 4.228051 3.739617 4.788758 4.546585
## ENSG00000029559 7.502999 2.232901 3.695395 7.862908 3.387194 2.851402
## ENSG00000034971 2.760253 2.232901 2.232901 2.232901 2.910991 2.232901
## SRR975599 SRR975600 SRR975601 SRR975602 SRR975603 SRR975604
## ENSG00000007306 10.865647 7.897514 7.389139 6.984942 6.288510 7.637623
## ENSG00000012504 6.437264 7.620326 8.989993 4.231262 8.527824 8.521452
## ENSG00000015413 13.421117 10.612505 7.218498 13.316369 10.100135 11.060091
## ENSG00000016602 4.675336 5.325398 4.817130 5.751074 5.158338 4.846432
## ENSG00000029559 3.486933 5.045923 3.451599 6.825973 7.203106 4.361161
## ENSG00000034971 2.232901 2.232901 2.232901 2.232901 2.782500 2.232901
## Gene
## ENSG00000007306 ENSG00000007306
## ENSG00000012504 ENSG00000012504
## ENSG00000015413 ENSG00000015413
## ENSG00000016602 ENSG00000016602
## ENSG00000029559 ENSG00000029559
## ENSG00000034971 ENSG00000034971
head(deseq2VST_long)
## Gene variable value
## 1 ENSG00000007306 SRR975551 10.568338
## 2 ENSG00000012504 SRR975551 3.231426
## 3 ENSG00000015413 SRR975551 11.169901
## 4 ENSG00000016602 SRR975551 8.153040
## 5 ENSG00000029559 SRR975551 3.101820
## 6 ENSG00000034971 SRR975551 2.232901
# Now overwrite our original data frame with the long format
deseq2VST <- melt(deseq2VST, id.vars=c("Gene"))
# Make a heatmap
heatmap <- ggplot(deseq2VST, aes(x=variable, y=Gene, fill=value)) + geom_raster() + scale_fill_viridis(trans="sqrt") + theme(axis.text.x=element_text(angle=65, hjust=1), axis.text.y=element_blank(), axis.ticks.y=element_blank())
heatmap
# Convert the significant genes back to a matrix for clustering
deseq2VSTMatrix <- dcast(deseq2VST, Gene ~ variable)
rownames(deseq2VSTMatrix) <- deseq2VSTMatrix$Gene
deseq2VSTMatrix$Gene <- NULL
# Compute a distance calculation on both dimensions of the matrix
distanceGene <- dist(deseq2VSTMatrix)
distanceSample <- dist(t(deseq2VSTMatrix))
# Cluster based on the distance calculations
clusterGene <- hclust(distanceGene, method="average")
clusterSample <- hclust(distanceSample, method="average")
# Construct a dendogram for samples
# install.packages("ggdendro")
library(ggdendro)
## Warning: package 'ggdendro' was built under R version 4.2.3
sampleModel <- as.dendrogram(clusterSample)
sampleDendrogramData <- segment(dendro_data(sampleModel, type = "rectangle"))
sampleDendrogram <- ggplot(sampleDendrogramData) + geom_segment(aes(x = x, y = y, xend = xend, yend = yend)) + theme_dendro()
# Re-factor samples for ggplot2
deseq2VST$variable <- factor(deseq2VST$variable, levels=clusterSample$labels[clusterSample$order])
# Construct the heatmap. note that at this point we have only clustered the samples NOT the genes
heatmap <- ggplot(deseq2VST, aes(x=variable, y=Gene, fill=value)) + geom_raster() + scale_fill_viridis(trans="sqrt") + theme(axis.text.x=element_text(angle=65, hjust=1), axis.text.y=element_blank(), axis.ticks.y=element_blank())
heatmap
# Combine the dendrogram and the heatmap
#install.packages("gridExtra")
library(gridExtra)
##
## Attaching package: 'gridExtra'
## The following object is masked from 'package:dplyr':
##
## combine
## The following object is masked from 'package:Biobase':
##
## combine
## The following object is masked from 'package:BiocGenerics':
##
## combine
grid.arrange(sampleDendrogram, heatmap, ncol=1, heights=c(1,5))
# Load in libraries necessary for modifying plots
#install.packages("gtable")
library(gtable)
library(grid)
# Modify the ggplot objects
sampleDendrogram_1 <- sampleDendrogram + scale_x_continuous(expand=c(.0085, .0085)) + scale_y_continuous(expand=c(0, 0))
heatmap_1 <- heatmap + scale_x_discrete(expand=c(0, 0)) + scale_y_discrete(expand=c(0, 0))
# Convert both grid based objects to grobs
sampleDendrogramGrob <- ggplotGrob(sampleDendrogram_1)
heatmapGrob <- ggplotGrob(heatmap_1)
# Check the widths of each grob
sampleDendrogramGrob$widths
## [1] 5.5points 0cm 0cm
## [4] sum(0cm, 2.75points) 1null 0cm
## [7] 0cm 0points 5.5points
heatmapGrob$widths
## [1] 5.5points 0cm 1grobwidth
## [4] sum(0cm, 2.75points) 1null 0cm
## [7] 0cm 11points 1.53346700913242cm
## [10] 0points 5.5points
# Add in the missing columns
sampleDendrogramGrob <- gtable_add_cols(sampleDendrogramGrob, heatmapGrob$widths[7], 6)
sampleDendrogramGrob <- gtable_add_cols(sampleDendrogramGrob, heatmapGrob$widths[8], 7)
# Make sure every width between the two grobs is the same
maxWidth <- unit.pmax(sampleDendrogramGrob$widths, heatmapGrob$widths)
sampleDendrogramGrob$widths <- as.list(maxWidth)
heatmapGrob$widths <- as.list(maxWidth)
# Arrange the grobs into a plot
finalGrob <- arrangeGrob(sampleDendrogramGrob, heatmapGrob, ncol=1, heights=c(2,5))
# Draw the plot
grid.draw(finalGrob)
# Re-order the sample data to match the clustering we did
sampleData_v2$Run <- factor(sampleData_v2$Run, levels=clusterSample$labels[clusterSample$order])
# Construct a plot to show the clinical data
colours <- c("#743B8B", "#8B743B", "#8B3B52")
sampleClinical <- ggplot(sampleData_v2, aes(x=Run, y=1, fill=Sample.Characteristic.biopsy.site.)) + geom_tile() + scale_x_discrete(expand=c(0, 0)) + scale_y_discrete(expand=c(0, 0)) + scale_fill_manual(name="Tissue", values=colours) + theme_void()
# Convert the clinical plot to a grob
sampleClinicalGrob <- ggplotGrob(sampleClinical)
# Make sure every width between all grobs is the same
maxWidth <- unit.pmax(sampleDendrogramGrob$widths, heatmapGrob$widths, sampleClinicalGrob$widths)
sampleDendrogramGrob$widths <- as.list(maxWidth)
heatmapGrob$widths <- as.list(maxWidth)
sampleClinicalGrob$widths <- as.list(maxWidth)
# Arrange and output the final plot
finalGrob <- arrangeGrob(sampleDendrogramGrob, sampleClinicalGrob, heatmapGrob, ncol=1, heights=c(2,1,5))
grid.draw(finalGrob)