class 10

Loading data and exploration

load("~/Dropbox/Uni/Master/HT_Course/Module_10_practiceDay/NSCLC.Rda")
summary(meta)
##   dead     days to determined death status    histology   tumor stage
##  no : 51   281    :  3                     adeno   :106   1a:40      
##  yes:145   556    :  2                     large   : 24   1b:90      
##            6      :  2                     squamous: 66   2a: 6      
##            912    :  2                                    2b:29      
##            1000   :  1                                    3a:21      
##            1007   :  1                                    3b: 6      
##            (Other):185                                    4 : 4      
##  performance status corresponding to who criteria      age     
##  0:105                                            70     : 14  
##  1: 75                                            66     : 12  
##  2: 12                                            67     : 11  
##  3:  4                                            62     :  9  
##                                                   65     :  8  
##                                                   51     :  7  
##                                                   (Other):135  
##     gender    adjuvant treatment     recurrence 
##  female: 89   no       :71       no       : 47  
##  male  :107   not known:96       not known:100  
##               yes      :29       yes      : 49  
##                                                 
##                                                 
##                                                 
##                                                 
##  days to recurrence / to last visit
##  not known:100                     
##  2086     :  2                     
##  239      :  2                     
##  245      :  2                     
##  54       :  2                     
##  103      :  1                     
##  (Other)  : 87

important

–> factor –> numeric –> will convert the level not the values

qualitativ vs qualitativ


plot(as.numeric(as.character(meta$"days to determined death status")), as.numeric(as.character(meta$age)))

plot of chunk unnamed-chunk-2

categorical vs qualitativ

boxplot(as.numeric(as.character(meta$age)) ~ meta$gender)

plot of chunk unnamed-chunk-3

qqnorm(as.numeric(as.character(meta$age)))

plot of chunk unnamed-chunk-4

hist(expr, breaks = 100)

plot of chunk unnamed-chunk-5

–> genes are normally distributed

are the levles from patient to patient comparable?

boxplot(expr)

plot of chunk unnamed-chunk-6

lets do clustering

deuc <- dist(t(expr), method = "euclidean")
hc <- hclust(deuc, method = "ward")
source("ftp://129.187.44.58/share/chen/R_Functions/plothclust.R")
plothclust(hc, col = meta$histology)
legend("topright", legend = unique(meta$histology), pch = 15, col = unique(as.numeric(meta$histology)))

plot of chunk unnamed-chunk-7

table(meta$histology)
## 
##    adeno    large squamous 
##      106       24       66

spearman

dsp <- 1 - cor(expr, method = "spearman")
hc_cor <- hclust(as.dist(dsp), method = "ward")
source("ftp://129.187.44.58/share/chen/R_Functions/plothclust.R")
plothclust(hc_cor, col = meta$histology)
legend("topright", legend = unique(meta$histology), pch = 15, col = unique(as.numeric(meta$histology)))

plot of chunk unnamed-chunk-8

table(meta$histology)
## 
##    adeno    large squamous 
##      106       24       66
layout(matrix(1:2, 2, 1))

plothclust(hc, col = meta$histology, main = "euclidean")
plothclust(hc_cor, col = meta$histology, main = "spearman")
legend("topright", legend = unique(meta$histology), pch = 15, col = unique(as.numeric(meta$histology)))

plot of chunk unnamed-chunk-9

PCA on all the patients

expr_scale <- scale(t(expr), center = TRUE, scale = TRUE)
pca <- prcomp(expr_scale, center = FALSE, scale. = FALSE)

plot(pca$x[, 1:2], col = meta$histology, pch = 20)

plot of chunk unnamed-chunk-10

barplot((pca$sdev^2)[1:10]/sum(pca$sdev^2))

plot of chunk unnamed-chunk-10

plot(pca$x[, 1:2], col = meta$gender, pch = 20)

plot of chunk unnamed-chunk-11

plot(pca$x[, 1:2], col = meta$"tumor stage", pch = 20)

plot of chunk unnamed-chunk-12

plot(pca$x[, 1:2], col = meta$"adjuvant treatment", pch = 20)
legend("topright", legend = unique(meta$"adjuvant treatment"), pch = 15, col = unique(as.numeric(meta$"adjuvant treatment")))

plot of chunk unnamed-chunk-13

adjuvant treatment looks better –> but with a legend not very useful in a biological way

comparision between adeno and squamous

## rownames(meta[which(meta$histology =='squamous'),])
identical(rownames(meta), colnames(expr))
## [1] TRUE
ade_squ <- expr[, meta$histology %in% c("adeno", "squamous")]

#### exclude the large suff for colouring
fac_2subtype <- meta$histology[meta$histology %in% c("adeno", "squamous")]
fac_2subtype <- factor(fac_2subtype)

ade_squ_scale <- scale(t(ade_squ), center = TRUE, scale = TRUE)
ade_squ_pca <- prcomp(ade_squ_scale, center = F, scale. = F)
plot(ade_squ_pca$x, col = fac_2subtype, pch = 2)

plot of chunk unnamed-chunk-14

barplot((ade_squ_pca$sdev^2)[1:5]/sum(ade_squ_pca$sdev^2))

plot of chunk unnamed-chunk-14

Differential expression


library(genefilter)
pval <- rowttests(ade_squ, fac_2subtype)

fdr <- p.adjust(pval$p.value, method = "fdr")
table(fdr < 0.01)
## 
## FALSE  TRUE 
##  4664  5235
#### those are genes (rotation) layout(matrix(1:2),1,2)
#### plot(ade_squ_pca$rotation[,1:2], pch=20) temp_mat <- scale(t(ade_squ[fdr <
#### 0.000001,])) pp <- prcomp(temp_mat, center= F, scale. =F)
#### plot(pp$rotation[,1:2 ], pch=20)

heatmap


# install.packages('gplots')
library("gplots")
## KernSmooth 2.23 loaded
## Copyright M. P. Wand 1997-2009
## 
## Attaching package: 'gplots'
## 
## The following object is masked from 'package:stats':
## 
##     lowess
heatmat <- ade_squ[fdr < 1e-18, ]
heatmap.2(heatmat, trace = "none", ColSideColors = as.character(as.numeric(fac_2subtype)))

plot of chunk unnamed-chunk-16

We use David for exact Fishertest/enrichnment …

http://david.abcc.ncifcrf.gov/

deg <- rownames(ade_squ)[fdr < 1e-05]
write.table(deg, file = "Desktop/forDavid.txt", col.names = F, row.names = F, 
    quote = F, sep = "\t")
## Warning: cannot open file 'Desktop/forDavid.txt': No such file or
## directory
## Error: cannot open the connection

Ensembl

deg_short <- deg[1:100]
library("biomaRt")
ensembl = useMart("ensembl", dataset = "hsapiens_gene_ensembl")

lf <- listFilters(ensembl)
lf[grep("affy", lf$name, ignore.case = F), ]
##                           name
## 23         with_affy_primeview
## 71           with_affy_hc_g110
## 72          with_affy_hg_focus
## 73          with_affy_hg_u133a
## 74          with_affy_hg_u133b
## 75        with_affy_hg_u133a_2
## 76    with_affy_hg_u133_plus_2
## 77           with_affy_hg_u95a
## 78         with_affy_hg_u95av2
## 79           with_affy_hg_u95b
## 80           with_affy_hg_u95c
## 81           with_affy_hg_u95d
## 82           with_affy_hg_u95e
## 83          with_affy_u133_x3p
## 84          with_affy_hugenefl
## 85  with_affy_hugene_1_0_st_v1
## 86  with_affy_hugene_2_0_st_v1
## 87    with_affy_huex_1_0_st_v2
## 161               affy_hc_g110
## 162              affy_hg_focus
## 163               affy_hg_u95a
## 164             affy_hg_u95av2
## 165               affy_hg_u95b
## 166               affy_hg_u95c
## 167               affy_hg_u95d
## 168               affy_hg_u95e
## 169            affy_hg_u133a_2
## 170              affy_hg_u133a
## 171              affy_hg_u133b
## 172        affy_hg_u133_plus_2
## 173              affy_hugenefl
## 174      affy_hugene_1_0_st_v1
## 175      affy_hugene_2_0_st_v1
## 176        affy_huex_1_0_st_v2
## 177             affy_primeview
## 178              affy_u133_x3p
##                                                    description
## 23                  with Affymetrix Microarray primeview ID(s)
## 71           with Affymetrix Microarray hc g110 probeset ID(s)
## 72          with Affymetrix Microarray hg Focus probeset ID(s)
## 73          with Affymetrix Microarray hg u133a probeset ID(s)
## 74          with Affymetrix Microarray hg u133b probeset ID(s)
## 75        with Affymetrix Microarray hg u133a 2 probeset ID(s)
## 76    with Affymetrix Microarray hg u133 plus 2 probeset ID(s)
## 77           with Affymetrix Microarray hg u95a probeset ID(s)
## 78         with Affymetrix Microarray hg u95av2 ID(s) probeset
## 79           with Affymetrix Microarray hg u95b probeset ID(s)
## 80           with Affymetrix Microarray hg u95c probeset ID(s)
## 81           with Affymetrix Microarray hg u95d probeset ID(s)
## 82           with Affymetrix Microarray hg u95e probeset ID(s)
## 83          with Affymetrix Microarray u133 x3p probeset ID(s)
## 84          with Affymetrix Microarray HuGeneFL probeset ID(s)
## 85  with Affymetrix Microarray hugene 1 0 st v1 probeset ID(s)
## 86           with Affymetrix Microarray hugene 2 0 st v1 ID(s)
## 87    with Affymetrix Microarray huex 1 0 st v2 probeset ID(s)
## 161                Affy hc g110 probeset ID(s) [e.g. 113_i_at]
## 162              Affy hg focus probeset ID(s) [e.g. 201612_at]
## 163                Affy hg u95a probeset ID(s) [e.g. 32647_at]
## 164              Affy hg u95av2 probeset ID(s) [e.g. 32647_at]
## 165                Affy hg u95b probeset ID(s) [e.g. 53925_at]
## 166              Affy hg u95c probeset ID(s) [e.g. 61056_r_at]
## 167                Affy hg u95d probeset ID(s) [e.g. 79632_at]
## 168                Affy hg u95e probeset ID(s) [e.g. 79965_at]
## 169          Affy hg u133a 2 probeset ID(s) [e.g. 200874_s_at]
## 170            Affy hg u133a probeset ID(s) [e.g. 200874_s_at]
## 171              Affy hg u133b probeset ID(s) [e.g. 227057_at]
## 172        Affy hg u133 plus 2 probeset ID(s) [e.g. 241843_at]
## 173           Affy HuGene FL probeset ID(s) [e.g. M58525_s_at]
## 174        Affy HuGene 1_0 st v1 probeset ID(s) [e.g. 8016215]
## 175       Affy HuGene 2_0 st v1 probeset ID(s) [e.g. 16942487]
## 176          Affy HuEx 1_0 st v2 probeset ID(s) [e.g. 4033465]
## 177   Affymetrix Microarray Primeview ID(s) [e.g. 11763890_at]
## 178  Affy u133 x3p probeset ID(s) [e.g. Hs2.205326.1.A1_3p_at]

la <- listAttributes(ensembl)
la[grep("symbol", la$name, ignore.case = F), ]
##           name description
## 57 hgnc_symbol HGNC symbol

annotgene <- getBM(attributes = c("affy_hg_u133a", "hgnc_symbol", "go_id"), 
    filters = "affy_hg_u133a", values = deg_short, mart = ensembl)