load("~/Dropbox/Uni/Master/HT_Course/Module_10_practiceDay/NSCLC.Rda")
summary(meta)
## dead days to determined death status histology tumor stage
## no : 51 281 : 3 adeno :106 1a:40
## yes:145 556 : 2 large : 24 1b:90
## 6 : 2 squamous: 66 2a: 6
## 912 : 2 2b:29
## 1000 : 1 3a:21
## 1007 : 1 3b: 6
## (Other):185 4 : 4
## performance status corresponding to who criteria age
## 0:105 70 : 14
## 1: 75 66 : 12
## 2: 12 67 : 11
## 3: 4 62 : 9
## 65 : 8
## 51 : 7
## (Other):135
## gender adjuvant treatment recurrence
## female: 89 no :71 no : 47
## male :107 not known:96 not known:100
## yes :29 yes : 49
##
##
##
##
## days to recurrence / to last visit
## not known:100
## 2086 : 2
## 239 : 2
## 245 : 2
## 54 : 2
## 103 : 1
## (Other) : 87
–> factor –> numeric –> will convert the level not the values
plot(as.numeric(as.character(meta$"days to determined death status")), as.numeric(as.character(meta$age)))
boxplot(as.numeric(as.character(meta$age)) ~ meta$gender)
qqnorm(as.numeric(as.character(meta$age)))
hist(expr, breaks = 100)
–> genes are normally distributed
boxplot(expr)
deuc <- dist(t(expr), method = "euclidean")
hc <- hclust(deuc, method = "ward")
source("ftp://129.187.44.58/share/chen/R_Functions/plothclust.R")
plothclust(hc, col = meta$histology)
legend("topright", legend = unique(meta$histology), pch = 15, col = unique(as.numeric(meta$histology)))
table(meta$histology)
##
## adeno large squamous
## 106 24 66
dsp <- 1 - cor(expr, method = "spearman")
hc_cor <- hclust(as.dist(dsp), method = "ward")
source("ftp://129.187.44.58/share/chen/R_Functions/plothclust.R")
plothclust(hc_cor, col = meta$histology)
legend("topright", legend = unique(meta$histology), pch = 15, col = unique(as.numeric(meta$histology)))
table(meta$histology)
##
## adeno large squamous
## 106 24 66
layout(matrix(1:2, 2, 1))
plothclust(hc, col = meta$histology, main = "euclidean")
plothclust(hc_cor, col = meta$histology, main = "spearman")
legend("topright", legend = unique(meta$histology), pch = 15, col = unique(as.numeric(meta$histology)))
expr_scale <- scale(t(expr), center = TRUE, scale = TRUE)
pca <- prcomp(expr_scale, center = FALSE, scale. = FALSE)
plot(pca$x[, 1:2], col = meta$histology, pch = 20)
barplot((pca$sdev^2)[1:10]/sum(pca$sdev^2))
plot(pca$x[, 1:2], col = meta$gender, pch = 20)
plot(pca$x[, 1:2], col = meta$"tumor stage", pch = 20)
plot(pca$x[, 1:2], col = meta$"adjuvant treatment", pch = 20)
legend("topright", legend = unique(meta$"adjuvant treatment"), pch = 15, col = unique(as.numeric(meta$"adjuvant treatment")))
adjuvant treatment looks better –> but with a legend not very useful in a biological way
## rownames(meta[which(meta$histology =='squamous'),])
identical(rownames(meta), colnames(expr))
## [1] TRUE
ade_squ <- expr[, meta$histology %in% c("adeno", "squamous")]
#### exclude the large suff for colouring
fac_2subtype <- meta$histology[meta$histology %in% c("adeno", "squamous")]
fac_2subtype <- factor(fac_2subtype)
ade_squ_scale <- scale(t(ade_squ), center = TRUE, scale = TRUE)
ade_squ_pca <- prcomp(ade_squ_scale, center = F, scale. = F)
plot(ade_squ_pca$x, col = fac_2subtype, pch = 2)
barplot((ade_squ_pca$sdev^2)[1:5]/sum(ade_squ_pca$sdev^2))
library(genefilter)
pval <- rowttests(ade_squ, fac_2subtype)
fdr <- p.adjust(pval$p.value, method = "fdr")
table(fdr < 0.01)
##
## FALSE TRUE
## 4664 5235
#### those are genes (rotation) layout(matrix(1:2),1,2)
#### plot(ade_squ_pca$rotation[,1:2], pch=20) temp_mat <- scale(t(ade_squ[fdr <
#### 0.000001,])) pp <- prcomp(temp_mat, center= F, scale. =F)
#### plot(pp$rotation[,1:2 ], pch=20)
# install.packages('gplots')
library("gplots")
## KernSmooth 2.23 loaded
## Copyright M. P. Wand 1997-2009
##
## Attaching package: 'gplots'
##
## The following object is masked from 'package:stats':
##
## lowess
heatmat <- ade_squ[fdr < 1e-18, ]
heatmap.2(heatmat, trace = "none", ColSideColors = as.character(as.numeric(fac_2subtype)))
http://david.abcc.ncifcrf.gov/
deg <- rownames(ade_squ)[fdr < 1e-05]
write.table(deg, file = "Desktop/forDavid.txt", col.names = F, row.names = F,
quote = F, sep = "\t")
## Warning: cannot open file 'Desktop/forDavid.txt': No such file or
## directory
## Error: cannot open the connection
deg_short <- deg[1:100]
library("biomaRt")
ensembl = useMart("ensembl", dataset = "hsapiens_gene_ensembl")
lf <- listFilters(ensembl)
lf[grep("affy", lf$name, ignore.case = F), ]
## name
## 23 with_affy_primeview
## 71 with_affy_hc_g110
## 72 with_affy_hg_focus
## 73 with_affy_hg_u133a
## 74 with_affy_hg_u133b
## 75 with_affy_hg_u133a_2
## 76 with_affy_hg_u133_plus_2
## 77 with_affy_hg_u95a
## 78 with_affy_hg_u95av2
## 79 with_affy_hg_u95b
## 80 with_affy_hg_u95c
## 81 with_affy_hg_u95d
## 82 with_affy_hg_u95e
## 83 with_affy_u133_x3p
## 84 with_affy_hugenefl
## 85 with_affy_hugene_1_0_st_v1
## 86 with_affy_hugene_2_0_st_v1
## 87 with_affy_huex_1_0_st_v2
## 161 affy_hc_g110
## 162 affy_hg_focus
## 163 affy_hg_u95a
## 164 affy_hg_u95av2
## 165 affy_hg_u95b
## 166 affy_hg_u95c
## 167 affy_hg_u95d
## 168 affy_hg_u95e
## 169 affy_hg_u133a_2
## 170 affy_hg_u133a
## 171 affy_hg_u133b
## 172 affy_hg_u133_plus_2
## 173 affy_hugenefl
## 174 affy_hugene_1_0_st_v1
## 175 affy_hugene_2_0_st_v1
## 176 affy_huex_1_0_st_v2
## 177 affy_primeview
## 178 affy_u133_x3p
## description
## 23 with Affymetrix Microarray primeview ID(s)
## 71 with Affymetrix Microarray hc g110 probeset ID(s)
## 72 with Affymetrix Microarray hg Focus probeset ID(s)
## 73 with Affymetrix Microarray hg u133a probeset ID(s)
## 74 with Affymetrix Microarray hg u133b probeset ID(s)
## 75 with Affymetrix Microarray hg u133a 2 probeset ID(s)
## 76 with Affymetrix Microarray hg u133 plus 2 probeset ID(s)
## 77 with Affymetrix Microarray hg u95a probeset ID(s)
## 78 with Affymetrix Microarray hg u95av2 ID(s) probeset
## 79 with Affymetrix Microarray hg u95b probeset ID(s)
## 80 with Affymetrix Microarray hg u95c probeset ID(s)
## 81 with Affymetrix Microarray hg u95d probeset ID(s)
## 82 with Affymetrix Microarray hg u95e probeset ID(s)
## 83 with Affymetrix Microarray u133 x3p probeset ID(s)
## 84 with Affymetrix Microarray HuGeneFL probeset ID(s)
## 85 with Affymetrix Microarray hugene 1 0 st v1 probeset ID(s)
## 86 with Affymetrix Microarray hugene 2 0 st v1 ID(s)
## 87 with Affymetrix Microarray huex 1 0 st v2 probeset ID(s)
## 161 Affy hc g110 probeset ID(s) [e.g. 113_i_at]
## 162 Affy hg focus probeset ID(s) [e.g. 201612_at]
## 163 Affy hg u95a probeset ID(s) [e.g. 32647_at]
## 164 Affy hg u95av2 probeset ID(s) [e.g. 32647_at]
## 165 Affy hg u95b probeset ID(s) [e.g. 53925_at]
## 166 Affy hg u95c probeset ID(s) [e.g. 61056_r_at]
## 167 Affy hg u95d probeset ID(s) [e.g. 79632_at]
## 168 Affy hg u95e probeset ID(s) [e.g. 79965_at]
## 169 Affy hg u133a 2 probeset ID(s) [e.g. 200874_s_at]
## 170 Affy hg u133a probeset ID(s) [e.g. 200874_s_at]
## 171 Affy hg u133b probeset ID(s) [e.g. 227057_at]
## 172 Affy hg u133 plus 2 probeset ID(s) [e.g. 241843_at]
## 173 Affy HuGene FL probeset ID(s) [e.g. M58525_s_at]
## 174 Affy HuGene 1_0 st v1 probeset ID(s) [e.g. 8016215]
## 175 Affy HuGene 2_0 st v1 probeset ID(s) [e.g. 16942487]
## 176 Affy HuEx 1_0 st v2 probeset ID(s) [e.g. 4033465]
## 177 Affymetrix Microarray Primeview ID(s) [e.g. 11763890_at]
## 178 Affy u133 x3p probeset ID(s) [e.g. Hs2.205326.1.A1_3p_at]
la <- listAttributes(ensembl)
la[grep("symbol", la$name, ignore.case = F), ]
## name description
## 57 hgnc_symbol HGNC symbol
annotgene <- getBM(attributes = c("affy_hg_u133a", "hgnc_symbol", "go_id"),
filters = "affy_hg_u133a", values = deg_short, mart = ensembl)