HPV positive vs HPV negative analysis for Head and Neck cancer dataset - GSE6791
Tumor vs Normal analysis for Head and Neck cancer dataset - GSE6791 under different conditions: ** Tumor (HPV positive & negative) vs Normal (HPVnegative); HPV Positive Tumor vs Normal (HPV negative); HPV Negative Tumor vs Normal (HPVnegative); Tumor vs Normal - tonsil samples only **
Load Required Packages
# if a new package from bioconductor has to be installed. write following two commands
# source("https://bioconductor.org/biocLite.R")
# biocLite("package name")
library(GEOquery)
## Loading required package: Biobase
## Loading required package: BiocGenerics
## Loading required package: parallel
##
## Attaching package: 'BiocGenerics'
##
## The following objects are masked from 'package:parallel':
##
## clusterApply, clusterApplyLB, clusterCall, clusterEvalQ,
## clusterExport, clusterMap, parApply, parCapply, parLapply,
## parLapplyLB, parRapply, parSapply, parSapplyLB
##
## The following object is masked from 'package:stats':
##
## xtabs
##
## The following objects are masked from 'package:base':
##
## anyDuplicated, append, as.data.frame, as.vector, cbind,
## colnames, do.call, duplicated, eval, evalq, Filter, Find, get,
## intersect, is.unsorted, lapply, Map, mapply, match, mget,
## order, paste, pmax, pmax.int, pmin, pmin.int, Position, rank,
## rbind, Reduce, rep.int, rownames, sapply, setdiff, sort,
## table, tapply, union, unique, unlist, unsplit
##
## Welcome to Bioconductor
##
## Vignettes contain introductory material; view with
## 'browseVignettes()'. To cite Bioconductor, see
## 'citation("Biobase")', and for packages 'citation("pkgname")'.
##
## Setting options('download.file.method.GEOquery'='auto')
library(limma)
##
## Attaching package: 'limma'
##
## The following object is masked from 'package:BiocGenerics':
##
## plotMA
#install.packages("rafalib")
library("rafalib")
Download input data
gset6791 <- getGEO("GSE6791", GSEMatrix =TRUE)
## ftp://ftp.ncbi.nlm.nih.gov/geo/series/GSE6nnn/GSE6791/matrix/
## Found 1 file(s)
## GSE6791_series_matrix.txt.gz
## File stored at:
## /var/folders/nt/jmcs3y6d4d3ghsv7j3yzpfrh0000gn/T//RtmpWbkVVH/GPL570.soft
#setwd("/Users/shruti/Dropbox/SHRUTIM/Microarray/Microarray_R_Scripts/HNSCC/TumorVSControl/GSE6791/")
#save(gset6791,file="./final/gset6791.rda")
#load("/Users/shruti/Dropbox/SHRUTIM/Microarray/Microarray_R_Scripts/HNSCC/TumorVSControl/GSE6791/final/gset6791.rda")
Data Munging
GPLid <- levels((gset6791[[1]])$platform_id)
if (length(gset6791) > 1)
{
idx <- grep(GPLid, attr(gset6791, "names"))
} else
{
idx <- 1
}
gset6791 <- gset6791[[idx]]
## look at the pattern in which the sample types are arranged.
#colnames(pData(gset6791))
#pData(gset6791)[1:2,]
#pData(gset6791)[1:2,c(1,7,10:14,19,24,25)]
#apply(pData(gset6791)[,c(10:12,14,24)],2,table)
# since HPV information is not available through gset, downlaod its meta data
dfl6791 <- getGSEDataTables("GSE6791")
#save(dfl6791,file="./final/dfl6791.rda")
#load("./final/dfl6791.rda")
sample_info <- dfl6791[c(1:12),]
sample_info <- as.data.frame(sample_info,stringsAsFactors=F)
sample_info <- as.data.frame(apply(sample_info,2,function(i) {sub("\\s$","",i)}))
rownames(sample_info) <- sample_info$Accession.number
colnames(sample_info) <- sapply(colnames(sample_info),function(i) {sub("\\.$","",i)},USE.NAMES=F)
# select on samples for Head and Neck tissue samples
#pData(gset6791)[1:2,1:2]
#sample_info[1:2,]
HNnormal_gsmids <- rownames(subset(sample_info,Case=="HN normal",))
HNcancer_gsmids <- rownames(subset(sample_info,Case=="HN cancer",))
gset6791_HN_tissue <- gset6791[,c(HNnormal_gsmids,HNcancer_gsmids)]
# log2 transform
ex6791_HN_tissue <- exprs(gset6791_HN_tissue)
qx <- as.numeric(quantile(ex6791_HN_tissue, c(0., 0.25, 0.5, 0.75, 0.99, 1.0), na.rm=T))
LogC <- (qx[5] > 100) ||
(qx[6]-qx[1] > 50 && qx[2] > 0) ||
(qx[2] > 0 && qx[2] < 1 && qx[4] > 1 && qx[4] < 2)
if (LogC) { ex6791_HN_tissue[which(ex6791_HN_tissue <= 0)] <- NaN
exprs(gset6791_HN_tissue) <- log2(ex6791_HN_tissue) }
sample_info_HN_tissue <- sample_info[c(HNnormal_gsmids,HNcancer_gsmids),]
#apply(sample_info_HN_tissue[,c(2:8)],2,table)
sample_info_HN_tissue$HPV <- sub(pattern="\\s?[(]\\d+[)]", replacement="", x=sample_info_HN_tissue$HPV)
cancer_status <- sample_info_HN_tissue$Case
cancer_status <- sub(pattern="HN ", replacement="", x=cancer_status,ignore.case=T)
cancer_status_factors <- factor(cancer_status, levels=c("normal","cancer"))
HPV_status_factors <- factor(sample_info_HN_tissue$HPV, levels=c("-","+"))
cancer_HPV_status <- factor(paste(cancer_status_factors,HPV_status_factors,sep=""),levels=c("normal-","cancer-","cancer+"))
sex <- factor(sample_info_HN_tissue$Gender)
tissue <- factor(sample_info_HN_tissue$Anatomical.sites)
cancer_tissue_status <- factor(paste(cancer_status_factors,tissue,sep=" "))
Exploratory Data Analysis for Tumor vs Normal
dist_matrix_HN_tissue_6791 <- dist(t(ex6791_HN_tissue)) # very important to take transpose "t"
mds_HN_tissue_6791 <- cmdscale(dist_matrix_HN_tissue_6791)
mypar(1,1)
# cancer_status
plot(mds_HN_tissue_6791[,1],mds_HN_tissue_6791[,2],bg=as.numeric(cancer_status_factors),pch=21,xlab="First dimension",ylab="Second dimension",cex=2,main="MultiDimension Scaling Plot: cancer status")
legend("bottomleft",levels(cancer_status_factors),col=seq(along=levels(cancer_status_factors)),pch=15,cex=1)
text(mds_HN_tissue_6791[,1],mds_HN_tissue_6791[,2],labels=rownames(mds_HN_tissue_6791),cex=0.6)
#identify(mds_HN_tissue_6791)
sample_info_HN_tissue[c(4,55,2,19),c(2,4,6)]
## Case HPV Anatomical.sites
## GSM155718 HN normal - Tonsil
## GSM155713 HN cancer + Tonsil
## GSM155716 HN normal - Uvula and palate
## GSM155677 HN cancer + Oropharynx
# cancer and HPV (+ or -)status
plot(mds_HN_tissue_6791[,1],mds_HN_tissue_6791[,2],bg=as.numeric(cancer_HPV_status),pch=21,xlab="First dimension",ylab="Second dimension",cex=2,main="MultiDimension Scaling Plot: cancer & HPV status")
legend("bottomleft",levels(cancer_HPV_status),col=seq(along=levels(cancer_HPV_status)),pch=15,cex=1)
# HPV (+ or -)status
plot(mds_HN_tissue_6791[,1],mds_HN_tissue_6791[,2],bg=as.numeric(HPV_status_factors),pch=21,xlab="First dimension",ylab="Second dimension",cex=2,main="MultiDimension Scaling Plot: HPV status")
legend("bottomleft",levels(HPV_status_factors),col=seq(along=levels(HPV_status_factors)),pch=15,cex=1)
# sex
plot(mds_HN_tissue_6791[,1],mds_HN_tissue_6791[,2],bg=as.numeric(sex),pch=21,xlab="First dimension",ylab="Second dimension",cex=2,main="MultiDimension Scaling Plot: sex")
legend("bottomleft",levels(sex),col=seq(along=levels(sex)),pch=15,cex=0.7)
#anatomical location
plot(mds_HN_tissue_6791[,1],mds_HN_tissue_6791[,2],bg=as.numeric(tissue),pch=21,xlab="First dimension",ylab="Second dimension",cex=2,main="MultiDimension Scaling Plot: anatomical location")
legend("bottomleft",levels(tissue),col=seq(along=levels(tissue)),pch=15,cex=0.5)
# represent cancer and normal samples with different symbols and use colors for tissue
plot(mds_HN_tissue_6791[,1],mds_HN_tissue_6791[,2],col=as.numeric(tissue),pch=as.numeric(cancer_status_factors),xlab="First dimension",ylab="Second dimension",cex=3,main="MultiDimension Scaling Plot: anatomical location & cancer status")
legend("topleft",levels(tissue),col=seq(along=levels(tissue)),pch=15,cex=0.7)
legend("bottomleft",levels(cancer_status_factors),pch=seq(along=levels(cancer_status_factors)),cex=1)
hierarchial_cluster_tumor_normal <- hclust(dist_matrix_HN_tissue_6791)
plot(hierarchial_cluster_tumor_normal,cex=0.8,main="Hierarchical clustering of samples")
plot(hierarchial_cluster_tumor_normal,cex=0.8,main="Hierarchical clustering of samples",label=tissue)
Exploratory Data Analysis for HPVpositive vs HPVnegative
HNC_HPV_neg_gsmids <- rownames(subset(sample_info_HN_tissue,Case=="HN cancer" & HPV=="-",Case))
HNC_HPV_pos_gsmids <- rownames(subset(sample_info_HN_tissue,Case=="HN cancer" & HPV=="+",Case))
gset6791_HNCancer <- gset6791[,c(HNC_HPV_neg_gsmids,HNC_HPV_pos_gsmids)]
ex6791_HNCancer <- ex6791_HN_tissue[,c(HNC_HPV_neg_gsmids,HNC_HPV_pos_gsmids)]
sample_info_HNCancer <- sample_info_HN_tissue[c(HNC_HPV_neg_gsmids,HNC_HPV_pos_gsmids),]
setdiff(rownames(sample_info_HNCancer),colnames(ex6791_HNCancer))
## character(0)
HNCancer_HPV_status <- factor(sample_info_HNCancer$HPV, levels=c("-","+")) ## order of levels is important
HNCancer_tissue <- factor(sample_info_HNCancer$Anatomical.sites)
HPV_tissue <- factor(paste(HNCancer_tissue,HNCancer_HPV_status,sep=""))
dist_matrix_HNCancer <- dist(t(ex6791_HNCancer)) # very important to take transpose "t"
mds_HNCancer <- cmdscale(dist_matrix_HNCancer)
plot(mds_HNCancer[,1],mds_HNCancer[,2],bg=as.numeric(HNCancer_HPV_status),pch=21,xlab="First dimension",ylab="Second dimension",cex=2,main="MultiDimension Scaling Plot: HPV status")
legend("bottomleft",levels(HNCancer_HPV_status),col=seq(along=levels(HNCancer_HPV_status)),pch=15,cex=1)
plot(mds_HNCancer[,1],mds_HNCancer[,2],bg=as.numeric(HNCancer_tissue),pch=21,xlab="First dimension",ylab="Second dimension",cex=2,main="MultiDimension Scaling Plot: anatomical location")
legend("bottomright",levels(HNCancer_tissue),col=seq(along=levels(HNCancer_tissue)),pch=15,cex=0.8)
# represent hpv +ve and -ve samples with different symbols and use colors for tissue
plot(mds_HNCancer[,1],mds_HNCancer[,2],col=as.numeric(HNCancer_tissue),pch=as.numeric(HNCancer_HPV_status),xlab="First dimension",ylab="Second dimension",cex=3,main="MultiDimension Scaling Plot: anatomical location & HPV status")
legend("bottomright",levels(HNCancer_tissue),col=seq(along=levels(HNCancer_tissue)),pch=15,cex=0.7)
legend("bottomleft",levels(HNCancer_HPV_status),pch=seq(along=levels(HNCancer_HPV_status)),cex=1)
Exploratory Data Analysis for HPVpositive Tumor vs Normal (HPVnegative)
gset6791_tumorHPVpositive_normal <- gset6791[,c(HNnormal_gsmids,HNC_HPV_pos_gsmids)]
sample_info_tumorHPVpositive_normal <- sample_info_HN_tissue[c(HNnormal_gsmids,HNC_HPV_pos_gsmids),]
cancer_status_tumorHPVpositive_normal <- factor(sample_info_tumorHPVpositive_normal$Case, levels=c("HN normal","HN cancer"))
ex6791_tumorHPVpositive_normal <- ex6791_HN_tissue[,c(HNnormal_gsmids,HNC_HPV_pos_gsmids)]
dist_matrix_tumorHPVpositive_normal <- dist(t(ex6791_tumorHPVpositive_normal)) # very important to take transpose "t"
mds_tumorHPVpositive_normal <- cmdscale(dist_matrix_tumorHPVpositive_normal)
#Cancer status
plot(mds_tumorHPVpositive_normal[,1],mds_tumorHPVpositive_normal[,2],bg=as.numeric(cancer_status_tumorHPVpositive_normal),pch=21,xlab="First dimension",ylab="Second dimension",cex=2,main="MultiDimension Scaling Plot: Cancer status")
legend("bottomleft",levels(cancer_status_tumorHPVpositive_normal),col=seq(along=levels(cancer_status_tumorHPVpositive_normal)),pch=15,cex=0.8)
# to identify which samples may not represent "pure normal" samples, add sample names to plot
text(mds_tumorHPVpositive_normal[,1],mds_tumorHPVpositive_normal[,2],labels=rownames(mds_tumorHPVpositive_normal),cex=0.5)
#identify(mds_tumorHPVpositive_normal)
sample_info_tumorHPVpositive_normal[c(28,4,29,2,3,17,27),c(2,4,6)]
## Case HPV Anatomical.sites
## GSM155711 HN cancer + Tonsil
## GSM155718 HN normal - Tonsil
## GSM155713 HN cancer + Tonsil
## GSM155716 HN normal - Uvula and palate
## GSM155717 HN normal - Tonsil
## GSM155677 HN cancer + Oropharynx
## GSM155709 HN cancer + Tongue
# represent cancer and normal samples with different symbols and use colors for tissue.
# See if GSM155718 is clustered with other cancer samples or is it clustered with other samples from same tissue.
plot(mds_tumorHPVpositive_normal[,1],mds_tumorHPVpositive_normal[,2],col=as.numeric(factor(sample_info_tumorHPVpositive_normal$Anatomical.sites)),pch=as.numeric(factor(sample_info_tumorHPVpositive_normal$Case)),xlab="First dimension",ylab="Second dimension",cex=3,main="MultiDimension Scaling Plot: Cancer & Tissue status")
legend("topleft",levels(factor(sample_info_tumorHPVpositive_normal$Anatomical.sites)),col=seq(along=levels(factor(sample_info_tumorHPVpositive_normal$Anatomical.sites))),pch=15,cex=0.7)
legend("bottomleft",levels(factor(sample_info_tumorHPVpositive_normal$Case)),pch=seq(along=levels(factor(sample_info_tumorHPVpositive_normal$Case))),cex=1)
Exploratory Data Analysis for HPVnegative Tumor vs Normal (HPVnegative)
gset6791_tumorHPVnegative_normal <- gset6791[,c(HNnormal_gsmids,HNC_HPV_neg_gsmids)]
sample_info_tumorHPVnegative_normal <- sample_info_HN_tissue[c(HNnormal_gsmids,HNC_HPV_neg_gsmids),]
cancer_status_tumorHPVnegative_normal <- factor(sample_info_tumorHPVnegative_normal$Case, levels=c("HN normal","HN cancer"))
ex6791_tumorHPVnegative_normal <- ex6791_HN_tissue[,c(HNnormal_gsmids,HNC_HPV_neg_gsmids)]
dist_matrix_tumorHPVnegative_normal <- dist(t(ex6791_tumorHPVnegative_normal)) # very important to take transpose "t"
mds_tumorHPVnegative_normal <- cmdscale(dist_matrix_tumorHPVnegative_normal)
# Cancer status
plot(mds_tumorHPVnegative_normal[,1],mds_tumorHPVnegative_normal[,2],bg=as.numeric(cancer_status_tumorHPVnegative_normal),pch=21,xlab="First dimension",ylab="Second dimension",cex=2,main="MultiDimension Scaling Plot: Cancer status")
legend("bottomleft",levels(cancer_status_tumorHPVnegative_normal),col=seq(along=levels(cancer_status_tumorHPVnegative_normal)),pch=15,cex=0.8)
# to identify which samples may not represent "pure normal" samples, add sample names to plot
text(mds_tumorHPVnegative_normal[,1],mds_tumorHPVnegative_normal[,2],labels=rownames(mds_tumorHPVnegative_normal),cex=0.6)
#identify(mds_tumorHPVnegative_normal)
sample_info_tumorHPVnegative_normal[c(3,18,40,15,38,4,17,16,2,15,19),c(2,4,6)]
## Case HPV Anatomical.sites
## GSM155717 HN normal - Tonsil
## GSM155680 HN cancer - Tonsil
## GSM155712 HN cancer - Hypopharynx
## GSM155674 HN cancer - Oral Cavity
## GSM155708 HN cancer - Tonsil
## GSM155718 HN normal - Tonsil
## GSM155679 HN cancer - ND
## GSM155673 HN cancer - Oral Cavity
## GSM155716 HN normal - Uvula and palate
## GSM155674.1 HN cancer - Oral Cavity
## GSM155681 HN cancer - Tonsil
# represent cancer and normal samples with different symbols and use colors for tissue.
# See if GSM155718 & GSM155717 are clustered with other cancer samples or is it clustered with other samples from same tissue.
plot(mds_tumorHPVnegative_normal[,1],mds_tumorHPVnegative_normal[,2],col=as.numeric(factor(sample_info_tumorHPVnegative_normal$Anatomical.sites)),pch=as.numeric(factor(sample_info_tumorHPVnegative_normal$Case)),xlab="First dimension",ylab="Second dimension",cex=3,main="MultiDimension Scaling Plot: Cancer & Tissue status")
legend("topleft",levels(factor(sample_info_tumorHPVnegative_normal$Anatomical.sites)),col=seq(along=levels(factor(sample_info_tumorHPVnegative_normal$Anatomical.sites))),pch=15,cex=0.7)
legend("bottomleft",levels(factor(sample_info_tumorHPVnegative_normal$Case)),pch=seq(along=levels(factor(sample_info_tumorHPVnegative_normal$Case))),cex=0.8)
Exploratory Data Analysis for Tumor vs Normal : Tonsil Tissue only
tonsil_normal_gsmids <- rownames( subset(sample_info_HN_tissue,Anatomical.sites=="Tonsil" & Case=="HN normal",) )
tonsil_cancer_gsmids <- rownames( subset(sample_info_HN_tissue,Anatomical.sites=="Tonsil" & Case=="HN cancer",) )
gset6791_tonsil_tumor_normal <- gset6791[,c(tonsil_normal_gsmids,tonsil_cancer_gsmids)]
sample_info_tonsil_tumor_normal <- subset(sample_info_HN_tissue,Anatomical.sites=="Tonsil")
cancer_status_tonsil_tumor_normal <- factor(sample_info_tonsil_tumor_normal$Case, levels=c("HN normal","HN cancer"))
ex6791_tonsil_tumor_normal <- ex6791_HN_tissue[,c(tonsil_normal_gsmids,tonsil_cancer_gsmids)]
dist_matrix_tonsil_tumor_normal <- dist(t(ex6791_tonsil_tumor_normal)) # very important to take transpose "t"
mds_tonsil_tumor_normal <- cmdscale(dist_matrix_tonsil_tumor_normal)
plot(mds_tonsil_tumor_normal[,1],mds_tonsil_tumor_normal[,2],bg=as.numeric(cancer_status_tonsil_tumor_normal),pch=21,xlab="First dimension",ylab="Second dimension",cex=2,main="MultiDimension Scaling Plot: Tonsil Samples, Cancer status")
legend("topleft",levels(cancer_status_tonsil_tumor_normal),col=seq(along=levels(cancer_status_tonsil_tumor_normal)),pch=15,cex=1)
text(mds_tonsil_tumor_normal[,1],mds_tonsil_tumor_normal[,2],labels=rownames(mds_tonsil_tumor_normal),cex=0.8)
# dendrogram to see which samples are close or far
hierarchial_cluster_tonsil_tumor_normal <- hclust(dist_matrix_tonsil_tumor_normal)
plot(hierarchial_cluster_tonsil_tumor_normal,cex=0.8,main="Hierarchical clustering of samples",label=cancer_status_tonsil_tumor_normal)
plot(hierarchial_cluster_tonsil_tumor_normal,cex=0.8,main="Hierarchical clustering of samples")
#sample_info_tonsil_tumor_normal[,c(2:9)]
tonsil_HPV_status <- factor(sample_info_tonsil_tumor_normal$HPV, levels=c("-","+"))
tonsil_cancer_HPV_status <- factor(paste(cancer_status_tonsil_tumor_normal,tonsil_HPV_status,sep=""),levels=c("HN normal-","HN cancer-","HN cancer+"))
plot(mds_tonsil_tumor_normal[,1],mds_tonsil_tumor_normal[,2],bg=as.numeric(tonsil_cancer_HPV_status),pch=21,xlab="First dimension",ylab="Second dimension",cex=2,main="MultiDimension Scaling Plot: Tonsil Samples, Cancer & HPV status")
legend("topleft",levels(tonsil_cancer_HPV_status),col=seq(along=levels(tonsil_cancer_HPV_status)),pch=15,cex=1)
text(mds_tonsil_tumor_normal[,1],mds_tonsil_tumor_normal[,2],labels=rownames(mds_tonsil_tumor_normal),cex=0.8)
plot(hierarchial_cluster_tonsil_tumor_normal,cex=0.8,main="Hierarchical clustering of samples",label=tonsil_cancer_HPV_status)
#tonsil_cancer_HPV_status
tonsil_cancer_HPVpositive_gsmids <- rownames( subset(sample_info_HN_tissue,Anatomical.sites=="Tonsil" & Case=="HN cancer" & HPV=="+",) )
tonsil_cancer_HPVnegative_gsmids <- rownames( subset(sample_info_HN_tissue,Anatomical.sites=="Tonsil" & Case=="HN cancer" & HPV=="-",) )
tonsil_cancer_HPVpositive_status <- factor(tonsil_cancer_HPV_status[c(1:4,8:10)],levels=c("HN normal-","HN cancer+"))
tonsil_cancer_HPVnegative_status <- factor(tonsil_cancer_HPV_status[c(1:4,5:7)],levels=c("HN normal-","HN cancer-"))
RunLimma <- function(gset,groups){
design_matrix <- model.matrix(~0+groups)
colnames(design_matrix) <- c("CONTROL","TREATMENT")
# to check order is correct
# head(design_matrix)
## fit linear model for each gene given a series of arrays
fit <- lmFit(gset, design_matrix)
## tell which levels should be compared
colnames(design_matrix)
cont.matrix <- makeContrasts(TREATMENT-CONTROL, levels=design_matrix)
## given a linear model fit to microarray data, compute estimated coefficients & standard errors for given set of contratsts
fit2 <- contrasts.fit(fit, cont.matrix)
fit2 <- eBayes(fit2, 0.01)
## to get the no.of genes in each array
nrow_gset <- length(featureNames(gset))
tT <- topTable(fit2, adjust="fdr", sort.by="logFC",number=nrow_gset)
colnames(tT)
tT <- subset(tT, select=c("ID","adj.P.Val","P.Value","logFC","t","B","AveExpr","ENTREZ_GENE_ID","Gene.Symbol","Gene.Title"))
return(tT)
}
#function to remove rows which are not associated with any gene
RemoveRows <- function(MatrixName,ColumnName){ #ColumnName is name of column containing gene id
# remove rows that contain "NA" in any column
complete <- complete.cases(MatrixName)
Matrix_complete <- MatrixName[complete,]
# remove rows that contain "" in column for gene id
nonempty_gene_ids <- Matrix_complete[,ColumnName]!=""
Matrix2 <- Matrix_complete[nonempty_gene_ids,]
# if GeneID contains more than 1 id, remove additional ones
if (length(grep("/",Matrix2[,ColumnName])>0))
{
GeneIds <- unlist(lapply(Matrix2[,ColumnName],function(x) {
sub("[ ]/.+","",x)
}))
Matrix2[,ColumnName] <- GeneIds
}
return(Matrix2)
}
HPVpositive vs HPVnegative
gse6791_HPV_positiveVsnegative <- RunLimma(gset6791_HNCancer,HNCancer_HPV_status)
gse6791_HPV_positiveVsnegativeDEG <- RemoveRows(gse6791_HPV_positiveVsnegative,"ENTREZ_GENE_ID")
# volcano plot
plot(gse6791_HPV_positiveVsnegativeDEG$logFC,-log10(gse6791_HPV_positiveVsnegativeDEG$adj.P.Val), xlab="Effect size",ylab="- log (base 10) adj.p-values",main="HPVpositive vs HPVnegative")
gse6791_HPV_positiveVsnegativeDEGFinal <- subset(gse6791_HPV_positiveVsnegativeDEG,adj.P.Val<0.05 & abs(logFC)>0.585,)
gse6791_HPV_positiveVsnegative_up <- unique(subset(gse6791_HPV_positiveVsnegativeDEGFinal,logFC>0.585,)$ENTREZ_GENE_ID)
gse6791_HPV_positiveVsnegative_down <- unique(subset(gse6791_HPV_positiveVsnegativeDEGFinal,logFC<(-0.585),)$ENTREZ_GENE_ID)
length(gse6791_HPV_positiveVsnegative_up)
## [1] 261
length(gse6791_HPV_positiveVsnegative_down)
## [1] 55
# save(gse6791_HPV_positiveVsnegativeDEGFinal, file="./rough/HPVpositiveVsNegative/gse6791_HPV_positiveVsnegativeDEGFinal.rda")
# write.table(gse6791_HPV_positiveVsnegativeDEGFinal, file="./rough/HPVpositiveVsNegative/gse6791_HPV_positiveVsnegativeDEGFinal.txt", row.names=F, sep="\t")
Tumor vs Normal
gse6791_cancerVsnormal <- RunLimma(gset6791_HN_tissue,cancer_status_factors)
gse6791_cancerVsnormalDEG <- RemoveRows(gse6791_cancerVsnormal,"ENTREZ_GENE_ID")
# volcano plot
plot(gse6791_cancerVsnormalDEG$logFC,-log10(gse6791_cancerVsnormalDEG$adj.P.Val), xlab="Effect size",ylab="- log (base 10) adj.p-values",main="Tumor vs Normal")
gse6791_cancerVsnormalDEGFinal <- subset(gse6791_cancerVsnormalDEG,adj.P.Val<0.05 & abs(logFC)>0.585,)
length(unique(subset(gse6791_cancerVsnormalDEGFinal,logFC>0.585,)$ENTREZ_GENE_ID))
## [1] 2461
length(unique(subset(gse6791_cancerVsnormalDEGFinal,logFC<(-0.585),)$ENTREZ_GENE_ID))
## [1] 762
HPVpositive Tumor vs Normal
gse6791_tumorHPVpositiveVsnormal <- RunLimma(gset6791_tumorHPVpositive_normal,cancer_status_tumorHPVpositive_normal)
gse6791_tumorHPVpositiveVsnormalDEG <- RemoveRows(gse6791_tumorHPVpositiveVsnormal,"ENTREZ_GENE_ID")
# volcano plot
plot(gse6791_tumorHPVpositiveVsnormalDEG$logFC,-log10(gse6791_tumorHPVpositiveVsnormalDEG$adj.P.Val), xlab="Effect size",ylab="- log (base 10) adj.p-values",main="HPVpositive Tumor vs Normal")
gse6791_tumorHPVpositiveVsnormalDEGFinal <- subset(gse6791_tumorHPVpositiveVsnormalDEG,adj.P.Val<0.05 & abs(logFC)>0.585,)
gse6791_tumorHPVpositiveVsnormal_up <- unique(subset(gse6791_tumorHPVpositiveVsnormalDEGFinal,logFC>0.585,)$ENTREZ_GENE_ID)
gse6791_tumorHPVpositiveVsnormal_down <- unique(subset(gse6791_tumorHPVpositiveVsnormalDEGFinal,logFC<(-0.585),)$ENTREZ_GENE_ID)
length(gse6791_tumorHPVpositiveVsnormal_up)
## [1] 2818
length(gse6791_tumorHPVpositiveVsnormal_down)
## [1] 946
# save(gse6791_tumorHPVpositiveVsnormalDEGFinal, file="./rough/tumorHPVpositiveVsnormal/gse6791_tumorHPVpositiveVsnormalDEGFinal.rda")
# write.table(gse6791_tumorHPVpositiveVsnormalDEGFinal, file="./rough/tumorHPVpositiveVsnormal/gse6791_tumorHPVpositiveVsnormalDEGFinal.txt", row.names=F, sep="\t")
HPVnegative Tumor vs Normal
gse6791_tumorHPVnegativeVsnormal <- RunLimma(gset6791_tumorHPVnegative_normal,cancer_status_tumorHPVnegative_normal)
gse6791_tumorHPVnegativeVsnormalDEG <- RemoveRows(gse6791_tumorHPVnegativeVsnormal,"ENTREZ_GENE_ID")
# volcano plot
plot(gse6791_tumorHPVnegativeVsnormalDEG$logFC,-log10(gse6791_tumorHPVnegativeVsnormalDEG$adj.P.Val), xlab="Effect size",ylab="- log (base 10) adj.p-values",main="HPVnegative Tumor vs Normal")
gse6791_tumorHPVnegativeVsnormalDEGFinal <- subset(gse6791_tumorHPVnegativeVsnormalDEG,adj.P.Val<0.05 & abs(logFC)>0.585,)
gse6791_tumorHPVnegativeVsnormal_up <- unique(subset(gse6791_tumorHPVnegativeVsnormalDEGFinal,logFC>0.585,)$ENTREZ_GENE_ID)
gse6791_tumorHPVnegativeVsnormal_down <- unique(subset(gse6791_tumorHPVnegativeVsnormalDEGFinal,logFC<(-0.585),)$ENTREZ_GENE_ID)
length(gse6791_tumorHPVnegativeVsnormal_up)
## [1] 2236
length(gse6791_tumorHPVnegativeVsnormal_down)
## [1] 803
# save(gse6791_tumorHPVnegativeVsnormalDEGFinal, file="./rough/tumorHPVnegativeVsnormal/gse6791_tumorHPVnegativeVsnormalDEGFinal.rda")
# write.table(gse6791_tumorHPVnegativeVsnormalDEGFinal, file="./rough/tumorHPVnegativeVsnormal/gse6791_tumorHPVnegativeVsnormalDEGFinal.txt", row.names=F, sep="\t")
Tumor vs Normal
gse6791_tonsil_cancerVsnormal <- RunLimma(gset6791_tonsil_tumor_normal,cancer_status_tonsil_tumor_normal)
gse6791_tonsil_cancerVsnormalDEG <- RemoveRows(gse6791_tonsil_cancerVsnormal,"ENTREZ_GENE_ID")
# volcano plot
plot(gse6791_tonsil_cancerVsnormalDEG$logFC,-log10(gse6791_tonsil_cancerVsnormalDEG$adj.P.Val), xlab="Effect size",ylab="- log (base 10) adj.p-values",main="Tumor vs Nnormal - Tonsil Samples")
gse6791_tonsil_cancerVsnormalDEGFinal <- subset(gse6791_tonsil_cancerVsnormalDEG,adj.P.Val<0.05 & logFC>0.585,)
length(unique(subset(gse6791_tonsil_cancerVsnormalDEGFinal,logFC>0.585,)$ENTREZ_GENE_ID))
## [1] 0
length(unique(subset(gse6791_tonsil_cancerVsnormalDEGFinal,logFC<(-0.585),)$ENTREZ_GENE_ID))
## [1] 0
HeatMaps
library(RColorBrewer)
library(gplots)
##
## Attaching package: 'gplots'
##
## The following object is masked from 'package:stats':
##
## lowess
# color gradient to represent the expression values of gene
hmcol <- colorRampPalette(brewer.pal(9,"GnBu"))(100)
HPVpositive vs HPVnegative Heatmap
HPVpositiveVsnegative_DEG_ids <- rownames(gse6791_HPV_positiveVsnegativeDEGFinal)
# colors to represent different levels represented by columns - here: normal and cancer
HPVpositiveVsnegative_cols <- palette(brewer.pal(8,"Dark2"))[as.numeric(HNCancer_HPV_status)]
#HNCancer_HPV_status
#HPVpositiveVsnegative_cols
heatmap.2(ex6791_HNCancer[HPVpositiveVsnegative_DEG_ids,], labCol=HNCancer_HPV_status,trace="none",ColSideColors=HPVpositiveVsnegative_cols,col=hmcol,margin=c(6,6),cexCol=1.5)
# colors to represent different levels represented by columns - here: tissue and hpv status combined
HPVpositiveVsnegative_cols2 <- palette(brewer.pal(12,"Set3"))[as.numeric(HPV_tissue)]
heatmap.2(ex6791_HNCancer[HPVpositiveVsnegative_DEG_ids,], labCol=HPV_tissue,trace="none",ColSideColors=HPVpositiveVsnegative_cols2,col=hmcol,margin=c(10,6),cexCol=1.2)
HPVpositive vs Normal Heatmap
#DEG_ids <- gse6791_tumorHPVpositiveVsnormalDEGFinal$ID # Dnt do this, it gives wrong results
tumorHPVpositiveVsnormal_DEG_ids <- rownames(gse6791_tumorHPVpositiveVsnormalDEGFinal)
tumorHPVpositiveVsnormal_disease_status <- sample_info_tumorHPVpositive_normal$Case
tumorHPVpositiveVsnormal_disease_status <- gsub("HN ","",tumorHPVpositiveVsnormal_disease_status)
tumorHPVpositiveVsnormal_tissue_status <- sample_info_tumorHPVpositive_normal$Anatomical.sites
tumorHPVpositiveVsnormal_disease_tissue_status <- paste(tumorHPVpositiveVsnormal_disease_status,tumorHPVpositiveVsnormal_tissue_status,sep="-")
#table(tumorHPVpositiveVsnormal_disease_tissue_status)
# colors to represent different levels represented by columns - here: normal and cancer
tumorHPVpositiveVsnormal_cols <- palette(brewer.pal(8,"Dark2"))[as.fumeric(tumorHPVpositiveVsnormal_disease_status)]
#tumorHPVpositiveVsnormal_disease_status
#tumorHPVpositiveVsnormal_cols
# dendrogram of samples using top 1000 DEG (Differentially Expressed Genes)
heatmap.2(ex6791_tumorHPVpositive_normal[tumorHPVpositiveVsnormal_DEG_ids,][1:1000,], labCol=tumorHPVpositiveVsnormal_disease_status,trace="none",ColSideColors=tumorHPVpositiveVsnormal_cols,col=hmcol,margin=c(6,6),cexCol=1.5)
# colors to represent different levels represented by columns - here: tissue and cancer status combined
tumorHPVpositiveVsnormal_cols2 <- palette(brewer.pal(12,"Set3"))[as.fumeric(tumorHPVpositiveVsnormal_disease_tissue_status)]
heatmap.2(ex6791_tumorHPVpositive_normal[tumorHPVpositiveVsnormal_DEG_ids,][1:1000,], labCol=tumorHPVpositiveVsnormal_disease_tissue_status,trace="none",ColSideColors=tumorHPVpositiveVsnormal_cols2,col=hmcol,margin=c(12,6),cexCol=1.2)
HPVnegative vs normal Heatmap
tumorHPVnegativeVsnormal_DEG_ids <- rownames(gse6791_tumorHPVnegativeVsnormalDEGFinal)
tumorHPVnegativeVsnormal_disease_status <- sample_info_tumorHPVnegative_normal$Case
tumorHPVnegativeVsnormal_disease_status <- gsub("HN ","",tumorHPVnegativeVsnormal_disease_status)
tumorHPVnegativeVsnormal_tissue_status <- sample_info_tumorHPVnegative_normal$Anatomical.sites
tumorHPVnegativeVsnormal_disease_tissue_status <- paste(tumorHPVnegativeVsnormal_disease_status,tumorHPVnegativeVsnormal_tissue_status,sep="-")
#table(tumorHPVnegativeVsnormal_disease_tissue_status)
# colors to represent different levels represented by columns - here: normal and cancer
tumorHPVnegativeVsnormal_cols <- palette(brewer.pal(8,"Dark2"))[as.fumeric(tumorHPVnegativeVsnormal_disease_status)]
#tumorHPVnegativeVsnormal_cols
#tumorHPVnegativeVsnormal_disease_status
heatmap.2(ex6791_tumorHPVnegative_normal[tumorHPVnegativeVsnormal_DEG_ids,][1:1000,], labCol=tumorHPVnegativeVsnormal_disease_status,trace="none",ColSideColors=tumorHPVnegativeVsnormal_cols,col=hmcol,margin=c(6,6),cexCol=1.5)
# colors to represent different levels represented by columns - here: tissue and cancer status combined
tumorHPVnegativeVsnormal_cols2 <- palette(brewer.pal(12,"Set3"))[as.fumeric(tumorHPVnegativeVsnormal_disease_tissue_status)]
heatmap.2(ex6791_tumorHPVnegative_normal[tumorHPVnegativeVsnormal_DEG_ids,][1:1000,], labCol=tumorHPVnegativeVsnormal_disease_tissue_status,trace="none",ColSideColors=tumorHPVnegativeVsnormal_cols2,col=hmcol,margin=c(12,6),cexCol=1.2)
Gene Signature Based Drug Repurposing: Connectivity Map /LINCS analysis
library("biomaRt")
## Warning: package 'biomaRt' was built under R version 3.2.2
## Creating a generic function for 'nchar' from package 'base' in package 'S4Vectors'
## Warning in .recacheSubclasses(def@className, def, doSubclasses, env):
## undefined subclass "externalRefMethod" of class "expressionORfunction";
## definition not updated
## Warning in .recacheSubclasses(def@className, def, doSubclasses, env):
## undefined subclass "externalRefMethod" of class "functionORNULL";
## definition not updated
listMarts(host="www.ensembl.org")
## biomart version
## 1 ENSEMBL_MART_ENSEMBL Ensembl Genes 83
## 2 ENSEMBL_MART_SNP Ensembl Variation 83
## 3 ENSEMBL_MART_FUNCGEN Ensembl Regulation 83
## 4 ENSEMBL_MART_VEGA Vega 63
## 5 pride PRIDE (EBI UK)
database <- useMart("ENSEMBL_MART_ENSEMBL",dataset="hsapiens_gene_ensembl", host="www.ensembl.org")
listDatasets(database)[grep("sapiens",listDatasets(database)$description,),]
## dataset description version
## 32 hsapiens_gene_ensembl Homo sapiens genes (GRCh38.p5) GRCh38.p5
## Filters (one or more) that should be used in the query.
filters <- listFilters(database)
filters[grep("entrez",filters$description,ignore.case=T),]
## name
## 28 with_entrezgene
## 29 with_entrezgene_transcript_name
## 85 entrezgene
## 86 entrezgene_transcript_name
## description
## 28 with EntrezGene ID(s)
## 29 with EntrezGene Transcript Name(s)
## 85 EntrezGene ID(s) [e.g. 115286]
## 86 EntrezGene transcript name ID(s) [e.g. CTD-2350J17.1-002]
## attribites are values that you are interested in to retrieve
attributes <- listAttributes(database)
grep("hg.?u.?133.?a",attributes$description,ignore.case=T)
## [1] 105 106
grep("entrez",attributes$description,ignore.case=T)
## [1] 58 59
attributes[c(58,59,105,106),]
## name description
## 58 entrezgene EntrezGene ID
## 59 entrezgene_transcript_name EntrezGene transcript name ID
## 105 affy_hg_u133a_2 Affy HG U133A_2 probeset
## 106 affy_hg_u133a Affy HG U133A probeset
GenerateCmapInputList <- function(MatrixName,ColumnName){ #ColumnName is name of column containing gene id
# selecting columns
CmapInput <- MatrixName[,c(ColumnName,"logFC","adj.P.Val")]
CmapInput <- CmapInput[order(-abs(CmapInput$logFC)),]
DEG_EntrezGeneID <- as.character(unique(CmapInput[,ColumnName]))
DEG_probesetIDs <- getBM(attributes=c('entrezgene','affy_hg_u133a'), filters = 'entrezgene', values = DEG_EntrezGeneID, mart = database, uniqueRows=T)
DEG_probesetIDs <- subset(DEG_probesetIDs,affy_hg_u133a!="")
DEG_probesetIDs_FC <- merge(x=CmapInput[,c(ColumnName,"logFC")],y=DEG_probesetIDs,by.x=ColumnName,by.y="entrezgene",sort=F,all.y=F)
DEG_probesetIDs_FC <- DEG_probesetIDs_FC[!duplicated(DEG_probesetIDs_FC$affy_hg_u133a),]
DEG_probesetIDs_FC <- DEG_probesetIDs_FC[order(-abs(DEG_probesetIDs_FC$logFC)),]
return(DEG_probesetIDs_FC)
}
gse6791_tumorHPVpositiveVsnormal_CmapInput <- GenerateCmapInputList(gse6791_tumorHPVpositiveVsnormalDEGFinal,"ENTREZ_GENE_ID")
gse6791_tumorHPVpositiveVsnormal_CmapInput_Up <- subset(gse6791_tumorHPVpositiveVsnormal_CmapInput,logFC>0,)$affy_hg_u133a
gse6791_tumorHPVpositiveVsnormal_CmapInput_Down <- subset(gse6791_tumorHPVpositiveVsnormal_CmapInput,logFC<0,)$affy_hg_u133a
## count no.of up and Down DEGs to get idea about iterations you will like to run
length(gse6791_tumorHPVpositiveVsnormal_CmapInput_Up)
## [1] 3982
length(gse6791_tumorHPVpositiveVsnormal_CmapInput_Down)
## [1] 1149
# write.table(gse6791_tumorHPVpositiveVsnormal_CmapInput_Up[1:500],file="./rough/tumorHPVpositiveVsnormal/Cmap/gse6791_tumorHPVpositiveVsnormal_CmapInput_Up.grp",sep="\t",quote=F,col.names=F,row.names=F)
# write.table(gse6791_tumorHPVpositiveVsnormal_CmapInput_Down[1:500],file="./rough/tumorHPVpositiveVsnormal/Cmap/gse6791_tumorHPVpositiveVsnormal_CmapInput_Down.grp",sep="\t",quote=F,col.names=F,row.names=F)
gse6791_tumorHPVnegativeVsnormal_CmapInput <- GenerateCmapInputList(gse6791_tumorHPVnegativeVsnormalDEGFinal,"ENTREZ_GENE_ID")
gse6791_tumorHPVnegativeVsnormal_CmapInput_Up <- subset(gse6791_tumorHPVnegativeVsnormal_CmapInput,logFC>0,)$affy_hg_u133a
gse6791_tumorHPVnegativeVsnormal_CmapInput_Down <- subset(gse6791_tumorHPVnegativeVsnormal_CmapInput,logFC<0,)$affy_hg_u133a
## count no.of up and Down DEGs to get idea about iterations you will like to run
length(gse6791_tumorHPVnegativeVsnormal_CmapInput_Up)
## [1] 3278
length(gse6791_tumorHPVnegativeVsnormal_CmapInput_Down)
## [1] 999
# write.table(gse6791_tumorHPVnegativeVsnormal_CmapInput_Up[1:500],file="./rough/tumorHPVnegativeVsnormal/Cmap/gse6791_tumorHPVnegativeVsnormal_CmapInput_Up.grp",sep="\t",quote=F,col.names=F,row.names=F)
# write.table(gse6791_tumorHPVnegativeVsnormal_CmapInput_Down[1:500],file="./rough/tumorHPVnegativeVsnormal/Cmap/gse6791_tumorHPVnegativeVsnormal_CmapInput_Down.grp",sep="\t",quote=F,col.names=F,row.names=F)
gse6791_HPV_positiveVsnegative_CmapInput <- GenerateCmapInputList(gse6791_HPV_positiveVsnegativeDEGFinal,"ENTREZ_GENE_ID")
gse6791_HPV_positiveVsnegative_CmapInput_Up <- subset(gse6791_HPV_positiveVsnegative_CmapInput,logFC>0,)$affy_hg_u133a
gse6791_HPV_positiveVsnegative_CmapInput_Down <- subset(gse6791_HPV_positiveVsnegative_CmapInput,logFC<0,)$affy_hg_u133a
## count no.of up and Down DEGs to get idea about iterations you will like to run
length(gse6791_HPV_positiveVsnegative_CmapInput_Up)
## [1] 323
length(gse6791_HPV_positiveVsnegative_CmapInput_Down)
## [1] 95
# write.table(gse6791_HPV_positiveVsnegative_CmapInput_Up,file="./rough/HPVpositiveVsNegative/Cmap/gse6791_HPV_positiveVsnegative_CmapInput_Up.grp",sep="\t",quote=F,col.names=F,row.names=F)
# write.table(gse6791_HPV_positiveVsnegative_CmapInput_Down,file="./rough/HPVpositiveVsNegative/Cmap/gse6791_HPV_positiveVsnegative_CmapInput_Down.grp",sep="\t",quote=F,col.names=F,row.names=F)
VennIntersection <- function(inputsets,inputset_names)
{
l <- length(inputsets)
#total set
universe <- c()
for(i in 1:l)
{
universe <- unique(c(universe,inputsets[[i]]))
}
universe <- sort(universe)
# Generate a matrix, with the sets in columns and possible letters on rows
Counts <- matrix(0, nrow=length(universe), ncol=l)
# Populate the said matrix
for (i in 1:length(universe))
{
for (j in 1:l)
{
Counts[i,j] <- universe[i] %in% inputsets[[j]]
}
}
# Name the columns with the sample names
colnames(Counts) <- inputset_names
rownames(Counts) <- universe
# Specify the colors for the sets
cols<- c("red","green","blue","magenta","cyan","brown")
vennDiagram(vennCounts(Counts), circle.col=cols[1:l],cex=c(1,1,0.7),mar=c(4,0,1,0),xpd=T)
legend("bottom",inputset_names,fill=cols[1:l],inset=c(-2,0))
}
# Common up DEG between (i) tumorHPVpositiveVsnormal (ii) tumorHPVnegativeVsnormal (iii)HPVpositiveVsnegative
VennIntersection(list(gse6791_tumorHPVpositiveVsnormal_up,gse6791_tumorHPVnegativeVsnormal_up,gse6791_HPV_positiveVsnegative_up),c("HPV+ve Tumor VS normal","HPV-ve Tumor VS normal","HPV+ve Tumor VS HPV-ve Tumor"))
# common Down DEG
VennIntersection(list(gse6791_tumorHPVpositiveVsnormal_down,gse6791_tumorHPVnegativeVsnormal_down,gse6791_HPV_positiveVsnegative_down),c("HPV+ve Tumor VS normal","HPV-ve Tumor VS normal","HPV+ve Tumor VS HPV-ve Tumor"))
# Common up DEG between (i) tumorHPVpositiveVsnormal (ii) tumorHPVnegativeVsnormal
VennIntersection(list(gse6791_tumorHPVpositiveVsnormal_up,gse6791_tumorHPVnegativeVsnormal_up),c("HPV+ve Tumor VS normal","HPV-ve Tumor VS normal"))
VennIntersection(list(gse6791_tumorHPVpositiveVsnormal_down,gse6791_tumorHPVnegativeVsnormal_down),c("HPV+ve Tumor VS normal","HPV-ve Tumor VS normal"))