Caso de estudio 1: Comparación entre tres tipos de cáncer de mama

# Targets
targetsDF <- read.csv(file=file.path(getwd(),"/datos/targets.csv"), header = TRUE, sep = ",")

sampleNames <- as.character(targetsDF$ShortName)
sampleColor <- as.character(targetsDF$Colors)

# Creamos un objeto AnnotatedDataFrame
targets <- AnnotatedDataFrame(targetsDF)

# Leemos archivos .CEL
CELfiles <- targetsDF$fileName

invisible(capture.output({
  rawData <- read.celfiles(file.path(getwd(),"datos", CELfiles), phenoData=targets)
}))
## Cargando paquete requerido: pd.hg.u133a
## Attempting to obtain 'pd.hg.u133a' from BioConductor website.
## Checking to see if your internet connection works...
## installing the source package 'pd.hg.u133a'
## Cargando paquete requerido: pd.hg.u133a
## Platform design info loaded.
rawData
## ExpressionFeatureSet (storageMode: lockedEnvironment)
## assayData: 506944 features, 49 samples 
##   element names: exprs 
## protocolData
##   rowNames: 1 2 ... 49 (49 total)
##   varLabels: exprs dates
##   varMetadata: labelDescription channel
## phenoData
##   rowNames: 1 2 ... 49 (49 total)
##   varLabels: fileName grupos ShortName Colors
##   varMetadata: labelDescription channel
## featureData: none
## experimentData: use 'experimentData(object)'
## Annotation: pd.hg.u133a

Exploración y control de calidad

boxplot(rawData, which="all", las=2, cex.axis=0.6, names=sampleNames, col=sampleColor)

# Agrupación jerárquica
clust.euclid.average <- hclust(dist(t(exprs(rawData))), method="average")

plot(clust.euclid.average, labels=sampleNames, main="Agrupamiento jerárquico RawData", 
     cex=0.7, hang=-1)

plotPCA(exprs(rawData), labels=sampleNames, dataDesc="raw Data", colors=sampleColor)

rerun <- FALSE
if(rerun){
  arrayQualityMetrics(rawData, reporttitle="QC_RawData", force= TRUE)
}

Normalizacion

invisible(capture.output({
  eset <- rma(rawData)
}))

write.exprs(eset, file.path(getwd(), "NormData_cancermama.txt"))
eset
## ExpressionSet (storageMode: lockedEnvironment)
## assayData: 22283 features, 49 samples 
##   element names: exprs 
## protocolData
##   rowNames: 1 2 ... 49 (49 total)
##   varLabels: exprs dates
##   varMetadata: labelDescription channel
## phenoData
##   rowNames: 1 2 ... 49 (49 total)
##   varLabels: fileName grupos ShortName Colors
##   varMetadata: labelDescription channel
## featureData: none
## experimentData: use 'experimentData(object)'
## Annotation: pd.hg.u133a

Filtrado

annotation(eset) <- "hgu133a.db"

invisible(capture.output({
  eset_filtered <- nsFilter(eset, var.func = IQR,
                          var.cutoff=0.75, var.filter=TRUE,
                          require.entrez = TRUE,
                          filterByQuantile=TRUE)
}))
## 
## 
print(eset_filtered)
## $eset
## ExpressionSet (storageMode: lockedEnvironment)
## assayData: 3161 features, 49 samples 
##   element names: exprs 
## protocolData
##   rowNames: 1 2 ... 49 (49 total)
##   varLabels: exprs dates
##   varMetadata: labelDescription channel
## phenoData
##   rowNames: 1 2 ... 49 (49 total)
##   varLabels: fileName grupos ShortName Colors
##   varMetadata: labelDescription channel
## featureData: none
## experimentData: use 'experimentData(object)'
## Annotation: hgu133a.db 
## 
## $filter.log
## $filter.log$numDupsRemoved
## [1] 7551
## 
## $filter.log$numLowVar
## [1] 9482
## 
## $filter.log$numRemoved.ENTREZID
## [1] 2079
## 
## $filter.log$feature.exclude
## [1] 10
filteredEset <- eset_filtered$eset
filteredData <- exprs(filteredEset)
colnames(filteredData) <- pData(eset_filtered$eset)$ShortName

Selección de genes

treatment <- pData(filteredEset)$grupos
lev <- factor(treatment, levels = unique(treatment))

design <- model.matrix(~0+lev)
colnames(design) <- levels(lev)
rownames(design) <- sampleNames

head(design)
##         APO BAS LUMI
## 78_APO    1   0    0
## 83_APO    1   0    0
## 86_APO    1   0    0
## 87_APO    1   0    0
## 103_APO   1   0    0
## 110_APO   1   0    0

Matriz de contraste

cont.matrix <- makeContrasts(
  APO_vs_BAS = APO - BAS,
  APO_vs_LUMI = APO - LUMI,
  BAS_vs_LUMI = BAS - LUMI,
  levels = design
)

print(cont.matrix)
##       Contrasts
## Levels APO_vs_BAS APO_vs_LUMI BAS_vs_LUMI
##   APO           1           1           0
##   BAS          -1           0           1
##   LUMI          0          -1          -1
# Estimamos el modelo 
fit <- lmFit(filteredData, design)
fit.main <- contrasts.fit(fit, cont.matrix)
fit.main <- eBayes(fit.main)

topTab_APO_vs_BAS <- topTable(fit.main, number=nrow(fit.main), coef="APO_vs_BAS", 
                              adjust="fdr", p.value=0.05, lfc=3)

topTab_APO_vs_LUMI <- topTable(fit.main, number=nrow(fit.main), coef="APO_vs_LUMI", 
                               adjust="fdr", p.value=0.05, lfc=3)

topTab_BAS_vs_LUMI <- topTable(fit.main, number=nrow(fit.main), coef="BAS_vs_LUMI", 
                               adjust="fdr", p.value=0.05, lfc=3)


#topTab <- topTable(fit.main, number=nrow(fit.main), adjust="fdr", p.value=0.05, lfc=3)

#dim(topTab)

#head(topTab)

Anotación de los resultados

keytypes(hgu133a.db)
##  [1] "ACCNUM"       "ALIAS"        "ENSEMBL"      "ENSEMBLPROT"  "ENSEMBLTRANS"
##  [6] "ENTREZID"     "ENZYME"       "EVIDENCE"     "EVIDENCEALL"  "GENENAME"    
## [11] "GENETYPE"     "GO"           "GOALL"        "IPI"          "MAP"         
## [16] "OMIM"         "ONTOLOGY"     "ONTOLOGYALL"  "PATH"         "PFAM"        
## [21] "PMID"         "PROBEID"      "PROSITE"      "REFSEQ"       "SYMBOL"      
## [26] "UCSCKG"       "UNIPROT"
anotaciones <- AnnotationDbi::select(hgu133a.db, keys=rownames(filteredData), 
                                     columns=c("ENTREZID", "SYMBOL"))
## 'select()' returned 1:1 mapping between keys and columns
anotaciones_unique <- anotaciones[!duplicated(anotaciones$PROBEID), ]

head(anotaciones_unique)
annotatedTopTable <- function(topTab, anotPackage)
{
  topTab <- cbind(PROBEID=rownames(topTab), topTab)
  myProbes <- rownames(topTab)
  thePackage <- eval(parse(text = anotPackage))
  geneAnots <- select(thePackage, myProbes, c("SYMBOL", "ENTREZID", "GENENAME"))
  annotatedTopTab<- merge(x=geneAnots, y=topTab, by.x="PROBEID", by.y="PROBEID")
return(annotatedTopTab)
}
topAnnotated_APO_vs_BAS <- annotatedTopTable(topTab_APO_vs_BAS,
                                             anotPackage = "hgu133a.db")
## 'select()' returned 1:1 mapping between keys and columns
topAnnotated_APO_vs_LUMI <- annotatedTopTable(topTab_APO_vs_LUMI,
                                             anotPackage = "hgu133a.db")
## 'select()' returned 1:1 mapping between keys and columns
topAnnotated_BAS_vs_LUMI <- annotatedTopTable(topTab_BAS_vs_LUMI,
                                             anotPackage = "hgu133a.db")
## 'select()' returned 1:1 mapping between keys and columns
head(topAnnotated_APO_vs_BAS)
head(topAnnotated_APO_vs_LUMI)
head(topAnnotated_BAS_vs_LUMI)
geneSymbols <- select(hgu133a.db, rownames(fit.main), c("SYMBOL"))
## 'select()' returned 1:1 mapping between keys and columns
SYMBOLS<- geneSymbols$SYMBOL

volcanoplot(fit.main, coef=1, highlight=4, names=SYMBOLS, 
            main=paste("Differentially expressed genes", colnames(cont.matrix)[1], sep="\n"))
  abline(v=c(-1,1))

for (i in colnames(cont.matrix)){
  volcanoplot(fit.main, coef=i, highlight=4, names=SYMBOLS,
              main=paste("Differentially expressed genes",i, sep="\n"))
  abline(v=c(-1,1))
}

Caso de estudio 2: Efecto del programa génico termogénico durante la diferenciación de adipocitos

# Targets
targetsDF <- read.csv(file=file.path(getwd(),"/datos2/targets2.csv"), header = TRUE, sep = ",")

sampleNames <- as.character(targetsDF$ShortName)
sampleColor <- as.character(targetsDF$Colors)

# Creamos un objeto AnnotatedDataFrame
targets <- AnnotatedDataFrame(targetsDF)

# Leemos archivos .CEL
CELfiles <- targetsDF$fileName

invisible(capture.output({
  rawData <- read.celfiles(file.path(getwd(),"datos2", CELfiles), phenoData=targets)
}))
## Platform design info loaded.
rawData
## GeneFeatureSet (storageMode: lockedEnvironment)
## assayData: 1416100 features, 12 samples 
##   element names: exprs 
## protocolData
##   rowNames: 1 2 ... 12 (12 total)
##   varLabels: exprs dates
##   varMetadata: labelDescription channel
## phenoData
##   rowNames: 1 2 ... 12 (12 total)
##   varLabels: fileName grupos ShortName Colors
##   varMetadata: labelDescription channel
## featureData: none
## experimentData: use 'experimentData(object)'
## Annotation: pd.mogene.2.1.st

Normalizacion

invisible(capture.output({
  eset <- rma(rawData)
}))

write.exprs(eset, file.path(getwd(), "NormData_adipo.txt"))
eset
## ExpressionSet (storageMode: lockedEnvironment)
## assayData: 41345 features, 12 samples 
##   element names: exprs 
## protocolData
##   rowNames: 1 2 ... 12 (12 total)
##   varLabels: exprs dates
##   varMetadata: labelDescription channel
## phenoData
##   rowNames: 1 2 ... 12 (12 total)
##   varLabels: fileName grupos ShortName Colors
##   varMetadata: labelDescription channel
## featureData: none
## experimentData: use 'experimentData(object)'
## Annotation: pd.mogene.2.1.st

Filtrado

annotation(eset) <- "mogene21sttranscriptcluster.db"

invisible(capture.output({
  eset_filtered <- nsFilter(eset, var.func = IQR,
                          var.cutoff=0.75, var.filter=TRUE,
                          require.entrez = TRUE,
                          filterByQuantile = TRUE)
}))

print(eset_filtered)
## $eset
## ExpressionSet (storageMode: lockedEnvironment)
## assayData: 5834 features, 12 samples 
##   element names: exprs 
## protocolData
##   rowNames: 1 2 ... 12 (12 total)
##   varLabels: exprs dates
##   varMetadata: labelDescription channel
## phenoData
##   rowNames: 1 2 ... 12 (12 total)
##   varLabels: fileName grupos ShortName Colors
##   varMetadata: labelDescription channel
## featureData: none
## experimentData: use 'experimentData(object)'
## Annotation: mogene21sttranscriptcluster.db 
## 
## $filter.log
## $filter.log$numDupsRemoved
## [1] 650
## 
## $filter.log$numLowVar
## [1] 17503
## 
## $filter.log$numRemoved.ENTREZID
## [1] 17358
filteredEset <- eset_filtered$eset

Matriz de contraste

treatment <- pData(filteredEset)$grupos
lev <- factor(treatment, levels = unique(treatment))

design <- model.matrix(~0+lev)
colnames(design) <- levels(lev)
rownames(design) <- sampleNames

cont.matrix <- makeContrasts(
  KO_vs_WT_RT = KO_RT - WT_RT,
  KO_vs_WT_Cold = KO_Cold - WT_Cold,
  KORT_vs_KOCold = KO_RT - KO_Cold,
  levels = design
)

print(cont.matrix)
##          Contrasts
## Levels    KO_vs_WT_RT KO_vs_WT_Cold KORT_vs_KOCold
##   WT_RT            -1             0              0
##   KO_RT             1             0              1
##   WT_Cold           0            -1              0
##   KO_Cold           0             1             -1