Caso de estudio 1: Comparación entre tres tipos de cáncer de
mama
# Targets
targetsDF <- read.csv(file=file.path(getwd(),"/datos/targets.csv"), header = TRUE, sep = ",")
sampleNames <- as.character(targetsDF$ShortName)
sampleColor <- as.character(targetsDF$Colors)
# Creamos un objeto AnnotatedDataFrame
targets <- AnnotatedDataFrame(targetsDF)
# Leemos archivos .CEL
CELfiles <- targetsDF$fileName
invisible(capture.output({
rawData <- read.celfiles(file.path(getwd(),"datos", CELfiles), phenoData=targets)
}))
## Cargando paquete requerido: pd.hg.u133a
## Attempting to obtain 'pd.hg.u133a' from BioConductor website.
## Checking to see if your internet connection works...
## installing the source package 'pd.hg.u133a'
## Cargando paquete requerido: pd.hg.u133a
## Platform design info loaded.
rawData
## ExpressionFeatureSet (storageMode: lockedEnvironment)
## assayData: 506944 features, 49 samples
## element names: exprs
## protocolData
## rowNames: 1 2 ... 49 (49 total)
## varLabels: exprs dates
## varMetadata: labelDescription channel
## phenoData
## rowNames: 1 2 ... 49 (49 total)
## varLabels: fileName grupos ShortName Colors
## varMetadata: labelDescription channel
## featureData: none
## experimentData: use 'experimentData(object)'
## Annotation: pd.hg.u133a
Exploración y control de calidad
boxplot(rawData, which="all", las=2, cex.axis=0.6, names=sampleNames, col=sampleColor)

# Agrupación jerárquica
clust.euclid.average <- hclust(dist(t(exprs(rawData))), method="average")
plot(clust.euclid.average, labels=sampleNames, main="Agrupamiento jerárquico RawData",
cex=0.7, hang=-1)

plotPCA(exprs(rawData), labels=sampleNames, dataDesc="raw Data", colors=sampleColor)

rerun <- FALSE
if(rerun){
arrayQualityMetrics(rawData, reporttitle="QC_RawData", force= TRUE)
}
Normalizacion
invisible(capture.output({
eset <- rma(rawData)
}))
write.exprs(eset, file.path(getwd(), "NormData_cancermama.txt"))
eset
## ExpressionSet (storageMode: lockedEnvironment)
## assayData: 22283 features, 49 samples
## element names: exprs
## protocolData
## rowNames: 1 2 ... 49 (49 total)
## varLabels: exprs dates
## varMetadata: labelDescription channel
## phenoData
## rowNames: 1 2 ... 49 (49 total)
## varLabels: fileName grupos ShortName Colors
## varMetadata: labelDescription channel
## featureData: none
## experimentData: use 'experimentData(object)'
## Annotation: pd.hg.u133a
Filtrado
annotation(eset) <- "hgu133a.db"
invisible(capture.output({
eset_filtered <- nsFilter(eset, var.func = IQR,
var.cutoff=0.75, var.filter=TRUE,
require.entrez = TRUE,
filterByQuantile=TRUE)
}))
##
##
print(eset_filtered)
## $eset
## ExpressionSet (storageMode: lockedEnvironment)
## assayData: 3161 features, 49 samples
## element names: exprs
## protocolData
## rowNames: 1 2 ... 49 (49 total)
## varLabels: exprs dates
## varMetadata: labelDescription channel
## phenoData
## rowNames: 1 2 ... 49 (49 total)
## varLabels: fileName grupos ShortName Colors
## varMetadata: labelDescription channel
## featureData: none
## experimentData: use 'experimentData(object)'
## Annotation: hgu133a.db
##
## $filter.log
## $filter.log$numDupsRemoved
## [1] 7551
##
## $filter.log$numLowVar
## [1] 9482
##
## $filter.log$numRemoved.ENTREZID
## [1] 2079
##
## $filter.log$feature.exclude
## [1] 10
filteredEset <- eset_filtered$eset
filteredData <- exprs(filteredEset)
colnames(filteredData) <- pData(eset_filtered$eset)$ShortName
Selección de genes
treatment <- pData(filteredEset)$grupos
lev <- factor(treatment, levels = unique(treatment))
design <- model.matrix(~0+lev)
colnames(design) <- levels(lev)
rownames(design) <- sampleNames
head(design)
## APO BAS LUMI
## 78_APO 1 0 0
## 83_APO 1 0 0
## 86_APO 1 0 0
## 87_APO 1 0 0
## 103_APO 1 0 0
## 110_APO 1 0 0
Matriz de contraste
cont.matrix <- makeContrasts(
APO_vs_BAS = APO - BAS,
APO_vs_LUMI = APO - LUMI,
BAS_vs_LUMI = BAS - LUMI,
levels = design
)
print(cont.matrix)
## Contrasts
## Levels APO_vs_BAS APO_vs_LUMI BAS_vs_LUMI
## APO 1 1 0
## BAS -1 0 1
## LUMI 0 -1 -1
# Estimamos el modelo
fit <- lmFit(filteredData, design)
fit.main <- contrasts.fit(fit, cont.matrix)
fit.main <- eBayes(fit.main)
topTab_APO_vs_BAS <- topTable(fit.main, number=nrow(fit.main), coef="APO_vs_BAS",
adjust="fdr", p.value=0.05, lfc=3)
topTab_APO_vs_LUMI <- topTable(fit.main, number=nrow(fit.main), coef="APO_vs_LUMI",
adjust="fdr", p.value=0.05, lfc=3)
topTab_BAS_vs_LUMI <- topTable(fit.main, number=nrow(fit.main), coef="BAS_vs_LUMI",
adjust="fdr", p.value=0.05, lfc=3)
#topTab <- topTable(fit.main, number=nrow(fit.main), adjust="fdr", p.value=0.05, lfc=3)
#dim(topTab)
#head(topTab)
Anotación de los resultados
keytypes(hgu133a.db)
## [1] "ACCNUM" "ALIAS" "ENSEMBL" "ENSEMBLPROT" "ENSEMBLTRANS"
## [6] "ENTREZID" "ENZYME" "EVIDENCE" "EVIDENCEALL" "GENENAME"
## [11] "GENETYPE" "GO" "GOALL" "IPI" "MAP"
## [16] "OMIM" "ONTOLOGY" "ONTOLOGYALL" "PATH" "PFAM"
## [21] "PMID" "PROBEID" "PROSITE" "REFSEQ" "SYMBOL"
## [26] "UCSCKG" "UNIPROT"
anotaciones <- AnnotationDbi::select(hgu133a.db, keys=rownames(filteredData),
columns=c("ENTREZID", "SYMBOL"))
## 'select()' returned 1:1 mapping between keys and columns
anotaciones_unique <- anotaciones[!duplicated(anotaciones$PROBEID), ]
head(anotaciones_unique)
annotatedTopTable <- function(topTab, anotPackage)
{
topTab <- cbind(PROBEID=rownames(topTab), topTab)
myProbes <- rownames(topTab)
thePackage <- eval(parse(text = anotPackage))
geneAnots <- select(thePackage, myProbes, c("SYMBOL", "ENTREZID", "GENENAME"))
annotatedTopTab<- merge(x=geneAnots, y=topTab, by.x="PROBEID", by.y="PROBEID")
return(annotatedTopTab)
}
topAnnotated_APO_vs_BAS <- annotatedTopTable(topTab_APO_vs_BAS,
anotPackage = "hgu133a.db")
## 'select()' returned 1:1 mapping between keys and columns
topAnnotated_APO_vs_LUMI <- annotatedTopTable(topTab_APO_vs_LUMI,
anotPackage = "hgu133a.db")
## 'select()' returned 1:1 mapping between keys and columns
topAnnotated_BAS_vs_LUMI <- annotatedTopTable(topTab_BAS_vs_LUMI,
anotPackage = "hgu133a.db")
## 'select()' returned 1:1 mapping between keys and columns
head(topAnnotated_APO_vs_BAS)
head(topAnnotated_APO_vs_LUMI)
head(topAnnotated_BAS_vs_LUMI)
geneSymbols <- select(hgu133a.db, rownames(fit.main), c("SYMBOL"))
## 'select()' returned 1:1 mapping between keys and columns
SYMBOLS<- geneSymbols$SYMBOL
volcanoplot(fit.main, coef=1, highlight=4, names=SYMBOLS,
main=paste("Differentially expressed genes", colnames(cont.matrix)[1], sep="\n"))
abline(v=c(-1,1))

for (i in colnames(cont.matrix)){
volcanoplot(fit.main, coef=i, highlight=4, names=SYMBOLS,
main=paste("Differentially expressed genes",i, sep="\n"))
abline(v=c(-1,1))
}


Caso de estudio 2: Efecto del programa génico termogénico durante la
diferenciación de adipocitos
# Targets
targetsDF <- read.csv(file=file.path(getwd(),"/datos2/targets2.csv"), header = TRUE, sep = ",")
sampleNames <- as.character(targetsDF$ShortName)
sampleColor <- as.character(targetsDF$Colors)
# Creamos un objeto AnnotatedDataFrame
targets <- AnnotatedDataFrame(targetsDF)
# Leemos archivos .CEL
CELfiles <- targetsDF$fileName
invisible(capture.output({
rawData <- read.celfiles(file.path(getwd(),"datos2", CELfiles), phenoData=targets)
}))
## Platform design info loaded.
rawData
## GeneFeatureSet (storageMode: lockedEnvironment)
## assayData: 1416100 features, 12 samples
## element names: exprs
## protocolData
## rowNames: 1 2 ... 12 (12 total)
## varLabels: exprs dates
## varMetadata: labelDescription channel
## phenoData
## rowNames: 1 2 ... 12 (12 total)
## varLabels: fileName grupos ShortName Colors
## varMetadata: labelDescription channel
## featureData: none
## experimentData: use 'experimentData(object)'
## Annotation: pd.mogene.2.1.st
Normalizacion
invisible(capture.output({
eset <- rma(rawData)
}))
write.exprs(eset, file.path(getwd(), "NormData_adipo.txt"))
eset
## ExpressionSet (storageMode: lockedEnvironment)
## assayData: 41345 features, 12 samples
## element names: exprs
## protocolData
## rowNames: 1 2 ... 12 (12 total)
## varLabels: exprs dates
## varMetadata: labelDescription channel
## phenoData
## rowNames: 1 2 ... 12 (12 total)
## varLabels: fileName grupos ShortName Colors
## varMetadata: labelDescription channel
## featureData: none
## experimentData: use 'experimentData(object)'
## Annotation: pd.mogene.2.1.st
Filtrado
annotation(eset) <- "mogene21sttranscriptcluster.db"
invisible(capture.output({
eset_filtered <- nsFilter(eset, var.func = IQR,
var.cutoff=0.75, var.filter=TRUE,
require.entrez = TRUE,
filterByQuantile = TRUE)
}))
print(eset_filtered)
## $eset
## ExpressionSet (storageMode: lockedEnvironment)
## assayData: 5834 features, 12 samples
## element names: exprs
## protocolData
## rowNames: 1 2 ... 12 (12 total)
## varLabels: exprs dates
## varMetadata: labelDescription channel
## phenoData
## rowNames: 1 2 ... 12 (12 total)
## varLabels: fileName grupos ShortName Colors
## varMetadata: labelDescription channel
## featureData: none
## experimentData: use 'experimentData(object)'
## Annotation: mogene21sttranscriptcluster.db
##
## $filter.log
## $filter.log$numDupsRemoved
## [1] 650
##
## $filter.log$numLowVar
## [1] 17503
##
## $filter.log$numRemoved.ENTREZID
## [1] 17358
filteredEset <- eset_filtered$eset
Matriz de contraste
treatment <- pData(filteredEset)$grupos
lev <- factor(treatment, levels = unique(treatment))
design <- model.matrix(~0+lev)
colnames(design) <- levels(lev)
rownames(design) <- sampleNames
cont.matrix <- makeContrasts(
KO_vs_WT_RT = KO_RT - WT_RT,
KO_vs_WT_Cold = KO_Cold - WT_Cold,
KORT_vs_KOCold = KO_RT - KO_Cold,
levels = design
)
print(cont.matrix)
## Contrasts
## Levels KO_vs_WT_RT KO_vs_WT_Cold KORT_vs_KOCold
## WT_RT -1 0 0
## KO_RT 1 0 1
## WT_Cold 0 -1 0
## KO_Cold 0 1 -1