library(Biobase)
library(GEOquery)
#This will produce ages, but it doesn't produce ONLY ages for the discovery dataset
N <- 443
AgeVec <- vector("list", N)
for (i in 971957:972456){
GSMName <- paste("GSM", i, sep = "")
GSM <- getGEO(GSMName, destdir=".")
if(Meta(GSM)$characteristics_ch1[1]!="dataset: discovery"){
next
}
#print(gsub("^.*?age.at.diagnosis: ","",Meta(GSM)$characteristics_ch1[3]))
Ps <- gsub("^.*?age.at.diagnosis: ","",Meta(GSM)$characteristics_ch1[3])
AgeVec[[Meta(GSM)$characteristics_ch1[3]]] <- Ps
}
length(AgeVec) #Result: 556; s/b 443
#This returns only NULL and grepl() definitely is evaluating accurately
N <- 443
AgeVec <- vector("list", N)
for (i in 971957:971959){
GSMName <- paste("GSM", i, sep = "")
GSM <- getGEO(GSMName, destdir=".")
if(grepl("discovery", Meta(GSM)$characteristics_ch1[1])!=TRUE){
next
}
#print(gsub("^.*?age.at.diagnosis: ","",Meta(GSM)$characteristics_ch1[3]))
Ps <- gsub("^.*?age.at.diagnosis: ","",Meta(GSM)$characteristics_ch1[3])
AgeVec[[Meta(GSM)$characteristics_ch1[3]]] <- Ps
}
AgeVec[1:5]
#Remove quotes
AgeVecnoquote <- noquote(AgeVec)
#Consider as numeric
AgeVecNumeric <- as.numeric(AgeVecnoquote)
#Ignore NAs
AgeVecNumeric <- AgeVecNumeric[!is.na(AgeVecNumeric)]
#Eliminate the one large element in vector, 972399
AgeVecClean <- AgeVecNumeric[2:112]
mean(AgeVecClean)
sd(AgeVecClean)
range(AgeVecClean)