largo <- 30
sample(letters, largo, replace = T) -> a
sample(1:10, largo, replace = T) -> b
sample(c(1:10,letters[1:5]), largo, replace = T) -> c
library(stringi)
stri_rand_strings(largo,8, pattern = "[A-B0-9t-z]") -> my.rows
stri_rand_strings(3,4, pattern = "[A-G]") -> my.cols
library(magrittr)
cbind(a,b,c) %>% set_rownames(my.rows) %>% set_colnames(my.cols) -> `my.mat`mis_numbs = NA
mis_letrs = NA
i = 0
p1 = 0
p2 = 0
for(i in 1:nrow(my.mat))
{
for(j in 1:ncol(my.mat))
{
my.mat[i,j] -> my.item
if(!is.na( as.numeric(my.item)))
{
p1 = p1 +1
mis_numbs[p1] <- as.numeric(my.item)
next
}
p2 = p2 + 1
mis_letrs[p2] = my.item
}
}
mis_letrs## [1] "k" "z" "d" "c" "b" "h" "z" "l" "g" "t" "g" "m" "b" "c" "o" "y" "a" "c" "z"
## [20] "k" "e" "o" "n" "a" "r" "n" "b" "s" "e" "b" "v" "a" "h" "r" "b" "a" "f" "z"
## [39] "b"
## [1] 6 7 4 8 7 4 1 9 7 6 4 10 3 9 4 7 4 9 3 2 3 8 9 9 8
## [26] 3 10 9 8 3 3 2 8 10 4 5 7 6 2 5 1 4 5 4 10 1 10 10 8 6
## [51] 7
mis_numbs = NA; mis_letrs = NA
i = 0; p1 = 0; p2 = 0
c(my.mat) -> matriz.aplanada
for(i in 1:length(matriz.aplanada)){
my.item <- matriz.aplanada[i]
if(!is.na( as.numeric(my.item))) {p1 = p1 +1
mis_numbs[p1] <- as.numeric(my.item)
next}
p2 = p2 + 1
mis_letrs[p2] = my.item
}
mis_letrs## [1] "k" "z" "d" "c" "b" "h" "z" "l" "g" "t" "g" "m" "c" "o" "y" "c" "z" "k" "o"
## [20] "n" "r" "n" "s" "e" "v" "h" "r" "a" "f" "z" "b" "a" "e" "a" "b" "b" "a" "b"
## [39] "b"
## [1] 6 4 7 1 7 4 3 4 4 3 3 9 9 3 9 8 3 8 10 5 7 2 5 4 5
## [26] 4 1 10 8 7 7 8 4 9 6 10 9 7 9 2 8 8 10 3 2 4 6 1 10 10
## [51] 6
mis_numbs = NULL
mis_letrs = NULL
i = 0
repeat{i=i+1
if(i>length(c(my.mat))){break}
my.item <- c(my.mat)[i]
if(!is.na( as.numeric(my.item))) {
mis_numbs %<>% c(as.numeric(my.item))
next}
mis_letrs %<>% c(my.item)
}
mis_letrs## [1] "k" "z" "d" "c" "b" "h" "z" "l" "g" "t" "g" "m" "c" "o" "y" "c" "z" "k" "o"
## [20] "n" "r" "n" "s" "e" "v" "h" "r" "a" "f" "z" "b" "a" "e" "a" "b" "b" "a" "b"
## [39] "b"
## [1] 6 4 7 1 7 4 3 4 4 3 3 9 9 3 9 8 3 8 10 5 7 2 5 4 5
## [26] 4 1 10 8 7 7 8 4 9 6 10 9 7 9 2 8 8 10 3 2 4 6 1 10 10
## [51] 6
mis_numbs = NULL
mis_letrs = NULL
i = 0
c(my.mat) -> matriz.aplanada
while(i<length(matriz.aplanada)){
i <- i + 1
my.item <- matriz.aplanada[i]
if(!is.na(as.numeric(my.item))) {
mis_numbs %<>% c(as.numeric(my.item))}
else{mis_letrs %<>% c(my.item)}
}
mis_letrs## [1] "k" "z" "d" "c" "b" "h" "z" "l" "g" "t" "g" "m" "c" "o" "y" "c" "z" "k" "o"
## [20] "n" "r" "n" "s" "e" "v" "h" "r" "a" "f" "z" "b" "a" "e" "a" "b" "b" "a" "b"
## [39] "b"
## [1] 6 4 7 1 7 4 3 4 4 3 3 9 9 3 9 8 3 8 10 5 7 2 5 4 5
## [26] 4 1 10 8 7 7 8 4 9 6 10 9 7 9 2 8 8 10 3 2 4 6 1 10 10
## [51] 6
if (!("GEOquery" %in% installed.packages())){install.packages("GEOquery")}
library("GEOquery")
library(tidyverse)
codes <- c('GSE159378', 'nada' , 'GSE159377', 'GSE154900', 'GSE151154', 'nada', 'GSE146754' )
metadata.list <- list()
for(i in 1:length(codes)){
down.path <- NULL
try(
getGEOfile(codes[i]) -> down.path)
if (down.path %>% is.null){next}
getGEO(filename=down.path) -> soft
soft -> metadata.list[[i]]
}## Error in file.exists(destfile) : objeto 'destfile' no encontrado
## Error in download.file(myurl, destfile, mode = mode, quiet = TRUE, method = getOption("download.file.method.GEOquery")) :
## no fue posible abrir la URL 'https://ftp.ncbi.nlm.nih.gov/geo/series/GSE159nnn/GSE159377/soft/GSE159377_family.soft.gz'
## Error in file.exists(destfile) : objeto 'destfile' no encontrado
#metadata.list %>% glimpse
#metadata.list %>% length
My.sample.codes <- NULL
Titles <- NULL
for(i in 1:length(metadata.list)){
metadata.list[[i]] -> extracted.metadata
if (extracted.metadata %>% is.null){next}
extracted.metadata %>% slotNames -> my.slots
for(j in 1:length(my.slots)){
slot(extracted.metadata,my.slots[j] ) %>% names -> my.names
if(str_detect(my.names, '(t|T)itle') %>% any){
my.names[str_detect(my.names, '(t|T)itle')] -> my.title0
slot(extracted.metadata,my.slots[j])[[my.title0]] -> my.title
Titles %<>% c(my.title)}
if (str_detect(my.names, 'GSM.*') %>% any) {
my.names[str_detect(my.names, 'GSM.*')] -> GSM0
GSM0 %>% paste(collapse = ',') -> GSM
My.sample.codes %<>% c(GSM)}
}
}
cbind(My.sample.codes,Titles) ## My.sample.codes
## [1,] "GSM4827519,GSM4827520,GSM4827521,GSM4827522,GSM4827523,GSM4827524"
## [2,] "GSM4682311,GSM4682312"
## [3,] "GSM4567350,GSM4567351,GSM4567352,GSM4567353,GSM4567354,GSM4567355,GSM4567356,GSM4567357"
## [4,] "GSM4405425,GSM4405426,GSM4405427,GSM4405428,GSM4405429,GSM4405430"
## Titles
## [1,] "An enhancer cluster promotes NPCs formation from ESCs through maintaining HoxB activation by long-range chromatin interaction"
## [2,] "Widespread traces of lytic KSHV in primary effusion lymphoma at single-cell resolution"
## [3,] "Global analysis of kidney gene expression by RNA-Seq of aged CTRP1-deficient mice on high-fat diet"
## [4,] "Smc3 regulates B-cell transit through germinal centers and restricts their malignant transformation (RNA-seq)"
library(GEOquery)
GSE43255.supp <- getGEOSuppFiles(GEO = 'GSE43255', makeDirectory = TRUE, fetch_files = TRUE)
GSE43255.supp %>% row.names() -> full.path.raw_tar
full.path.raw_tar %>% str_match('.*(?=GSE\\d+.RAW.*)') -> gse.folder
untar(tarfile = full.path.raw_tar ,exdir = gse.folder)list.files(gse.folder) %>% str_extract_all('.*gz') %>% unlist() -> my.gz
gz.paths <- paste0(gse.folder,my.gz)
for(i in 1:length(gz.paths)){
gunzip(gz.paths[i], remove = F, overwrite = T)
}
list.files(gse.folder) %>% str_extract_all('.*gpr') %>% unlist() %>% paste0(gse.folder, .) -> all.raw.filesmy.gpr.DFs <- list()
for(i in 1:length(all.raw.files)){
line <- 0L
input <- 'start'
a.file <- all.raw.files[i]
sample.id <- str_extract(a.file, 'GSM\\d+')
#see if the word is in the file
while(!str_detect(input, '(B|b)lock')){
line <- line + 1L
input <- read_lines(a.file, skip = line - 1L, n_max = 1L)}
as.data.frame(read_delim(a.file, delim = '\t', skip = line-1)) -> my.gpr.DFs[[sample.id]]
}FONDECYT Postdoctoral Fellow, Universidad de Chile, deepen.data@gmail.com↩︎
Pregrado, Universidad de Chile↩︎