#https://www.ncbi.nlm.nih.gov/gds/?term=TCDD
rm(list = ls())
###############################input data
dir_path <- "C:\\Users\\xut2\\Desktop\\GEO\\"
dir_path_name <- list.files(pattern = ".*",dir_path,full.names = T, recursive = T)
dir_path_name
## [1] "C:\\Users\\xut2\\Desktop\\GEO\\gds_result.txt"
## [2] "C:\\Users\\xut2\\Desktop\\GEO\\GSE_file_GEO.R"
## [3] "C:\\Users\\xut2\\Desktop\\GEO\\GSE_file_GEO.spin.R"
## [4] "C:\\Users\\xut2\\Desktop\\GEO\\GSE_file_GEO.spin.Rmd"
#?read.delim
data_1 <- read.delim(grep("gds_result.txt",dir_path_name,value = T),header = F,stringsAsFactors = F, quote = "\t")
dim(data_1) #[1] 33460 3
## [1] 33465 3
#View(data_1)
#View(data_1)
data_2 <- data_1[grep("Accession", data_1$V3), ]
dim(data_2) #[1] 4850 3
## [1] 4612 3
#View(data_2)
data_2 <- unique(data_2)
head(data_2)
## V1 V2 V3
## 2 Series Accession: GSE206948
## 20 Series Accession: GSE206294
## 28 Series Accession: GSE203302
## 36 Series Accession: GSE205502
## 44 Series Accession: GSE195728
## 52 Series Accession: GSE193758
data_2 <- unique(data_2$V3)
length(data_2) #[1] 4611
## [1] 4611
write.csv(data_2, paste0(dir_path,Sys.Date(),"-","GSE_list.csv"),row.names = FALSE,na = "")