#https://www.ncbi.nlm.nih.gov/gds/?term=TCDD
rm(list = ls())
###############################input data 
dir_path <- "C:\\Users\\xut2\\Desktop\\GEO\\"
dir_path_name <- list.files(pattern = ".*",dir_path,full.names = T, recursive = T)
dir_path_name
## [1] "C:\\Users\\xut2\\Desktop\\GEO\\gds_result.txt"       
## [2] "C:\\Users\\xut2\\Desktop\\GEO\\GSE_file_GEO.R"       
## [3] "C:\\Users\\xut2\\Desktop\\GEO\\GSE_file_GEO.spin.R"  
## [4] "C:\\Users\\xut2\\Desktop\\GEO\\GSE_file_GEO.spin.Rmd"
#?read.delim
data_1 <- read.delim(grep("gds_result.txt",dir_path_name,value = T),header = F,stringsAsFactors = F, quote = "\t")
dim(data_1) #[1] 33460     3
## [1] 33465     3
#View(data_1)
#View(data_1)
data_2 <- data_1[grep("Accession", data_1$V3), ]
dim(data_2) #[1] 4850    3
## [1] 4612    3
#View(data_2)
data_2 <- unique(data_2)
head(data_2)
##        V1 V2                   V3
## 2  Series    Accession: GSE206948
## 20 Series    Accession: GSE206294
## 28 Series    Accession: GSE203302
## 36 Series    Accession: GSE205502
## 44 Series    Accession: GSE195728
## 52 Series    Accession: GSE193758
data_2 <- unique(data_2$V3)
length(data_2) #[1] 4611
## [1] 4611
write.csv(data_2, paste0(dir_path,Sys.Date(),"-","GSE_list.csv"),row.names = FALSE,na = "")