rm(list = ls())
###############################input data 
library(GEOquery)
## 载入需要的程辑包:Biobase
## 载入需要的程辑包:BiocGenerics
## 
## 载入程辑包:'BiocGenerics'
## The following objects are masked from 'package:stats':
## 
##     IQR, mad, sd, var, xtabs
## The following objects are masked from 'package:base':
## 
##     anyDuplicated, append, as.data.frame, basename, cbind, colnames,
##     dirname, do.call, duplicated, eval, evalq, Filter, Find, get, grep,
##     grepl, intersect, is.unsorted, lapply, Map, mapply, match, mget,
##     order, paste, pmax, pmax.int, pmin, pmin.int, Position, rank,
##     rbind, Reduce, rownames, sapply, setdiff, sort, table, tapply,
##     union, unique, unsplit, which.max, which.min
## Welcome to Bioconductor
## 
##     Vignettes contain introductory material; view with
##     'browseVignettes()'. To cite Bioconductor, see
##     'citation("Biobase")', and for packages 'citation("pkgname")'.
## Setting options('download.file.method.GEOquery'='auto')
## Setting options('GEOquery.inmemory.gpl'=FALSE)
gse_1 <- getGEO("GSE101464")
## Found 1 file(s)
## GSE101464_series_matrix.txt.gz
class(gse_1);length(gse_1);gse_1[[1]];names(gse_1)
## [1] "list"
## [1] 1
## ExpressionSet (storageMode: lockedEnvironment)
## assayData: 13493 features, 4 samples 
##   element names: exprs 
## protocolData: none
## phenoData
##   sampleNames: GSM2703969 GSM2703970 GSM2703971 GSM2703972
##   varLabels: title geo_accession ... tissue:ch1 (37 total)
##   varMetadata: labelDescription
## featureData
##   featureNames: TC01000001.hg.1 TC01000002.hg.1 ... TC05003432.hg.1
##     (13493 total)
##   fvarLabels: ID probeset_id ... SPOT_ID (15 total)
##   fvarMetadata: Column Description labelDescription
## experimentData: use 'experimentData(object)'
## Annotation: GPL17586
## [1] "GSE101464_series_matrix.txt.gz"
#View(gse_1)
gse_2 <- gse_1[[1]]
##################################
sample_1 <- pData(gse_2) ## print the sample information
gene_1 <- fData(gse_2) ## print the gene annotation
expression_1 <- exprs(gse_2) ## print the expression data
class(sample_1); class(gene_1); class(expression_1)
## [1] "data.frame"
## [1] "data.frame"
## [1] "matrix" "array"
dim(sample_1); dim(gene_1); dim(expression_1)
## [1]  4 37
## [1] 13493    15
## [1] 13493     4
head(sample_1)
##                                       title geo_accession                status
## GSM2703969 SK-N-SH at DMSO, biological rep1    GSM2703969 Public on Jul 12 2020
## GSM2703970 SK-N-SH at DMSO, biological rep2    GSM2703970 Public on Jul 12 2020
## GSM2703971 SK-N-SH at TCDD, biological rep1    GSM2703971 Public on Jul 12 2020
## GSM2703972 SK-N-SH at TCDD, biological rep2    GSM2703972 Public on Jul 12 2020
##            submission_date last_update_date type channel_count
## GSM2703969     Jul 14 2017      Jul 12 2020  RNA             1
## GSM2703970     Jul 14 2017      Jul 12 2020  RNA             1
## GSM2703971     Jul 14 2017      Jul 12 2020  RNA             1
## GSM2703972     Jul 14 2017      Jul 12 2020  RNA             1
##                                          source_name_ch1 organism_ch1
## GSM2703969 human neuroblastoma cell line, exposure, 24 h Homo sapiens
## GSM2703970 human neuroblastoma cell line, exposure, 24 h Homo sapiens
## GSM2703971 human neuroblastoma cell line, exposure, 24 h Homo sapiens
## GSM2703972 human neuroblastoma cell line, exposure, 24 h Homo sapiens
##            characteristics_ch1 characteristics_ch1.1
## GSM2703969       tissue: brain    cell line: SK-N-SH
## GSM2703970       tissue: brain    cell line: SK-N-SH
## GSM2703971       tissue: brain    cell line: SK-N-SH
## GSM2703972       tissue: brain    cell line: SK-N-SH
##                                                                                                                                                                                                                                                                                                                                           treatment_protocol_ch1
## GSM2703969 Cells were seeded in culture dish at appropriate density for 24-36 hr. At 70% confluency, cells were exposed to TCDD. TCDD was purchased from Wellington Laboratories Inc. (Ontario, Canada) and dissolved in dimethyl sulfoxide (DMSO). TCDD was employed at the low concentration of 10-10 M. DMSO was present at 0.1% or lower for all treatments.
## GSM2703970 Cells were seeded in culture dish at appropriate density for 24-36 hr. At 70% confluency, cells were exposed to TCDD. TCDD was purchased from Wellington Laboratories Inc. (Ontario, Canada) and dissolved in dimethyl sulfoxide (DMSO). TCDD was employed at the low concentration of 10-10 M. DMSO was present at 0.1% or lower for all treatments.
## GSM2703971 Cells were seeded in culture dish at appropriate density for 24-36 hr. At 70% confluency, cells were exposed to TCDD. TCDD was purchased from Wellington Laboratories Inc. (Ontario, Canada) and dissolved in dimethyl sulfoxide (DMSO). TCDD was employed at the low concentration of 10-10 M. DMSO was present at 0.1% or lower for all treatments.
## GSM2703972 Cells were seeded in culture dish at appropriate density for 24-36 hr. At 70% confluency, cells were exposed to TCDD. TCDD was purchased from Wellington Laboratories Inc. (Ontario, Canada) and dissolved in dimethyl sulfoxide (DMSO). TCDD was employed at the low concentration of 10-10 M. DMSO was present at 0.1% or lower for all treatments.
##                                                                                                                                                                                                                                                                                                                                                         growth_protocol_ch1
## GSM2703969 SK-N-SH cells were routinely grown in in Dulbecco's modified Eagle's medium (DMEM, DMEM, Gibco, Life Technologies) supplemented with 10% Fetal Bovine Serum Australia Source (FBS, Corning) and the mix of 100 U/ml penicillin with 100 μg/ml streptomycin (1% P/S; DMEM, Gibco, Life Technologies). Cells were grown at 37 °C in a 5% CO2 humidified incubator.
## GSM2703970 SK-N-SH cells were routinely grown in in Dulbecco's modified Eagle's medium (DMEM, DMEM, Gibco, Life Technologies) supplemented with 10% Fetal Bovine Serum Australia Source (FBS, Corning) and the mix of 100 U/ml penicillin with 100 μg/ml streptomycin (1% P/S; DMEM, Gibco, Life Technologies). Cells were grown at 37 °C in a 5% CO2 humidified incubator.
## GSM2703971 SK-N-SH cells were routinely grown in in Dulbecco's modified Eagle's medium (DMEM, DMEM, Gibco, Life Technologies) supplemented with 10% Fetal Bovine Serum Australia Source (FBS, Corning) and the mix of 100 U/ml penicillin with 100 μg/ml streptomycin (1% P/S; DMEM, Gibco, Life Technologies). Cells were grown at 37 °C in a 5% CO2 humidified incubator.
## GSM2703972 SK-N-SH cells were routinely grown in in Dulbecco's modified Eagle's medium (DMEM, DMEM, Gibco, Life Technologies) supplemented with 10% Fetal Bovine Serum Australia Source (FBS, Corning) and the mix of 100 U/ml penicillin with 100 μg/ml streptomycin (1% P/S; DMEM, Gibco, Life Technologies). Cells were grown at 37 °C in a 5% CO2 humidified incubator.
##            molecule_ch1
## GSM2703969    total RNA
## GSM2703970    total RNA
## GSM2703971    total RNA
## GSM2703972    total RNA
##                                                                                                                                                             extract_protocol_ch1
## GSM2703969 Total RNA was extracted from cell samples using Trizol/Chloroform method, and then purified with magnetic beads of Agencourt Ampure (Beckman Coulter, Brea, CA, USA).
## GSM2703970 Total RNA was extracted from cell samples using Trizol/Chloroform method, and then purified with magnetic beads of Agencourt Ampure (Beckman Coulter, Brea, CA, USA).
## GSM2703971 Total RNA was extracted from cell samples using Trizol/Chloroform method, and then purified with magnetic beads of Agencourt Ampure (Beckman Coulter, Brea, CA, USA).
## GSM2703972 Total RNA was extracted from cell samples using Trizol/Chloroform method, and then purified with magnetic beads of Agencourt Ampure (Beckman Coulter, Brea, CA, USA).
##            label_ch1
## GSM2703969    biotin
## GSM2703970    biotin
## GSM2703971    biotin
## GSM2703972    biotin
##                                                                                                                              label_protocol_ch1
## GSM2703969 fter fragmentation of 2nd-cycle single-stranded cDNA, sample was labeled with biotin by terminal deoxynucleotidyl transferase (TdT).
## GSM2703970 fter fragmentation of 2nd-cycle single-stranded cDNA, sample was labeled with biotin by terminal deoxynucleotidyl transferase (TdT).
## GSM2703971 fter fragmentation of 2nd-cycle single-stranded cDNA, sample was labeled with biotin by terminal deoxynucleotidyl transferase (TdT).
## GSM2703972 fter fragmentation of 2nd-cycle single-stranded cDNA, sample was labeled with biotin by terminal deoxynucleotidyl transferase (TdT).
##            taxid_ch1
## GSM2703969      9606
## GSM2703970      9606
## GSM2703971      9606
## GSM2703972      9606
##                                                                                                                                              hyb_protocol
## GSM2703969 Then sample was hybridized to a GeneChip® Human Transcriptome Array 2.0 (HTA 2.0, Affymetrix) with 44699 annotated genes for 16-18 hr at 45°C.
## GSM2703970 Then sample was hybridized to a GeneChip® Human Transcriptome Array 2.0 (HTA 2.0, Affymetrix) with 44699 annotated genes for 16-18 hr at 45°C.
## GSM2703971 Then sample was hybridized to a GeneChip® Human Transcriptome Array 2.0 (HTA 2.0, Affymetrix) with 44699 annotated genes for 16-18 hr at 45°C.
## GSM2703972 Then sample was hybridized to a GeneChip® Human Transcriptome Array 2.0 (HTA 2.0, Affymetrix) with 44699 annotated genes for 16-18 hr at 45°C.
##                                                                           scan_protocol
## GSM2703969 GeneChips were scanned using the Hewlett-Packard GeneArray Scanner G3000 7G.
## GSM2703970 GeneChips were scanned using the Hewlett-Packard GeneArray Scanner G3000 7G.
## GSM2703971 GeneChips were scanned using the Hewlett-Packard GeneArray Scanner G3000 7G.
## GSM2703972 GeneChips were scanned using the Hewlett-Packard GeneArray Scanner G3000 7G.
##                                             description
## GSM2703969 Expression profiling of TCDD-treated SK-N-SH
## GSM2703970 Expression profiling of TCDD-treated SK-N-SH
## GSM2703971 Expression profiling of TCDD-treated SK-N-SH
## GSM2703972 Expression profiling of TCDD-treated SK-N-SH
##                                                                                                                                                                                               data_processing
## GSM2703969 The data were analyzed with RMA using Affymetrix default analysis settings and global scaling as normalization method. The trimmed mean target intensity of each array was arbitrarily set to 100.
## GSM2703970 The data were analyzed with RMA using Affymetrix default analysis settings and global scaling as normalization method. The trimmed mean target intensity of each array was arbitrarily set to 100.
## GSM2703971 The data were analyzed with RMA using Affymetrix default analysis settings and global scaling as normalization method. The trimmed mean target intensity of each array was arbitrarily set to 100.
## GSM2703972 The data were analyzed with RMA using Affymetrix default analysis settings and global scaling as normalization method. The trimmed mean target intensity of each array was arbitrarily set to 100.
##            platform_id contact_name      contact_email   contact_phone
## GSM2703969    GPL17586     Tuan,,Xu xutuan2012@163.com +86-01-62842865
## GSM2703970    GPL17586     Tuan,,Xu xutuan2012@163.com +86-01-62842865
## GSM2703971    GPL17586     Tuan,,Xu xutuan2012@163.com +86-01-62842865
## GSM2703972    GPL17586     Tuan,,Xu xutuan2012@163.com +86-01-62842865
##                                        contact_department
## GSM2703969 Research Center for Eco-Environmental Sciences
## GSM2703970 Research Center for Eco-Environmental Sciences
## GSM2703971 Research Center for Eco-Environmental Sciences
## GSM2703972 Research Center for Eco-Environmental Sciences
##                      contact_institute    contact_address contact_city
## GSM2703969 Chinese Academy of Sciences 18 Shuangqing Road      Beijing
## GSM2703970 Chinese Academy of Sciences 18 Shuangqing Road      Beijing
## GSM2703971 Chinese Academy of Sciences 18 Shuangqing Road      Beijing
## GSM2703972 Chinese Academy of Sciences 18 Shuangqing Road      Beijing
##            contact_state contact_zip/postal_code contact_country
## GSM2703969       Beijing                  100085           China
## GSM2703970       Beijing                  100085           China
## GSM2703971       Beijing                  100085           China
## GSM2703972       Beijing                  100085           China
##                                                                                              supplementary_file
## GSM2703969 ftp://ftp.ncbi.nlm.nih.gov/geo/samples/GSM2703nnn/GSM2703969/suppl/GSM2703969_DMSO1N_HTA-2_0_.CEL.gz
## GSM2703970 ftp://ftp.ncbi.nlm.nih.gov/geo/samples/GSM2703nnn/GSM2703970/suppl/GSM2703970_DMSO2N_HTA-2_0_.CEL.gz
## GSM2703971 ftp://ftp.ncbi.nlm.nih.gov/geo/samples/GSM2703nnn/GSM2703971/suppl/GSM2703971_TCDD1N_HTA-2_0_.CEL.gz
## GSM2703972 ftp://ftp.ncbi.nlm.nih.gov/geo/samples/GSM2703nnn/GSM2703972/suppl/GSM2703972_TCDD2N_HTA-2_0_.CEL.gz
##            data_row_count cell line:ch1 tissue:ch1
## GSM2703969          13493       SK-N-SH      brain
## GSM2703970          13493       SK-N-SH      brain
## GSM2703971          13493       SK-N-SH      brain
## GSM2703972          13493       SK-N-SH      brain
head(gene_1,2)
##                              ID     probeset_id seqname strand start  stop
## TC01000001.hg.1 TC01000001.hg.1 TC01000001.hg.1    chr1      + 11869 14409
## TC01000002.hg.1 TC01000002.hg.1 TC01000002.hg.1    chr1      + 29554 31109
##                 total_probes
## TC01000001.hg.1           49
## TC01000002.hg.1           60
##                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                         gene_assignment
## TC01000001.hg.1                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                          NR_046018 // DDX11L1 // DEAD/H (Asp-Glu-Ala-Asp/His) box helicase 11 like 1 // 1p36.33 // 100287102 /// ENST00000456328 // DDX11L5 // DEAD/H (Asp-Glu-Ala-Asp/His) box helicase 11 like 5 // 9p24.3 // 100287596 /// ENST00000456328 // DDX11L1 // DEAD/H (Asp-Glu-Ala-Asp/His) box helicase 11 like 1 // 1p36.33 // 100287102
## TC01000002.hg.1 ENST00000408384 // MIR1302-11 // microRNA 1302-11 // --- // 100422919 /// ENST00000408384 // MIR1302-10 // microRNA 1302-10 // --- // 100422834 /// ENST00000408384 // MIR1302-9 // microRNA 1302-9 // --- // 100422831 /// ENST00000408384 // MIR1302-2 // microRNA 1302-2 // --- // 100302278 /// ENST00000469289 // MIR1302-11 // microRNA 1302-11 // --- // 100422919 /// ENST00000469289 // MIR1302-10 // microRNA 1302-10 // --- // 100422834 /// ENST00000469289 // MIR1302-9 // microRNA 1302-9 // --- // 100422831 /// ENST00000469289 // MIR1302-2 // microRNA 1302-2 // --- // 100302278 /// ENST00000473358 // MIR1302-11 // microRNA 1302-11 // --- // 100422919 /// ENST00000473358 // MIR1302-10 // microRNA 1302-10 // --- // 100422834 /// ENST00000473358 // MIR1302-9 // microRNA 1302-9 // --- // 100422831 /// ENST00000473358 // MIR1302-2 // microRNA 1302-2 // --- // 100302278 /// OTTHUMT00000002841 // OTTHUMG00000000959 // NULL // --- // --- /// OTTHUMT00000002841 // RP11-34P13.3 // NULL // --- // --- /// OTTHUMT00000002840 // OTTHUMG00000000959 // NULL // --- // --- /// OTTHUMT00000002840 // RP11-34P13.3 // NULL // --- // ---
##                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                     mrna_assignment
## TC01000001.hg.1                                                                                                                                                                                                                                                                                               NR_046018 // RefSeq // Homo sapiens DEAD/H (Asp-Glu-Ala-Asp/His) box helicase 11 like 1 (DDX11L1), non-coding RNA. // chr1 // 100 // 100 // 0 // --- // 0 /// ENST00000456328 // ENSEMBL // cdna:known chromosome:GRCh37:1:11869:14409:1 gene:ENSG00000223972 gene_biotype:pseudogene transcript_biotype:processed_transcript // chr1 // 100 // 100 // 0 // --- // 0 /// uc001aaa.3 // UCSC Genes // --- // chr1 // 100 // 100 // 0 // --- // 0 /// uc010nxq.1 // UCSC Genes // --- // chr1 // 100 // 100 // 0 // --- // 0 /// uc010nxr.1 // UCSC Genes // --- // chr1 // 100 // 100 // 0 // --- // 0
## TC01000002.hg.1 ENST00000408384 // ENSEMBL // ncrna:miRNA chromosome:GRCh37:1:30366:30503:1 gene:ENSG00000221311 gene_biotype:miRNA transcript_biotype:miRNA // chr1 // 100 // 100 // 0 // --- // 0 /// ENST00000469289 // ENSEMBL // havana:lincRNA chromosome:GRCh37:1:30267:31109:1 gene:ENSG00000243485 gene_biotype:lincRNA transcript_biotype:lincRNA // chr1 // 100 // 100 // 0 // --- // 0 /// ENST00000473358 // ENSEMBL // havana:lincRNA chromosome:GRCh37:1:29554:31097:1 gene:ENSG00000243485 gene_biotype:lincRNA transcript_biotype:lincRNA // chr1 // 100 // 100 // 0 // --- // 0 /// OTTHUMT00000002841 // Havana transcript // cdna:all chromosome:VEGA52:1:30267:31109:1 Gene:OTTHUMG00000000959 // chr1 // 100 // 100 // 0 // --- // 0 /// OTTHUMT00000002840 // Havana transcript // cdna:all chromosome:VEGA52:1:29554:31097:1 Gene:OTTHUMG00000000959 // chr1 // 100 // 100 // 0 // --- // 0
##                                                                                                                                                                                                                                                                                       swissprot
## TC01000001.hg.1 NR_046018 // B7ZGX0 /// NR_046018 // B7ZGX2 /// NR_046018 // B7ZGX7 /// NR_046018 // B7ZGX8 /// ENST00000456328 // B7ZGX0 /// ENST00000456328 // B7ZGX2 /// ENST00000456328 // B7ZGX3 /// ENST00000456328 // B7ZGX7 /// ENST00000456328 // B7ZGX8 /// ENST00000456328 // Q6ZU42
## TC01000002.hg.1                                                                                                                                                                                                                                                                             ---
##                                                                                                                                                                                                                                                                                                               unigene
## TC01000001.hg.1                                                                                NR_046018 // Hs.714157 // testis| normal| adult /// ENST00000456328 // Hs.719844 // brain| testis| normal /// ENST00000456328 // Hs.714157 // testis| normal| adult /// ENST00000456328 // Hs.618434 // testis| normal
## TC01000002.hg.1 ENST00000469289 // Hs.622486 // eye| normal| adult /// ENST00000469289 // Hs.729632 // testis| normal /// ENST00000469289 // Hs.742718 // testis /// ENST00000473358 // Hs.622486 // eye| normal| adult /// ENST00000473358 // Hs.729632 // testis| normal /// ENST00000473358 // Hs.742718 // testis
##                 category locus type notes             SPOT_ID
## TC01000001.hg.1     main     Coding   --- chr1(+):11869-14409
## TC01000002.hg.1     main     Coding   --- chr1(+):29554-31109
head(expression_1,2)
##                 GSM2703969 GSM2703970 GSM2703971 GSM2703972
## TC01000001.hg.1    8.93745    8.88139      8.884    8.89484
## TC01000002.hg.1    8.63661    8.66471      8.407    8.47635
## exprs get the expression levels as a data frame and get the distribution
##For visualisation and statistical analysis, we will inspect the data to discover what scale the data are presented in. The methods we will use assume the data are on a log2 scale; typically in the range of 0 to 16.
##From this output we clearly see that the values go beyond 16, so we will need to perform a log2 transformation. A boxplot can also be generated to see if the data have been normalised. If so, the distributions of each sample should be highly similar.
summary(exprs(gse_2))
##    GSM2703969       GSM2703970       GSM2703971       GSM2703972    
##  Min.   : 3.468   Min.   : 3.439   Min.   : 3.517   Min.   : 3.541  
##  1st Qu.: 7.086   1st Qu.: 7.163   1st Qu.: 7.063   1st Qu.: 7.056  
##  Median : 8.190   Median : 8.219   Median : 8.200   Median : 8.189  
##  Mean   : 8.139   Mean   : 8.154   Mean   : 8.167   Mean   : 8.157  
##  3rd Qu.: 9.216   3rd Qu.: 9.168   3rd Qu.: 9.272   3rd Qu.: 9.259  
##  Max.   :14.839   Max.   :14.841   Max.   :14.870   Max.   :14.872
#exprs(gse_2) <- log2(exprs(gse_2))
#boxplot(exprs(gse_2),outline=FALSE)
boxplot(expression_1,outline=T)
#View(sample_1)
full_output <- cbind(gene_1,expression_1)
head(full_output,2)
##                              ID     probeset_id seqname strand start  stop
## TC01000001.hg.1 TC01000001.hg.1 TC01000001.hg.1    chr1      + 11869 14409
## TC01000002.hg.1 TC01000002.hg.1 TC01000002.hg.1    chr1      + 29554 31109
##                 total_probes
## TC01000001.hg.1           49
## TC01000002.hg.1           60
##                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                         gene_assignment
## TC01000001.hg.1                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                          NR_046018 // DDX11L1 // DEAD/H (Asp-Glu-Ala-Asp/His) box helicase 11 like 1 // 1p36.33 // 100287102 /// ENST00000456328 // DDX11L5 // DEAD/H (Asp-Glu-Ala-Asp/His) box helicase 11 like 5 // 9p24.3 // 100287596 /// ENST00000456328 // DDX11L1 // DEAD/H (Asp-Glu-Ala-Asp/His) box helicase 11 like 1 // 1p36.33 // 100287102
## TC01000002.hg.1 ENST00000408384 // MIR1302-11 // microRNA 1302-11 // --- // 100422919 /// ENST00000408384 // MIR1302-10 // microRNA 1302-10 // --- // 100422834 /// ENST00000408384 // MIR1302-9 // microRNA 1302-9 // --- // 100422831 /// ENST00000408384 // MIR1302-2 // microRNA 1302-2 // --- // 100302278 /// ENST00000469289 // MIR1302-11 // microRNA 1302-11 // --- // 100422919 /// ENST00000469289 // MIR1302-10 // microRNA 1302-10 // --- // 100422834 /// ENST00000469289 // MIR1302-9 // microRNA 1302-9 // --- // 100422831 /// ENST00000469289 // MIR1302-2 // microRNA 1302-2 // --- // 100302278 /// ENST00000473358 // MIR1302-11 // microRNA 1302-11 // --- // 100422919 /// ENST00000473358 // MIR1302-10 // microRNA 1302-10 // --- // 100422834 /// ENST00000473358 // MIR1302-9 // microRNA 1302-9 // --- // 100422831 /// ENST00000473358 // MIR1302-2 // microRNA 1302-2 // --- // 100302278 /// OTTHUMT00000002841 // OTTHUMG00000000959 // NULL // --- // --- /// OTTHUMT00000002841 // RP11-34P13.3 // NULL // --- // --- /// OTTHUMT00000002840 // OTTHUMG00000000959 // NULL // --- // --- /// OTTHUMT00000002840 // RP11-34P13.3 // NULL // --- // ---
##                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                     mrna_assignment
## TC01000001.hg.1                                                                                                                                                                                                                                                                                               NR_046018 // RefSeq // Homo sapiens DEAD/H (Asp-Glu-Ala-Asp/His) box helicase 11 like 1 (DDX11L1), non-coding RNA. // chr1 // 100 // 100 // 0 // --- // 0 /// ENST00000456328 // ENSEMBL // cdna:known chromosome:GRCh37:1:11869:14409:1 gene:ENSG00000223972 gene_biotype:pseudogene transcript_biotype:processed_transcript // chr1 // 100 // 100 // 0 // --- // 0 /// uc001aaa.3 // UCSC Genes // --- // chr1 // 100 // 100 // 0 // --- // 0 /// uc010nxq.1 // UCSC Genes // --- // chr1 // 100 // 100 // 0 // --- // 0 /// uc010nxr.1 // UCSC Genes // --- // chr1 // 100 // 100 // 0 // --- // 0
## TC01000002.hg.1 ENST00000408384 // ENSEMBL // ncrna:miRNA chromosome:GRCh37:1:30366:30503:1 gene:ENSG00000221311 gene_biotype:miRNA transcript_biotype:miRNA // chr1 // 100 // 100 // 0 // --- // 0 /// ENST00000469289 // ENSEMBL // havana:lincRNA chromosome:GRCh37:1:30267:31109:1 gene:ENSG00000243485 gene_biotype:lincRNA transcript_biotype:lincRNA // chr1 // 100 // 100 // 0 // --- // 0 /// ENST00000473358 // ENSEMBL // havana:lincRNA chromosome:GRCh37:1:29554:31097:1 gene:ENSG00000243485 gene_biotype:lincRNA transcript_biotype:lincRNA // chr1 // 100 // 100 // 0 // --- // 0 /// OTTHUMT00000002841 // Havana transcript // cdna:all chromosome:VEGA52:1:30267:31109:1 Gene:OTTHUMG00000000959 // chr1 // 100 // 100 // 0 // --- // 0 /// OTTHUMT00000002840 // Havana transcript // cdna:all chromosome:VEGA52:1:29554:31097:1 Gene:OTTHUMG00000000959 // chr1 // 100 // 100 // 0 // --- // 0
##                                                                                                                                                                                                                                                                                       swissprot
## TC01000001.hg.1 NR_046018 // B7ZGX0 /// NR_046018 // B7ZGX2 /// NR_046018 // B7ZGX7 /// NR_046018 // B7ZGX8 /// ENST00000456328 // B7ZGX0 /// ENST00000456328 // B7ZGX2 /// ENST00000456328 // B7ZGX3 /// ENST00000456328 // B7ZGX7 /// ENST00000456328 // B7ZGX8 /// ENST00000456328 // Q6ZU42
## TC01000002.hg.1                                                                                                                                                                                                                                                                             ---
##                                                                                                                                                                                                                                                                                                               unigene
## TC01000001.hg.1                                                                                NR_046018 // Hs.714157 // testis| normal| adult /// ENST00000456328 // Hs.719844 // brain| testis| normal /// ENST00000456328 // Hs.714157 // testis| normal| adult /// ENST00000456328 // Hs.618434 // testis| normal
## TC01000002.hg.1 ENST00000469289 // Hs.622486 // eye| normal| adult /// ENST00000469289 // Hs.729632 // testis| normal /// ENST00000469289 // Hs.742718 // testis /// ENST00000473358 // Hs.622486 // eye| normal| adult /// ENST00000473358 // Hs.729632 // testis| normal /// ENST00000473358 // Hs.742718 // testis
##                 category locus type notes             SPOT_ID GSM2703969
## TC01000001.hg.1     main     Coding   --- chr1(+):11869-14409    8.93745
## TC01000002.hg.1     main     Coding   --- chr1(+):29554-31109    8.63661
##                 GSM2703970 GSM2703971 GSM2703972
## TC01000001.hg.1    8.88139      8.884    8.89484
## TC01000002.hg.1    8.66471      8.407    8.47635
#View(head(full_output))
write.csv(full_output, paste0(Sys.Date(),"-","geo.csv"),row.names = FALSE,na = "")
#################
#Sample clustering and Principal Components Analysis
library(pheatmap)

## argument use="c" stops an error if there are any missing data points
corMatrix <- cor(expression_1,use="c")
pheatmap(corMatrix)    

##https://kasperdanielhansen.github.io/genbioconductor/html/GEOquery.html
#https://sbc.shef.ac.uk/geo_tutorial/tutorial.nb.html