Load data
load("~/Downloads/drive-download-20190403T144049Z-001/TCGA_BLCA.RData")
load("~/Downloads/drive-download-20190403T144049Z-001/test_tcga_blca_se.RData")
TCGA_BLCA
## class: RangedSummarizedExperiment
## dim: 56830 433
## metadata(1): data_release
## assays(1): HTSeq - Counts
## rownames(56830): ENSG00000000003 ENSG00000000005 ...
## ENSG00000281912 ENSG00000281920
## rowData names(3): ensembl_gene_id external_gene_name
## original_ensembl_gene_id
## colnames(433): TCGA-XF-A8HD-01A-11R-A36F-07
## TCGA-DK-AA6W-01A-12R-A39I-07 ... TCGA-GD-A76B-01A-11R-A32O-07
## TCGA-ZF-AA4T-01A-11R-A38B-07
## colData names(229): sample patient ... subtype_Fusion.in.TNFRSF21
## subtype_Fusion.in.ASIP
test_tcga_blca_se
## class: RangedSummarizedExperiment
## dim: 56925 433
## metadata(1): data_release
## assays(1): HTSeq - Counts
## rownames(56925): ENSG00000000003 ENSG00000000005 ...
## ENSG00000281912 ENSG00000281920
## rowData names(3): ensembl_gene_id external_gene_name
## original_ensembl_gene_id
## colnames(433): TCGA-GD-A2C5-01A-12R-A180-07
## TCGA-BT-A42F-01A-11R-A23W-07 ... TCGA-GU-A766-01A-11R-A32O-07
## TCGA-GU-A42R-01A-11R-A23N-07
## colData names(230): sample patient ... subtype_Fusion.in.TNFRSF21
## subtype_Fusion.in.ASIP
metadata(TCGA_BLCA)
## $data_release
## [1] "Data Release 12.0 - June 13, 2018"
metadata(test_tcga_blca_se)
## $data_release
## [1] "Data Release 15.0 - February 20, 2019"
compare objects
dim(TCGA_BLCA)
## [1] 56830 433
dim(test_tcga_blca_se)
## [1] 56925 433
names(assays(TCGA_BLCA)) == names(assays(test_tcga_blca_se))
## [1] TRUE
plyr::count(rownames(assay(TCGA_BLCA)) %in% rownames(assay(test_tcga_blca_se)))
## x freq
## 1 FALSE 114
## 2 TRUE 56716
compare counts for the first 10 genes
rownames(assay(TCGA_BLCA))[1] == rownames(assay(test_tcga_blca_se))[1]
## [1] TRUE
# Since the column is in different order we need to put them in the same order
for(i in which(rownames(assay(TCGA_BLCA))[1:10] == rownames(assay(test_tcga_blca_se))[1:10])){
print(plyr::count(assay(TCGA_BLCA)[i,match(colnames(test_tcga_blca_se),colnames(TCGA_BLCA))] == assay(test_tcga_blca_se)[i,]))
}
## x freq
## 1 TRUE 433
## x freq
## 1 TRUE 433
## x freq
## 1 TRUE 433
## x freq
## 1 TRUE 433
## x freq
## 1 TRUE 433
## x freq
## 1 TRUE 433
## x freq
## 1 TRUE 433
## x freq
## 1 TRUE 433
## x freq
## 1 TRUE 433
## x freq
## 1 TRUE 433
Compare genes
# all genes in the new data was in the old one
which(!rownames(test_tcga_blca_se) %in% rownames(TCGA_BLCA))
## integer(0)
# but some were remoded
rownames(TCGA_BLCA)[which(!rownames(TCGA_BLCA) %in% rownames(test_tcga_blca_se))]
## [1] "ENSG00000167945" "ENSG00000176075" "ENSG00000178193"
## [4] "ENSG00000186354" "ENSG00000189393" "ENSG00000198384"
## [7] "ENSG00000203386" "ENSG00000203819" "ENSG00000203886"
## [10] "ENSG00000204187" "ENSG00000204717" "ENSG00000205662"
## [13] "ENSG00000205663" "ENSG00000205664" "ENSG00000206483"
## [16] "ENSG00000212290" "ENSG00000214215" "ENSG00000215365"
## [19] "ENSG00000221972" "ENSG00000223414" "ENSG00000223535"
## [22] "ENSG00000223929" "ENSG00000224710" "ENSG00000225541"
## [25] "ENSG00000225704" "ENSG00000225860" "ENSG00000226434"
## [28] "ENSG00000226738" "ENSG00000226785" "ENSG00000227603"
## [31] "ENSG00000228614" "ENSG00000228651" "ENSG00000229477"
## [34] "ENSG00000230439" "ENSG00000231200" "ENSG00000231429"
## [37] "ENSG00000231435" "ENSG00000231656" "ENSG00000231842"
## [40] "ENSG00000233265" "ENSG00000233895" "ENSG00000234165"
## [43] "ENSG00000234449" "ENSG00000235475" "ENSG00000235825"
## [46] "ENSG00000236082" "ENSG00000236392" "ENSG00000236660"
## [49] "ENSG00000237122" "ENSG00000237534" "ENSG00000238033"
## [52] "ENSG00000241737" "ENSG00000242349" "ENSG00000243012"
## [55] "ENSG00000247732" "ENSG00000248478" "ENSG00000248686"
## [58] "ENSG00000249734" "ENSG00000251085" "ENSG00000251628"
## [61] "ENSG00000253248" "ENSG00000253371" "ENSG00000253426"
## [64] "ENSG00000253839" "ENSG00000254018" "ENSG00000254235"
## [67] "ENSG00000254267" "ENSG00000254281" "ENSG00000254597"
## [70] "ENSG00000254671" "ENSG00000254869" "ENSG00000254981"
## [73] "ENSG00000254998" "ENSG00000256304" "ENSG00000258297"
## [76] "ENSG00000260114" "ENSG00000260940" "ENSG00000260977"
## [79] "ENSG00000261013" "ENSG00000261176" "ENSG00000261643"
## [82] "ENSG00000262251" "ENSG00000262888" "ENSG00000263553"
## [85] "ENSG00000264242" "ENSG00000264263" "ENSG00000265114"
## [88] "ENSG00000266411" "ENSG00000267747" "ENSG00000268036"
## [91] "ENSG00000268439" "ENSG00000269916" "ENSG00000270028"
## [94] "ENSG00000270058" "ENSG00000271020" "ENSG00000271324"
## [97] "ENSG00000271762" "ENSG00000271840" "ENSG00000271941"
## [100] "ENSG00000272486" "ENSG00000272780" "ENSG00000272993"
## [103] "ENSG00000273071" "ENSG00000273237" "ENSG00000273478"
## [106] "ENSG00000274457" "ENSG00000275142" "ENSG00000275611"
## [109] "ENSG00000276911" "ENSG00000277420" "ENSG00000277539"
## [112] "ENSG00000278937" "ENSG00000279535" "ENSG00000281508"
# i.e.https://www.ensembl.org/Homo_sapiens/Gene/Idhistory?g=ENSG00000167945