## try http:// if https:// URLs are not supported
if (!requireNamespace("BiocManager", quietly=TRUE))
install.packages("BiocManager")
library(GDCRNATools)##
##
## Registered S3 method overwritten by 'enrichplot':
## method from
## fortify.enrichResult DOSE
## ##############################################################################
## Pathview is an open source software package distributed under GNU General
## Public License version 3 (GPLv3). Details of GPLv3 is available at
## http://www.gnu.org/licenses/gpl-3.0.html. Particullary, users are required to
## formally cite the original Pathview paper (not just mention it) in publications
## or products. For details, do citation("pathview") within R.
##
## The pathview downloads and uses KEGG data. Non-academic uses may require a KEGG
## license agreement (details at http://www.kegg.jp/kegg/legal.html).
## ##############################################################################
## TCGA-3X-AAV9-01 TCGA-3X-AAVA-01 TCGA-3X-AAVB-01
## ENSG00000003989 1520 960 2177
## ENSG00000004799 7659 957 2295
## ENSG00000005812 2246 1698 2454
## ENSG00000007908 214 39 30
## ENSG00000011105 5246 1664 4028
## TCGA-3X-AAVC-01 TCGA-3X-AAVE-01
## ENSG00000003989 454 1211
## ENSG00000004799 9393 908
## ENSG00000005812 2377 1561
## ENSG00000007908 237 51
## ENSG00000011105 1310 1971
## [1] 1000 45
## TCGA-3X-AAV9-01 TCGA-3X-AAVA-01 TCGA-3X-AAVB-01
## hsa-let-7a-5p 165141 132094 210259
## hsa-let-7a-3p 204 169 298
## hsa-let-7a-2-3p 30 26 50
## hsa-let-7b-5p 63905 38917 138022
## hsa-let-7b-3p 50 60 138
## TCGA-3X-AAVC-01 TCGA-3X-AAVE-01
## hsa-let-7a-5p 84663 117897
## hsa-let-7a-3p 127 149
## hsa-let-7a-2-3p 1 39
## hsa-let-7b-5p 55664 101545
## hsa-let-7b-3p 65 125
####### Normalization of RNAseq data #######
rnaExpr <- gdcVoomNormalization(counts = rnaCounts, filter = FALSE)
rnaExpr[1:5,1:5]## TCGA-3X-AAV9-01 TCGA-3X-AAVA-01 TCGA-3X-AAVB-01
## ENSG00000003989 10.435492 10.007138 10.990261
## ENSG00000004799 12.768196 10.002625 11.066397
## ENSG00000005812 10.998625 10.829542 11.163017
## ENSG00000007908 7.609995 5.403277 4.832541
## ENSG00000011105 12.222301 10.800370 11.877831
## TCGA-3X-AAVC-01 TCGA-3X-AAVE-01
## ENSG00000003989 8.859192 10.094094
## ENSG00000004799 13.228502 9.678858
## ENSG00000005812 11.246285 10.460233
## ENSG00000007908 7.922839 5.538016
## ENSG00000011105 10.386957 10.796594
####### Normalization of miRNAs data #######
mirExpr <- gdcVoomNormalization(counts = mirCounts, filter = FALSE)
mirExpr[1:5,1:5]## TCGA-3X-AAV9-01 TCGA-3X-AAVA-01 TCGA-3X-AAVB-01
## hsa-let-7a-5p 14.781661 14.850358 15.066842
## hsa-let-7a-3p 5.124275 5.244289 5.606618
## hsa-let-7a-2-3p 2.379055 2.567068 3.043242
## hsa-let-7b-5p 13.411970 13.087279 14.459575
## hsa-let-7b-3p 3.106529 3.758011 4.498773
## TCGA-3X-AAVC-01 TCGA-3X-AAVE-01
## hsa-let-7a-5p 13.937813 14.653520
## hsa-let-7a-3p 4.562713 5.030348
## hsa-let-7a-2-3p -1.846678 3.110128
## hsa-let-7b-5p 13.332830 14.438114
## hsa-let-7b-3p 3.601783 4.777890
####### Parse and filter RNAseq metadata #######
metaMatrix.RNA <- gdcParseMetadata(project.id = 'TCGA-CHOL',
data.type = 'RNAseq',
write.meta = FALSE)
metaMatrix.RNA[1:5,1:5]## # A tibble: 5 x 5
## file_name file_id patient sample submitter_id
## <chr> <chr> <chr> <chr> <chr>
## 1 725eaa94-5221-4c22-bce~ 85bc7f81-51fb-44~ TCGA-3X~ TCGA-3X~ TCGA-3X-AAV9~
## 2 b6a2c03a-c8ad-41e9-8a1~ 42b8d463-6209-4e~ TCGA-3X~ TCGA-3X~ TCGA-3X-AAVA~
## 3 c2765336-c804-4fd2-b45~ 6e2031e9-df75-48~ TCGA-3X~ TCGA-3X~ TCGA-3X-AAVB~
## 4 8b20cba8-9fd5-4d56-bd0~ 19e8fd21-f6c8-49~ TCGA-3X~ TCGA-3X~ TCGA-3X-AAVC~
## 5 4082f7d5-5656-476a-9aa~ 1ace0df3-9837-46~ TCGA-3X~ TCGA-3X~ TCGA-3X-AAVE~
## [1] "file_name" "file_id"
## [3] "patient" "sample"
## [5] "submitter_id" "entity_submitter_id"
## [7] "sample_type" "gender"
## [9] "age_at_diagnosis" "tumor_stage"
## [11] "tumor_grade" "days_to_death"
## [13] "days_to_last_follow_up" "vital_status"
## [15] "project_id"
## Removed 0 samples
## Removed 0 samples
## # A tibble: 5 x 5
## file_name file_id patient sample submitter_id
## <chr> <chr> <chr> <chr> <chr>
## 1 725eaa94-5221-4c22-bce~ 85bc7f81-51fb-44~ TCGA-3X~ TCGA-3X~ TCGA-3X-AAV9~
## 2 b6a2c03a-c8ad-41e9-8a1~ 42b8d463-6209-4e~ TCGA-3X~ TCGA-3X~ TCGA-3X-AAVA~
## 3 c2765336-c804-4fd2-b45~ 6e2031e9-df75-48~ TCGA-3X~ TCGA-3X~ TCGA-3X-AAVB~
## 4 8b20cba8-9fd5-4d56-bd0~ 19e8fd21-f6c8-49~ TCGA-3X~ TCGA-3X~ TCGA-3X-AAVC~
## 5 4082f7d5-5656-476a-9aa~ 1ace0df3-9837-46~ TCGA-3X~ TCGA-3X~ TCGA-3X-AAVE~
##
## PrimaryTumor SolidTissueNormal
## 36 9
DEGAll <- gdcDEAnalysis(counts = rnaCounts, # rnacounts数据
group = metaMatrix.RNA$sample_type, # 分组
comparison = 'PrimaryTumor-SolidTissueNormal', # 比较
method = 'limma')
## 展示所有基因
DEGAll[1:5,1:5]## # A tibble: 5 x 5
## symbol group logFC AveExpr t
## <fct> <fct> <dbl> <dbl> <dbl>
## 1 NR1I3 protein_coding -6.92 7.02 -17.3
## 2 ETFRF1 protein_coding -2.49 9.52 -16.1
## 3 SOX5 protein_coding -4.87 6.23 -15.0
## 4 ABCA8 protein_coding -5.65 7.52 -14.9
## 5 ISOC1 protein_coding -2.37 10.5 -14.6
## [1] 565 8
##
## IG long_non_coding ncRNA protein_coding
## 2 101 8 403
## pseudogene TEC TR
## 45 4 2
## [1] -10.855558 5.919461
## [1] 283 8
ceOutput <- gdcCEAnalysis(lnc = rownames(deLNC), # lncRNA
pc = rownames(dePC), # mRNA
lnc.targets = 'starBase',
pc.targets = 'starBase',
rna.expr = rnaExpr,
mir.expr = mirExpr)## Step 1/3: Hypergenometric test done !
## Step 2/3: Correlation analysis done !
## Step 3/3: Regulation pattern analysis done !
## [1] 453 13
## # A tibble: 5 x 13
## lncRNAs Genes Counts listTotal popHits popTotal foldEnrichment
## <chr> <chr> <chr> <chr> <chr> <chr> <chr>
## 1 ENSG00~ ENSG~ 2 2 95 277 2.91578947368~
## 2 ENSG00~ ENSG~ 2 2 24 277 11.5416666666~
## 3 ENSG00~ ENSG~ 2 2 8 277 34.625
## 4 ENSG00~ ENSG~ 2 2 20 277 13.85
## 5 ENSG00~ ENSG~ 2 2 28 277 9.89285714285~
## # ... with 6 more variables: hyperPValue <chr>, miRNAs <chr>, cor <dbl>,
## # corPValue <dbl>, regSim <dbl>, sppc <dbl>
ceOutput2 <- ceOutput[ceOutput$hyperPValue<0.01 ## hyperPvalue少选
& ceOutput$corPValue<0.01 & ceOutput$regSim != 0,] ## corPvalue筛选
## 导出edges
edges <- gdcExportNetwork(ceNetwork = ceOutput2, net = 'edges')
str(edges)## 'data.frame': 452 obs. of 3 variables:
## $ fromNode : chr "ENSG00000234456" "ENSG00000234456" "ENSG00000234741" "ENSG00000255717" ...
## $ toNode : chr "hsa-miR-374b-5p" "hsa-miR-374a-5p" "hsa-miR-137" "hsa-miR-377-3p" ...
## $ altNode1Name: chr "MAGI2-AS3" "MAGI2-AS3" "GAS5" "SNHG1" ...
## # A tibble: 6 x 3
## fromNode toNode altNode1Name
## <chr> <chr> <chr>
## 1 ENSG00000234456 hsa-miR-374b-5p MAGI2-AS3
## 2 ENSG00000234456 hsa-miR-374a-5p MAGI2-AS3
## 3 ENSG00000234741 hsa-miR-137 GAS5
## 4 ENSG00000255717 hsa-miR-377-3p SNHG1
## 5 ENSG00000255717 hsa-miR-421 SNHG1
## 6 ENSG00000255717 hsa-miR-326 SNHG1
## 'data.frame': 187 obs. of 4 variables:
## $ gene : Factor w/ 187 levels "ENSG00000003989",..: 1 2 3 4 5 6 7 8 9 10 ...
## $ symbol : Factor w/ 187 levels "ABCC5","AC015813.1",..: 162 130 31 154 131 129 124 118 113 15 ...
## $ type : Factor w/ 3 levels "lnc","mir","pc": 3 3 3 3 3 3 3 3 3 3 ...
## $ numInteractions: num 2 5 3 3 3 2 2 2 2 1 ...
## # A tibble: 6 x 4
## gene symbol type numInteractions
## <fct> <fct> <fct> <dbl>
## 1 ENSG00000003989 SLC7A2 pc 2
## 2 ENSG00000004799 PDK4 pc 5
## 3 ENSG00000021826 CPS1 pc 3
## 4 ENSG00000047634 SCML1 pc 3
## 5 ENSG00000049246 PER3 pc 3
## 6 ENSG00000065320 NTN1 pc 2