query.exp <- GDCquery(
project = "TCGA-HNSC",
data.category = "Transcriptome Profiling",
experimental.strategy = 'RNA-Seq',
data.type = "Gene Expression Quantification",
workflow.type = "STAR - Counts",
sample.type = "Primary Tumor"
)
GDCdownload(
query = query.exp,
files.per.chunk = 100
)
hnsc.exp <- GDCprepare(
query = query.exp
)IL6/IL6R expression in HNSC
Import TCGA data
This report focuses on the correlation between IL6 and IL6R expression on the EMT, MAPK and JAK-STAT pathways in the TCGA database.
RNA-Seq data
Clinical data
# get subtype information
infomation.subtype <- TCGAquery_subtype(tumor = "HNSC")
# get clinical data
information.clinical <- GDCquery_clinic(project = "TCGA-HNSC", type = "clinical")
# Load metadata
information.clinical.all <- read.csv("GDCdata/TCGA.HNSC.sampleMap-HNSC_clinicalMatrix.tsv", sep = "\t", header = T)
information.clinical.all <- information.clinical.all[grepl("-01", information.clinical.all$sampleID), ]
# Subset information.clinical.all to get only hpv_status_by_p16_testing == Negative
# information.clinical.all <- information.clinical.all[information.clinical.all$hpv_status_by_p16_testing == "Negative", ]
clinical_hnscc_data.all <- information.clinical.all |>
inner_join(information.clinical, by = "bcr_patient_barcode") |>
dplyr::select(c(submitter_id, gender.x, age_at_index, tissue_or_organ_of_origin)) |>
dplyr::mutate(sex = ifelse(gender.x == "MALE", "M", "F")) |>
dplyr::mutate(age_group_60 = ifelse(age_at_index >= 60, "Y", "N")) |>
dplyr::select(c(submitter_id, sex, age_group_60, tissue_or_organ_of_origin)) |>
na.omit()Analyse the expression of IL6 and IL6R
The objective of this section is to identify samples exhibiting high co-expression of IL6 and IL6R.
Please note that it has been chosen not to differentiate between HPV-positive and HPV-negative patients in this analysis. This choice was based on the limited number of samples for that information was available (n = 74).
Extract and normalize counts
# Counts
hnsc_matrix <- assay(hnsc.exp, "unstranded")
# Gene and sample metadata
gene_metadata <- as.data.frame(rowData(hnsc.exp))
coldata <- as.data.frame(colData(hnsc.exp))# VST counts
dds <- DESeqDataSetFromMatrix(countData = hnsc_matrix,
colData = coldata,
design = ~ 1)
# Removing genes with sum total of 10 reads across all samples
keep <- rowSums(counts(dds)) >= 10
dds <- dds[keep,]
vsd <- vst(dds, blind=FALSE)
hnsc_matrix_vst <- assay(vsd)Distribution of IL6 and IL6R across the HNSC samples
Firstly, the IL6 and IL6R high and low groups are set as distinct groups.
hnsc_il6 <- hnsc_matrix_vst |>
as.data.frame() |>
rownames_to_column(var = 'gene_id') |>
gather(key = 'case_id', value = 'counts', -gene_id) |>
left_join(gene_metadata, by = "gene_id") |>
filter(gene_name == "IL6")
# Z-score the counts columns
hnsc_il6$z_counts <- scale(hnsc_il6$counts)
hnsc_il6 <- hnsc_il6 |>
relocate(z_counts, .after = counts)
hnsc_il6r <- hnsc_matrix_vst |>
as.data.frame() |>
rownames_to_column(var = 'gene_id') |>
gather(key = 'case_id', value = 'counts', -gene_id) |>
left_join(gene_metadata, by = "gene_id") |>
filter(gene_name == "IL6R")
hnsc_il6r$z_counts <- scale(hnsc_il6r$counts)
hnsc_il6r <- hnsc_il6r |>
relocate(z_counts, .after = counts)
# head(hnsc_il6)
# head(hnsc_il6r)# Plot IL6 and IL6R
p1 <- ggplot(hnsc_il6, aes(x = z_counts)) +
geom_boxplot(fill = "blue", alpha = 0.5) +
geom_point(aes(y = 0), color = "red", size = 2) +
theme_minimal() +
theme(axis.text.y = element_blank()) +
labs(title = "IL6 distribution in HNSC samples", x = "Z-Normalize VST Counts", y = "")
p2 <- ggplot(hnsc_il6r, aes(x = z_counts)) +
geom_boxplot(fill = "blue", alpha = 0.5) +
geom_point(aes(y = 0), color = "red", size = 2) +
theme_minimal() +
theme(axis.text.y = element_blank()) +
labs(title = "IL6R distribution in HNSC samples", x = "Z-Normalize VST Counts", y = "")
p1 / p2Creation of IL6/IL6R ‘Low’ and ‘High’ groups
Then, the samples that are common to the IL6 and IL6R ‘High’ groups are classified as belonging to the IL6/IL6R ‘High’ group, and the same process is applied to the samples that are common to the IL6 and IL6R ‘Low’ groups.
# Create IL6 'Low' and 'High' groups based on Z-counts if -1 = Low and +1 = High and NA else
hnsc_il6$il6_group <- ifelse(hnsc_il6$z_counts < -0.5, "Low", ifelse(hnsc_il6$z_counts > 0.5, "High", "Medium"))
hnsc_il6r$il6r_group <- ifelse(hnsc_il6r$z_counts < -0.5, "Low", ifelse(hnsc_il6r$z_counts > 0.5, "High", "Medium"))
# Merge IL6 and IL6R data and create a High_High and Low_Low group
hnsc_il6_il6r <- hnsc_il6 |>
inner_join(hnsc_il6r, by = "case_id") |>
mutate(group = ifelse(il6_group == "High" & il6r_group == "High", "High",
ifelse(il6_group == "Low" & il6r_group == "Low", "Low", "Other")))
table(hnsc_il6_il6r$group)
High Low Other
57 61 401
DESeq2 analysis for High and Low IL6/IL6R groups
An analysis of the genes differentially expressed between samples from the ‘High’ and ‘Low’ IL6/IL6R groups was then performed.
Prepare the data for DESeq2 analysis
hnsc_il6_il6r_subset <- hnsc_il6_il6r |>
dplyr::select(c(case_id, group))
# Add the column group to coldata bind by case_id in hnsc_il6_il6r and barcode in coldata
coldata <- coldata |>
inner_join(hnsc_il6_il6r_subset, by = c("barcode" = "case_id"))# Keep only the samples with group High and Low in coldata
coldata_subset <- coldata[coldata$group %in% c("High", "Low"), ]
# Same for hnsc_matrix
hnsc_matrix_subset <- hnsc_matrix[, coldata_subset$barcode]hnsc.exp <- hnsc.exp[, hnsc.exp$patient %in% coldata_subset$patient]
hnsc.exp$group <- coldata$group[match(hnsc.exp$patient,
coldata$patient)]DESeq2 analysis
dds <- DESeqDataSet(hnsc.exp,
design = ~ group)
# Removing genes with sum total of 10 reads across all samples
keep <- rowSums(counts(dds)) >= 10
dds <- dds[keep,]
dds <- DESeq(dds)
resultsNames(dds)
vsd <- vst(dds, blind=FALSE)
hnsc_matrix_vst <- assay(vsd)res <- results(dds, name = "group_Low_vs_High")
res_df <- as.data.frame(res)
res_df$gene_id <- rownames(res_df)
# Multiply Log2FoldChange by -1 to get the High vs Low
res_df$log2FoldChange <- -res_df$log2FoldChange
gene_metadata <- gene_metadata |>
select(c(gene_id, gene_name))
res_df <- res_df |>
inner_join(gene_metadata, by = c("gene_id" = "gene_id")) |>
select(c(gene_name, gene_id, baseMean, log2FoldChange, lfcSE, stat, pvalue, padj))Significant DEGs genes between IL6/IL6R High and Low groups
# Subset significant genes based on the p-value
res_df_significant <- res_df |>
filter(padj < 0.05) |>
# filter(padj < 0.05, abs(log2FoldChange) >= 0.5) |>
arrange(padj)
res_df_significant |>
dplyr::select(-baseMean, -lfcSE, -stat, -pvalue, -gene_id) |>
dplyr::rename(gene_name = 'gene_name') |>
relocate(gene_name, .before = 1L) |>
filter(padj < 0.05, abs(log2FoldChange) >= 0.5 ) |>
arrange(padj) |>
slice_head(n = 30) |>
gt() |>
tab_style(
style = cell_text(
font = c("Comic Sansa", "Menloa", default_fonts())
), locations = cells_body(columns = everything())
) |>
fmt_number(columns = c(log2FoldChange),
decimals = 3) |>
fmt_scientific(
columns = padj,
decimals = 2) |>
data_color(columns = padj, reverse = TRUE,
palette = "viridis") |>
data_color(columns = log2FoldChange,
palette = colorRampPalette(c("blue", "white", "red"))(n = 100)) |>
gt_theme_guardian()| gene_name | log2FoldChange | padj |
|---|---|---|
| IL6 | 4.457 | 3.96 × 10−138 |
| IL6R | 2.084 | 7.69 × 10−74 |
| ZFR2 | −6.102 | 6.80 × 10−41 |
| TAF7L | −5.162 | 1.52 × 10−34 |
| ABCA17P | −5.019 | 8.16 × 10−34 |
| NEFH | −5.263 | 4.03 × 10−33 |
| CSF2 | 3.905 | 1.83 × 10−29 |
| NR4A3 | 2.767 | 3.86 × 10−27 |
| SMC1B | −4.303 | 2.51 × 10−24 |
| SERPINE1 | 2.800 | 5.95 × 10−24 |
| SYCP2 | −3.701 | 5.71 × 10−23 |
| MXD3 | −1.840 | 5.71 × 10−23 |
| MACF1 | 1.275 | 6.80 × 10−23 |
| IL24 | 3.741 | 1.34 × 10−22 |
| DDX25 | −3.992 | 1.42 × 10−22 |
| FPR2 | 3.295 | 2.28 × 10−22 |
| KLHL35 | −3.104 | 3.06 × 10−22 |
| SYCE2 | −2.615 | 3.06 × 10−22 |
| PET100 | −1.722 | 4.47 × 10−22 |
| TMPO-AS1 | −1.516 | 4.96 × 10−22 |
| HAS1 | 3.116 | 1.01 × 10−21 |
| WDR83 | −1.076 | 2.44 × 10−21 |
| UPB1 | −3.998 | 2.48 × 10−21 |
| YBX2 | −3.485 | 3.45 × 10−21 |
| TMSB15A | −3.414 | 3.45 × 10−21 |
| SOCS3 | 1.556 | 3.45 × 10−21 |
| EPHA6 | −4.256 | 4.32 × 10−21 |
| KEL | −3.905 | 4.32 × 10−21 |
| IL11 | 2.551 | 9.16 × 10−21 |
| COMMD3 | −1.276 | 4.07 × 10−20 |
As expected, the top differentially expressed genes were IL6 and IL6R. These results will then be used as a base for the analysis of the enrichment pathways between the two groups.
GSEA Analysis between IL6/IL6R High and Low groups
GSEA is used to highlight the pathways that are over-represented between the IL6/IL6R High and Low groups. The GSEA method is based on an ordered list of genes and a database of pathways.
The first step in this analysis is to analyse the Hallmark Gene Set, which summarises and represents specific well-defined biological states or processes and displays coherent expression, including the EMT pathway. The analysis will then be reproduced on the KEGG Gene Set, which contains canonical pathways derived from the KEGG pathway database, including the MAPK/ERK and JAK/STAT pathways.
Prepare the data for GSEA analysis
Ranking
# is there duplicate
any(duplicated(res_df$gene_name))
# which one ?
dup_gene_names <- res_df %>%
dplyr::filter(duplicated(gene_name)) %>%
dplyr::pull(gene_name)
# filter duplicated
res_df_filter <- res_df %>%
# Sort so that the highest absolute values of the log2 fold change are at the top
dplyr::arrange(dplyr::desc(abs(log2FoldChange))) %>%
# Filter out the duplicated rows using `dplyr::distinct()`
dplyr::distinct(gene_name, .keep_all = TRUE)
any(duplicated(res_df_filter$gene_name))# Ranking based on the p-value and the log2FoldChange
low_vs_high <- res_df_filter |>
mutate(sign_p_value = -log10(pvalue) * sign(log2FoldChange)) |>
arrange(desc(sign_p_value)) res_gsea <- low_vs_high |>
dplyr::select(gene_name, sign_p_value)
# res_gsearanks <- deframe(res_gsea)
head(ranks, 20)GSEA analysis using Hallmark gene set
Pathways using HALLMARK gene set
pathways.hallmark <- gmtPathways("gsea/h.all.v2023.2.Hs.entrez.gmt.txt")
# pathways.hallmark |>
# head() |>
# lapply(head)fgseaRes <- fgsea(pathways=pathways.hallmark, stats=ranks)Warning in preparePathwaysAndStats(pathways, stats, minSize, maxSize, gseaParam, : There are ties in the preranked stats (0.01% of the list).
The order of those tied genes will be arbitrary, which may produce unexpected results.
fgseaResTidy <- fgseaRes |>
as_tibble() |>
arrange(desc(NES))ggplot(fgseaResTidy, aes(reorder(pathway, NES), NES)) +
geom_col(aes(fill=padj<0.05)) +
coord_flip() +
labs(x="Pathway", y="Normalized Enrichment Score",
title="Hallmark pathways NES from GSEA - IL6/IL6R High vs Low groups") +
theme_minimal()Enrichment analysis using the Hallmark Gene set indicates that the pathway most over-represented in the IL6/IL6R High group corresponds to the EMT pathway, supporting the correlation between IL6/IL6R expression and the pathway’s activation.
EMT Pathway
plotEnrichment(pathways.hallmark[["HALLMARK_EPITHELIAL_MESENCHYMAL_TRANSITION"]],
ranks) + labs(title="HALLMARK_EPITHELIAL_MESENCHYMAL_TRANSITION")GSEA analysis using KEGG gene set
Pathways using KEGG gene set
pathways.kegg <- gmtPathways("gsea/c2.cp.kegg_legacy.v2023.2.Hs.symbols.gmt.txt")
# pathways.kegg |>
# head() |>
# lapply(head)fgseaRes <- fgsea(pathways=pathways.kegg, stats=ranks)Warning in preparePathwaysAndStats(pathways, stats, minSize, maxSize, gseaParam, : There are ties in the preranked stats (0.01% of the list).
The order of those tied genes will be arbitrary, which may produce unexpected results.
fgseaResTidy <- fgseaRes |>
as_tibble() |>
arrange(desc(NES))fgseaResTidy |>
select(c(pathway, NES, padj, size, leadingEdge)) |>
slice_head(n = 30) |>
gt() |>
tab_style(
style = cell_text(
font = c("Comic Sansa", "Menloa", default_fonts())
), locations = cells_body(columns = everything())
) |>
# increase the width of the leadingEdge column
cols_width(
pathway ~ px(70)
) |>
fmt_number(columns = c(NES),
decimals = 3) |>
fmt_scientific(
columns = padj,
decimals = 2) |>
data_color(columns = padj, reverse = TRUE,
palette = "viridis") |>
data_color(columns = NES,
palette = colorRampPalette(c("white", "red"))(n = 100)) |>
tab_options(container.width = 2000, container.height = 500) |>
gt_theme_guardian()| pathway | NES | padj | size | leadingEdge |
|---|---|---|---|---|
| KEGG_FOCAL_ADHESION | 2.740 | 5.77 × 10−26 | 198 | COL4A1, LAMC1, LAMC2, FLNA, ITGA5, RAPGEF1, ITGB1, CAV1, ITGA6, LAMA3, ITGB3, THBS1, SHC1, PDGFB, COL4A2, MET, VWF, TNC, IGF1R, ACTN1, PIK3CD, LAMB3, HGF, EGFR, AKT1, KDR, PXN, MAPK1, FLT1, VEGFC, VCL, ITGB4, RELN, COL4A6, TLN1, LAMB1, COL5A3, ITGA3, ROCK2, PDGFRB, RAP1B, BCAR1, CRKL, DIAPH1, FLT4, ROCK1, LAMA4, CAV2, COL6A3, FLNB, PDGFC, ARHGAP5, COL5A2, PRKCA, MYLK, VAV2, MAP2K1, COL2A1, ITGA4, ACTN4, ITGA8, JUN, ITGAV, IGF1, COL6A1, ITGA1, PAK2, MYLK3, CRK, TNXB, VASP, LAMA5, ZYX, SOS2, SPP1, PTK2, ITGA2, ITGA7, MYL12A, CAPN2, THBS2, DOCK1, PIK3CG, BIRC2, TLN2, PDGFA, LAMA2, PIK3R5, XIAP, COL5A1, COL3A1, PARVA, ACTB, FN1, FYN, PDGFRA, ITGB6, COL6A2, MYL12B, RAC2, ARHGAP35, PPP1R12A, FLNC, CTNNB1, PTEN, PRKCB, PARVB, SRC, TNN, PDPK1, ITGA9, VTN, PGF, LAMB2, SOS1, RASGRF1, SHC3, LAMC3, PDGFD, COL1A1, COL1A2, EGF |
| KEGG_JAK_STAT_SIGNALING_PATHWAY | 2.662 | 1.81 × 10−17 | 147 | IL6, IL6R, CSF2, IL24, SOCS3, IL11, SPRY4, OSM, CBL, LIF, PIK3CD, OSMR, CSF3, AKT1, JAK1, IL7R, SPRY1, IL10, CLCF1, LEPR, CSF3R, SPRED2, PTPN11, IFNK, SOCS5, MYC, IL13RA1, IL20, CBLB, EP300, IL6ST, STAM, SPRED1, IFNLR1, SOS2, CREBBP, IL5RA, PIK3CG, SOCS4, CNTF, PIK3R5 |
| KEGG_HEMATOPOIETIC_CELL_LINEAGE | 2.649 | 6.18 × 10−12 | 84 | IL6, IL6R, CSF2, IL11, ITGA5, ITGA6, ITGB3, IL1B, ANPEP, CSF3, IL7R, CD44, CSF3R, ITGA3, MME, TNF, CR1, IL1R1, ITGA4, IL1A, ITGA1, CD34, ITGA2, IL5RA |
| KEGG_ECM_RECEPTOR_INTERACTION | 2.549 | 4.03 × 10−9 | 83 | COL4A1, LAMC1, LAMC2, ITGA5, ITGB1, ITGA6, LAMA3, ITGB3, THBS1, COL4A2, VWF, TNC, LAMB3, CD44, ITGB4, RELN, COL4A6, LAMB1, COL5A3, ITGA3, HSPG2, LAMA4, COL6A3, COL5A2, COL2A1, ITGA4, ITGA8, ITGAV, AGRN, COL6A1, ITGA1, TNXB, LAMA5, SPP1, ITGA2, ITGA7, THBS2, LAMA2, COL5A1, COL3A1, FN1, ITGB6, COL6A2, SDC1, SDC4, GP5, TNN, ITGA9, VTN, LAMB2, LAMC3, COL1A1, COL1A2, ITGB8 |
| KEGG_CYTOKINE_CYTOKINE_RECEPTOR_INTERACTION | 2.544 | 3.12 × 10−25 | 254 | IL6, IL6R, CSF2, IL24, IL11, CXCL8, INHBA, CXCL5, PDGFB, TGFBR1, OSM, LIF, MET, IL1B, TGFBR2, OSMR, CXCR1, HGF, CSF3, EGFR, CCL11, IL7R, KDR, FLT1, IL10, CXCL3, CLCF1, VEGFC, LEPR, CXCL2, ACVR1, TNFRSF11B, TGFB2, CSF3R, BMPR2, CCL24, TGFB1, PDGFRB, INHBE, IFNK, TNF, IL1RAP, FLT4, CXCL12, CCL7, PDGFC, IL13RA1, IL20, CCR3, IL1R1, IL6ST, ACVRL1, BMP2, IL1A, CCL21, CCL8, IFNLR1, CCR1, TNFRSF12A, CCL23, PPBP, CCL3L3, TNFRSF10C, IL17RA, IL5RA, BMPR1B, PDGFA, CNTF, TNFSF14, CCR4, CCR2, CXCR2, PLEKHO2, CCR8, TNFRSF1A, RELT, CSF1R, TNFRSF21, IL10RA, PDGFRA, EPO, CCL13, IL22, CCL2, CXCL1, TNFSF8, CXCL14, CCL18, IL9R, TNFRSF1B, CCL22 |
| KEGG_CHEMOKINE_SIGNALING_PATHWAY | 2.478 | 7.76 × 10−16 | 187 | CXCL8, CXCL5, SHC1, PIK3CD, CXCR1, AKT1, CCL11, ADCY7, PXN, MAPK1, CXCL3, CXCL2, ROCK2, CCL24, RAP1B, BCAR1, CRKL, ADCY3, NFKB1, RELA, ROCK1, CXCL12, CCL7, GNB1, CCR3, VAV2, NRAS, MAP2K1, GRK5, CCL21, CCL8, CCR1, CRK, GNG12, SOS2, CCL23, PPBP, CCL3L3, PTK2, PREX1, PIK3CG, ADCY9, CHUK, GNAI1, PLCB1, PIK3R5, CCR4, CCR2, GNAI2, CXCR2, GNB5, CCR8, ELMO1, DOCK2, GRK3, CCL13, TIAM1, WASL, GNG11, RAC2, CCL2, IKBKG, STAT5B, CXCL1, CXCL14, ADCY4, CCL18, CCL22, FOXO3, PRKCB, STAT3, GNAI3, PTK2B, STAT1, SOS1, XCR1, CCL19, TIAM2, SHC3, PF4, PLCB3, GNGT1, CXCL11, PRKCZ, FGR, CCL3, CCL14, HCK, PRKACB, GSK3B, CCL16, NFKBIA, CCR7, CXCL6, CCL4, PIK3R1, GRK2, CDC42, CXCR5, PIK3CA |
| KEGG_NOD_LIKE_RECEPTOR_SIGNALING_PATHWAY | 2.445 | 2.88 × 10−5 | 62 | IL6, CXCL8, IL1B, NLRP3, CCL11, MAPK1, CXCL2, TNFAIP3, NFKB1, TNF, MEFV, RELA, NAIP, NOD1, CCL7, CCL8, TAB2, CHUK, BIRC2, HSP90AB1, RIPK2, XIAP, NLRC4, HSP90AA1, CCL13, CCL2, IKBKG, CXCL1 |
| KEGG_GRAFT_VERSUS_HOST_DISEASE | 2.444 | 1.50 × 10−4 | 37 | IL6, IL1B, TNF, IL1A, CD80, CD86, CD28, HLA-G |
| KEGG_HYPERTROPHIC_CARDIOMYOPATHY_HCM | 2.400 | 1.47 × 10−6 | 83 | IL6, ITGA5, ITGB1, ITGA6, ITGB3, ITGB4, TGFB2, ITGA3, TGFB1, TNF, TPM3, ITGA4, ITGA8, ITGAV, IGF1, ITGA1, TPM4, ACE, SGCB, ITGA2, PRKAB2, ITGA7, ATP2A2, LAMA2, SGCA, DES, CACNA1C, MYH6, ACTB, SGCD, SLC8A1, ITGB6, MYH7, PRKAG3, PRKAA1, ACTC1, TPM1, ITGA9, LMNA, CACNG6 |
| KEGG_REGULATION_OF_ACTIN_CYTOSKELETON | 2.356 | 8.52 × 10−13 | 212 | GNA12, ITGA5, ITGB1, ITGA6, ITGB3, PDGFB, MYH9, ACTN1, IQGAP1, PIK3CD, SSH1, MSN, FGF7, EGFR, PXN, MAPK1, VCL, ITGB4, FGF5, SLC9A1, ENAH, CYFIP1, SSH2, ITGA3, ROCK2, RDX, F2R, PDGFRB, BCAR1, LIMK1, CRKL, DIAPH1, ARHGEF12, NCKAP1, ROCK1, PDGFC, MYLK, VAV2, NRAS, MAP2K1, ITGA4, ACTN4, ITGA8, PIP5K1A, ITGAV, PIP4K2A, ITGA1, PAK2, MYLK3, CRK, ARHGEF7, GNG12, SOS2, CHRM5, PTK2, ITGA2, ITGA7, MYL12A, DOCK1, RRAS2, GNA13, APC, PIK3CG, PDGFA, PIK3R5, FGF10, DIAPH3, ACTB, PIP4K2C, FN1, ARHGEF6, BDKRB2, WASF2, PDGFRA, TIAM1, WASL, ITGB6, MYL12B, RAC2, IQGAP2, ARHGAP35, PPP1R12A, PIP5K1B, MYH10, LIMK2, CFL2, FGF16, ITGAX, ITGA9, SOS1, FGF11, NCKAP1L, TIAM2, PDGFD, FGF13, EGF, ITGB8, EZR, ITGB2, MRAS, PPP1CA, ITGB7, PFN2, FGF21, CFL1, MYL2, CD14, PIKFYVE, BAIAP2, PIP5K1C, PIK3R1, PIP4K2B, ITGB5, CDC42, PIK3CA, FGF18, ACTG1, ARPC1B, BDKRB1, SSH3 |
| KEGG_VIRAL_MYOCARDITIS | 2.350 | 8.52 × 10−5 | 68 | ABL2, CAV1, MYH9, EIF4G3, ABL1, CD80, EIF4G2, SGCB, LAMA2, SGCA, MYH11, MYH1, MYH3, CD86, MYH6, ACTB, CD55, SGCD, CD28, EIF4G1, FYN, ICAM1, HLA-G, RAC2, MYH7, MYH10, MYH2, ITGB2, HLA-C, HLA-E, MYH8, ACTG1 |
| KEGG_INTESTINAL_IMMUNE_NETWORK_FOR_IGA_PRODUCTION | 2.338 | 3.20 × 10−3 | 45 | IL6, IL10, TGFB1, CXCL12, ITGA4, CD80, CD86, CD28, PIGR |
| KEGG_TOLL_LIKE_RECEPTOR_SIGNALING_PATHWAY | 2.313 | 5.84 × 10−7 | 95 | IL6, CXCL8, IL1B, PIK3CD, LBP, AKT1, MAPK1, NFKB1, TNF, RELA, MAP2K3, TLR4, TRAF3, MAP2K1, JUN, CD80, TAB2, SPP1, FOS, TICAM1, TLR8, FADD, PIK3CG, CHUK, PIK3R5, MYD88, MAP2K4, CD86 |
| KEGG_PATHWAYS_IN_CANCER | 2.275 | 2.79 × 10−17 | 325 | IL6, COL4A1, LAMC1, CXCL8, LAMC2, ITGB1, ITGA6, LAMA3, PDGFB, TGFBR1, ETS1, CDK6, COL4A2, CBL, MET, IGF1R, PIK3CD, TGFBR2, LAMB3, HGF, FGF7, EGFR, AKT1, JAK1, MAPK1, SLC2A1, VEGFC, COL4A6, FGF5, LAMB1, EPAS1, TGFB2, CSF3R, HIF1A, ITGA3, TGFB1, TGFA, PDGFRB, CRKL, NFKB1, RET, TPM3, RELA, PPARD, LAMA4, MYC, RB1, MMP1, RARA, CBLB, EP300, ABL1, PRKCA, TRAF3, PLCG2, WNT9A, NRAS, RALBP1, MTOR, MAP2K1, PTGS2, BMP2, MMP2, MMP9, WNT7B, JUN, ITGAV, IGF1, NTRK1, RALB, WNT7A, STK4, CRK, GLI3, LAMA5, SOS2, CREBBP, PTK2, ITGA2, FOS, CTNNA3, FADD, APC, PIK3CG, CHUK, EGLN1, BIRC2, PDGFA, LAMA2, HSP90AB1, PIK3R5, WNT5A, XIAP, RXRG, GLI2, FGF10, SMAD3, FN1, CSF1R, CTBP2, HSP90AA1, PDGFRA, DCC, RAC2, DVL1, WNT9B, IKBKG, PML, STAT5B, BCR, CTNNB1, GLI1, ARNT, FOXO1, PTEN, PRKCB, STAT3, SUFU, FGF16, E2F3, STAT1, NCOA4, FZD4, TRAF1, CDKN1A, WNT2, PGF, FZD1, LAMB2, SOS1, FGF11 |
| KEGG_ADHERENS_JUNCTION | 2.252 | 6.84 × 10−4 | 73 | TGFBR1, MET, IGF1R, ACTN1, IQGAP1, TGFBR2, EGFR, MAPK1, VCL, SNAI2, FARP2, SNAI1, PTPN1, EP300, YES1, ACTN4, PTPRB, CREBBP, CSNK2A2, CTNNA3, FER, SMAD3, TJP1, ACTB, FYN, WASF2, WASL, RAC2, CTNNB1, LMO7, SRC, AFDN, PTPRJ, PTPRF, NECTIN1, PTPRM, CSNK2A1, BAIAP2, CDC42, INSR, ACTG1, WASF3, ACTN2 |
| KEGG_LEISHMANIA_INFECTION | 2.243 | 1.09 × 10−3 | 70 | ITGB1, IL1B, JAK1, MAPK1, IL10, FCGR3B, TGFB2, TGFB1, NFKB1, TNF, RELA, CR1, FCGR2A, TLR4, PTGS2, ITGA4, JUN, IL1A, TAB2, NCF2, FOS, MYD88, IRAK1, C3, PRKCB, MAPK12, STAT1, TLR2, ITGB2, TGFB3, MAPK14, NFKBIA |
| KEGG_RENAL_CELL_CARCINOMA | 2.235 | 1.20 × 10−3 | 70 | RAPGEF1, PDGFB, ETS1, MET, PIK3CD, HGF, AKT1, MAPK1, SLC2A1, VEGFC, EPAS1, TGFB2, HIF1A, TGFB1, TGFA, PTPN11, RAP1B, CRKL, EP300, NRAS, MAP2K1, JUN, PAK2, CRK, SOS2, CREBBP, PIK3CG, EGLN1, PIK3R5 |
| KEGG_ERBB_SIGNALING_PATHWAY | 2.230 | 6.19 × 10−5 | 87 | ABL2, SHC1, CBL, PIK3CD, CAMK2D, EGFR, AKT1, MAPK1, HBEGF, TGFA, CRKL, NRG1, MYC, CBLB, EREG, ABL1, PRKCA, PLCG2, NRAS, MTOR, MAP2K1, AREG, JUN, PAK2, CRK, SOS2, PTK2, PIK3CG, PIK3R5, MAP2K4, GAB1, CAMK2A, STAT5B, PRKCB, SRC, CDKN1A, SOS1, SHC3, EGF |
| KEGG_CELL_ADHESION_MOLECULES_CAMS | 2.170 | 1.81 × 10−5 | 130 | ITGB1, ITGA6, CLDN19, CDH5, PECAM1, SELE, L1CAM, CD276, PDCD1LG2, PVR, CNTNAP1, ESAM, JAM3, ITGA4, ITGA8, SELP, NFASC, ITGAV, MPZL1, CADM3, CD80, CLDN6, GLG1, CD34, VCAN, JAM2, CDH3, PTPRC, CLDN18, CD86, ICAM3, CD28, NRCAM, NLGN4X, ICAM1, NCAM1, HLA-G, SELL, SDC1, SDC4, ICOS, ITGA9, SELPLG, CDH4, SIGLEC1, PTPRF, ITGB8, NECTIN1, ITGB2, CDH15, ITGB7, SDC3, SPN, PTPRM, NEGR1, CD4, HLA-C, MPZ, HLA-E |
| KEGG_ARRHYTHMOGENIC_RIGHT_VENTRICULAR_CARDIOMYOPATHY_ARVC | 2.159 | 1.61 × 10−3 | 74 | ITGA5, ITGB1, ITGA6, ITGB3, ACTN1, ITGB4, ITGA3, ITGA4, ACTN4, ITGA8, ITGAV, ITGA1, DSG2, SGCB, ITGA2, ITGA7, CTNNA3, ATP2A2, DSP, PKP2, LAMA2, SGCA, DES, CACNA1C, ACTB, SGCD, SLC8A1, GJA1, ITGB6, CTNNB1, DSC2, ITGA9, LMNA, CACNG6, CACNB1, ITGB8 |
| KEGG_CYTOSOLIC_DNA_SENSING_PATHWAY | 2.156 | 3.00 × 10−2 | 46 | IL6, IL1B, POLR3A, NFKB1, ADAR, RELA, CHUK, IL33, POLR3H, IKBKG, MAVS |
| KEGG_DILATED_CARDIOMYOPATHY | 2.142 | 2.09 × 10−4 | 90 | ITGA5, ITGB1, ITGA6, ITGB3, ADCY7, ITGB4, TGFB2, ITGA3, TGFB1, ADCY3, TNF, TPM3, ITGA4, ITGA8, ITGAV, IGF1, ITGA1, TPM4, SGCB, ITGA2, ITGA7, ATP2A2, ADCY9, LAMA2, SGCA, DES, CACNA1C, MYH6, ACTB, SGCD, SLC8A1, ITGB6, MYH7, ADCY4, ACTC1, TPM1, PLN, ITGA9, LMNA, CACNG6, CACNB1, ITGB8, ITGB7, MYL2, TGFB3, PRKACB, CACNA1S |
| KEGG_T_CELL_RECEPTOR_SIGNALING_PATHWAY | 2.128 | 1.01 × 10−4 | 108 | CSF2, CBL, PIK3CD, AKT1, MAPK1, IL10, NFKB1, PPP3CA, TNF, NFAT5, RELA, CBLB, VAV2, NRAS, MAP2K1, JUN, NFATC2, PAK2, PRKCQ, SOS2, FOS, PIK3CG, CHUK, BCL10, LCP2, PIK3R5, CARD11, PTPRC, NFATC3, CD28, FYN, PPP3CB, IKBKG, NFATC1, MAPK12, PDPK1, ICOS, SOS1 |
| KEGG_TGF_BETA_SIGNALING_PATHWAY | 2.120 | 4.53 × 10−6 | 86 | INHBA, THBS1, TGFBR1, LTBP1, TGFBR2, MAPK1, ACVR1, TGFB2, ZFYVE9, ROCK2, BMPR2, TGFB1, INHBE, TNF, ROCK1, MYC, SMAD7, SMURF1, EP300, ACVRL1, BMP2, CREBBP, BMP6, THBS2, BMPR1B, PPP2R1B, SMURF2, BMP5, NOG, SMAD3, DCN |
| KEGG_PRION_DISEASES | 2.103 | 2.92 × 10−2 | 35 | IL6, LAMC1, IL1B, MAPK1, HSPA5, MAP2K1, PRNP, IL1A, EGR1, C6, FYN, NCAM1 |
| KEGG_LEUKOCYTE_TRANSENDOTHELIAL_MIGRATION | 2.079 | 1.50 × 10−4 | 114 | ITGB1, CLDN19, CDH5, PECAM1, ACTN1, PIK3CD, MSN, PXN, VCL, ROCK2, PTPN11, RAP1B, BCAR1, ROCK1, CXCL12, ARHGAP5, PRKCA, PLCG2, VAV2, ESAM, MMP2, JAM3, ITGA4, ACTN4, MMP9, CLDN6, VASP, NCF2, PTK2, MYL12A, RHOH, CTNNA3, PIK3CG, JAM2, GNAI1, PIK3R5, GNAI2, CLDN18, ACTB, CYBB, ICAM1, MYL12B, RAC2, ARHGAP35, CTNNB1, RAPGEF4, PRKCB, MAPK12, GNAI3, PTK2B, AFDN |
| KEGG_NATURAL_KILLER_CELL_MEDIATED_CYTOTOXICITY | 2.066 | 2.07 × 10−4 | 125 | CSF2, SHC1, PIK3CD, MAPK1, FCGR3B, PTPN11, PPP3CA, TNF, NFAT5, PRKCA, PLCG2, VAV2, NRAS, MAP2K1, NFATC2, SOS2, TNFRSF10C, PIK3CG, LCP2, PIK3R5, NFATC3, MICB, FYN, ICAM1, PPP3CB, HLA-G, RAC2, PRKCB, NFATC1, PTK2B, SOS1, ULBP3, SHC3, TNFRSF10B, TNFRSF10A |
| KEGG_B_CELL_RECEPTOR_SIGNALING_PATHWAY | 2.032 | 1.36 × 10−2 | 75 | PIK3CD, AKT1, MAPK1, NFKB1, PPP3CA, NFAT5, RELA, PLCG2, VAV2, NRAS, MAP2K1, JUN, NFATC2, SOS2, FOS, RASGRP3, PIK3CG, CHUK, BCL10, PIK3R5, CARD11, NFATC3, LILRB3, PPP3CB, RAC2, IKBKG, PIK3AP1, PRKCB, NFATC1, DAPP1, SOS1, CD79A, GSK3B, NFKBIA, BTK, MALT1, PIK3R1, PIK3CA |
| KEGG_MAPK_SIGNALING_PATHWAY | 2.032 | 6.47 × 10−10 | 267 | GNA12, FLNA, PDGFB, TGFBR1, IL1B, TGFBR2, MAP4K4, FGF7, EGFR, RASGRP4, AKT1, MAPK1, MAP3K20, FGF5, NR4A1, TGFB2, RAPGEF2, TGFB1, PDGFRB, RAP1B, CRKL, NFKB1, PPP3CA, TNF, RELA, MYC, FLNB, TAOK1, MAP2K3, PRKCA, IL1R1, NRAS, MAP2K1, DUSP3, JUN, PLA2G2A, RPS6KA2, NF1, ATF2, IL1A, NTRK1, NFATC2, DUSP16, MAP3K2, MEF2C, PAK2, STK4, CRK, DUSP6, TAB2, TAOK3, GNG12, SOS2, HSPA8, CACNA1G, FOS, RPS6KA3, RRAS2, RASGRP3, STK3, CHUK, RPS6KA4, SRF, PDGFA, DUSP5, RPS6KA1, MAP2K4, DUSP7, PLA2G4A, DUSP1, MAP3K3, CACNA1C, FGF10, TNFRSF1A, RASGRF2, PDGFRA, MAP3K11, PPP3CB, RAC2, RASA1, IKBKG, FLNC, DUSP10, MAP3K12, PRKCB, MAPK12, FGF16, PTPN5, PLA2G4E, CACNG6, SOS1, FGF11, RASGRF1, CACNB1, FGF13, MAP4K3, MAPKAPK3, EGF, DUSP14, RASA2, MAP4K2, HSPA6, MRAS, FGF21, PTPN7, RPS6KA6, CD14, GADD45B, TGFB3, PRKACB, CACNA1S, MAPK14, GADD45A, CACNA1H |
| KEGG_COMPLEMENT_AND_COAGULATION_CASCADES | 2.032 | 1.94 × 10−2 | 69 | SERPINE1, MASP1, VWF, PLAUR, C5AR1, C4B, F2R, PLAU, C4A, A2M, C1S, CR1, F13A1, C4BPA, C1R, F3, C6, CD55, BDKRB2, C3AR1, SERPING1, C3, CFH, THBD, PROC, CD59 |
plotEnrichment(pathways.kegg[["KEGG_JAK_STAT_SIGNALING_PATHWAY"]],
ranks) + labs(title="KEGG_JAK_STAT_SIGNALING_PATHWAY")plotEnrichment(pathways.kegg[["KEGG_MAPK_SIGNALING_PATHWAY"]],
ranks) + labs(title="KEGG_MAPK_SIGNALING_PATHWAY")Analysis of the database reveals that the MAPK and JAK_STAT pathways are also over-represented in the IL6/IL6R High group compared to the Low group. This over-representation is the result of several genes (cf. Leading Edge Subset) and cannot be accounted for by the IL6/IL6R genes only.
Survival curve of IL6/IL6R High and Low groups
clinical_hnscc_data <- GDCquery_clinic("TCGA-HNSC")
# subset only present in coldata_subset$patient
clinical_hnscc_data <- clinical_hnscc_data[clinical_hnscc_data$submitter_id %in% clinical_hnscc_data.all$submitter_id, ]
any(colnames(clinical_hnscc_data) %in% c("vital_status", "days_to_last_follow_up", "days_to_death"))
which(colnames(clinical_hnscc_data) %in% c("vital_status", "days_to_last_follow_up", "days_to_death"))
# clinical_hnscc_data[,c(9,43,49)]table(clinical_hnscc_data$vital_status)
Alive Dead
303 224
clinical_hnscc_data$deceased <- ifelse(clinical_hnscc_data$vital_status == "Alive", FALSE, TRUE)
clinical_hnscc_data$overall_survival <- ifelse(clinical_hnscc_data$vital_status == "Alive",
clinical_hnscc_data$days_to_last_follow_up,
clinical_hnscc_data$days_to_death)output_hnscc <- getResults(query.exp)
cases_rec <- output_hnscc |>
dplyr::select(-submitter_id) |>
dplyr::rename(submitter_id = cases.submitter_id) |>
dplyr::select(c(cases, submitter_id)) |>
inner_join(clinical_hnscc_data, by = 'submitter_id')
# cases_rec
# any(colnames(cases_rec) %in% c("vital_status", "days_to_last_follow_up", "days_to_death"))
# which(colnames(cases_rec) %in% c("vital_status", "days_to_last_follow_up", "days_to_death"))
# cases_rec[,c(10,44,50)]
# table(cases_rec$vital_status)
cases_rec$deceased <- ifelse(cases_rec$vital_status == "Alive", FALSE, TRUE)
cases_rec$overall_survival <- ifelse(cases_rec$vital_status == "Alive",
cases_rec$days_to_last_follow_up,
cases_rec$days_to_death)survival_data <- coldata |>
select(c(barcode, group))
survival_il6_il6r <- merge(survival_data, cases_rec, by.x = 'barcode', by.y = 'cases') |>
# convert days to year
mutate(overall_survival = overall_survival*0.00273973) |>
filter(group %in% c("High", "Low"))No significant difference in survival of IL6/IL6R ‘High’ compared to ‘Low’
# fitting survival curve
fit <- survfit(Surv(overall_survival, deceased) ~ group,
data = survival_il6_il6r
)
fitCall: survfit(formula = Surv(overall_survival, deceased) ~ group, data = survival_il6_il6r)
n events median 0.95LCL 0.95UCL
group=High 57 22 4.58 2.51 NA
group=Low 61 16 5.04 4.36 NA
survival <- ggsurvplot(fit,
data = survival_il6_il6r,
pval = T,
risk.table = T,
conf.int = F,
ncensor.plot = F,
xlim = c(0,8),
break.time.by = 1
)
survivalThis abscence of significant differences can be explained by several factors, such as the treatment approaches, which can be different from one patient to another. A differentiation of patients according to these methods would lead to a bias due to the insufficient number of patients per treatment method.