IL6/IL6R expression in HNSC

Author

Hugues Escoffier

Import TCGA data

This report focuses on the correlation between IL6 and IL6R expression on the EMT, MAPK and JAK-STAT pathways in the TCGA database.

RNA-Seq data

query.exp <- GDCquery(
    project = "TCGA-HNSC", 
    data.category = "Transcriptome Profiling",
    experimental.strategy = 'RNA-Seq',
    data.type = "Gene Expression Quantification", 
    workflow.type = "STAR - Counts",
    sample.type = "Primary Tumor"
)

GDCdownload(
    query = query.exp,
    files.per.chunk = 100
)

hnsc.exp <- GDCprepare(
    query = query.exp
)

Clinical data

# get subtype information
infomation.subtype <- TCGAquery_subtype(tumor = "HNSC")

# get clinical data
information.clinical <- GDCquery_clinic(project = "TCGA-HNSC", type = "clinical") 

# Load metadata
information.clinical.all <- read.csv("GDCdata/TCGA.HNSC.sampleMap-HNSC_clinicalMatrix.tsv", sep = "\t", header = T)
information.clinical.all <- information.clinical.all[grepl("-01", information.clinical.all$sampleID), ]

# Subset information.clinical.all to get only hpv_status_by_p16_testing == Negative
# information.clinical.all <- information.clinical.all[information.clinical.all$hpv_status_by_p16_testing == "Negative", ]

clinical_hnscc_data.all <- information.clinical.all |>
  inner_join(information.clinical, by = "bcr_patient_barcode") |>
  dplyr::select(c(submitter_id, gender.x, age_at_index, tissue_or_organ_of_origin)) |>
  dplyr::mutate(sex = ifelse(gender.x == "MALE", "M", "F")) |>
  dplyr::mutate(age_group_60 = ifelse(age_at_index >= 60, "Y", "N")) |>
  dplyr::select(c(submitter_id, sex, age_group_60, tissue_or_organ_of_origin)) |>
  na.omit()

Analyse the expression of IL6 and IL6R

The objective of this section is to identify samples exhibiting high co-expression of IL6 and IL6R.

Please note that it has been chosen not to differentiate between HPV-positive and HPV-negative patients in this analysis. This choice was based on the limited number of samples for that information was available (n = 74).

Extract and normalize counts

# Counts
hnsc_matrix <- assay(hnsc.exp, "unstranded")

# Gene and sample metadata
gene_metadata <- as.data.frame(rowData(hnsc.exp))
coldata <- as.data.frame(colData(hnsc.exp))

# VST counts 
dds <- DESeqDataSetFromMatrix(countData = hnsc_matrix,
                              colData = coldata,
                              design = ~ 1)

# Removing genes with sum total of 10 reads across all samples
keep <- rowSums(counts(dds)) >= 10
dds <- dds[keep,]

vsd <- vst(dds, blind=FALSE)
hnsc_matrix_vst <- assay(vsd)

Distribution of IL6 and IL6R across the HNSC samples

Firstly, the IL6 and IL6R high and low groups are set as distinct groups.

hnsc_il6 <- hnsc_matrix_vst |>
  as.data.frame() |>
  rownames_to_column(var = 'gene_id') |>
  gather(key = 'case_id', value = 'counts', -gene_id) |>
  left_join(gene_metadata, by = "gene_id") |>
  filter(gene_name == "IL6")

# Z-score the counts columns 
hnsc_il6$z_counts <- scale(hnsc_il6$counts) 

hnsc_il6 <- hnsc_il6 |>
  relocate(z_counts, .after = counts)

hnsc_il6r <- hnsc_matrix_vst |>
  as.data.frame() |>
  rownames_to_column(var = 'gene_id') |>
  gather(key = 'case_id', value = 'counts', -gene_id) |>
  left_join(gene_metadata, by = "gene_id") |>
  filter(gene_name == "IL6R")

hnsc_il6r$z_counts <- scale(hnsc_il6r$counts)

hnsc_il6r <- hnsc_il6r |>
  relocate(z_counts, .after = counts)

# head(hnsc_il6)
# head(hnsc_il6r)

# Plot IL6 and IL6R 
p1 <- ggplot(hnsc_il6, aes(x = z_counts)) +
  geom_boxplot(fill = "blue", alpha = 0.5) +
  geom_point(aes(y = 0), color = "red", size = 2) +
  theme_minimal() +
  theme(axis.text.y = element_blank()) +
  labs(title = "IL6 distribution in HNSC samples", x = "Z-Normalize VST Counts", y = "")

p2 <- ggplot(hnsc_il6r, aes(x = z_counts)) +
  geom_boxplot(fill = "blue", alpha = 0.5) +
  geom_point(aes(y = 0), color = "red", size = 2) +
  theme_minimal() +
  theme(axis.text.y = element_blank()) +
  labs(title = "IL6R distribution in HNSC samples", x = "Z-Normalize VST Counts", y = "")

p1 / p2

Creation of IL6/IL6R ‘Low’ and ‘High’ groups

Then, the samples that are common to the IL6 and IL6R ‘High’ groups are classified as belonging to the IL6/IL6R ‘High’ group, and the same process is applied to the samples that are common to the IL6 and IL6R ‘Low’ groups.

# Create IL6 'Low' and 'High' groups based on Z-counts if -1 = Low and +1 = High and NA else 
hnsc_il6$il6_group <- ifelse(hnsc_il6$z_counts < -0.5, "Low", ifelse(hnsc_il6$z_counts > 0.5, "High", "Medium"))
hnsc_il6r$il6r_group <- ifelse(hnsc_il6r$z_counts < -0.5, "Low", ifelse(hnsc_il6r$z_counts > 0.5, "High", "Medium"))

# Merge IL6 and IL6R data and create a High_High and Low_Low group
hnsc_il6_il6r <- hnsc_il6 |>
  inner_join(hnsc_il6r, by = "case_id") |>
  mutate(group = ifelse(il6_group == "High" & il6r_group == "High", "High",
                        ifelse(il6_group == "Low" & il6r_group == "Low", "Low", "Other")))

table(hnsc_il6_il6r$group)


 High   Low Other 
   57    61   401

DESeq2 analysis for High and Low IL6/IL6R groups

An analysis of the genes differentially expressed between samples from the ‘High’ and ‘Low’ IL6/IL6R groups was then performed.

Prepare the data for DESeq2 analysis

hnsc_il6_il6r_subset <- hnsc_il6_il6r |>
  dplyr::select(c(case_id, group))

# Add the column group to coldata bind by case_id in hnsc_il6_il6r and barcode in coldata 
coldata <- coldata |>
  inner_join(hnsc_il6_il6r_subset, by = c("barcode" = "case_id"))

# Keep only the samples with group High and Low in coldata
coldata_subset <- coldata[coldata$group %in% c("High", "Low"), ]
# Same for hnsc_matrix
hnsc_matrix_subset <- hnsc_matrix[, coldata_subset$barcode]

hnsc.exp <- hnsc.exp[, hnsc.exp$patient %in% coldata_subset$patient]
hnsc.exp$group <- coldata$group[match(hnsc.exp$patient, 
                                             coldata$patient)]

DESeq2 analysis

dds <- DESeqDataSet(hnsc.exp,
                    design = ~ group)

# Removing genes with sum total of 10 reads across all samples
keep <- rowSums(counts(dds)) >= 10
dds <- dds[keep,]

dds <- DESeq(dds)

resultsNames(dds)

vsd <- vst(dds, blind=FALSE)
hnsc_matrix_vst <- assay(vsd)

res <- results(dds, name = "group_Low_vs_High")
res_df <- as.data.frame(res)
res_df$gene_id <- rownames(res_df)

# Multiply Log2FoldChange by -1 to get the High vs Low
res_df$log2FoldChange <- -res_df$log2FoldChange

gene_metadata <- gene_metadata |>
  select(c(gene_id, gene_name))

res_df <- res_df |>
  inner_join(gene_metadata, by = c("gene_id" = "gene_id")) |>
  select(c(gene_name, gene_id, baseMean, log2FoldChange, lfcSE, stat, pvalue, padj))

Significant DEGs genes between IL6/IL6R High and Low groups

# Subset significant genes based on the p-value 
res_df_significant <- res_df |>
  filter(padj < 0.05) |>
  # filter(padj < 0.05, abs(log2FoldChange) >= 0.5) |>
  arrange(padj)

res_df_significant |>
  dplyr::select(-baseMean, -lfcSE, -stat, -pvalue, -gene_id) |>
  dplyr::rename(gene_name = 'gene_name') |>
  relocate(gene_name, .before = 1L) |>
  filter(padj < 0.05, abs(log2FoldChange) >= 0.5 ) |> 
  arrange(padj) |> 
  slice_head(n = 30) |>
  gt() |>
   tab_style(
    style = cell_text(
      font = c("Comic Sansa", "Menloa", default_fonts())
    ), locations = cells_body(columns = everything())
  ) |>
  fmt_number(columns = c(log2FoldChange),
             decimals = 3) |>
  fmt_scientific(
    columns = padj,
    decimals = 2) |>
  data_color(columns = padj, reverse = TRUE,
             palette = "viridis") |>
   data_color(columns = log2FoldChange,
             palette = colorRampPalette(c("blue", "white", "red"))(n = 100)) |> 
  gt_theme_guardian()

gene_name	log2FoldChange	padj
IL6	4.457	3.96 × 10⁻¹³⁸
IL6R	2.084	7.69 × 10⁻⁷⁴
ZFR2	−6.102	6.80 × 10⁻⁴¹
TAF7L	−5.162	1.52 × 10⁻³⁴
ABCA17P	−5.019	8.16 × 10⁻³⁴
NEFH	−5.263	4.03 × 10⁻³³
CSF2	3.905	1.83 × 10⁻²⁹
NR4A3	2.767	3.86 × 10⁻²⁷
SMC1B	−4.303	2.51 × 10⁻²⁴
SERPINE1	2.800	5.95 × 10⁻²⁴
SYCP2	−3.701	5.71 × 10⁻²³
MXD3	−1.840	5.71 × 10⁻²³
MACF1	1.275	6.80 × 10⁻²³
IL24	3.741	1.34 × 10⁻²²
DDX25	−3.992	1.42 × 10⁻²²
FPR2	3.295	2.28 × 10⁻²²
KLHL35	−3.104	3.06 × 10⁻²²
SYCE2	−2.615	3.06 × 10⁻²²
PET100	−1.722	4.47 × 10⁻²²
TMPO-AS1	−1.516	4.96 × 10⁻²²
HAS1	3.116	1.01 × 10⁻²¹
WDR83	−1.076	2.44 × 10⁻²¹
UPB1	−3.998	2.48 × 10⁻²¹
YBX2	−3.485	3.45 × 10⁻²¹
TMSB15A	−3.414	3.45 × 10⁻²¹
SOCS3	1.556	3.45 × 10⁻²¹
EPHA6	−4.256	4.32 × 10⁻²¹
KEL	−3.905	4.32 × 10⁻²¹
IL11	2.551	9.16 × 10⁻²¹
COMMD3	−1.276	4.07 × 10⁻²⁰

As expected, the top differentially expressed genes were IL6 and IL6R. These results will then be used as a base for the analysis of the enrichment pathways between the two groups.

GSEA Analysis between IL6/IL6R High and Low groups

GSEA is used to highlight the pathways that are over-represented between the IL6/IL6R High and Low groups. The GSEA method is based on an ordered list of genes and a database of pathways.

The first step in this analysis is to analyse the Hallmark Gene Set, which summarises and represents specific well-defined biological states or processes and displays coherent expression, including the EMT pathway. The analysis will then be reproduced on the KEGG Gene Set, which contains canonical pathways derived from the KEGG pathway database, including the MAPK/ERK and JAK/STAT pathways.

Prepare the data for GSEA analysis

Ranking

# is there duplicate
any(duplicated(res_df$gene_name))

# which one ? 
dup_gene_names <- res_df %>%
  dplyr::filter(duplicated(gene_name)) %>%
  dplyr::pull(gene_name)

# filter duplicated 
res_df_filter <- res_df %>%
  # Sort so that the highest absolute values of the log2 fold change are at the top
  dplyr::arrange(dplyr::desc(abs(log2FoldChange))) %>%
  # Filter out the duplicated rows using `dplyr::distinct()`
  dplyr::distinct(gene_name, .keep_all = TRUE)

any(duplicated(res_df_filter$gene_name))

# Ranking based on the p-value and the log2FoldChange
low_vs_high <- res_df_filter |> 
  mutate(sign_p_value = -log10(pvalue) * sign(log2FoldChange)) |>
  arrange(desc(sign_p_value))

res_gsea <- low_vs_high |> 
  dplyr::select(gene_name, sign_p_value)

# res_gsea

ranks <- deframe(res_gsea)
head(ranks, 20)

GSEA analysis using Hallmark gene set

Pathways using HALLMARK gene set

pathways.hallmark <- gmtPathways("gsea/h.all.v2023.2.Hs.entrez.gmt.txt")

# pathways.hallmark |> 
#   head() |>  
#   lapply(head)

fgseaRes <- fgsea(pathways=pathways.hallmark, stats=ranks)

Warning in preparePathwaysAndStats(pathways, stats, minSize, maxSize, gseaParam, : There are ties in the preranked stats (0.01% of the list).
The order of those tied genes will be arbitrary, which may produce unexpected results.

fgseaResTidy <- fgseaRes |> 
  as_tibble() |> 
  arrange(desc(NES))

ggplot(fgseaResTidy, aes(reorder(pathway, NES), NES)) +
  geom_col(aes(fill=padj<0.05)) +
  coord_flip() +
  labs(x="Pathway", y="Normalized Enrichment Score",
       title="Hallmark pathways NES from GSEA - IL6/IL6R High vs Low groups") + 
  theme_minimal()

Enrichment analysis using the Hallmark Gene set indicates that the pathway most over-represented in the IL6/IL6R High group corresponds to the EMT pathway, supporting the correlation between IL6/IL6R expression and the pathway’s activation.

EMT Pathway

plotEnrichment(pathways.hallmark[["HALLMARK_EPITHELIAL_MESENCHYMAL_TRANSITION"]],
               ranks) + labs(title="HALLMARK_EPITHELIAL_MESENCHYMAL_TRANSITION")

GSEA analysis using KEGG gene set

Pathways using KEGG gene set

pathways.kegg <- gmtPathways("gsea/c2.cp.kegg_legacy.v2023.2.Hs.symbols.gmt.txt")

# pathways.kegg |> 
#   head() |>  
#   lapply(head)

fgseaRes <- fgsea(pathways=pathways.kegg, stats=ranks)

Warning in preparePathwaysAndStats(pathways, stats, minSize, maxSize, gseaParam, : There are ties in the preranked stats (0.01% of the list).
The order of those tied genes will be arbitrary, which may produce unexpected results.

fgseaResTidy <- fgseaRes |> 
  as_tibble() |> 
  arrange(desc(NES))

fgseaResTidy |> 
  select(c(pathway, NES, padj, size, leadingEdge)) |>
  slice_head(n = 30) |> 
  gt() |>
   tab_style(
    style = cell_text(
      font = c("Comic Sansa", "Menloa", default_fonts())
    ), locations = cells_body(columns = everything())
  ) |>
  # increase the width of the leadingEdge column
  cols_width(
    pathway ~ px(70)
  ) |>
  fmt_number(columns = c(NES),
             decimals = 3) |>
  fmt_scientific(
    columns = padj,
    decimals = 2) |>
  data_color(columns = padj, reverse = TRUE,
             palette = "viridis") |>
  data_color(columns = NES,
             palette = colorRampPalette(c("white", "red"))(n = 100)) |> 
  tab_options(container.width = 2000, container.height = 500) |>
  gt_theme_guardian()

pathway	NES	padj	size	leadingEdge
KEGG_FOCAL_ADHESION	2.740	5.77 × 10⁻²⁶	198	COL4A1, LAMC1, LAMC2, FLNA, ITGA5, RAPGEF1, ITGB1, CAV1, ITGA6, LAMA3, ITGB3, THBS1, SHC1, PDGFB, COL4A2, MET, VWF, TNC, IGF1R, ACTN1, PIK3CD, LAMB3, HGF, EGFR, AKT1, KDR, PXN, MAPK1, FLT1, VEGFC, VCL, ITGB4, RELN, COL4A6, TLN1, LAMB1, COL5A3, ITGA3, ROCK2, PDGFRB, RAP1B, BCAR1, CRKL, DIAPH1, FLT4, ROCK1, LAMA4, CAV2, COL6A3, FLNB, PDGFC, ARHGAP5, COL5A2, PRKCA, MYLK, VAV2, MAP2K1, COL2A1, ITGA4, ACTN4, ITGA8, JUN, ITGAV, IGF1, COL6A1, ITGA1, PAK2, MYLK3, CRK, TNXB, VASP, LAMA5, ZYX, SOS2, SPP1, PTK2, ITGA2, ITGA7, MYL12A, CAPN2, THBS2, DOCK1, PIK3CG, BIRC2, TLN2, PDGFA, LAMA2, PIK3R5, XIAP, COL5A1, COL3A1, PARVA, ACTB, FN1, FYN, PDGFRA, ITGB6, COL6A2, MYL12B, RAC2, ARHGAP35, PPP1R12A, FLNC, CTNNB1, PTEN, PRKCB, PARVB, SRC, TNN, PDPK1, ITGA9, VTN, PGF, LAMB2, SOS1, RASGRF1, SHC3, LAMC3, PDGFD, COL1A1, COL1A2, EGF
KEGG_JAK_STAT_SIGNALING_PATHWAY	2.662	1.81 × 10⁻¹⁷	147	IL6, IL6R, CSF2, IL24, SOCS3, IL11, SPRY4, OSM, CBL, LIF, PIK3CD, OSMR, CSF3, AKT1, JAK1, IL7R, SPRY1, IL10, CLCF1, LEPR, CSF3R, SPRED2, PTPN11, IFNK, SOCS5, MYC, IL13RA1, IL20, CBLB, EP300, IL6ST, STAM, SPRED1, IFNLR1, SOS2, CREBBP, IL5RA, PIK3CG, SOCS4, CNTF, PIK3R5
KEGG_HEMATOPOIETIC_CELL_LINEAGE	2.649	6.18 × 10⁻¹²	84	IL6, IL6R, CSF2, IL11, ITGA5, ITGA6, ITGB3, IL1B, ANPEP, CSF3, IL7R, CD44, CSF3R, ITGA3, MME, TNF, CR1, IL1R1, ITGA4, IL1A, ITGA1, CD34, ITGA2, IL5RA
KEGG_ECM_RECEPTOR_INTERACTION	2.549	4.03 × 10⁻⁹	83	COL4A1, LAMC1, LAMC2, ITGA5, ITGB1, ITGA6, LAMA3, ITGB3, THBS1, COL4A2, VWF, TNC, LAMB3, CD44, ITGB4, RELN, COL4A6, LAMB1, COL5A3, ITGA3, HSPG2, LAMA4, COL6A3, COL5A2, COL2A1, ITGA4, ITGA8, ITGAV, AGRN, COL6A1, ITGA1, TNXB, LAMA5, SPP1, ITGA2, ITGA7, THBS2, LAMA2, COL5A1, COL3A1, FN1, ITGB6, COL6A2, SDC1, SDC4, GP5, TNN, ITGA9, VTN, LAMB2, LAMC3, COL1A1, COL1A2, ITGB8
KEGG_CYTOKINE_CYTOKINE_RECEPTOR_INTERACTION	2.544	3.12 × 10⁻²⁵	254	IL6, IL6R, CSF2, IL24, IL11, CXCL8, INHBA, CXCL5, PDGFB, TGFBR1, OSM, LIF, MET, IL1B, TGFBR2, OSMR, CXCR1, HGF, CSF3, EGFR, CCL11, IL7R, KDR, FLT1, IL10, CXCL3, CLCF1, VEGFC, LEPR, CXCL2, ACVR1, TNFRSF11B, TGFB2, CSF3R, BMPR2, CCL24, TGFB1, PDGFRB, INHBE, IFNK, TNF, IL1RAP, FLT4, CXCL12, CCL7, PDGFC, IL13RA1, IL20, CCR3, IL1R1, IL6ST, ACVRL1, BMP2, IL1A, CCL21, CCL8, IFNLR1, CCR1, TNFRSF12A, CCL23, PPBP, CCL3L3, TNFRSF10C, IL17RA, IL5RA, BMPR1B, PDGFA, CNTF, TNFSF14, CCR4, CCR2, CXCR2, PLEKHO2, CCR8, TNFRSF1A, RELT, CSF1R, TNFRSF21, IL10RA, PDGFRA, EPO, CCL13, IL22, CCL2, CXCL1, TNFSF8, CXCL14, CCL18, IL9R, TNFRSF1B, CCL22
KEGG_CHEMOKINE_SIGNALING_PATHWAY	2.478	7.76 × 10⁻¹⁶	187	CXCL8, CXCL5, SHC1, PIK3CD, CXCR1, AKT1, CCL11, ADCY7, PXN, MAPK1, CXCL3, CXCL2, ROCK2, CCL24, RAP1B, BCAR1, CRKL, ADCY3, NFKB1, RELA, ROCK1, CXCL12, CCL7, GNB1, CCR3, VAV2, NRAS, MAP2K1, GRK5, CCL21, CCL8, CCR1, CRK, GNG12, SOS2, CCL23, PPBP, CCL3L3, PTK2, PREX1, PIK3CG, ADCY9, CHUK, GNAI1, PLCB1, PIK3R5, CCR4, CCR2, GNAI2, CXCR2, GNB5, CCR8, ELMO1, DOCK2, GRK3, CCL13, TIAM1, WASL, GNG11, RAC2, CCL2, IKBKG, STAT5B, CXCL1, CXCL14, ADCY4, CCL18, CCL22, FOXO3, PRKCB, STAT3, GNAI3, PTK2B, STAT1, SOS1, XCR1, CCL19, TIAM2, SHC3, PF4, PLCB3, GNGT1, CXCL11, PRKCZ, FGR, CCL3, CCL14, HCK, PRKACB, GSK3B, CCL16, NFKBIA, CCR7, CXCL6, CCL4, PIK3R1, GRK2, CDC42, CXCR5, PIK3CA
KEGG_NOD_LIKE_RECEPTOR_SIGNALING_PATHWAY	2.445	2.88 × 10⁻⁵	62	IL6, CXCL8, IL1B, NLRP3, CCL11, MAPK1, CXCL2, TNFAIP3, NFKB1, TNF, MEFV, RELA, NAIP, NOD1, CCL7, CCL8, TAB2, CHUK, BIRC2, HSP90AB1, RIPK2, XIAP, NLRC4, HSP90AA1, CCL13, CCL2, IKBKG, CXCL1
KEGG_GRAFT_VERSUS_HOST_DISEASE	2.444	1.50 × 10⁻⁴	37	IL6, IL1B, TNF, IL1A, CD80, CD86, CD28, HLA-G
KEGG_HYPERTROPHIC_CARDIOMYOPATHY_HCM	2.400	1.47 × 10⁻⁶	83	IL6, ITGA5, ITGB1, ITGA6, ITGB3, ITGB4, TGFB2, ITGA3, TGFB1, TNF, TPM3, ITGA4, ITGA8, ITGAV, IGF1, ITGA1, TPM4, ACE, SGCB, ITGA2, PRKAB2, ITGA7, ATP2A2, LAMA2, SGCA, DES, CACNA1C, MYH6, ACTB, SGCD, SLC8A1, ITGB6, MYH7, PRKAG3, PRKAA1, ACTC1, TPM1, ITGA9, LMNA, CACNG6
KEGG_REGULATION_OF_ACTIN_CYTOSKELETON	2.356	8.52 × 10⁻¹³	212	GNA12, ITGA5, ITGB1, ITGA6, ITGB3, PDGFB, MYH9, ACTN1, IQGAP1, PIK3CD, SSH1, MSN, FGF7, EGFR, PXN, MAPK1, VCL, ITGB4, FGF5, SLC9A1, ENAH, CYFIP1, SSH2, ITGA3, ROCK2, RDX, F2R, PDGFRB, BCAR1, LIMK1, CRKL, DIAPH1, ARHGEF12, NCKAP1, ROCK1, PDGFC, MYLK, VAV2, NRAS, MAP2K1, ITGA4, ACTN4, ITGA8, PIP5K1A, ITGAV, PIP4K2A, ITGA1, PAK2, MYLK3, CRK, ARHGEF7, GNG12, SOS2, CHRM5, PTK2, ITGA2, ITGA7, MYL12A, DOCK1, RRAS2, GNA13, APC, PIK3CG, PDGFA, PIK3R5, FGF10, DIAPH3, ACTB, PIP4K2C, FN1, ARHGEF6, BDKRB2, WASF2, PDGFRA, TIAM1, WASL, ITGB6, MYL12B, RAC2, IQGAP2, ARHGAP35, PPP1R12A, PIP5K1B, MYH10, LIMK2, CFL2, FGF16, ITGAX, ITGA9, SOS1, FGF11, NCKAP1L, TIAM2, PDGFD, FGF13, EGF, ITGB8, EZR, ITGB2, MRAS, PPP1CA, ITGB7, PFN2, FGF21, CFL1, MYL2, CD14, PIKFYVE, BAIAP2, PIP5K1C, PIK3R1, PIP4K2B, ITGB5, CDC42, PIK3CA, FGF18, ACTG1, ARPC1B, BDKRB1, SSH3
KEGG_VIRAL_MYOCARDITIS	2.350	8.52 × 10⁻⁵	68	ABL2, CAV1, MYH9, EIF4G3, ABL1, CD80, EIF4G2, SGCB, LAMA2, SGCA, MYH11, MYH1, MYH3, CD86, MYH6, ACTB, CD55, SGCD, CD28, EIF4G1, FYN, ICAM1, HLA-G, RAC2, MYH7, MYH10, MYH2, ITGB2, HLA-C, HLA-E, MYH8, ACTG1
KEGG_INTESTINAL_IMMUNE_NETWORK_FOR_IGA_PRODUCTION	2.338	3.20 × 10⁻³	45	IL6, IL10, TGFB1, CXCL12, ITGA4, CD80, CD86, CD28, PIGR
KEGG_TOLL_LIKE_RECEPTOR_SIGNALING_PATHWAY	2.313	5.84 × 10⁻⁷	95	IL6, CXCL8, IL1B, PIK3CD, LBP, AKT1, MAPK1, NFKB1, TNF, RELA, MAP2K3, TLR4, TRAF3, MAP2K1, JUN, CD80, TAB2, SPP1, FOS, TICAM1, TLR8, FADD, PIK3CG, CHUK, PIK3R5, MYD88, MAP2K4, CD86
KEGG_PATHWAYS_IN_CANCER	2.275	2.79 × 10⁻¹⁷	325	IL6, COL4A1, LAMC1, CXCL8, LAMC2, ITGB1, ITGA6, LAMA3, PDGFB, TGFBR1, ETS1, CDK6, COL4A2, CBL, MET, IGF1R, PIK3CD, TGFBR2, LAMB3, HGF, FGF7, EGFR, AKT1, JAK1, MAPK1, SLC2A1, VEGFC, COL4A6, FGF5, LAMB1, EPAS1, TGFB2, CSF3R, HIF1A, ITGA3, TGFB1, TGFA, PDGFRB, CRKL, NFKB1, RET, TPM3, RELA, PPARD, LAMA4, MYC, RB1, MMP1, RARA, CBLB, EP300, ABL1, PRKCA, TRAF3, PLCG2, WNT9A, NRAS, RALBP1, MTOR, MAP2K1, PTGS2, BMP2, MMP2, MMP9, WNT7B, JUN, ITGAV, IGF1, NTRK1, RALB, WNT7A, STK4, CRK, GLI3, LAMA5, SOS2, CREBBP, PTK2, ITGA2, FOS, CTNNA3, FADD, APC, PIK3CG, CHUK, EGLN1, BIRC2, PDGFA, LAMA2, HSP90AB1, PIK3R5, WNT5A, XIAP, RXRG, GLI2, FGF10, SMAD3, FN1, CSF1R, CTBP2, HSP90AA1, PDGFRA, DCC, RAC2, DVL1, WNT9B, IKBKG, PML, STAT5B, BCR, CTNNB1, GLI1, ARNT, FOXO1, PTEN, PRKCB, STAT3, SUFU, FGF16, E2F3, STAT1, NCOA4, FZD4, TRAF1, CDKN1A, WNT2, PGF, FZD1, LAMB2, SOS1, FGF11
KEGG_ADHERENS_JUNCTION	2.252	6.84 × 10⁻⁴	73	TGFBR1, MET, IGF1R, ACTN1, IQGAP1, TGFBR2, EGFR, MAPK1, VCL, SNAI2, FARP2, SNAI1, PTPN1, EP300, YES1, ACTN4, PTPRB, CREBBP, CSNK2A2, CTNNA3, FER, SMAD3, TJP1, ACTB, FYN, WASF2, WASL, RAC2, CTNNB1, LMO7, SRC, AFDN, PTPRJ, PTPRF, NECTIN1, PTPRM, CSNK2A1, BAIAP2, CDC42, INSR, ACTG1, WASF3, ACTN2
KEGG_LEISHMANIA_INFECTION	2.243	1.09 × 10⁻³	70	ITGB1, IL1B, JAK1, MAPK1, IL10, FCGR3B, TGFB2, TGFB1, NFKB1, TNF, RELA, CR1, FCGR2A, TLR4, PTGS2, ITGA4, JUN, IL1A, TAB2, NCF2, FOS, MYD88, IRAK1, C3, PRKCB, MAPK12, STAT1, TLR2, ITGB2, TGFB3, MAPK14, NFKBIA
KEGG_RENAL_CELL_CARCINOMA	2.235	1.20 × 10⁻³	70	RAPGEF1, PDGFB, ETS1, MET, PIK3CD, HGF, AKT1, MAPK1, SLC2A1, VEGFC, EPAS1, TGFB2, HIF1A, TGFB1, TGFA, PTPN11, RAP1B, CRKL, EP300, NRAS, MAP2K1, JUN, PAK2, CRK, SOS2, CREBBP, PIK3CG, EGLN1, PIK3R5
KEGG_ERBB_SIGNALING_PATHWAY	2.230	6.19 × 10⁻⁵	87	ABL2, SHC1, CBL, PIK3CD, CAMK2D, EGFR, AKT1, MAPK1, HBEGF, TGFA, CRKL, NRG1, MYC, CBLB, EREG, ABL1, PRKCA, PLCG2, NRAS, MTOR, MAP2K1, AREG, JUN, PAK2, CRK, SOS2, PTK2, PIK3CG, PIK3R5, MAP2K4, GAB1, CAMK2A, STAT5B, PRKCB, SRC, CDKN1A, SOS1, SHC3, EGF
KEGG_CELL_ADHESION_MOLECULES_CAMS	2.170	1.81 × 10⁻⁵	130	ITGB1, ITGA6, CLDN19, CDH5, PECAM1, SELE, L1CAM, CD276, PDCD1LG2, PVR, CNTNAP1, ESAM, JAM3, ITGA4, ITGA8, SELP, NFASC, ITGAV, MPZL1, CADM3, CD80, CLDN6, GLG1, CD34, VCAN, JAM2, CDH3, PTPRC, CLDN18, CD86, ICAM3, CD28, NRCAM, NLGN4X, ICAM1, NCAM1, HLA-G, SELL, SDC1, SDC4, ICOS, ITGA9, SELPLG, CDH4, SIGLEC1, PTPRF, ITGB8, NECTIN1, ITGB2, CDH15, ITGB7, SDC3, SPN, PTPRM, NEGR1, CD4, HLA-C, MPZ, HLA-E
KEGG_ARRHYTHMOGENIC_RIGHT_VENTRICULAR_CARDIOMYOPATHY_ARVC	2.159	1.61 × 10⁻³	74	ITGA5, ITGB1, ITGA6, ITGB3, ACTN1, ITGB4, ITGA3, ITGA4, ACTN4, ITGA8, ITGAV, ITGA1, DSG2, SGCB, ITGA2, ITGA7, CTNNA3, ATP2A2, DSP, PKP2, LAMA2, SGCA, DES, CACNA1C, ACTB, SGCD, SLC8A1, GJA1, ITGB6, CTNNB1, DSC2, ITGA9, LMNA, CACNG6, CACNB1, ITGB8
KEGG_CYTOSOLIC_DNA_SENSING_PATHWAY	2.156	3.00 × 10⁻²	46	IL6, IL1B, POLR3A, NFKB1, ADAR, RELA, CHUK, IL33, POLR3H, IKBKG, MAVS
KEGG_DILATED_CARDIOMYOPATHY	2.142	2.09 × 10⁻⁴	90	ITGA5, ITGB1, ITGA6, ITGB3, ADCY7, ITGB4, TGFB2, ITGA3, TGFB1, ADCY3, TNF, TPM3, ITGA4, ITGA8, ITGAV, IGF1, ITGA1, TPM4, SGCB, ITGA2, ITGA7, ATP2A2, ADCY9, LAMA2, SGCA, DES, CACNA1C, MYH6, ACTB, SGCD, SLC8A1, ITGB6, MYH7, ADCY4, ACTC1, TPM1, PLN, ITGA9, LMNA, CACNG6, CACNB1, ITGB8, ITGB7, MYL2, TGFB3, PRKACB, CACNA1S
KEGG_T_CELL_RECEPTOR_SIGNALING_PATHWAY	2.128	1.01 × 10⁻⁴	108	CSF2, CBL, PIK3CD, AKT1, MAPK1, IL10, NFKB1, PPP3CA, TNF, NFAT5, RELA, CBLB, VAV2, NRAS, MAP2K1, JUN, NFATC2, PAK2, PRKCQ, SOS2, FOS, PIK3CG, CHUK, BCL10, LCP2, PIK3R5, CARD11, PTPRC, NFATC3, CD28, FYN, PPP3CB, IKBKG, NFATC1, MAPK12, PDPK1, ICOS, SOS1
KEGG_TGF_BETA_SIGNALING_PATHWAY	2.120	4.53 × 10⁻⁶	86	INHBA, THBS1, TGFBR1, LTBP1, TGFBR2, MAPK1, ACVR1, TGFB2, ZFYVE9, ROCK2, BMPR2, TGFB1, INHBE, TNF, ROCK1, MYC, SMAD7, SMURF1, EP300, ACVRL1, BMP2, CREBBP, BMP6, THBS2, BMPR1B, PPP2R1B, SMURF2, BMP5, NOG, SMAD3, DCN
KEGG_PRION_DISEASES	2.103	2.92 × 10⁻²	35	IL6, LAMC1, IL1B, MAPK1, HSPA5, MAP2K1, PRNP, IL1A, EGR1, C6, FYN, NCAM1
KEGG_LEUKOCYTE_TRANSENDOTHELIAL_MIGRATION	2.079	1.50 × 10⁻⁴	114	ITGB1, CLDN19, CDH5, PECAM1, ACTN1, PIK3CD, MSN, PXN, VCL, ROCK2, PTPN11, RAP1B, BCAR1, ROCK1, CXCL12, ARHGAP5, PRKCA, PLCG2, VAV2, ESAM, MMP2, JAM3, ITGA4, ACTN4, MMP9, CLDN6, VASP, NCF2, PTK2, MYL12A, RHOH, CTNNA3, PIK3CG, JAM2, GNAI1, PIK3R5, GNAI2, CLDN18, ACTB, CYBB, ICAM1, MYL12B, RAC2, ARHGAP35, CTNNB1, RAPGEF4, PRKCB, MAPK12, GNAI3, PTK2B, AFDN
KEGG_NATURAL_KILLER_CELL_MEDIATED_CYTOTOXICITY	2.066	2.07 × 10⁻⁴	125	CSF2, SHC1, PIK3CD, MAPK1, FCGR3B, PTPN11, PPP3CA, TNF, NFAT5, PRKCA, PLCG2, VAV2, NRAS, MAP2K1, NFATC2, SOS2, TNFRSF10C, PIK3CG, LCP2, PIK3R5, NFATC3, MICB, FYN, ICAM1, PPP3CB, HLA-G, RAC2, PRKCB, NFATC1, PTK2B, SOS1, ULBP3, SHC3, TNFRSF10B, TNFRSF10A
KEGG_B_CELL_RECEPTOR_SIGNALING_PATHWAY	2.032	1.36 × 10⁻²	75	PIK3CD, AKT1, MAPK1, NFKB1, PPP3CA, NFAT5, RELA, PLCG2, VAV2, NRAS, MAP2K1, JUN, NFATC2, SOS2, FOS, RASGRP3, PIK3CG, CHUK, BCL10, PIK3R5, CARD11, NFATC3, LILRB3, PPP3CB, RAC2, IKBKG, PIK3AP1, PRKCB, NFATC1, DAPP1, SOS1, CD79A, GSK3B, NFKBIA, BTK, MALT1, PIK3R1, PIK3CA
KEGG_MAPK_SIGNALING_PATHWAY	2.032	6.47 × 10⁻¹⁰	267	GNA12, FLNA, PDGFB, TGFBR1, IL1B, TGFBR2, MAP4K4, FGF7, EGFR, RASGRP4, AKT1, MAPK1, MAP3K20, FGF5, NR4A1, TGFB2, RAPGEF2, TGFB1, PDGFRB, RAP1B, CRKL, NFKB1, PPP3CA, TNF, RELA, MYC, FLNB, TAOK1, MAP2K3, PRKCA, IL1R1, NRAS, MAP2K1, DUSP3, JUN, PLA2G2A, RPS6KA2, NF1, ATF2, IL1A, NTRK1, NFATC2, DUSP16, MAP3K2, MEF2C, PAK2, STK4, CRK, DUSP6, TAB2, TAOK3, GNG12, SOS2, HSPA8, CACNA1G, FOS, RPS6KA3, RRAS2, RASGRP3, STK3, CHUK, RPS6KA4, SRF, PDGFA, DUSP5, RPS6KA1, MAP2K4, DUSP7, PLA2G4A, DUSP1, MAP3K3, CACNA1C, FGF10, TNFRSF1A, RASGRF2, PDGFRA, MAP3K11, PPP3CB, RAC2, RASA1, IKBKG, FLNC, DUSP10, MAP3K12, PRKCB, MAPK12, FGF16, PTPN5, PLA2G4E, CACNG6, SOS1, FGF11, RASGRF1, CACNB1, FGF13, MAP4K3, MAPKAPK3, EGF, DUSP14, RASA2, MAP4K2, HSPA6, MRAS, FGF21, PTPN7, RPS6KA6, CD14, GADD45B, TGFB3, PRKACB, CACNA1S, MAPK14, GADD45A, CACNA1H
KEGG_COMPLEMENT_AND_COAGULATION_CASCADES	2.032	1.94 × 10⁻²	69	SERPINE1, MASP1, VWF, PLAUR, C5AR1, C4B, F2R, PLAU, C4A, A2M, C1S, CR1, F13A1, C4BPA, C1R, F3, C6, CD55, BDKRB2, C3AR1, SERPING1, C3, CFH, THBD, PROC, CD59

plotEnrichment(pathways.kegg[["KEGG_JAK_STAT_SIGNALING_PATHWAY"]],
               ranks) + labs(title="KEGG_JAK_STAT_SIGNALING_PATHWAY")

plotEnrichment(pathways.kegg[["KEGG_MAPK_SIGNALING_PATHWAY"]],
               ranks) + labs(title="KEGG_MAPK_SIGNALING_PATHWAY")

Analysis of the database reveals that the MAPK and JAK_STAT pathways are also over-represented in the IL6/IL6R High group compared to the Low group. This over-representation is the result of several genes (cf. Leading Edge Subset) and cannot be accounted for by the IL6/IL6R genes only.

Survival curve of IL6/IL6R High and Low groups

clinical_hnscc_data <- GDCquery_clinic("TCGA-HNSC")

# subset only present in coldata_subset$patient 
clinical_hnscc_data <- clinical_hnscc_data[clinical_hnscc_data$submitter_id %in% clinical_hnscc_data.all$submitter_id, ]

any(colnames(clinical_hnscc_data) %in% c("vital_status", "days_to_last_follow_up", "days_to_death"))
which(colnames(clinical_hnscc_data) %in% c("vital_status", "days_to_last_follow_up", "days_to_death"))
# clinical_hnscc_data[,c(9,43,49)]

table(clinical_hnscc_data$vital_status)


Alive  Dead 
  303   224

clinical_hnscc_data$deceased <- ifelse(clinical_hnscc_data$vital_status == "Alive", FALSE, TRUE)

clinical_hnscc_data$overall_survival <- ifelse(clinical_hnscc_data$vital_status == "Alive",
                                               clinical_hnscc_data$days_to_last_follow_up,
                                               clinical_hnscc_data$days_to_death)

output_hnscc <- getResults(query.exp)

cases_rec <- output_hnscc |>
  dplyr::select(-submitter_id) |>
  dplyr::rename(submitter_id = cases.submitter_id) |>
  dplyr::select(c(cases, submitter_id)) |> 
  inner_join(clinical_hnscc_data, by = 'submitter_id')

# cases_rec

# any(colnames(cases_rec) %in% c("vital_status", "days_to_last_follow_up", "days_to_death"))
# which(colnames(cases_rec) %in% c("vital_status", "days_to_last_follow_up", "days_to_death"))
# cases_rec[,c(10,44,50)]

# table(cases_rec$vital_status)
cases_rec$deceased <- ifelse(cases_rec$vital_status == "Alive", FALSE, TRUE)

cases_rec$overall_survival <- ifelse(cases_rec$vital_status == "Alive",
                                     cases_rec$days_to_last_follow_up,
                                     cases_rec$days_to_death)

survival_data <- coldata |>
  select(c(barcode, group))

survival_il6_il6r <- merge(survival_data, cases_rec, by.x = 'barcode', by.y = 'cases') |>
  # convert days to year
  mutate(overall_survival = overall_survival*0.00273973) |>
  filter(group %in% c("High", "Low"))

No significant difference in survival of IL6/IL6R ‘High’ compared to ‘Low’

# fitting survival curve 
fit <- survfit(Surv(overall_survival, deceased) ~ group, 
               data = survival_il6_il6r
               )
fit

Call: survfit(formula = Surv(overall_survival, deceased) ~ group, data = survival_il6_il6r)

            n events median 0.95LCL 0.95UCL
group=High 57     22   4.58    2.51      NA
group=Low  61     16   5.04    4.36      NA

survival <- ggsurvplot(fit,
                       data = survival_il6_il6r,
                       pval = T,
                       risk.table = T,
                       conf.int = F, 
                       ncensor.plot = F,
                       xlim = c(0,8), 
                       break.time.by = 1
                       ) 
survival

This abscence of significant differences can be explained by several factors, such as the treatment approaches, which can be different from one patient to another. A differentiation of patients according to these methods would lead to a bias due to the insufficient number of patients per treatment method.