数据读取

setwd("~/Documents/coploit/home")
protein <- fread("protein/HCC_protein_after_scale.xls") %>% as.data.frame()
rna <- fread("RNAseq/HCC_TPM_after_scale.xls") %>% as.data.frame()
fe <- c("CCDC134", "GPX4", "SAT1", "GCLM", "CP", "FTH1")

查看目标基因

protein[protein$genename %in% fe, 1:2]
##      genename  U00000764
## 885        CP  1.4429643
## 999      FTH1 -0.1695630
## 1983     GPX4 -0.2764138
## 2156     GCLM -0.6360676
## 5707  CCDC134  0.2907613
rna[rna$genename %in% fe, 1:2]
##       genename  U00000991
## 303         CP -0.2710977
## 7952      SAT1  0.1195445
## 12135     FTH1 -0.1190031
## 14448     GCLM  0.1095749
## 17188     GPX4 -1.6308617
## 21898  CCDC134  0.2194031

数据整理

protein1 <- protein[protein$genename %in% fe, ]
row.names(protein1) <- protein1$genename
protein1 <- protein1[, -1] %>% t()

rna1 <- rna[rna$genename %in% fe, ]
row.names(rna1) <- rna1$genename
rna1 <- rna1[, -1] %>% t()

CCDC134 与 Fe 基因(RNA)相关性分析

ccdc134_rna <- rna1[, "CCDC134"]
fe_genes <- setdiff(colnames(rna1), "CCDC134")
cor_stats <- sapply(fe_genes, function(gene) {
    test <- cor.test(ccdc134_rna, rna1[, gene], method = "pearson")
    c(cor = test$estimate, p = test$p.value)
})
cor_df <- data.frame(
    Gene = fe_genes,
    Correlation = cor_stats["cor.cor", ],
    P.value = cor_stats["p", ]
)
cor_df$Significant <- cor_df$P.value < 0.05
knitr::kable(cor_df, digits = 3)
Gene Correlation P.value Significant
CP CP 0.052 0.601 FALSE
SAT1 SAT1 -0.110 0.269 FALSE
FTH1 FTH1 0.339 0.000 TRUE
GCLM GCLM 0.439 0.000 TRUE
GPX4 GPX4 0.171 0.085 FALSE

柱状图展示相关性(RNA)

ggplot(cor_df, aes(x = Gene, y = Correlation, fill = Significant, label = sprintf("P=%.3g", P.value))) +
  geom_bar(stat = "identity", width = 0.6) +
  geom_text(vjust = ifelse(cor_df$Correlation >= 0, -0.5, 1.5), size = 3.5) +
  scale_fill_manual(values = c("FALSE" = "grey70", "TRUE" = "red")) +
  ylim(-0.5, 0.5) +
  theme_minimal() +
  labs(title = "Correlation of CCDC134 with Fe genes (RNA)", y = "Pearson Correlation", x = "Fe Gene") +
  theme(legend.position = "none")

散点图及拟合线(RNA)

for (gene in fe_genes) {
    df <- data.frame(
        CCDC134 = rna1[, "CCDC134"],
        FeGene = rna1[, gene]
    )
    test <- cor.test(df$CCDC134, df$FeGene, method = "pearson")
    pval <- signif(test$p.value, 3)
    corval <- signif(test$estimate, 3)
    gg <- ggplot(df, aes(x = CCDC134, y = FeGene)) +
        geom_point(size = 2, color = "steelblue") +
        geom_smooth(method = "lm", se = FALSE, color = "red") +
        labs(
            title = paste("CCDC134 vs", gene, "(RNA)"),
            subtitle = paste0("Pearson r = ", corval, ", P = ", pval),
            x = "CCDC134 (RNA)",
            y = paste(gene, "(RNA)")
        ) +
        theme_minimal()
    print(gg)
}

CCDC134 与 Fe 基因(Protein)相关性分析

ccdc134_protein <- protein1[, "CCDC134"]
fe_genes_protein <- setdiff(colnames(protein1), "CCDC134")
cor_stats_protein <- sapply(fe_genes_protein, function(gene) {
  test <- cor.test(ccdc134_protein, protein1[, gene], method = "pearson")
  c(cor = test$estimate, p = test$p.value)
})
cor_df_protein <- data.frame(
  Gene = fe_genes_protein,
  Correlation = cor_stats_protein["cor.cor", ],
  P.value = cor_stats_protein["p", ]
)
cor_df_protein$Significant <- cor_df_protein$P.value < 0.05
knitr::kable(cor_df_protein, digits = 3)
Gene Correlation P.value Significant
CP CP 0.447 0.008 TRUE
FTH1 FTH1 -0.064 0.719 FALSE
GPX4 GPX4 -0.226 0.198 FALSE
GCLM GCLM -0.022 0.902 FALSE

柱状图展示相关性(Protein)

ggplot(cor_df_protein, aes(x = Gene, y = Correlation, fill = Significant, label = sprintf("P=%.3g", P.value))) +
  geom_bar(stat = "identity", width = 0.6) +
  geom_text(vjust = ifelse(cor_df_protein$Correlation >= 0, -0.5, 1.5), size = 3.5) +
  scale_fill_manual(values = c("FALSE" = "grey70", "TRUE" = "red")) +
  ylim(-0.5, 0.5) +
  theme_minimal() +
  labs(title = "Correlation of CCDC134 with Fe genes (Protein)", y = "Pearson Correlation", x = "Fe Gene") +
  theme(legend.position = "none")

散点图及拟合线(Protein)

for (gene in fe_genes_protein) {
  df <- data.frame(
    CCDC134 = protein1[, "CCDC134"],
    FeGene = protein1[, gene]
  )
  test <- cor.test(df$CCDC134, df$FeGene, method = "pearson")
  pval <- signif(test$p.value, 3)
  corval <- signif(test$estimate, 3)
  gg <- ggplot(df, aes(x = CCDC134, y = FeGene)) +
    geom_point(size = 2, color = "steelblue") +
    geom_smooth(method = "lm", se = FALSE, color = "red") +
    labs(
      title = paste("CCDC134 vs", gene, "(Protein)"),
      subtitle = paste0("Pearson r = ", corval, ", P = ", pval),
      x = "CCDC134 (Protein)",
      y = paste(gene, "(Protein)")
    ) +
    theme_minimal()
  print(gg)
}