数据读取

setwd("~/Documents/coploit/home")
#protein <- fread("protein/HCC_protein_after_scale.xls") %>% as.data.frame()
#rna <- fread("RNAseq/HCC_TPM_after_scale.xls") %>% as.data.frame()

#rna
all_TPM <- fread("SynologyDrive/RNAseq/All.TPM.exp.xls")

all_TPM <- all_TPM[,-c(208:219)]
all_TPM[1:4,202:207]
##    U00000828 U00000728 U00000880 U00000780 U00000821 U00000721
##        <num>     <num>     <num>     <num>     <num>     <num>
## 1:         0         0         0         0         0         0
## 2:         0         0         0         0         0         0
## 3:         0         0         0         0         0         0
## 4:         0         0         0         0         0         0
all_TPM = as.data.frame(all_TPM)
table(duplicated(all_TPM$AccID))
## 
## FALSE  TRUE 
## 56638  1711
new_all_TPM <- all_TPM %>% 
  dplyr :: distinct(AccID,.keep_all = TRUE) 
table(duplicated(new_all_TPM$AccID))
## 
## FALSE 
## 56638
new_all_TPM <- new_all_TPM %>%
  as_tibble() %>%
  column_to_rownames(var = "AccID")

id <- read.xlsx("SynologyDrive/RNAseq/肝癌RNA-seq样本编号.xlsx")  #该文件中有样本编号对应的住院号(seqn)

hcc_tpm <- new_all_TPM[,colnames(new_all_TPM)  %in% id$uid]  # 癌组织
para_tpm <- new_all_TPM[,colnames(new_all_TPM)  %in% id$pair]  #癌旁组织

hcc_rna<-log2(hcc_tpm + 1)
para_rna<-log2(para_tpm + 1)

#protein
all_protein <- fread("SynologyDrive/protein/dia_after_immpute_onehalf.xls") %>% as.data.frame()
row.names(all_protein)<-all_protein$V1; all_protein<-all_protein[,-1,drop=F]

id2 <- read.xlsx("SynologyDrive/protein/dia_sample_id.xlsx")  #该文件中有样本编号对应的住院号(seqn)
#table(id2$group)
hcc_pro<- all_protein[,colnames(all_protein)  %in% subset(id2,group=="hcc")$id,drop=F] # 癌组织
para_pro <- all_protein[,colnames(all_protein) %in% subset(id2,group=="para")$id]  #癌旁组织


##
fe <- c("CCDC134", "GPX4", "SAT1", "GCLM", "CP", "FTH1")

1. (肿瘤组织)查看目标基因

hcc_pro[row.names(hcc_pro) %in% fe, 1:2]
##              1875      1877
## CP      2402.3438 1673.9227
## FTH1     451.8825  470.6070
## GPX4     966.6021  477.3249
## GCLM     439.0198  356.1182
## CCDC134  304.5132  450.2108
hcc_rna[row.names(hcc_rna) %in% fe, 1:2]
##         U00000991 U00000990
## CP       7.960560  8.676991
## SAT1     9.017533  9.800277
## FTH1     9.366623  7.880857
## GCLM     5.481211  5.323010
## GPX4     7.759533  8.638239
## CCDC134  2.564475  1.460593

1. (in tumor) 数据整理

protein1 <- hcc_pro[row.names(hcc_pro) %in% fe, ]  %>% t()
rna1 <- hcc_rna[row.names(hcc_rna) %in% fe, ]%>% t()

CCDC134 与 Fe 基因(RNA)相关性分析

ccdc134_rna <- rna1[, "CCDC134"]
fe_genes <- setdiff(colnames(rna1), "CCDC134")
cor_stats <- sapply(fe_genes, function(gene) {
    test <- cor.test(ccdc134_rna, rna1[, gene], method = "pearson")
    c(cor = test$estimate, p = test$p.value)
})
cor_df <- data.frame(
    Gene = fe_genes,
    Correlation = cor_stats["cor.cor", ],
    P.value = cor_stats["p", ]
)
cor_df$Significant <- cor_df$P.value < 0.05
knitr::kable(cor_df, digits = 3)
Gene Correlation P.value Significant
CP CP 0.052 0.601 FALSE
SAT1 SAT1 -0.110 0.269 FALSE
FTH1 FTH1 0.339 0.000 TRUE
GCLM GCLM 0.439 0.000 TRUE
GPX4 GPX4 0.171 0.085 FALSE

柱状图展示相关性(RNA)

ggplot(cor_df, aes(x = Gene, y = Correlation, fill = Significant, label = sprintf("P=%.3g", P.value))) +
  geom_bar(stat = "identity", width = 0.6) +
  geom_text(vjust = ifelse(cor_df$Correlation >= 0, -0.5, 1.5), size = 3.5) +
  scale_fill_manual(values = c("FALSE" = "grey70", "TRUE" = "red")) +
  ylim(-0.5, 0.5) +
  theme_minimal() +
  labs(title = "Correlation of CCDC134 with Fe genes (RNA) in tumor", y = "Pearson Correlation", x = "Fe Gene") +
  theme(legend.position = "none")

散点图及拟合线(RNA)

for (gene in fe_genes) {
    df <- data.frame(
        CCDC134 = rna1[, "CCDC134"],
        FeGene = rna1[, gene]
    )
    test <- cor.test(df$CCDC134, df$FeGene, method = "pearson")
    pval <- signif(test$p.value, 3)
    corval <- signif(test$estimate, 3)
    gg <- ggplot(df, aes(x = CCDC134, y = FeGene)) +
        geom_point(size = 2, color = "steelblue") +
        geom_smooth(method = "lm", se = FALSE, color = "red") +
        labs(
            title = paste("CCDC134 vs", gene, "(RNA) in tumor"),
            subtitle = paste0("Pearson r = ", corval, ", P = ", pval),
            x = "CCDC134 (RNA)",
            y = paste(gene, "(RNA)")
        ) +
        theme_minimal()
    print(gg)
}

CCDC134 与 Fe 基因(Protein)相关性分析

ccdc134_protein <- protein1[, "CCDC134"]
fe_genes_protein <- setdiff(colnames(protein1), "CCDC134")
cor_stats_protein <- sapply(fe_genes_protein, function(gene) {
  test <- cor.test(ccdc134_protein, protein1[, gene], method = "pearson")
  c(cor = test$estimate, p = test$p.value)
})
cor_df_protein <- data.frame(
  Gene = fe_genes_protein,
  Correlation = cor_stats_protein["cor.cor", ],
  P.value = cor_stats_protein["p", ]
)
cor_df_protein$Significant <- cor_df_protein$P.value < 0.05
knitr::kable(cor_df_protein, digits = 3)
Gene Correlation P.value Significant
CP CP 0.447 0.008 TRUE
FTH1 FTH1 -0.064 0.719 FALSE
GPX4 GPX4 -0.226 0.198 FALSE
GCLM GCLM -0.022 0.902 FALSE

柱状图展示相关性(Protein)

ggplot(cor_df_protein, aes(x = Gene, y = Correlation, fill = Significant, label = sprintf("P=%.3g", P.value))) +
  geom_bar(stat = "identity", width = 0.6) +
  geom_text(vjust = ifelse(cor_df_protein$Correlation >= 0, -0.5, 1.5), size = 3.5) +
  scale_fill_manual(values = c("FALSE" = "grey70", "TRUE" = "red")) +
  ylim(-0.5, 0.5) +
  theme_minimal() +
  labs(title = "Correlation of CCDC134 with Fe genes (Protein) in tumor", y = "Pearson Correlation", x = "Fe Gene") +
  theme(legend.position = "none")

散点图及拟合线(Protein)

for (gene in fe_genes_protein) {
  df <- data.frame(
    CCDC134 = protein1[, "CCDC134"],
    FeGene = protein1[, gene]
  )
  test <- cor.test(df$CCDC134, df$FeGene, method = "pearson")
  pval <- signif(test$p.value, 3)
  corval <- signif(test$estimate, 3)
  gg <- ggplot(df, aes(x = CCDC134, y = FeGene)) +
    geom_point(size = 2, color = "steelblue") +
    geom_smooth(method = "lm", se = FALSE, color = "red") +
    labs(
      title = paste("CCDC134 vs", gene, "(Protein) in tumor"),
      subtitle = paste0("Pearson r = ", corval, ", P = ", pval),
      x = "CCDC134 (Protein)",
      y = paste(gene, "(Protein)")
    ) +
    theme_minimal()
  print(gg)
}

2. (癌旁组织)查看目标基因

para_pro[row.names(para_pro) %in% fe, 1:2]
##               1876      1878
## CP      1013.98444 1787.4272
## FTH1     825.94214 1039.2148
## GPX4    2892.94434 1541.0046
## GCLM     363.01901  414.3443
## CCDC134   70.55958  150.4873
para_rna[row.names(para_rna) %in% fe, 1:2]
##         U00001020 U00001019
## CP       9.145246  8.530697
## SAT1     9.850314  9.525502
## FTH1     8.130989  8.184558
## GCLM     4.892074  4.889641
## GPX4     7.755636  8.123001
## CCDC134  1.729456  1.175767

2. (in para) 数据整理

protein1 <- para_pro[row.names(para_pro) %in% fe, ]  %>% t()
rna1 <- para_rna[row.names(para_rna) %in% fe, ]%>% t()

CCDC134 与 Fe 基因(RNA)相关性分析

ccdc134_rna <- rna1[, "CCDC134"]
fe_genes <- setdiff(colnames(rna1), "CCDC134")
cor_stats <- sapply(fe_genes, function(gene) {
    test <- cor.test(ccdc134_rna, rna1[, gene], method = "pearson")
    c(cor = test$estimate, p = test$p.value)
})
cor_df <- data.frame(
    Gene = fe_genes,
    Correlation = cor_stats["cor.cor", ],
    P.value = cor_stats["p", ]
)
cor_df$Significant <- cor_df$P.value < 0.05
knitr::kable(cor_df, digits = 3)
Gene Correlation P.value Significant
CP CP 0.231 0.019 TRUE
SAT1 SAT1 0.068 0.497 FALSE
FTH1 FTH1 0.576 0.000 TRUE
GCLM GCLM 0.419 0.000 TRUE
GPX4 GPX4 0.229 0.020 TRUE

柱状图展示相关性(RNA)

ggplot(cor_df, aes(x = Gene, y = Correlation, fill = Significant, label = sprintf("P=%.3g", P.value))) +
  geom_bar(stat = "identity", width = 0.6) +
  geom_text(vjust = ifelse(cor_df$Correlation >= 0, -0.5, 1.5), size = 3.5) +
  scale_fill_manual(values = c("FALSE" = "grey70", "TRUE" = "red")) +
  ylim(-0.5, 0.5) +
  theme_minimal() +
  labs(title = "Correlation of CCDC134 with Fe genes (RNA) in para", y = "Pearson Correlation", x = "Fe Gene") +
  theme(legend.position = "none")

散点图及拟合线(RNA)

for (gene in fe_genes) {
    df <- data.frame(
        CCDC134 = rna1[, "CCDC134"],
        FeGene = rna1[, gene]
    )
    test <- cor.test(df$CCDC134, df$FeGene, method = "pearson")
    pval <- signif(test$p.value, 3)
    corval <- signif(test$estimate, 3)
    gg <- ggplot(df, aes(x = CCDC134, y = FeGene)) +
        geom_point(size = 2, color = "steelblue") +
        geom_smooth(method = "lm", se = FALSE, color = "red") +
        labs(
            title = paste("CCDC134 vs", gene, "(RNA) in para"),
            subtitle = paste0("Pearson r = ", corval, ", P = ", pval),
            x = "CCDC134 (RNA)",
            y = paste(gene, "(RNA)")
        ) +
        theme_minimal()
    print(gg)
}

CCDC134 与 Fe 基因(Protein)相关性分析

ccdc134_protein <- protein1[, "CCDC134"]
fe_genes_protein <- setdiff(colnames(protein1), "CCDC134")
cor_stats_protein <- sapply(fe_genes_protein, function(gene) {
  test <- cor.test(ccdc134_protein, protein1[, gene], method = "pearson")
  c(cor = test$estimate, p = test$p.value)
})
cor_df_protein <- data.frame(
  Gene = fe_genes_protein,
  Correlation = cor_stats_protein["cor.cor", ],
  P.value = cor_stats_protein["p", ]
)
cor_df_protein$Significant <- cor_df_protein$P.value < 0.05
knitr::kable(cor_df_protein, digits = 3)
Gene Correlation P.value Significant
CP CP -0.225 0.200 FALSE
FTH1 FTH1 -0.135 0.445 FALSE
GPX4 GPX4 -0.498 0.003 TRUE
GCLM GCLM 0.521 0.002 TRUE

柱状图展示相关性(Protein)

ggplot(cor_df_protein, aes(x = Gene, y = Correlation, fill = Significant, label = sprintf("P=%.3g", P.value))) +
  geom_bar(stat = "identity", width = 0.6) +
  geom_text(vjust = ifelse(cor_df_protein$Correlation >= 0, -0.5, 1.5), size = 3.5) +
  scale_fill_manual(values = c("FALSE" = "grey70", "TRUE" = "red")) +
  ylim(-0.5, 0.5) +
  theme_minimal() +
  labs(title = "Correlation of CCDC134 with Fe genes (Protein) in para", y = "Pearson Correlation", x = "Fe Gene") +
  theme(legend.position = "none")

散点图及拟合线(Protein)

for (gene in fe_genes_protein) {
  df <- data.frame(
    CCDC134 = protein1[, "CCDC134"],
    FeGene = protein1[, gene]
  )
  test <- cor.test(df$CCDC134, df$FeGene, method = "pearson")
  pval <- signif(test$p.value, 3)
  corval <- signif(test$estimate, 3)
  gg <- ggplot(df, aes(x = CCDC134, y = FeGene)) +
    geom_point(size = 2, color = "steelblue") +
    geom_smooth(method = "lm", se = FALSE, color = "red") +
    labs(
      title = paste("CCDC134 vs", gene, "(Protein) in para"),
      subtitle = paste0("Pearson r = ", corval, ", P = ", pval),
      x = "CCDC134 (Protein)",
      y = paste(gene, "(Protein)")
    ) +
    theme_minimal()
  print(gg)
}