数据读取
setwd("~/Documents/coploit/home")
#protein <- fread("protein/HCC_protein_after_scale.xls") %>% as.data.frame()
#rna <- fread("RNAseq/HCC_TPM_after_scale.xls") %>% as.data.frame()
#rna
all_TPM <- fread("SynologyDrive/RNAseq/All.TPM.exp.xls")
all_TPM <- all_TPM[,-c(208:219)]
all_TPM[1:4,202:207]
## U00000828 U00000728 U00000880 U00000780 U00000821 U00000721
## <num> <num> <num> <num> <num> <num>
## 1: 0 0 0 0 0 0
## 2: 0 0 0 0 0 0
## 3: 0 0 0 0 0 0
## 4: 0 0 0 0 0 0
all_TPM = as.data.frame(all_TPM)
table(duplicated(all_TPM$AccID))
##
## FALSE TRUE
## 56638 1711
new_all_TPM <- all_TPM %>%
dplyr :: distinct(AccID,.keep_all = TRUE)
table(duplicated(new_all_TPM$AccID))
##
## FALSE
## 56638
new_all_TPM <- new_all_TPM %>%
as_tibble() %>%
column_to_rownames(var = "AccID")
id <- read.xlsx("SynologyDrive/RNAseq/肝癌RNA-seq样本编号.xlsx") #该文件中有样本编号对应的住院号(seqn)
hcc_tpm <- new_all_TPM[,colnames(new_all_TPM) %in% id$uid] # 癌组织
para_tpm <- new_all_TPM[,colnames(new_all_TPM) %in% id$pair] #癌旁组织
hcc_rna<-log2(hcc_tpm + 1)
para_rna<-log2(para_tpm + 1)
#protein
all_protein <- fread("SynologyDrive/protein/dia_after_immpute_onehalf.xls") %>% as.data.frame()
row.names(all_protein)<-all_protein$V1; all_protein<-all_protein[,-1,drop=F]
id2 <- read.xlsx("SynologyDrive/protein/dia_sample_id.xlsx") #该文件中有样本编号对应的住院号(seqn)
#table(id2$group)
hcc_pro<- all_protein[,colnames(all_protein) %in% subset(id2,group=="hcc")$id,drop=F] # 癌组织
para_pro <- all_protein[,colnames(all_protein) %in% subset(id2,group=="para")$id] #癌旁组织
##
fe <- c("CCDC134", "GPX4", "SAT1", "GCLM", "CP", "FTH1")
1. (肿瘤组织)查看目标基因
hcc_pro[row.names(hcc_pro) %in% fe, 1:2]
## 1875 1877
## CP 2402.3438 1673.9227
## FTH1 451.8825 470.6070
## GPX4 966.6021 477.3249
## GCLM 439.0198 356.1182
## CCDC134 304.5132 450.2108
hcc_rna[row.names(hcc_rna) %in% fe, 1:2]
## U00000991 U00000990
## CP 7.960560 8.676991
## SAT1 9.017533 9.800277
## FTH1 9.366623 7.880857
## GCLM 5.481211 5.323010
## GPX4 7.759533 8.638239
## CCDC134 2.564475 1.460593
1. (in tumor) 数据整理
protein1 <- hcc_pro[row.names(hcc_pro) %in% fe, ] %>% t()
rna1 <- hcc_rna[row.names(hcc_rna) %in% fe, ]%>% t()
CCDC134 与 Fe 基因(RNA)相关性分析
ccdc134_rna <- rna1[, "CCDC134"]
fe_genes <- setdiff(colnames(rna1), "CCDC134")
cor_stats <- sapply(fe_genes, function(gene) {
test <- cor.test(ccdc134_rna, rna1[, gene], method = "pearson")
c(cor = test$estimate, p = test$p.value)
})
cor_df <- data.frame(
Gene = fe_genes,
Correlation = cor_stats["cor.cor", ],
P.value = cor_stats["p", ]
)
cor_df$Significant <- cor_df$P.value < 0.05
knitr::kable(cor_df, digits = 3)
CP |
CP |
0.052 |
0.601 |
FALSE |
SAT1 |
SAT1 |
-0.110 |
0.269 |
FALSE |
FTH1 |
FTH1 |
0.339 |
0.000 |
TRUE |
GCLM |
GCLM |
0.439 |
0.000 |
TRUE |
GPX4 |
GPX4 |
0.171 |
0.085 |
FALSE |
柱状图展示相关性(RNA)
ggplot(cor_df, aes(x = Gene, y = Correlation, fill = Significant, label = sprintf("P=%.3g", P.value))) +
geom_bar(stat = "identity", width = 0.6) +
geom_text(vjust = ifelse(cor_df$Correlation >= 0, -0.5, 1.5), size = 3.5) +
scale_fill_manual(values = c("FALSE" = "grey70", "TRUE" = "red")) +
ylim(-0.5, 0.5) +
theme_minimal() +
labs(title = "Correlation of CCDC134 with Fe genes (RNA) in tumor", y = "Pearson Correlation", x = "Fe Gene") +
theme(legend.position = "none")

散点图及拟合线(RNA)
for (gene in fe_genes) {
df <- data.frame(
CCDC134 = rna1[, "CCDC134"],
FeGene = rna1[, gene]
)
test <- cor.test(df$CCDC134, df$FeGene, method = "pearson")
pval <- signif(test$p.value, 3)
corval <- signif(test$estimate, 3)
gg <- ggplot(df, aes(x = CCDC134, y = FeGene)) +
geom_point(size = 2, color = "steelblue") +
geom_smooth(method = "lm", se = FALSE, color = "red") +
labs(
title = paste("CCDC134 vs", gene, "(RNA) in tumor"),
subtitle = paste0("Pearson r = ", corval, ", P = ", pval),
x = "CCDC134 (RNA)",
y = paste(gene, "(RNA)")
) +
theme_minimal()
print(gg)
}





CCDC134 与 Fe 基因(Protein)相关性分析
ccdc134_protein <- protein1[, "CCDC134"]
fe_genes_protein <- setdiff(colnames(protein1), "CCDC134")
cor_stats_protein <- sapply(fe_genes_protein, function(gene) {
test <- cor.test(ccdc134_protein, protein1[, gene], method = "pearson")
c(cor = test$estimate, p = test$p.value)
})
cor_df_protein <- data.frame(
Gene = fe_genes_protein,
Correlation = cor_stats_protein["cor.cor", ],
P.value = cor_stats_protein["p", ]
)
cor_df_protein$Significant <- cor_df_protein$P.value < 0.05
knitr::kable(cor_df_protein, digits = 3)
CP |
CP |
0.447 |
0.008 |
TRUE |
FTH1 |
FTH1 |
-0.064 |
0.719 |
FALSE |
GPX4 |
GPX4 |
-0.226 |
0.198 |
FALSE |
GCLM |
GCLM |
-0.022 |
0.902 |
FALSE |
柱状图展示相关性(Protein)
ggplot(cor_df_protein, aes(x = Gene, y = Correlation, fill = Significant, label = sprintf("P=%.3g", P.value))) +
geom_bar(stat = "identity", width = 0.6) +
geom_text(vjust = ifelse(cor_df_protein$Correlation >= 0, -0.5, 1.5), size = 3.5) +
scale_fill_manual(values = c("FALSE" = "grey70", "TRUE" = "red")) +
ylim(-0.5, 0.5) +
theme_minimal() +
labs(title = "Correlation of CCDC134 with Fe genes (Protein) in tumor", y = "Pearson Correlation", x = "Fe Gene") +
theme(legend.position = "none")

散点图及拟合线(Protein)
for (gene in fe_genes_protein) {
df <- data.frame(
CCDC134 = protein1[, "CCDC134"],
FeGene = protein1[, gene]
)
test <- cor.test(df$CCDC134, df$FeGene, method = "pearson")
pval <- signif(test$p.value, 3)
corval <- signif(test$estimate, 3)
gg <- ggplot(df, aes(x = CCDC134, y = FeGene)) +
geom_point(size = 2, color = "steelblue") +
geom_smooth(method = "lm", se = FALSE, color = "red") +
labs(
title = paste("CCDC134 vs", gene, "(Protein) in tumor"),
subtitle = paste0("Pearson r = ", corval, ", P = ", pval),
x = "CCDC134 (Protein)",
y = paste(gene, "(Protein)")
) +
theme_minimal()
print(gg)
}




2. (癌旁组织)查看目标基因
para_pro[row.names(para_pro) %in% fe, 1:2]
## 1876 1878
## CP 1013.98444 1787.4272
## FTH1 825.94214 1039.2148
## GPX4 2892.94434 1541.0046
## GCLM 363.01901 414.3443
## CCDC134 70.55958 150.4873
para_rna[row.names(para_rna) %in% fe, 1:2]
## U00001020 U00001019
## CP 9.145246 8.530697
## SAT1 9.850314 9.525502
## FTH1 8.130989 8.184558
## GCLM 4.892074 4.889641
## GPX4 7.755636 8.123001
## CCDC134 1.729456 1.175767
2. (in para) 数据整理
protein1 <- para_pro[row.names(para_pro) %in% fe, ] %>% t()
rna1 <- para_rna[row.names(para_rna) %in% fe, ]%>% t()
CCDC134 与 Fe 基因(RNA)相关性分析
ccdc134_rna <- rna1[, "CCDC134"]
fe_genes <- setdiff(colnames(rna1), "CCDC134")
cor_stats <- sapply(fe_genes, function(gene) {
test <- cor.test(ccdc134_rna, rna1[, gene], method = "pearson")
c(cor = test$estimate, p = test$p.value)
})
cor_df <- data.frame(
Gene = fe_genes,
Correlation = cor_stats["cor.cor", ],
P.value = cor_stats["p", ]
)
cor_df$Significant <- cor_df$P.value < 0.05
knitr::kable(cor_df, digits = 3)
CP |
CP |
0.231 |
0.019 |
TRUE |
SAT1 |
SAT1 |
0.068 |
0.497 |
FALSE |
FTH1 |
FTH1 |
0.576 |
0.000 |
TRUE |
GCLM |
GCLM |
0.419 |
0.000 |
TRUE |
GPX4 |
GPX4 |
0.229 |
0.020 |
TRUE |
柱状图展示相关性(RNA)
ggplot(cor_df, aes(x = Gene, y = Correlation, fill = Significant, label = sprintf("P=%.3g", P.value))) +
geom_bar(stat = "identity", width = 0.6) +
geom_text(vjust = ifelse(cor_df$Correlation >= 0, -0.5, 1.5), size = 3.5) +
scale_fill_manual(values = c("FALSE" = "grey70", "TRUE" = "red")) +
ylim(-0.5, 0.5) +
theme_minimal() +
labs(title = "Correlation of CCDC134 with Fe genes (RNA) in para", y = "Pearson Correlation", x = "Fe Gene") +
theme(legend.position = "none")

散点图及拟合线(RNA)
for (gene in fe_genes) {
df <- data.frame(
CCDC134 = rna1[, "CCDC134"],
FeGene = rna1[, gene]
)
test <- cor.test(df$CCDC134, df$FeGene, method = "pearson")
pval <- signif(test$p.value, 3)
corval <- signif(test$estimate, 3)
gg <- ggplot(df, aes(x = CCDC134, y = FeGene)) +
geom_point(size = 2, color = "steelblue") +
geom_smooth(method = "lm", se = FALSE, color = "red") +
labs(
title = paste("CCDC134 vs", gene, "(RNA) in para"),
subtitle = paste0("Pearson r = ", corval, ", P = ", pval),
x = "CCDC134 (RNA)",
y = paste(gene, "(RNA)")
) +
theme_minimal()
print(gg)
}





CCDC134 与 Fe 基因(Protein)相关性分析
ccdc134_protein <- protein1[, "CCDC134"]
fe_genes_protein <- setdiff(colnames(protein1), "CCDC134")
cor_stats_protein <- sapply(fe_genes_protein, function(gene) {
test <- cor.test(ccdc134_protein, protein1[, gene], method = "pearson")
c(cor = test$estimate, p = test$p.value)
})
cor_df_protein <- data.frame(
Gene = fe_genes_protein,
Correlation = cor_stats_protein["cor.cor", ],
P.value = cor_stats_protein["p", ]
)
cor_df_protein$Significant <- cor_df_protein$P.value < 0.05
knitr::kable(cor_df_protein, digits = 3)
CP |
CP |
-0.225 |
0.200 |
FALSE |
FTH1 |
FTH1 |
-0.135 |
0.445 |
FALSE |
GPX4 |
GPX4 |
-0.498 |
0.003 |
TRUE |
GCLM |
GCLM |
0.521 |
0.002 |
TRUE |
柱状图展示相关性(Protein)
ggplot(cor_df_protein, aes(x = Gene, y = Correlation, fill = Significant, label = sprintf("P=%.3g", P.value))) +
geom_bar(stat = "identity", width = 0.6) +
geom_text(vjust = ifelse(cor_df_protein$Correlation >= 0, -0.5, 1.5), size = 3.5) +
scale_fill_manual(values = c("FALSE" = "grey70", "TRUE" = "red")) +
ylim(-0.5, 0.5) +
theme_minimal() +
labs(title = "Correlation of CCDC134 with Fe genes (Protein) in para", y = "Pearson Correlation", x = "Fe Gene") +
theme(legend.position = "none")

散点图及拟合线(Protein)
for (gene in fe_genes_protein) {
df <- data.frame(
CCDC134 = protein1[, "CCDC134"],
FeGene = protein1[, gene]
)
test <- cor.test(df$CCDC134, df$FeGene, method = "pearson")
pval <- signif(test$p.value, 3)
corval <- signif(test$estimate, 3)
gg <- ggplot(df, aes(x = CCDC134, y = FeGene)) +
geom_point(size = 2, color = "steelblue") +
geom_smooth(method = "lm", se = FALSE, color = "red") +
labs(
title = paste("CCDC134 vs", gene, "(Protein) in para"),
subtitle = paste0("Pearson r = ", corval, ", P = ", pval),
x = "CCDC134 (Protein)",
y = paste(gene, "(Protein)")
) +
theme_minimal()
print(gg)
}



