library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(org.Mm.eg.db)
## Loading required package: AnnotationDbi
## Warning: package 'AnnotationDbi' was built under R version 4.3.2
## Loading required package: stats4
## Loading required package: BiocGenerics
## Warning: package 'BiocGenerics' was built under R version 4.3.1
##
## Attaching package: 'BiocGenerics'
## The following objects are masked from 'package:dplyr':
##
## combine, intersect, setdiff, union
## The following objects are masked from 'package:stats':
##
## IQR, mad, sd, var, xtabs
## The following objects are masked from 'package:base':
##
## anyDuplicated, aperm, append, as.data.frame, basename, cbind,
## colnames, dirname, do.call, duplicated, eval, evalq, Filter, Find,
## get, grep, grepl, intersect, is.unsorted, lapply, Map, mapply,
## match, mget, order, paste, pmax, pmax.int, pmin, pmin.int,
## Position, rank, rbind, Reduce, rownames, sapply, setdiff, sort,
## table, tapply, union, unique, unsplit, which.max, which.min
## Loading required package: Biobase
## Warning: package 'Biobase' was built under R version 4.3.1
## Welcome to Bioconductor
##
## Vignettes contain introductory material; view with
## 'browseVignettes()'. To cite Bioconductor, see
## 'citation("Biobase")', and for packages 'citation("pkgname")'.
## Loading required package: IRanges
## Warning: package 'IRanges' was built under R version 4.3.1
## Loading required package: S4Vectors
## Warning: package 'S4Vectors' was built under R version 4.3.2
##
## Attaching package: 'S4Vectors'
## The following objects are masked from 'package:dplyr':
##
## first, rename
## The following object is masked from 'package:utils':
##
## findMatches
## The following objects are masked from 'package:base':
##
## expand.grid, I, unname
##
## Attaching package: 'IRanges'
## The following objects are masked from 'package:dplyr':
##
## collapse, desc, slice
##
## Attaching package: 'AnnotationDbi'
## The following object is masked from 'package:dplyr':
##
## select
##
library(org.Hs.eg.db)
##
library(tidyverse)
## Warning: package 'ggplot2' was built under R version 4.3.2
## Warning: package 'tidyr' was built under R version 4.3.2
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ forcats 1.0.0.9000 ✔ readr 2.1.5
## ✔ ggplot2 3.5.1 ✔ stringr 1.5.1
## ✔ lubridate 1.9.3 ✔ tibble 3.2.1
## ✔ purrr 1.0.2 ✔ tidyr 1.3.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ lubridate::%within%() masks IRanges::%within%()
## ✖ IRanges::collapse() masks dplyr::collapse()
## ✖ Biobase::combine() masks BiocGenerics::combine(), dplyr::combine()
## ✖ IRanges::desc() masks dplyr::desc()
## ✖ tidyr::expand() masks S4Vectors::expand()
## ✖ dplyr::filter() masks stats::filter()
## ✖ S4Vectors::first() masks dplyr::first()
## ✖ dplyr::lag() masks stats::lag()
## ✖ ggplot2::Position() masks BiocGenerics::Position(), base::Position()
## ✖ purrr::reduce() masks IRanges::reduce()
## ✖ S4Vectors::rename() masks dplyr::rename()
## ✖ lubridate::second() masks S4Vectors::second()
## ✖ lubridate::second<-() masks S4Vectors::second<-()
## ✖ AnnotationDbi::select() masks dplyr::select()
## ✖ IRanges::slice() masks dplyr::slice()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(ComplexHeatmap)
## Warning: package 'ComplexHeatmap' was built under R version 4.3.1
## Loading required package: grid
## ========================================
## ComplexHeatmap version 2.18.0
## Bioconductor page: http://bioconductor.org/packages/ComplexHeatmap/
## Github page: https://github.com/jokergoo/ComplexHeatmap
## Documentation: http://jokergoo.github.io/ComplexHeatmap-reference
##
## If you use it in published research, please cite either one:
## - Gu, Z. Complex Heatmap Visualization. iMeta 2022.
## - Gu, Z. Complex heatmaps reveal patterns and correlations in multidimensional
## genomic data. Bioinformatics 2016.
##
##
## The new InteractiveComplexHeatmap package can directly export static
## complex heatmaps into an interactive Shiny app with zero effort. Have a try!
##
## This message can be suppressed by:
## suppressPackageStartupMessages(library(ComplexHeatmap))
## ========================================
library(readxl)
library(readr)
library(STRINGdb)
## Warning: package 'STRINGdb' was built under R version 4.3.1
library(enrichR)
## Welcome to enrichR
## Checking connection ...
## Enrichr ... Connection is Live!
## FlyEnrichr ... Connection is Live!
## WormEnrichr ... Connection is Live!
## YeastEnrichr ... Connection is Live!
## FishEnrichr ... Connection is Live!
## OxEnrichr ... Connection is Live!
library(clusterProfiler)
## Warning: package 'clusterProfiler' was built under R version 4.3.1
##
## Registered S3 methods overwritten by 'treeio':
## method from
## MRCA.phylo tidytree
## MRCA.treedata tidytree
## Nnode.treedata tidytree
## Ntip.treedata tidytree
## ancestor.phylo tidytree
## ancestor.treedata tidytree
## child.phylo tidytree
## child.treedata tidytree
## full_join.phylo tidytree
## full_join.treedata tidytree
## groupClade.phylo tidytree
## groupClade.treedata tidytree
## groupOTU.phylo tidytree
## groupOTU.treedata tidytree
## inner_join.phylo tidytree
## inner_join.treedata tidytree
## is.rooted.treedata tidytree
## nodeid.phylo tidytree
## nodeid.treedata tidytree
## nodelab.phylo tidytree
## nodelab.treedata tidytree
## offspring.phylo tidytree
## offspring.treedata tidytree
## parent.phylo tidytree
## parent.treedata tidytree
## root.treedata tidytree
## rootnode.phylo tidytree
## sibling.phylo tidytree
## clusterProfiler v4.8.3 For help: https://yulab-smu.top/biomedical-knowledge-mining-book/
##
## If you use clusterProfiler in published research, please cite:
## T Wu, E Hu, S Xu, M Chen, P Guo, Z Dai, T Feng, L Zhou, W Tang, L Zhan, X Fu, S Liu, X Bo, and G Yu. clusterProfiler 4.0: A universal enrichment tool for interpreting omics data. The Innovation. 2021, 2(3):100141
##
## Attaching package: 'clusterProfiler'
##
## The following object is masked from 'package:purrr':
##
## simplify
##
## The following object is masked from 'package:AnnotationDbi':
##
## select
##
## The following object is masked from 'package:IRanges':
##
## slice
##
## The following object is masked from 'package:S4Vectors':
##
## rename
##
## The following object is masked from 'package:stats':
##
## filter
library(igraph)
## Warning: package 'igraph' was built under R version 4.3.2
##
## Attaching package: 'igraph'
##
## The following object is masked from 'package:clusterProfiler':
##
## simplify
##
## The following objects are masked from 'package:lubridate':
##
## %--%, union
##
## The following objects are masked from 'package:purrr':
##
## compose, simplify
##
## The following object is masked from 'package:tidyr':
##
## crossing
##
## The following object is masked from 'package:tibble':
##
## as_data_frame
##
## The following object is masked from 'package:IRanges':
##
## union
##
## The following object is masked from 'package:S4Vectors':
##
## union
##
## The following objects are masked from 'package:BiocGenerics':
##
## normalize, path, union
##
## The following objects are masked from 'package:dplyr':
##
## as_data_frame, groups, union
##
## The following objects are masked from 'package:stats':
##
## decompose, spectrum
##
## The following object is masked from 'package:base':
##
## union
library(ggraph)
## Warning: package 'ggraph' was built under R version 4.3.2
library(readxl)
#load AID based String protein names
JTC_AID_STRING_RESULTS_JAN2 <- read_excel("~/Desktop/JTC.AID.STRING.RESULTS.JAN2.xlsx")
head(JTC_AID_STRING_RESULTS_JAN2)
## # A tibble: 6 × 3
## Cytokine.mediated.signaling.pa…¹ Actin.filament.bundl…² T-Cell.Receptor.Comp…³
## <chr> <chr> <chr>
## 1 CLEC16A CHMP4B CSK
## 2 PTPN2 FLOT1 UBASH3A
## 3 FHL1 TNPO2 ICAM3
## 4 STAT1 TNPO3 ITGAL
## 5 STAT2 KPNB1 RGS14
## 6 STAT3 SENP1 VAV3
## # ℹ abbreviated names: ¹​Cytokine.mediated.signaling.pathway.AID,
## # ²​Actin.filament.bundle.assembly.AID, ³​`T-Cell.Receptor.Complex.AID`
#load JTC 1845 significant list
JTC_Pull_1845 <- read_excel("~/Desktop/JTC.Pull.1845.xlsx")
head(JTC_Pull_1845)
## # A tibble: 6 × 9
## Protein `F-Value` `FDR (BH)` `Control-All` `diff Control-All`
## <chr> <dbl> <chr> <chr> <dbl>
## 1 TurboID 894. 5.27792289666326e-09 4.10459374650874… -10.0
## 2 TAF1 11.8 1.12813261340936E-2 6.43706940440112… -3.61
## 3 UHRF1BP1L 39.8 3.3343890015458003E-4 8.79313785705215… -5.07
## 4 SHTN1 99.2 1.22502977927992e-05 1.64862039529698… -6.44
## 5 ILVBL 7.52 3.0580832955954099E-2 2.07321632991941… -4.50
## 6 DENND11 17.1 4.2821594007173098E-3 2.0209649861693E… -2.51
## # ℹ 4 more variables: diff.All.vs.Control <dbl>, p.value <chr>,
## # log2FoldChange <dbl>, expression <chr>
Cytokine.mediated.signaling.pathway.AID <- JTC_AID_STRING_RESULTS_JAN2$Cytokine.mediated.signaling.pathway.AID
Actin.filament.bundle.assembly.AID <- JTC_AID_STRING_RESULTS_JAN2$Actin.filament.bundle.assembly.AID
T.Cell.Receptor.Complex.AID <- JTC_AID_STRING_RESULTS_JAN2$`T-Cell.Receptor.Complex.AID`
##check protein names
Cytokine.mediated.signaling.pathway.AID
## [1] "CLEC16A" "PTPN2" "FHL1" "STAT1" "STAT2" "STAT3" "STAT5A"
## [8] "SMARCA4" "IRAK1" "FGFR1OP" "PPP2R2A" "PPP2R3A" "PPP2R3C" "MINK1"
## [15] "LPP" "BCL2" "CDK6" "CDC37" "CASP8" "PRKCB" "ITPR3"
## [22] "ATF2A2" "ATP1B3" "STIM1" "RUNX1" "CCR4" NA NA
## [29] NA
Cytokine.mediated.signaling.pathway.AID <- head(Cytokine.mediated.signaling.pathway.AID, 26)
Cytokine.mediated.signaling.pathway.AID
## [1] "CLEC16A" "PTPN2" "FHL1" "STAT1" "STAT2" "STAT3" "STAT5A"
## [8] "SMARCA4" "IRAK1" "FGFR1OP" "PPP2R2A" "PPP2R3A" "PPP2R3C" "MINK1"
## [15] "LPP" "BCL2" "CDK6" "CDC37" "CASP8" "PRKCB" "ITPR3"
## [22] "ATF2A2" "ATP1B3" "STIM1" "RUNX1" "CCR4"
Actin.filament.bundle.assembly.AID
## [1] "CHMP4B" "FLOT1" "TNPO2" "TNPO3" "KPNB1" "SENP1"
## [7] "NUP210" "AKAP11" "FNBP1" "ACTR2" "ARHGAP4" "EPS15L1"
## [13] "LCP1" "EXOC2" "FLNB" "ITGA4" "ARHGAP15" "DOCK10"
## [19] "LPXN" "NFKBIE" "ARRB2" "PTK2B" "IQGAP1" "CIT"
## [25] "SWAP70" "NCK2" "ABL2" "TAOK3" "CSNK2B"
T.Cell.Receptor.Complex.AID
## [1] "CSK" "UBASH3A" "ICAM3" "ITGAL" "RGS14" "VAV3" "SKAP1"
## [8] "TRAT1" "ITK" "CD3G" "CD3E" "CD247" "ZAP70" "SHC1"
## [15] "LCP2" "GRAP2" "PTPN11" "PTPN22" "LAT" "THEMIS" "KIR3DL3"
## [22] "HLA-A" "HLA-G" "TAP1" "PSMB9" "PSMD14" "PSMD5" "PSMD1"
## [29] "UBLCP1"
#Subset data for Cytokine.mediated.signaling.pathway.AID from JTC_Pull_1845 for log FC values
Cytokine.mediated.signaling.pathway.AID.data <- subset(JTC_Pull_1845, Protein %in% Cytokine.mediated.signaling.pathway.AID)
Cytokine.mediated.signaling.pathway.AID.data
## # A tibble: 19 × 9
## Protein `F-Value` `FDR (BH)` `Control-All` `diff Control-All`
## <chr> <dbl> <chr> <chr> <dbl>
## 1 PRKCB 21.0 2.4210941751688802E-3 1.00384534839752E… -4.51
## 2 BCL2 12.7 9.3025077796056697E-3 5.145144326176300… -2.78
## 3 PTPN2 8.24 2.5289084549813899E-2 1.66528277490606E… -3.98
## 4 STAT3 56.1 1.0012171592612E-4 2.08343516221231e… -4.90
## 5 STAT1 22.9 1.88696578587398E-3 7.398138903776600… -4.78
## 6 SMARCA4 34.4 5.3625596100866004E-4 1.5789411220675E-4 -4.10
## 7 IRAK1 124. 5.21858471940925e-06 5.82549130482479e… -3.18
## 8 STAT2 175. 1.49877063458406e-06 1.16590481103389e… -6.13
## 9 ATP1B3 10.6 1.4356128233778E-2 8.543320185795549… -3.85
## 10 PPP2R2A 17.9 3.77660790006001E-3 1.725370049896590… -4.35
## 11 CDK6 25.3 1.41012990608174E-3 5.161152242796199… -2.84
## 12 RUNX1 18.5 3.47115669368638E-3 1.563438137412999… -3.22
## 13 STIM1 104. 1.03319990614549e-05 1.34223558402409e… -9.24
## 14 FHL1 68.3 4.95570260286022e-05 8.87281424007051e… -5.56
## 15 CASP8 5.81 4.9897315475137503E-2 3.660300932948629… -2.22
## 16 CDC37 85.9 2.13064872424559e-05 3.18292728688281e… -6.10
## 17 CLEC16A 32.8 6.2799900639022002E-4 1.902805235231300… -2.90
## 18 MINK1 489. 3.35662674527538e-08 8.04755777182624e… -8.59
## 19 LPP 266. 3.08721629305423e-07 1.56101951807488e… -7.73
## # ℹ 4 more variables: diff.All.vs.Control <dbl>, p.value <chr>,
## # log2FoldChange <dbl>, expression <chr>
#Subset data for Actin.filament.bundle.assembly.AID
Actin.filament.bundle.assembly.AID.data <- subset(JTC_Pull_1845, Protein %in% Actin.filament.bundle.assembly.AID)
Actin.filament.bundle.assembly.AID.data
## # A tibble: 29 × 9
## Protein `F-Value` `FDR (BH)` `Control-All` `diff Control-All`
## <chr> <dbl> <chr> <chr> <dbl>
## 1 NFKBIE 6.02 4.6960617105448597E-2 3.406516954557049… -2.66
## 2 CIT 14.8 6.3014634161585601E-3 3.230388480162189… -2.74
## 3 TNPO2 8.09 2.6225631430651701E-2 1.744295206006E-2 -3.64
## 4 NCK2 87.7 1.96684405812182e-05 2.88409260953686e… -5.98
## 5 LPXN 81.6 2.54873504008281e-05 3.99667642901746e… -8.19
## 6 FLNB 88.9 1.86958220085285e-05 2.71238491216691e… -7.39
## 7 FLOT1 36.5 4.4146029866052E-4 1.244880266545600… -3.42
## 8 ITGA4 783. 7.45988995855363e-09 7.8891481573759e-… -7.75
## 9 LCP1 50.2 1.4983169548236E-4 3.35678944371217e… -5.86
## 10 ARRB2 29.1 9.1935911154051003E-4 3.050226846145700… -4.42
## # ℹ 19 more rows
## # ℹ 4 more variables: diff.All.vs.Control <dbl>, p.value <chr>,
## # log2FoldChange <dbl>, expression <chr>
#Subset data for T.Cell.Receptor.Complex.AID
T.Cell.Receptor.Complex.AID.data <- subset(JTC_Pull_1845, Protein %in% T.Cell.Receptor.Complex.AID)
T.Cell.Receptor.Complex.AID.data
## # A tibble: 27 × 9
## Protein `F-Value` `FDR (BH)` `Control-All` `diff Control-All`
## <chr> <dbl> <chr> <chr> <dbl>
## 1 PSMD14 16.7 4.6051251106252796E-3 2.199569675553419… -3.82
## 2 RGS14 67.0 5.34065415334716e-05 9.63852013672922e… -5.80
## 3 GRAP2 22.7 1.9224501345493101E-3 7.597080267219300… -4.32
## 4 HLA-A 35.8 4.7082280047660999E-4 1.345992836835100… -5.49
## 5 CD3E 21.3 2.3309625358376301E-3 9.610347288945400… -4.92
## 6 CD3G 10.6 1.4356128233778E-2 8.542446711050460… -3.21
## 7 HLA-G 6.82 3.6935183674587702E-2 2.59882330872537E… -2.28
## 8 ITGAL 150. 2.58658093779827e-06 2.39442342744453e… -7.10
## 9 CD247 288. 2.32764420227164e-07 1.05925465447601e… -8.32
## 10 PSMB9 7.39 3.1512876979558103E-2 2.158466602917220… -2.59
## # ℹ 17 more rows
## # ℹ 4 more variables: diff.All.vs.Control <dbl>, p.value <chr>,
## # log2FoldChange <dbl>, expression <chr>
library(readxl)
T_Cell_Receptor_Complex_AID <- read_excel("T.Cell.Receptor.Complex.AID.xlsx")
head(T_Cell_Receptor_Complex_AID)
## # A tibble: 6 × 6
## Protein diff.All.vs.Control p.value log2FoldChange expression Risk.Frequency
## <chr> <dbl> <chr> <dbl> <chr> <chr>
## 1 PSMD14 3.82 2.199569… 3.82 Upregulat… 8.76651999999…
## 2 RGS14 5.80 9.638520… 5.80 Upregulat… 0.34150000000…
## 3 GRAP2 4.32 7.597080… 4.32 Upregulat… NR
## 4 HLA-A 5.49 1.345992… 5.49 Upregulat… 6.09999999999…
## 5 CD3E 4.92 9.610347… 4.92 Upregulat… NR
## 6 CD3G 3.21 8.542446… 3.21 Upregulat… NR
data <- T_Cell_Receptor_Complex_AID
# Create a mock adjacency matrix for STRING-like connections
# You can replace this with actual STRING interaction data
proteins <- unique(data$Protein)
mock_edges <- expand.grid(proteins, proteins)
mock_edges <- mock_edges[mock_edges$Var1 != mock_edges$Var2, ] # Remove self-loops
set.seed(42) # For reproducibility
mock_edges <- mock_edges[sample(1:nrow(mock_edges), min(100, nrow(mock_edges))), ] # Subset for clarity
# Convert Risk.Frequency to numeric (handle non-numeric values like "NR")
data$Risk.Frequency <- as.numeric(ifelse(data$Risk.Frequency == "NR", NA, data$Risk.Frequency))
## Warning: NAs introduced by coercion
# Create a mock adjacency matrix for STRING-like connections
proteins <- unique(data$Protein)
mock_edges <- expand.grid(proteins, proteins)
mock_edges <- mock_edges[mock_edges$Var1 != mock_edges$Var2, ] # Remove self-loops
set.seed(42) # For reproducibility
mock_edges <- mock_edges[sample(1:nrow(mock_edges), min(100, nrow(mock_edges))), ] # Subset for clarity
# Prepare graph edges
colnames(mock_edges) <- c("protein1", "protein2")
graph <- graph_from_data_frame(mock_edges, directed = FALSE)
# Map attributes to nodes
V(graph)$name <- proteins
V(graph)$size <- data$Risk.Frequency[match(V(graph)$name, data$Protein)] * 10 # Scale size for visualization
V(graph)$logFC <- data$log2FoldChange[match(V(graph)$name, data$Protein)] # For color scale
V(graph)$label <- V(graph)$name # Add labels for node names
# Handle missing values
V(graph)$size[is.na(V(graph)$size)] <- 5 # Default size for NA Risk.Frequency
V(graph)$logFC[is.na(V(graph)$logFC)] <- 0 # Default logFC for missing values
# Visualize using ggraph
ggraph(graph, layout = "fr") +
geom_edge_link(color = "gray70", alpha = 0.7) + # Edges
geom_node_point(aes(size = size, color = logFC)) + # Nodes
geom_node_text(aes(label = label), repel = TRUE, size = 6) + # Protein names
scale_color_gradient2(low = "blue", mid = "white", high = "darkgreen", midpoint = 0, name = "Log2 FC") + # Log2FC color scale
theme_void() +
labs(title = "T-CellReceptor Complex",
size = "Risk Frequency") +
theme(legend.position = "right")

library(readr)
#wd <- '/Users/usri/Desktop/JTC.membrane.analysis/JTC.Pull.AID.Stringanalysis/jan2/'
Cytokine.mediated.signaling.pathway.AID.data <- read_csv("Cytokine.mediated.signaling.pathway.AID.data.csv")
## Rows: 19 Columns: 8
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (3): Protein, expression, Risk.Frequency
## dbl (5): F-Value, FDR (BH), diff.All.vs.Control, p.value, log2FoldChange
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
head(Cytokine.mediated.signaling.pathway.AID.data)
## # A tibble: 6 × 8
## Protein `F-Value` `FDR (BH)` diff.All.vs.Control p.value log2FoldChange
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 PRKCB 21.0 0.00242 4.51 0.00100 4.51
## 2 BCL2 12.7 0.00930 2.78 0.00515 2.78
## 3 PTPN2 8.24 0.0253 3.98 0.0167 3.98
## 4 STAT3 56.1 0.000100 4.90 0.0000208 4.90
## 5 STAT1 22.9 0.00189 4.78 0.000740 4.78
## 6 SMARCA4 34.4 0.000536 4.10 0.000158 4.10
## # ℹ 2 more variables: expression <chr>, Risk.Frequency <chr>
data <- Cytokine.mediated.signaling.pathway.AID.data
# Create a mock adjacency matrix for STRING-like connections
# You can replace this with actual STRING interaction data
proteins <- unique(data$Protein)
mock_edges <- expand.grid(proteins, proteins)
mock_edges <- mock_edges[mock_edges$Var1 != mock_edges$Var2, ] # Remove self-loops
set.seed(42) # For reproducibility
mock_edges <- mock_edges[sample(1:nrow(mock_edges), min(100, nrow(mock_edges))), ] # Subset for clarity
# Convert Risk.Frequency to numeric (handle non-numeric values like "NR")
data$Risk.Frequency <- as.numeric(ifelse(data$Risk.Frequency == "NR", NA, data$Risk.Frequency))
# Create a mock adjacency matrix for STRING-like connections
proteins <- unique(data$Protein)
mock_edges <- expand.grid(proteins, proteins)
mock_edges <- mock_edges[mock_edges$Var1 != mock_edges$Var2, ] # Remove self-loops
set.seed(42) # For reproducibility
mock_edges <- mock_edges[sample(1:nrow(mock_edges), min(100, nrow(mock_edges))), ] # Subset for clarity
# Prepare graph edges
colnames(mock_edges) <- c("protein1", "protein2")
graph <- graph_from_data_frame(mock_edges, directed = FALSE)
# Map attributes to nodes
V(graph)$name <- proteins
V(graph)$size <- data$Risk.Frequency[match(V(graph)$name, data$Protein)] * 10 # Scale size for visualization
V(graph)$logFC <- data$log2FoldChange[match(V(graph)$name, data$Protein)] # For color scale
V(graph)$label <- V(graph)$name # Add labels for node names
# Handle missing values
V(graph)$size[is.na(V(graph)$size)] <- 5 # Default size for NA Risk.Frequency
V(graph)$logFC[is.na(V(graph)$logFC)] <- 0 # Default logFC for missing values
# Visualize using ggraph
ggraph(graph, layout = "fr") +
geom_edge_link(color = "gray70", alpha = 0.7) + # Edges
geom_node_point(aes(size = size, color = logFC)) + # Nodes
geom_node_text(aes(label = label), repel = TRUE, size = 6) + # Protein names
scale_color_gradient2(low = "blue", mid = "white", high = "brown", midpoint = 0, name = "Log2 FC") + # Log2FC color scale
theme_void() +
labs(title = "Cytokine.mediated.signaling.pathway",
size = "Risk Frequency") +
theme(legend.position = "right")

library(readxl)
Actin.filament.bundle.assembly <- read_excel("Actin.filament.bundle.assembly.AID.xlsx")
head(Actin.filament.bundle.assembly)
## # A tibble: 6 × 6
## Protein diff.All.vs.Control p.value log2FoldChange expression Risk.Frequency
## <chr> <dbl> <chr> <dbl> <chr> <chr>
## 1 NFKBIE 2.66 3.406516… 2.66 Upregulat… 0.215
## 2 CIT 2.74 3.230388… 2.74 Upregulat… 7.34199999999…
## 3 TNPO2 3.64 1.744295… 3.64 Upregulat… NR
## 4 NCK2 5.98 2.884092… 5.98 Upregulat… NR
## 5 LPXN 8.19 3.996676… 8.19 Upregulat… 0.76200000000…
## 6 FLNB 7.39 2.712384… 7.39 Upregulat… NR
data <- Actin.filament.bundle.assembly
# Create a mock adjacency matrix for STRING-like connections
# You can replace this with actual STRING interaction data
proteins <- unique(data$Protein)
mock_edges <- expand.grid(proteins, proteins)
mock_edges <- mock_edges[mock_edges$Var1 != mock_edges$Var2, ] # Remove self-loops
set.seed(42) # For reproducibility
mock_edges <- mock_edges[sample(1:nrow(mock_edges), min(100, nrow(mock_edges))), ] # Subset for clarity
# Convert Risk.Frequency to numeric (handle non-numeric values like "NR")
data$Risk.Frequency <- as.numeric(ifelse(data$Risk.Frequency == "NR", NA, data$Risk.Frequency))
# Create a mock adjacency matrix for STRING-like connections
proteins <- unique(data$Protein)
mock_edges <- expand.grid(proteins, proteins)
mock_edges <- mock_edges[mock_edges$Var1 != mock_edges$Var2, ] # Remove self-loops
set.seed(42) # For reproducibility
mock_edges <- mock_edges[sample(1:nrow(mock_edges), min(100, nrow(mock_edges))), ] # Subset for clarity
# Prepare graph edges
colnames(mock_edges) <- c("protein1", "protein2")
graph <- graph_from_data_frame(mock_edges, directed = FALSE)
# Map attributes to nodes
V(graph)$name <- proteins
V(graph)$size <- data$Risk.Frequency[match(V(graph)$name, data$Protein)] * 10 # Scale size for visualization
V(graph)$logFC <- data$log2FoldChange[match(V(graph)$name, data$Protein)] # For color scale
V(graph)$label <- V(graph)$name # Add labels for node names
# Handle missing values
V(graph)$size[is.na(V(graph)$size)] <- 5 # Default size for NA Risk.Frequency
V(graph)$logFC[is.na(V(graph)$logFC)] <- 0 # Default logFC for missing values
# Visualize using ggraph
ggraph(graph, layout = "fr") +
geom_edge_link(color = "gray70", alpha = 0.7) + # Edges
geom_node_point(aes(size = size, color = logFC)) + # Nodes
geom_node_text(aes(label = label), repel = TRUE, size = 6) + # Protein names
scale_color_gradient2(low = "blue", mid = "white", high = "navy", midpoint = 0, name = "Log2 FC") + # Log2FC color scale
theme_void() +
labs(title = "Cytokine.mediated.signaling.pathway",
size = "Risk Frequency") +
theme(legend.position = "right")
