library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(org.Mm.eg.db)
## Loading required package: AnnotationDbi
## Warning: package 'AnnotationDbi' was built under R version 4.3.2
## Loading required package: stats4
## Loading required package: BiocGenerics
## Warning: package 'BiocGenerics' was built under R version 4.3.1
## 
## Attaching package: 'BiocGenerics'
## The following objects are masked from 'package:dplyr':
## 
##     combine, intersect, setdiff, union
## The following objects are masked from 'package:stats':
## 
##     IQR, mad, sd, var, xtabs
## The following objects are masked from 'package:base':
## 
##     anyDuplicated, aperm, append, as.data.frame, basename, cbind,
##     colnames, dirname, do.call, duplicated, eval, evalq, Filter, Find,
##     get, grep, grepl, intersect, is.unsorted, lapply, Map, mapply,
##     match, mget, order, paste, pmax, pmax.int, pmin, pmin.int,
##     Position, rank, rbind, Reduce, rownames, sapply, setdiff, sort,
##     table, tapply, union, unique, unsplit, which.max, which.min
## Loading required package: Biobase
## Warning: package 'Biobase' was built under R version 4.3.1
## Welcome to Bioconductor
## 
##     Vignettes contain introductory material; view with
##     'browseVignettes()'. To cite Bioconductor, see
##     'citation("Biobase")', and for packages 'citation("pkgname")'.
## Loading required package: IRanges
## Warning: package 'IRanges' was built under R version 4.3.1
## Loading required package: S4Vectors
## Warning: package 'S4Vectors' was built under R version 4.3.2
## 
## Attaching package: 'S4Vectors'
## The following objects are masked from 'package:dplyr':
## 
##     first, rename
## The following object is masked from 'package:utils':
## 
##     findMatches
## The following objects are masked from 'package:base':
## 
##     expand.grid, I, unname
## 
## Attaching package: 'IRanges'
## The following objects are masked from 'package:dplyr':
## 
##     collapse, desc, slice
## 
## Attaching package: 'AnnotationDbi'
## The following object is masked from 'package:dplyr':
## 
##     select
## 
library(org.Hs.eg.db)
## 
library(tidyverse)
## Warning: package 'ggplot2' was built under R version 4.3.2
## Warning: package 'tidyr' was built under R version 4.3.2
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ forcats   1.0.0.9000     ✔ readr     2.1.5     
## ✔ ggplot2   3.5.1          ✔ stringr   1.5.1     
## ✔ lubridate 1.9.3          ✔ tibble    3.2.1     
## ✔ purrr     1.0.2          ✔ tidyr     1.3.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ lubridate::%within%()   masks IRanges::%within%()
## ✖ IRanges::collapse()     masks dplyr::collapse()
## ✖ Biobase::combine()      masks BiocGenerics::combine(), dplyr::combine()
## ✖ IRanges::desc()         masks dplyr::desc()
## ✖ tidyr::expand()         masks S4Vectors::expand()
## ✖ dplyr::filter()         masks stats::filter()
## ✖ S4Vectors::first()      masks dplyr::first()
## ✖ dplyr::lag()            masks stats::lag()
## ✖ ggplot2::Position()     masks BiocGenerics::Position(), base::Position()
## ✖ purrr::reduce()         masks IRanges::reduce()
## ✖ S4Vectors::rename()     masks dplyr::rename()
## ✖ lubridate::second()     masks S4Vectors::second()
## ✖ lubridate::second<-()   masks S4Vectors::second<-()
## ✖ AnnotationDbi::select() masks dplyr::select()
## ✖ IRanges::slice()        masks dplyr::slice()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(ComplexHeatmap)
## Warning: package 'ComplexHeatmap' was built under R version 4.3.1
## Loading required package: grid
## ========================================
## ComplexHeatmap version 2.18.0
## Bioconductor page: http://bioconductor.org/packages/ComplexHeatmap/
## Github page: https://github.com/jokergoo/ComplexHeatmap
## Documentation: http://jokergoo.github.io/ComplexHeatmap-reference
## 
## If you use it in published research, please cite either one:
## - Gu, Z. Complex Heatmap Visualization. iMeta 2022.
## - Gu, Z. Complex heatmaps reveal patterns and correlations in multidimensional 
##     genomic data. Bioinformatics 2016.
## 
## 
## The new InteractiveComplexHeatmap package can directly export static 
## complex heatmaps into an interactive Shiny app with zero effort. Have a try!
## 
## This message can be suppressed by:
##   suppressPackageStartupMessages(library(ComplexHeatmap))
## ========================================
library(readxl)
library(readr)
library(STRINGdb)
## Warning: package 'STRINGdb' was built under R version 4.3.1
library(enrichR)
## Welcome to enrichR
## Checking connection ... 
## Enrichr ... Connection is Live!
## FlyEnrichr ... Connection is Live!
## WormEnrichr ... Connection is Live!
## YeastEnrichr ... Connection is Live!
## FishEnrichr ... Connection is Live!
## OxEnrichr ... Connection is Live!
library(clusterProfiler)
## Warning: package 'clusterProfiler' was built under R version 4.3.1
## 
## Registered S3 methods overwritten by 'treeio':
##   method              from    
##   MRCA.phylo          tidytree
##   MRCA.treedata       tidytree
##   Nnode.treedata      tidytree
##   Ntip.treedata       tidytree
##   ancestor.phylo      tidytree
##   ancestor.treedata   tidytree
##   child.phylo         tidytree
##   child.treedata      tidytree
##   full_join.phylo     tidytree
##   full_join.treedata  tidytree
##   groupClade.phylo    tidytree
##   groupClade.treedata tidytree
##   groupOTU.phylo      tidytree
##   groupOTU.treedata   tidytree
##   inner_join.phylo    tidytree
##   inner_join.treedata tidytree
##   is.rooted.treedata  tidytree
##   nodeid.phylo        tidytree
##   nodeid.treedata     tidytree
##   nodelab.phylo       tidytree
##   nodelab.treedata    tidytree
##   offspring.phylo     tidytree
##   offspring.treedata  tidytree
##   parent.phylo        tidytree
##   parent.treedata     tidytree
##   root.treedata       tidytree
##   rootnode.phylo      tidytree
##   sibling.phylo       tidytree
## clusterProfiler v4.8.3  For help: https://yulab-smu.top/biomedical-knowledge-mining-book/
## 
## If you use clusterProfiler in published research, please cite:
## T Wu, E Hu, S Xu, M Chen, P Guo, Z Dai, T Feng, L Zhou, W Tang, L Zhan, X Fu, S Liu, X Bo, and G Yu. clusterProfiler 4.0: A universal enrichment tool for interpreting omics data. The Innovation. 2021, 2(3):100141
## 
## Attaching package: 'clusterProfiler'
## 
## The following object is masked from 'package:purrr':
## 
##     simplify
## 
## The following object is masked from 'package:AnnotationDbi':
## 
##     select
## 
## The following object is masked from 'package:IRanges':
## 
##     slice
## 
## The following object is masked from 'package:S4Vectors':
## 
##     rename
## 
## The following object is masked from 'package:stats':
## 
##     filter
library(igraph)
## Warning: package 'igraph' was built under R version 4.3.2
## 
## Attaching package: 'igraph'
## 
## The following object is masked from 'package:clusterProfiler':
## 
##     simplify
## 
## The following objects are masked from 'package:lubridate':
## 
##     %--%, union
## 
## The following objects are masked from 'package:purrr':
## 
##     compose, simplify
## 
## The following object is masked from 'package:tidyr':
## 
##     crossing
## 
## The following object is masked from 'package:tibble':
## 
##     as_data_frame
## 
## The following object is masked from 'package:IRanges':
## 
##     union
## 
## The following object is masked from 'package:S4Vectors':
## 
##     union
## 
## The following objects are masked from 'package:BiocGenerics':
## 
##     normalize, path, union
## 
## The following objects are masked from 'package:dplyr':
## 
##     as_data_frame, groups, union
## 
## The following objects are masked from 'package:stats':
## 
##     decompose, spectrum
## 
## The following object is masked from 'package:base':
## 
##     union
library(ggraph)
## Warning: package 'ggraph' was built under R version 4.3.2
library(readxl)
#load AID based String protein names
JTC_AID_STRING_RESULTS_JAN2 <- read_excel("~/Desktop/JTC.AID.STRING.RESULTS.JAN2.xlsx")
head(JTC_AID_STRING_RESULTS_JAN2)
## # A tibble: 6 × 3
##   Cytokine.mediated.signaling.pa…¹ Actin.filament.bundl…² T-Cell.Receptor.Comp…³
##   <chr>                            <chr>                  <chr>                 
## 1 CLEC16A                          CHMP4B                 CSK                   
## 2 PTPN2                            FLOT1                  UBASH3A               
## 3 FHL1                             TNPO2                  ICAM3                 
## 4 STAT1                            TNPO3                  ITGAL                 
## 5 STAT2                            KPNB1                  RGS14                 
## 6 STAT3                            SENP1                  VAV3                  
## # ℹ abbreviated names: ¹​Cytokine.mediated.signaling.pathway.AID,
## #   ²​Actin.filament.bundle.assembly.AID, ³​`T-Cell.Receptor.Complex.AID`
#load JTC 1845 significant list
JTC_Pull_1845 <- read_excel("~/Desktop/JTC.Pull.1845.xlsx")
head(JTC_Pull_1845)
## # A tibble: 6 × 9
##   Protein   `F-Value` `FDR (BH)`            `Control-All`     `diff Control-All`
##   <chr>         <dbl> <chr>                 <chr>                          <dbl>
## 1 TurboID      894.   5.27792289666326e-09  4.10459374650874…             -10.0 
## 2 TAF1          11.8  1.12813261340936E-2   6.43706940440112…              -3.61
## 3 UHRF1BP1L     39.8  3.3343890015458003E-4 8.79313785705215…              -5.07
## 4 SHTN1         99.2  1.22502977927992e-05  1.64862039529698…              -6.44
## 5 ILVBL          7.52 3.0580832955954099E-2 2.07321632991941…              -4.50
## 6 DENND11       17.1  4.2821594007173098E-3 2.0209649861693E…              -2.51
## # ℹ 4 more variables: diff.All.vs.Control <dbl>, p.value <chr>,
## #   log2FoldChange <dbl>, expression <chr>
Cytokine.mediated.signaling.pathway.AID <- JTC_AID_STRING_RESULTS_JAN2$Cytokine.mediated.signaling.pathway.AID
Actin.filament.bundle.assembly.AID <- JTC_AID_STRING_RESULTS_JAN2$Actin.filament.bundle.assembly.AID
T.Cell.Receptor.Complex.AID <- JTC_AID_STRING_RESULTS_JAN2$`T-Cell.Receptor.Complex.AID`
##check protein names
Cytokine.mediated.signaling.pathway.AID
##  [1] "CLEC16A" "PTPN2"   "FHL1"    "STAT1"   "STAT2"   "STAT3"   "STAT5A" 
##  [8] "SMARCA4" "IRAK1"   "FGFR1OP" "PPP2R2A" "PPP2R3A" "PPP2R3C" "MINK1"  
## [15] "LPP"     "BCL2"    "CDK6"    "CDC37"   "CASP8"   "PRKCB"   "ITPR3"  
## [22] "ATF2A2"  "ATP1B3"  "STIM1"   "RUNX1"   "CCR4"    NA        NA       
## [29] NA
Cytokine.mediated.signaling.pathway.AID <- head(Cytokine.mediated.signaling.pathway.AID, 26)
Cytokine.mediated.signaling.pathway.AID
##  [1] "CLEC16A" "PTPN2"   "FHL1"    "STAT1"   "STAT2"   "STAT3"   "STAT5A" 
##  [8] "SMARCA4" "IRAK1"   "FGFR1OP" "PPP2R2A" "PPP2R3A" "PPP2R3C" "MINK1"  
## [15] "LPP"     "BCL2"    "CDK6"    "CDC37"   "CASP8"   "PRKCB"   "ITPR3"  
## [22] "ATF2A2"  "ATP1B3"  "STIM1"   "RUNX1"   "CCR4"
Actin.filament.bundle.assembly.AID
##  [1] "CHMP4B"   "FLOT1"    "TNPO2"    "TNPO3"    "KPNB1"    "SENP1"   
##  [7] "NUP210"   "AKAP11"   "FNBP1"    "ACTR2"    "ARHGAP4"  "EPS15L1" 
## [13] "LCP1"     "EXOC2"    "FLNB"     "ITGA4"    "ARHGAP15" "DOCK10"  
## [19] "LPXN"     "NFKBIE"   "ARRB2"    "PTK2B"    "IQGAP1"   "CIT"     
## [25] "SWAP70"   "NCK2"     "ABL2"     "TAOK3"    "CSNK2B"
T.Cell.Receptor.Complex.AID
##  [1] "CSK"     "UBASH3A" "ICAM3"   "ITGAL"   "RGS14"   "VAV3"    "SKAP1"  
##  [8] "TRAT1"   "ITK"     "CD3G"    "CD3E"    "CD247"   "ZAP70"   "SHC1"   
## [15] "LCP2"    "GRAP2"   "PTPN11"  "PTPN22"  "LAT"     "THEMIS"  "KIR3DL3"
## [22] "HLA-A"   "HLA-G"   "TAP1"    "PSMB9"   "PSMD14"  "PSMD5"   "PSMD1"  
## [29] "UBLCP1"
#Subset data for Cytokine.mediated.signaling.pathway.AID from JTC_Pull_1845 for log FC values
Cytokine.mediated.signaling.pathway.AID.data <- subset(JTC_Pull_1845, Protein %in% Cytokine.mediated.signaling.pathway.AID)
Cytokine.mediated.signaling.pathway.AID.data
## # A tibble: 19 × 9
##    Protein `F-Value` `FDR (BH)`            `Control-All`      `diff Control-All`
##    <chr>       <dbl> <chr>                 <chr>                           <dbl>
##  1 PRKCB       21.0  2.4210941751688802E-3 1.00384534839752E…              -4.51
##  2 BCL2        12.7  9.3025077796056697E-3 5.145144326176300…              -2.78
##  3 PTPN2        8.24 2.5289084549813899E-2 1.66528277490606E…              -3.98
##  4 STAT3       56.1  1.0012171592612E-4    2.08343516221231e…              -4.90
##  5 STAT1       22.9  1.88696578587398E-3   7.398138903776600…              -4.78
##  6 SMARCA4     34.4  5.3625596100866004E-4 1.5789411220675E-4              -4.10
##  7 IRAK1      124.   5.21858471940925e-06  5.82549130482479e…              -3.18
##  8 STAT2      175.   1.49877063458406e-06  1.16590481103389e…              -6.13
##  9 ATP1B3      10.6  1.4356128233778E-2    8.543320185795549…              -3.85
## 10 PPP2R2A     17.9  3.77660790006001E-3   1.725370049896590…              -4.35
## 11 CDK6        25.3  1.41012990608174E-3   5.161152242796199…              -2.84
## 12 RUNX1       18.5  3.47115669368638E-3   1.563438137412999…              -3.22
## 13 STIM1      104.   1.03319990614549e-05  1.34223558402409e…              -9.24
## 14 FHL1        68.3  4.95570260286022e-05  8.87281424007051e…              -5.56
## 15 CASP8        5.81 4.9897315475137503E-2 3.660300932948629…              -2.22
## 16 CDC37       85.9  2.13064872424559e-05  3.18292728688281e…              -6.10
## 17 CLEC16A     32.8  6.2799900639022002E-4 1.902805235231300…              -2.90
## 18 MINK1      489.   3.35662674527538e-08  8.04755777182624e…              -8.59
## 19 LPP        266.   3.08721629305423e-07  1.56101951807488e…              -7.73
## # ℹ 4 more variables: diff.All.vs.Control <dbl>, p.value <chr>,
## #   log2FoldChange <dbl>, expression <chr>
#Subset data for Actin.filament.bundle.assembly.AID
Actin.filament.bundle.assembly.AID.data <- subset(JTC_Pull_1845, Protein %in% Actin.filament.bundle.assembly.AID)
Actin.filament.bundle.assembly.AID.data
## # A tibble: 29 × 9
##    Protein `F-Value` `FDR (BH)`            `Control-All`      `diff Control-All`
##    <chr>       <dbl> <chr>                 <chr>                           <dbl>
##  1 NFKBIE       6.02 4.6960617105448597E-2 3.406516954557049…              -2.66
##  2 CIT         14.8  6.3014634161585601E-3 3.230388480162189…              -2.74
##  3 TNPO2        8.09 2.6225631430651701E-2 1.744295206006E-2               -3.64
##  4 NCK2        87.7  1.96684405812182e-05  2.88409260953686e…              -5.98
##  5 LPXN        81.6  2.54873504008281e-05  3.99667642901746e…              -8.19
##  6 FLNB        88.9  1.86958220085285e-05  2.71238491216691e…              -7.39
##  7 FLOT1       36.5  4.4146029866052E-4    1.244880266545600…              -3.42
##  8 ITGA4      783.   7.45988995855363e-09  7.8891481573759e-…              -7.75
##  9 LCP1        50.2  1.4983169548236E-4    3.35678944371217e…              -5.86
## 10 ARRB2       29.1  9.1935911154051003E-4 3.050226846145700…              -4.42
## # ℹ 19 more rows
## # ℹ 4 more variables: diff.All.vs.Control <dbl>, p.value <chr>,
## #   log2FoldChange <dbl>, expression <chr>
#Subset data for T.Cell.Receptor.Complex.AID
T.Cell.Receptor.Complex.AID.data <- subset(JTC_Pull_1845, Protein %in% T.Cell.Receptor.Complex.AID)
T.Cell.Receptor.Complex.AID.data
## # A tibble: 27 × 9
##    Protein `F-Value` `FDR (BH)`            `Control-All`      `diff Control-All`
##    <chr>       <dbl> <chr>                 <chr>                           <dbl>
##  1 PSMD14      16.7  4.6051251106252796E-3 2.199569675553419…              -3.82
##  2 RGS14       67.0  5.34065415334716e-05  9.63852013672922e…              -5.80
##  3 GRAP2       22.7  1.9224501345493101E-3 7.597080267219300…              -4.32
##  4 HLA-A       35.8  4.7082280047660999E-4 1.345992836835100…              -5.49
##  5 CD3E        21.3  2.3309625358376301E-3 9.610347288945400…              -4.92
##  6 CD3G        10.6  1.4356128233778E-2    8.542446711050460…              -3.21
##  7 HLA-G        6.82 3.6935183674587702E-2 2.59882330872537E…              -2.28
##  8 ITGAL      150.   2.58658093779827e-06  2.39442342744453e…              -7.10
##  9 CD247      288.   2.32764420227164e-07  1.05925465447601e…              -8.32
## 10 PSMB9        7.39 3.1512876979558103E-2 2.158466602917220…              -2.59
## # ℹ 17 more rows
## # ℹ 4 more variables: diff.All.vs.Control <dbl>, p.value <chr>,
## #   log2FoldChange <dbl>, expression <chr>
library(readxl)
T_Cell_Receptor_Complex_AID <- read_excel("T.Cell.Receptor.Complex.AID.xlsx")
head(T_Cell_Receptor_Complex_AID)
## # A tibble: 6 × 6
##   Protein diff.All.vs.Control p.value   log2FoldChange expression Risk.Frequency
##   <chr>                 <dbl> <chr>              <dbl> <chr>      <chr>         
## 1 PSMD14                 3.82 2.199569…           3.82 Upregulat… 8.76651999999…
## 2 RGS14                  5.80 9.638520…           5.80 Upregulat… 0.34150000000…
## 3 GRAP2                  4.32 7.597080…           4.32 Upregulat… NR            
## 4 HLA-A                  5.49 1.345992…           5.49 Upregulat… 6.09999999999…
## 5 CD3E                   4.92 9.610347…           4.92 Upregulat… NR            
## 6 CD3G                   3.21 8.542446…           3.21 Upregulat… NR
data <- T_Cell_Receptor_Complex_AID
# Create a mock adjacency matrix for STRING-like connections
# You can replace this with actual STRING interaction data
proteins <- unique(data$Protein)
mock_edges <- expand.grid(proteins, proteins)
mock_edges <- mock_edges[mock_edges$Var1 != mock_edges$Var2, ]  # Remove self-loops
set.seed(42)  # For reproducibility
mock_edges <- mock_edges[sample(1:nrow(mock_edges), min(100, nrow(mock_edges))), ]  # Subset for clarity
# Convert Risk.Frequency to numeric (handle non-numeric values like "NR")
data$Risk.Frequency <- as.numeric(ifelse(data$Risk.Frequency == "NR", NA, data$Risk.Frequency))
## Warning: NAs introduced by coercion
# Create a mock adjacency matrix for STRING-like connections
proteins <- unique(data$Protein)
mock_edges <- expand.grid(proteins, proteins)
mock_edges <- mock_edges[mock_edges$Var1 != mock_edges$Var2, ]  # Remove self-loops
set.seed(42)  # For reproducibility
mock_edges <- mock_edges[sample(1:nrow(mock_edges), min(100, nrow(mock_edges))), ]  # Subset for clarity

# Prepare graph edges
colnames(mock_edges) <- c("protein1", "protein2")
graph <- graph_from_data_frame(mock_edges, directed = FALSE)

# Map attributes to nodes
V(graph)$name <- proteins
V(graph)$size <- data$Risk.Frequency[match(V(graph)$name, data$Protein)] * 10  # Scale size for visualization
V(graph)$logFC <- data$log2FoldChange[match(V(graph)$name, data$Protein)]  # For color scale
V(graph)$label <- V(graph)$name  # Add labels for node names

# Handle missing values
V(graph)$size[is.na(V(graph)$size)] <- 5  # Default size for NA Risk.Frequency
V(graph)$logFC[is.na(V(graph)$logFC)] <- 0  # Default logFC for missing values

# Visualize using ggraph
ggraph(graph, layout = "fr") + 
    geom_edge_link(color = "gray70", alpha = 0.7) +  # Edges
    geom_node_point(aes(size = size, color = logFC)) +  # Nodes
    geom_node_text(aes(label = label), repel = TRUE, size = 6) +  # Protein names
    scale_color_gradient2(low = "blue", mid = "white", high = "darkgreen", midpoint = 0,  name = "Log2 FC") +  # Log2FC color scale
    theme_void() + 
    labs(title = "T-CellReceptor Complex",
         size = "Risk Frequency") +
    theme(legend.position = "right")

library(readr)
#wd <- '/Users/usri/Desktop/JTC.membrane.analysis/JTC.Pull.AID.Stringanalysis/jan2/'
Cytokine.mediated.signaling.pathway.AID.data <- read_csv("Cytokine.mediated.signaling.pathway.AID.data.csv")
## Rows: 19 Columns: 8
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (3): Protein, expression, Risk.Frequency
## dbl (5): F-Value, FDR (BH), diff.All.vs.Control, p.value, log2FoldChange
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
head(Cytokine.mediated.signaling.pathway.AID.data)
## # A tibble: 6 × 8
##   Protein `F-Value` `FDR (BH)` diff.All.vs.Control   p.value log2FoldChange
##   <chr>       <dbl>      <dbl>               <dbl>     <dbl>          <dbl>
## 1 PRKCB       21.0    0.00242                 4.51 0.00100             4.51
## 2 BCL2        12.7    0.00930                 2.78 0.00515             2.78
## 3 PTPN2        8.24   0.0253                  3.98 0.0167              3.98
## 4 STAT3       56.1    0.000100                4.90 0.0000208           4.90
## 5 STAT1       22.9    0.00189                 4.78 0.000740            4.78
## 6 SMARCA4     34.4    0.000536                4.10 0.000158            4.10
## # ℹ 2 more variables: expression <chr>, Risk.Frequency <chr>
data <- Cytokine.mediated.signaling.pathway.AID.data
# Create a mock adjacency matrix for STRING-like connections
# You can replace this with actual STRING interaction data
proteins <- unique(data$Protein)
mock_edges <- expand.grid(proteins, proteins)
mock_edges <- mock_edges[mock_edges$Var1 != mock_edges$Var2, ]  # Remove self-loops
set.seed(42)  # For reproducibility
mock_edges <- mock_edges[sample(1:nrow(mock_edges), min(100, nrow(mock_edges))), ]  # Subset for clarity
# Convert Risk.Frequency to numeric (handle non-numeric values like "NR")
data$Risk.Frequency <- as.numeric(ifelse(data$Risk.Frequency == "NR", NA, data$Risk.Frequency))

# Create a mock adjacency matrix for STRING-like connections
proteins <- unique(data$Protein)
mock_edges <- expand.grid(proteins, proteins)
mock_edges <- mock_edges[mock_edges$Var1 != mock_edges$Var2, ]  # Remove self-loops
set.seed(42)  # For reproducibility
mock_edges <- mock_edges[sample(1:nrow(mock_edges), min(100, nrow(mock_edges))), ]  # Subset for clarity

# Prepare graph edges
colnames(mock_edges) <- c("protein1", "protein2")
graph <- graph_from_data_frame(mock_edges, directed = FALSE)

# Map attributes to nodes
V(graph)$name <- proteins
V(graph)$size <- data$Risk.Frequency[match(V(graph)$name, data$Protein)] * 10  # Scale size for visualization
V(graph)$logFC <- data$log2FoldChange[match(V(graph)$name, data$Protein)]  # For color scale
V(graph)$label <- V(graph)$name  # Add labels for node names

# Handle missing values
V(graph)$size[is.na(V(graph)$size)] <- 5  # Default size for NA Risk.Frequency
V(graph)$logFC[is.na(V(graph)$logFC)] <- 0  # Default logFC for missing values

# Visualize using ggraph
ggraph(graph, layout = "fr") + 
    geom_edge_link(color = "gray70", alpha = 0.7) +  # Edges
    geom_node_point(aes(size = size, color = logFC)) +  # Nodes
    geom_node_text(aes(label = label), repel = TRUE, size = 6) +  # Protein names
    scale_color_gradient2(low = "blue", mid = "white", high = "brown", midpoint = 0,  name = "Log2 FC") +  # Log2FC color scale
    theme_void() + 
    labs(title = "Cytokine.mediated.signaling.pathway",
         size = "Risk Frequency") +
    theme(legend.position = "right")

library(readxl)
Actin.filament.bundle.assembly <- read_excel("Actin.filament.bundle.assembly.AID.xlsx")
head(Actin.filament.bundle.assembly)
## # A tibble: 6 × 6
##   Protein diff.All.vs.Control p.value   log2FoldChange expression Risk.Frequency
##   <chr>                 <dbl> <chr>              <dbl> <chr>      <chr>         
## 1 NFKBIE                 2.66 3.406516…           2.66 Upregulat… 0.215         
## 2 CIT                    2.74 3.230388…           2.74 Upregulat… 7.34199999999…
## 3 TNPO2                  3.64 1.744295…           3.64 Upregulat… NR            
## 4 NCK2                   5.98 2.884092…           5.98 Upregulat… NR            
## 5 LPXN                   8.19 3.996676…           8.19 Upregulat… 0.76200000000…
## 6 FLNB                   7.39 2.712384…           7.39 Upregulat… NR
data <- Actin.filament.bundle.assembly
# Create a mock adjacency matrix for STRING-like connections
# You can replace this with actual STRING interaction data
proteins <- unique(data$Protein)
mock_edges <- expand.grid(proteins, proteins)
mock_edges <- mock_edges[mock_edges$Var1 != mock_edges$Var2, ]  # Remove self-loops
set.seed(42)  # For reproducibility
mock_edges <- mock_edges[sample(1:nrow(mock_edges), min(100, nrow(mock_edges))), ]  # Subset for clarity
# Convert Risk.Frequency to numeric (handle non-numeric values like "NR")
data$Risk.Frequency <- as.numeric(ifelse(data$Risk.Frequency == "NR", NA, data$Risk.Frequency))

# Create a mock adjacency matrix for STRING-like connections
proteins <- unique(data$Protein)
mock_edges <- expand.grid(proteins, proteins)
mock_edges <- mock_edges[mock_edges$Var1 != mock_edges$Var2, ]  # Remove self-loops
set.seed(42)  # For reproducibility
mock_edges <- mock_edges[sample(1:nrow(mock_edges), min(100, nrow(mock_edges))), ]  # Subset for clarity

# Prepare graph edges
colnames(mock_edges) <- c("protein1", "protein2")
graph <- graph_from_data_frame(mock_edges, directed = FALSE)

# Map attributes to nodes
V(graph)$name <- proteins
V(graph)$size <- data$Risk.Frequency[match(V(graph)$name, data$Protein)] * 10  # Scale size for visualization
V(graph)$logFC <- data$log2FoldChange[match(V(graph)$name, data$Protein)]  # For color scale
V(graph)$label <- V(graph)$name  # Add labels for node names

# Handle missing values
V(graph)$size[is.na(V(graph)$size)] <- 5  # Default size for NA Risk.Frequency
V(graph)$logFC[is.na(V(graph)$logFC)] <- 0  # Default logFC for missing values

# Visualize using ggraph
ggraph(graph, layout = "fr") + 
    geom_edge_link(color = "gray70", alpha = 0.7) +  # Edges
    geom_node_point(aes(size = size, color = logFC)) +  # Nodes
    geom_node_text(aes(label = label), repel = TRUE, size = 6) +  # Protein names
    scale_color_gradient2(low = "blue", mid = "white", high = "navy", midpoint = 0,  name = "Log2 FC") +  # Log2FC color scale
    theme_void() + 
    labs(title = "Cytokine.mediated.signaling.pathway",
         size = "Risk Frequency") +
    theme(legend.position = "right")