PPI network

Build the PPI network using string Thhe dataset was obtained from the STRING website:

https://string-db.org/cgi/network?taskId=bKElIqhE3Ntr&sessionId=bQYS56KsX3jK

# Load libraries (install them previously)
library(STRINGdb)
library(igraph)

## Warning: package 'igraph' was built under R version 4.4.3

## 
## Attaching package: 'igraph'

## The following objects are masked from 'package:stats':
## 
##     decompose, spectrum

## The following object is masked from 'package:base':
## 
##     union

library(dplyr)

## 
## Attaching package: 'dplyr'

## The following objects are masked from 'package:igraph':
## 
##     as_data_frame, groups, union

## The following objects are masked from 'package:stats':
## 
##     filter, lag

## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

library(httr)

## Warning: package 'httr' was built under R version 4.4.3

## Check the current working directory
getwd()

## [1] "C:/Users/Owner/Desktop"

#Import the TSV file from the current directory
data_ppi <- read.delim("string_interactions_short.tsv", header = TRUE, sep = "\t")

#Explore the data: str(structure), head and tail (shows 6 first and last rows), dim (rows and columns)
str(data_ppi)

## 'data.frame':    55 obs. of  13 variables:
##  $ X.node1                              : chr  "CDC45" "CDC45" "CDC45" "CDC45" ...
##  $ node2                                : chr  "MCM5" "POLE2" "GINS2" "TOPBP1" ...
##  $ node1_string_id                      : chr  "9606.ENSP00000405726" "9606.ENSP00000405726" "9606.ENSP00000405726" "9606.ENSP00000405726" ...
##  $ node2_string_id                      : chr  "9606.ENSP00000216122" "9606.ENSP00000216367" "9606.ENSP00000253462" "9606.ENSP00000260810" ...
##  $ neighborhood_on_chromosome           : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ gene_fusion                          : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ phylogenetic_cooccurrence            : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ homology                             : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ coexpression                         : num  0.644 0.604 0.795 0.211 0.821 0.392 0.792 0.824 0.314 0.591 ...
##  $ experimentally_determined_interaction: num  0.995 0.986 0.996 0.984 0.994 0.996 0.997 0.997 0.997 0.997 ...
##  $ database_annotated                   : num  0.54 0.5 0.54 0 0.54 0.54 0.54 0.54 0.54 0.54 ...
##  $ automated_textmining                 : num  0.975 0.75 0.841 0.994 0.906 0.931 0.944 0.701 0.995 0.961 ...
##  $ combined_score                       : num  0.999 0.999 0.999 0.999 0.999 0.999 0.999 0.999 0.999 0.999 ...

head(data_ppi)

##   X.node1  node2      node1_string_id      node2_string_id
## 1   CDC45   MCM5 9606.ENSP00000405726 9606.ENSP00000216122
## 2   CDC45  POLE2 9606.ENSP00000405726 9606.ENSP00000216367
## 3   CDC45  GINS2 9606.ENSP00000405726 9606.ENSP00000253462
## 4   CDC45 TOPBP1 9606.ENSP00000405726 9606.ENSP00000260810
## 5   CDC45   MCM4 9606.ENSP00000405726 9606.ENSP00000262105
## 6   CDC45  GINS1 9606.ENSP00000405726 9606.ENSP00000262460
##   neighborhood_on_chromosome gene_fusion phylogenetic_cooccurrence homology
## 1                          0           0                         0        0
## 2                          0           0                         0        0
## 3                          0           0                         0        0
## 4                          0           0                         0        0
## 5                          0           0                         0        0
## 6                          0           0                         0        0
##   coexpression experimentally_determined_interaction database_annotated
## 1        0.644                                 0.995               0.54
## 2        0.604                                 0.986               0.50
## 3        0.795                                 0.996               0.54
## 4        0.211                                 0.984               0.00
## 5        0.821                                 0.994               0.54
## 6        0.392                                 0.996               0.54
##   automated_textmining combined_score
## 1                0.975          0.999
## 2                0.750          0.999
## 3                0.841          0.999
## 4                0.994          0.999
## 5                0.906          0.999
## 6                0.931          0.999

tail(data_ppi)

##    X.node1  node2      node1_string_id      node2_string_id
## 50    MCM6  POLE2 9606.ENSP00000264156 9606.ENSP00000216367
## 51    MCM6 TOPBP1 9606.ENSP00000264156 9606.ENSP00000260810
## 52    MCM6   MCM7 9606.ENSP00000264156 9606.ENSP00000307288
## 53    MCM7  POLE2 9606.ENSP00000307288 9606.ENSP00000216367
## 54    MCM7 TOPBP1 9606.ENSP00000307288 9606.ENSP00000260810
## 55   POLE2 TOPBP1 9606.ENSP00000216367 9606.ENSP00000260810
##    neighborhood_on_chromosome gene_fusion phylogenetic_cooccurrence homology
## 50                          0           0                         0    0.000
## 51                          0           0                         0    0.000
## 52                          0           0                         0    0.777
## 53                          0           0                         0    0.000
## 54                          0           0                         0    0.000
## 55                          0           0                         0    0.000
##    coexpression experimentally_determined_interaction database_annotated
## 50        0.530                                 0.950                0.5
## 51        0.257                                 0.000                0.0
## 52        0.973                                 0.997                0.9
## 53        0.460                                 0.896                0.5
## 54        0.193                                 0.000                0.0
## 55        0.233                                 0.387                0.0
##    automated_textmining combined_score
## 50                0.644          0.995
## 51                0.642          0.722
## 52                0.990          0.999
## 53                0.435          0.982
## 54                0.497          0.577
## 55                0.424          0.705

dim(data_ppi)

## [1] 55 13

# The data.frame has    55 obs (rows) and  13 variables (colums)
#Filter columns
ppi_network <- data_ppi[, c("X.node1", "node2", "combined_score")]

#Rename columns (using %>% ---> dplyr )
data_ppi <- data_ppi %>% rename(protein_A = X.node1, protein_B = node2, interaction_score = combined_score)

#convert in a data frame, each node is a protein, construct the PPI network
network_graph <- graph_from_data_frame(ppi_network, directed = FALSE)

# Plot 
plot(network_graph,
     vertex.label.cex = 0.8,
     vertex.color = "red",
     edge.width = E(network_graph)$interaction_score * 5,
     main = "PPI Network")

#note--> edge.width = E(network_graph)$interaction_score * 5:
#Multiplying by 5  scales the width

References: https://www.biostars.org/ https://string-db.org/help/getting_started/

PPI network

2025-04-17