Build the PPI network using string Thhe dataset was obtained from the STRING website:
https://string-db.org/cgi/network?taskId=bKElIqhE3Ntr&sessionId=bQYS56KsX3jK
# Load libraries (install them previously)
library(STRINGdb)
library(igraph)
## Warning: package 'igraph' was built under R version 4.4.3
##
## Attaching package: 'igraph'
## The following objects are masked from 'package:stats':
##
## decompose, spectrum
## The following object is masked from 'package:base':
##
## union
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:igraph':
##
## as_data_frame, groups, union
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(httr)
## Warning: package 'httr' was built under R version 4.4.3
## Check the current working directory
getwd()
## [1] "C:/Users/Owner/Desktop"
#Import the TSV file from the current directory
data_ppi <- read.delim("string_interactions_short.tsv", header = TRUE, sep = "\t")
#Explore the data: str(structure), head and tail (shows 6 first and last rows), dim (rows and columns)
str(data_ppi)
## 'data.frame': 55 obs. of 13 variables:
## $ X.node1 : chr "CDC45" "CDC45" "CDC45" "CDC45" ...
## $ node2 : chr "MCM5" "POLE2" "GINS2" "TOPBP1" ...
## $ node1_string_id : chr "9606.ENSP00000405726" "9606.ENSP00000405726" "9606.ENSP00000405726" "9606.ENSP00000405726" ...
## $ node2_string_id : chr "9606.ENSP00000216122" "9606.ENSP00000216367" "9606.ENSP00000253462" "9606.ENSP00000260810" ...
## $ neighborhood_on_chromosome : int 0 0 0 0 0 0 0 0 0 0 ...
## $ gene_fusion : int 0 0 0 0 0 0 0 0 0 0 ...
## $ phylogenetic_cooccurrence : num 0 0 0 0 0 0 0 0 0 0 ...
## $ homology : num 0 0 0 0 0 0 0 0 0 0 ...
## $ coexpression : num 0.644 0.604 0.795 0.211 0.821 0.392 0.792 0.824 0.314 0.591 ...
## $ experimentally_determined_interaction: num 0.995 0.986 0.996 0.984 0.994 0.996 0.997 0.997 0.997 0.997 ...
## $ database_annotated : num 0.54 0.5 0.54 0 0.54 0.54 0.54 0.54 0.54 0.54 ...
## $ automated_textmining : num 0.975 0.75 0.841 0.994 0.906 0.931 0.944 0.701 0.995 0.961 ...
## $ combined_score : num 0.999 0.999 0.999 0.999 0.999 0.999 0.999 0.999 0.999 0.999 ...
head(data_ppi)
## X.node1 node2 node1_string_id node2_string_id
## 1 CDC45 MCM5 9606.ENSP00000405726 9606.ENSP00000216122
## 2 CDC45 POLE2 9606.ENSP00000405726 9606.ENSP00000216367
## 3 CDC45 GINS2 9606.ENSP00000405726 9606.ENSP00000253462
## 4 CDC45 TOPBP1 9606.ENSP00000405726 9606.ENSP00000260810
## 5 CDC45 MCM4 9606.ENSP00000405726 9606.ENSP00000262105
## 6 CDC45 GINS1 9606.ENSP00000405726 9606.ENSP00000262460
## neighborhood_on_chromosome gene_fusion phylogenetic_cooccurrence homology
## 1 0 0 0 0
## 2 0 0 0 0
## 3 0 0 0 0
## 4 0 0 0 0
## 5 0 0 0 0
## 6 0 0 0 0
## coexpression experimentally_determined_interaction database_annotated
## 1 0.644 0.995 0.54
## 2 0.604 0.986 0.50
## 3 0.795 0.996 0.54
## 4 0.211 0.984 0.00
## 5 0.821 0.994 0.54
## 6 0.392 0.996 0.54
## automated_textmining combined_score
## 1 0.975 0.999
## 2 0.750 0.999
## 3 0.841 0.999
## 4 0.994 0.999
## 5 0.906 0.999
## 6 0.931 0.999
tail(data_ppi)
## X.node1 node2 node1_string_id node2_string_id
## 50 MCM6 POLE2 9606.ENSP00000264156 9606.ENSP00000216367
## 51 MCM6 TOPBP1 9606.ENSP00000264156 9606.ENSP00000260810
## 52 MCM6 MCM7 9606.ENSP00000264156 9606.ENSP00000307288
## 53 MCM7 POLE2 9606.ENSP00000307288 9606.ENSP00000216367
## 54 MCM7 TOPBP1 9606.ENSP00000307288 9606.ENSP00000260810
## 55 POLE2 TOPBP1 9606.ENSP00000216367 9606.ENSP00000260810
## neighborhood_on_chromosome gene_fusion phylogenetic_cooccurrence homology
## 50 0 0 0 0.000
## 51 0 0 0 0.000
## 52 0 0 0 0.777
## 53 0 0 0 0.000
## 54 0 0 0 0.000
## 55 0 0 0 0.000
## coexpression experimentally_determined_interaction database_annotated
## 50 0.530 0.950 0.5
## 51 0.257 0.000 0.0
## 52 0.973 0.997 0.9
## 53 0.460 0.896 0.5
## 54 0.193 0.000 0.0
## 55 0.233 0.387 0.0
## automated_textmining combined_score
## 50 0.644 0.995
## 51 0.642 0.722
## 52 0.990 0.999
## 53 0.435 0.982
## 54 0.497 0.577
## 55 0.424 0.705
dim(data_ppi)
## [1] 55 13
# The data.frame has 55 obs (rows) and 13 variables (colums)
#Filter columns
ppi_network <- data_ppi[, c("X.node1", "node2", "combined_score")]
#Rename columns (using %>% ---> dplyr )
data_ppi <- data_ppi %>% rename(protein_A = X.node1, protein_B = node2, interaction_score = combined_score)
#convert in a data frame, each node is a protein, construct the PPI network
network_graph <- graph_from_data_frame(ppi_network, directed = FALSE)
# Plot
plot(network_graph,
vertex.label.cex = 0.8,
vertex.color = "red",
edge.width = E(network_graph)$interaction_score * 5,
main = "PPI Network")
#note--> edge.width = E(network_graph)$interaction_score * 5:
#Multiplying by 5 scales the width
References: https://www.biostars.org/ https://string-db.org/help/getting_started/