# Importation des libraries R
library(MASS)
library(FactoMineR)
library(cluster)
library(ade4)
##
## Attaching package: 'ade4'
## The following object is masked from 'package:FactoMineR':
##
## reconst
library(plotrix)
library(ggplot2)
library(plotly)
##
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:MASS':
##
## select
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
library(grid)
library(gridExtra)
library(fpc)
library(pvclust)
library(Rtsne)
library(scatterplot3d)
library(plotly)
# Chargements des fichiers de donnees
#Descripteurs PhysChem
setwd("/Users/sperandio/Desktop")
iPPI_pocket=read.table("iPPI_crystal_pocket_all.csv",sep=";",header=TRUE)
PPI_pocket=read.table("iPPI_interface_pocket_all.csv",sep=";",header=TRUE)
noniPPI_pocket = read.table("noniPPI_crystal_pocketall.csv",sep=";",header=TRUE)
#################
#################
# 1) Characterisation de l'espace chimique par utilisation des tSNE
col1 = hsv(h = 0.1, s = 0.85, v = 0.85, alpha = 0.9)
col2 = hsv(h = 0.15, s = 0.85, v = 0.85, alpha = 0.3)
col3 = hsv(h = 0.20, s = 0.85, v = 0.85, alpha = 0.5)
col11 = hsv(h = 0.25, s = 0.85, v = 0.85, alpha = 1)
col22 = hsv(h = 0.30, s = 0.85, v = 0.85, alpha = 1)
col33 = hsv(h = 0.35, s = 0.85, v = 0.85, alpha = 1)
col111 = hsv(h = 0.40, s = 0.85, v = 0.85, alpha = 1)
col222 = hsv(h = 0.45, s = 0.85, v = 0.85, alpha = 1)
col1111 = hsv(h = 0.50, s = 0.85, v = 0.85, alpha = 1)
col2222 = hsv(h = 0.55, s = 0.85, v = 0.85, alpha = 1)
col3333 = hsv(h = 0.60, s = 0.85, v = 0.85, alpha = 1)
col4444 = hsv(h = 0.65, s = 0.85, v = 0.85, alpha = 1)
col5555 = hsv(h = 0.70, s = 0.85, v = 0.85, alpha = 1)
col6666 = hsv(h = 0.75, s = 0.85, v = 0.85, alpha = 1)
col7777 = hsv(h = 0.80, s = 0.85, v = 0.85, alpha = 1)
col8888 = hsv(h = 0.85, s = 0.85, v = 0.85, alpha = 1)
col9999 = hsv(h = 0.90, s = 0.85, v = 0.85, alpha = 1)
cols <- c("A:BCL2" = col1, "B:Brd" = col2, "C:IL2"= col3,
"D:K_Ras"= col11, "E:LEDGF"=col22, "F:MDM2"=col33,
"G:XIAP"=col111, "H:ZIPA"=col222,
"I:ICAM"=col1111, "J:Keap1"=col2222,
"K:Max"=col3333, "L:Menin"=col4444,
"M:gp120"=col5555,"N:ENZYME"=col6666,
"O:ION"=col7777, "P:kinase"=col8888,
"Q:nuclear"=col9999)
#cols <- c("1:iPPIDB"= col11, "2:eDrugs" = col2)
#cols <- c("1:pocket" = col2)
iPPI_pocket <- data.frame(iPPI_pocket,shape="21")
PPI_pocket <- data.frame(PPI_pocket,shape="23")
noniPPI_pocket <- data.frame(noniPPI_pocket,shape="24")
ALL = rbind(iPPI_pocket, PPI_pocket, noniPPI_pocket)
attach(ALL)
## The following object is masked from package:MASS:
##
## npr1
ALL <- ALL[order(Famille),]
ordre <- c(rep("A:BCL2",length(which(ALL$Famille=="BCL2"))), rep("B:Brd",length(which(ALL$Famille=="Brd"))),
rep("C:IL2",length(which(ALL$Famille=="IL2"))), rep("D:K_Ras",length(which(ALL$Famille=="K_Ras"))),
rep("E:LEDGF",length(which(ALL$Famille=="LEDGF"))), rep("F:MDM2",length(which(ALL$Famille=="MDM2"))),
rep("G:XIAP",length(which(ALL$Famille=="XIAP"))), rep("H:ZIPA",length(which(ALL$Famille=="ZipA"))),
rep("I:ICAM",length(which(ALL$Famille=="ICAM"))),rep("J:Keap1",length(which(ALL$Famille=="Keap1"))),
rep("K:Max",length(which(ALL$Famille=="Max"))),rep("L:Menin",length(which(ALL$Famille=="Menin"))),
rep("M:gp120",length(which(ALL$Famille=="gp120"))),
rep("N:ENZYME", length(which(ALL$Famille=="enzyme"))), rep("O:ION", length(which(ALL$Famille=="ion_channel"))),
rep("P:kinase", length(which(ALL$Famille=="kinase"))), rep("Q:nuclear", length(which(ALL$Famille=="nuclear"))))
ZERO_ALL <- ALL[,3:109][,sapply(ALL[,3:109], function(v) var(v, na.rm=TRUE)!=0)] ### remove constant/zero variance columns
SC_MALL=scale(ZERO_ALL)
#DSC_MALL = dist(SC_MALL)
tsne_out <- Rtsne(SC_MALL, perplexity = 5, theta=0.0, is_distance = FALSE,pca=TRUE,pca_scale=TRUE,pca_center=TRUE, verbose=TRUE,max_iter=2000,eta=200,dims=3)
## Read the 159 x 50 data matrix successfully!
## Using no_dims = 3, perplexity = 5.000000, and theta = 0.000000
## Computing input similarities...
## Normalizing input...
## Symmetrizing...
## Done in 0.01 seconds!
## Learning embedding...
## Iteration 50: error is 72.487791 (50 iterations in 0.03 seconds)
## Iteration 100: error is 71.938681 (50 iterations in 0.03 seconds)
## Iteration 150: error is 74.287321 (50 iterations in 0.03 seconds)
## Iteration 200: error is 71.803254 (50 iterations in 0.03 seconds)
## Iteration 250: error is 71.596861 (50 iterations in 0.03 seconds)
## Iteration 300: error is 2.103622 (50 iterations in 0.03 seconds)
## Iteration 350: error is 0.872602 (50 iterations in 0.03 seconds)
## Iteration 400: error is 0.622334 (50 iterations in 0.03 seconds)
## Iteration 450: error is 0.596746 (50 iterations in 0.03 seconds)
## Iteration 500: error is 0.586230 (50 iterations in 0.03 seconds)
## Iteration 550: error is 0.579734 (50 iterations in 0.03 seconds)
## Iteration 600: error is 0.573849 (50 iterations in 0.03 seconds)
## Iteration 650: error is 0.566394 (50 iterations in 0.03 seconds)
## Iteration 700: error is 0.561597 (50 iterations in 0.03 seconds)
## Iteration 750: error is 0.559183 (50 iterations in 0.03 seconds)
## Iteration 800: error is 0.557340 (50 iterations in 0.03 seconds)
## Iteration 850: error is 0.556170 (50 iterations in 0.03 seconds)
## Iteration 900: error is 0.555137 (50 iterations in 0.03 seconds)
## Iteration 950: error is 0.554202 (50 iterations in 0.03 seconds)
## Iteration 1000: error is 0.553354 (50 iterations in 0.03 seconds)
## Iteration 1050: error is 0.552579 (50 iterations in 0.03 seconds)
## Iteration 1100: error is 0.551863 (50 iterations in 0.03 seconds)
## Iteration 1150: error is 0.551217 (50 iterations in 0.03 seconds)
## Iteration 1200: error is 0.550595 (50 iterations in 0.03 seconds)
## Iteration 1250: error is 0.550011 (50 iterations in 0.03 seconds)
## Iteration 1300: error is 0.549561 (50 iterations in 0.03 seconds)
## Iteration 1350: error is 0.549158 (50 iterations in 0.03 seconds)
## Iteration 1400: error is 0.548783 (50 iterations in 0.03 seconds)
## Iteration 1450: error is 0.548438 (50 iterations in 0.03 seconds)
## Iteration 1500: error is 0.548142 (50 iterations in 0.03 seconds)
## Iteration 1550: error is 0.547879 (50 iterations in 0.03 seconds)
## Iteration 1600: error is 0.547634 (50 iterations in 0.03 seconds)
## Iteration 1650: error is 0.547410 (50 iterations in 0.03 seconds)
## Iteration 1700: error is 0.547204 (50 iterations in 0.03 seconds)
## Iteration 1750: error is 0.547011 (50 iterations in 0.03 seconds)
## Iteration 1800: error is 0.546824 (50 iterations in 0.03 seconds)
## Iteration 1850: error is 0.546648 (50 iterations in 0.03 seconds)
## Iteration 1900: error is 0.546481 (50 iterations in 0.03 seconds)
## Iteration 1950: error is 0.546327 (50 iterations in 0.03 seconds)
## Iteration 2000: error is 0.546183 (50 iterations in 0.03 seconds)
## Fitting performed in 1.10 seconds.
tsne_out_DF <- data.frame(x=tsne_out$Y[,1],y=tsne_out$Y[,2],z=tsne_out$Y[,3])
plot_ly(tsne_out_DF,x=~x,y=~y,z=~z,color=~ordre)
## No trace type specified:
## Based on info supplied, a 'scatter3d' trace seems appropriate.
## Read more about this trace type -> https://plot.ly/r/reference/#scatter3d
## No scatter3d mode specifed:
## Setting the mode to markers
## Read more about this attribute -> https://plot.ly/r/reference/#scatter-mode
## Warning in RColorBrewer::brewer.pal(N, "Set2"): n too large, allowed maximum for palette Set2 is 8
## Returning the palette you asked for with that many colors
tsne_out <- Rtsne(SC_MALL, perplexity = 5, theta=0.0, is_distance = FALSE,pca=TRUE,pca_scale=TRUE,pca_center=TRUE, verbose=TRUE,max_iter=2000,eta=200,dims=2)
## Read the 159 x 50 data matrix successfully!
## Using no_dims = 2, perplexity = 5.000000, and theta = 0.000000
## Computing input similarities...
## Normalizing input...
## Symmetrizing...
## Done in 0.01 seconds!
## Learning embedding...
## Iteration 50: error is 74.951336 (50 iterations in 0.02 seconds)
## Iteration 100: error is 73.677399 (50 iterations in 0.02 seconds)
## Iteration 150: error is 76.401000 (50 iterations in 0.02 seconds)
## Iteration 200: error is 73.304131 (50 iterations in 0.02 seconds)
## Iteration 250: error is 72.264808 (50 iterations in 0.02 seconds)
## Iteration 300: error is 2.501968 (50 iterations in 0.02 seconds)
## Iteration 350: error is 1.574982 (50 iterations in 0.02 seconds)
## Iteration 400: error is 1.167289 (50 iterations in 0.02 seconds)
## Iteration 450: error is 0.959475 (50 iterations in 0.02 seconds)
## Iteration 500: error is 0.902684 (50 iterations in 0.02 seconds)
## Iteration 550: error is 0.855160 (50 iterations in 0.02 seconds)
## Iteration 600: error is 0.773345 (50 iterations in 0.02 seconds)
## Iteration 650: error is 0.743385 (50 iterations in 0.02 seconds)
## Iteration 700: error is 0.694993 (50 iterations in 0.02 seconds)
## Iteration 750: error is 0.674890 (50 iterations in 0.02 seconds)
## Iteration 800: error is 0.667943 (50 iterations in 0.02 seconds)
## Iteration 850: error is 0.660752 (50 iterations in 0.02 seconds)
## Iteration 900: error is 0.654485 (50 iterations in 0.02 seconds)
## Iteration 950: error is 0.650070 (50 iterations in 0.02 seconds)
## Iteration 1000: error is 0.648100 (50 iterations in 0.02 seconds)
## Iteration 1050: error is 0.646935 (50 iterations in 0.02 seconds)
## Iteration 1100: error is 0.646009 (50 iterations in 0.02 seconds)
## Iteration 1150: error is 0.645210 (50 iterations in 0.02 seconds)
## Iteration 1200: error is 0.644484 (50 iterations in 0.02 seconds)
## Iteration 1250: error is 0.643842 (50 iterations in 0.02 seconds)
## Iteration 1300: error is 0.643249 (50 iterations in 0.02 seconds)
## Iteration 1350: error is 0.642704 (50 iterations in 0.02 seconds)
## Iteration 1400: error is 0.642178 (50 iterations in 0.02 seconds)
## Iteration 1450: error is 0.641657 (50 iterations in 0.02 seconds)
## Iteration 1500: error is 0.641155 (50 iterations in 0.02 seconds)
## Iteration 1550: error is 0.640673 (50 iterations in 0.02 seconds)
## Iteration 1600: error is 0.640222 (50 iterations in 0.02 seconds)
## Iteration 1650: error is 0.639809 (50 iterations in 0.02 seconds)
## Iteration 1700: error is 0.639404 (50 iterations in 0.02 seconds)
## Iteration 1750: error is 0.638991 (50 iterations in 0.02 seconds)
## Iteration 1800: error is 0.638601 (50 iterations in 0.02 seconds)
## Iteration 1850: error is 0.638275 (50 iterations in 0.02 seconds)
## Iteration 1900: error is 0.637950 (50 iterations in 0.02 seconds)
## Iteration 1950: error is 0.637641 (50 iterations in 0.02 seconds)
## Iteration 2000: error is 0.637335 (50 iterations in 0.02 seconds)
## Fitting performed in 0.90 seconds.
tsne_out_DF <- data.frame(x=tsne_out$Y[,1],y=tsne_out$Y[,2])
g <- ggplot(tsne_out_DF,aes(x,y, group=interaction(ALL$Famille,ALL$Name), shape=factor(ALL$shape))) + geom_point(aes(fill=factor(ordre)),size=5, alpha = 0.6, colour="#ffffff80") +
theme(plot.margin = unit(c(0.5,5,.5,1), "cm"))+ ggtitle("tSNE")+
scale_fill_manual(name="Datasets", values = cols)+ scale_shape_identity()
ggplotly(g)
## We recommend that you use the dev version of ggplot2 with `ggplotly()`
## Install it with: `devtools::install_github('hadley/ggplot2')`