PRINCIPLE

The R language was primarily designed as a language for data manipulation, modeling and visualization, and many of the data structures reflect this view. ####When R is started, a workspace is created and that workspace is where the user creates and manipulates variables.This workspace is an environment, and an environment is a set of bindings of names, or symbols, to values. The top-level workspace can be accessed through its name, which is GlobalEnv. Assignment of value to a variable is generally done with either the = (equals) character, or a special symbol that is the concatenation of less than and minus,<-. Assignment creates a binding between a symbol and a value, in a particular environment. ###An R package typically consists of a coherent collection of functions and data structures that are suitable for addressing a particular problem. ####Here we use some package : ###1. Bio3d ###2. ggbiplot ###3. peptides

ASSIGNMENT

PROJECT 1

download.pdb= function(protein){
  base.url= "http://www.rcsb.org/pdb/files/"
  dest.file= paste(protein,'.pdb',sep = "")
  protein.url= paste(base.url,dest.file,sep = "")
  download.file(protein.url,destfile = dest.file)
  
}
proteins =list( "4PGU","3ZKV")
require(plyr)
## Loading required package: plyr
l_ply(proteins,download.pdb)
library("bio3d")
## Warning: package 'bio3d' was built under R version 3.4.4
library("Peptides")
## Warning: package 'Peptides' was built under R version 3.4.4
#Membrane protein
mem.pgu<-read.pdb("4pgu.pdb")
##    PDB has ALT records, taking A only, rm.alt=TRUE
seq.pgu<-pdbseq(mem.pgu)

#Thermostability value of the membrane protein
seq.pgu.i<-aIndex(seq.pgu)
ik<-which(seq.pgu.i==0.0)
seq.pgu.in<-seq.pgu.i[-ik]
therm.pgu<-sum(seq.pgu.in)/length(seq.pgu.in)
(therm.pgu)
## [1] 305.9574
#Protein Interaction
seq.pgu.pp<-boman(seq.pgu)
bin.pot<-sum(seq.pgu.pp)/length(seq.pgu)
bin.pot
## [1] -0.6791787
#binpot<2.68 which implies the protein has lower affinity to bind with another protein


#Cytosolic protein
cyto.zkv<-read.pdb("3zkv.pdb")
seq.zkv<-pdbseq(cyto.zkv)
#Amino acid descriptors for the cytosolic protein compared with the reference 20 aa present
seq.zkv.d<-aaDescriptors(seq.zkv)
summary(seq.zkv.d)
##      PP1.1             PP2.1             PP3.1              KF1.1        
##  Min.   :-1.0000   Min.   :-1.0000   Min.   :-1.00000   Min.   :-1.5600  
##  1st Qu.:-0.9000   1st Qu.:-0.6400   1st Qu.:-0.24000   1st Qu.:-1.0400  
##  Median :-0.5500   Median :-0.4000   Median :-0.08000   Median :-0.4100  
##  Mean   :-0.1225   Mean   :-0.2631   Mean   :-0.01286   Mean   :-0.2437  
##  3rd Qu.: 0.6700   3rd Qu.: 0.0300   3rd Qu.: 0.37000   3rd Qu.: 0.3000  
##  Max.   : 1.0000   Max.   : 1.0000   Max.   : 1.00000   Max.   : 2.0600  
##      KF2.1             KF3.1              KF4.1         
##  Min.   :-1.9600   Min.   :-1.61000   Min.   :-1.57000  
##  1st Qu.:-0.7100   1st Qu.:-0.42000   1st Qu.:-0.77000  
##  Median :-0.0700   Median :-0.23000   Median :-0.27000  
##  Mean   :-0.1287   Mean   : 0.05529   Mean   : 0.02849  
##  3rd Qu.: 0.2400   3rd Qu.: 0.80000   3rd Qu.: 0.81000  
##  Max.   : 2.1000   Max.   : 2.04000   Max.   : 1.87000  
##      KF5.1              KF6.1             KF7.1          
##  Min.   :-1.70000   Min.   :-2.0500   Min.   :-1.890000  
##  1st Qu.:-0.71000   1st Qu.:-0.8100   1st Qu.:-0.830000  
##  Median :-0.21000   Median : 0.0300   Median : 0.240000  
##  Mean   :-0.07866   Mean   :-0.2244   Mean   : 0.001294  
##  3rd Qu.: 0.50000   3rd Qu.: 0.4000   3rd Qu.: 0.920000  
##  Max.   : 2.00000   Max.   : 2.4100   Max.   : 1.520000  
##      KF8.1             KF9.1              KF10.1             Z1.1        
##  Min.   :-2.3000   Min.   :-2.30000   Min.   :-2.3300   Min.   :-4.3600  
##  1st Qu.:-0.7600   1st Qu.:-0.48000   1st Qu.:-0.4400   1st Qu.:-2.8500  
##  Median :-0.0800   Median : 0.21000   Median : 0.1900   Median : 0.7500  
##  Mean   :-0.1106   Mean   : 0.04092   Mean   :-0.0193   Mean   :-0.2018  
##  3rd Qu.: 0.4700   3rd Qu.: 0.66000   3rd Qu.: 0.7000   3rd Qu.: 2.3900  
##  Max.   : 2.3600   Max.   : 1.71000   Max.   : 1.6300   Max.   : 3.9800  
##       Z2.1              Z3.1             Z4.1              Z5.1        
##  Min.   :-4.0600   Min.   :-3.500   Min.   :-3.0400   Min.   :-2.6500  
##  1st Qu.:-1.7300   1st Qu.:-1.490   1st Qu.:-1.3400   1st Qu.:-0.3800  
##  Median :-1.0700   Median :-0.110   Median :-0.7200   Median : 0.2600  
##  Mean   :-0.4083   Mean   :-0.309   Mean   :-0.3544   Mean   : 0.1833  
##  3rd Qu.: 0.8900   3rd Qu.: 0.600   3rd Qu.: 0.5400   3rd Qu.: 0.8400  
##  Max.   : 3.9400   Max.   : 3.750   Max.   : 3.9000   Max.   : 2.0000  
##       F1.1              F2.1              F3.1               F4.1        
##  Min.   :-1.3870   Min.   :-2.2190   Min.   :-1.65600   Min.   :-2.0800  
##  1st Qu.:-1.2290   1st Qu.:-0.4500   1st Qu.:-0.61000   1st Qu.:-0.3280  
##  Median :-0.2050   Median : 0.4610   Median :-0.02400   Median : 0.3330  
##  Mean   :-0.1001   Mean   : 0.2093   Mean   :-0.02822   Mean   : 0.3088  
##  3rd Qu.: 0.9970   3rd Qu.: 0.8210   3rd Qu.: 0.70300   3rd Qu.: 1.0260  
##  Max.   : 1.5240   Max.   : 1.3880   Max.   : 2.06900   Max.   : 1.9040  
##       F5.1               F6.1               T1.1             T2.1         
##  Min.   :-1.11500   Min.   :-1.76200   Min.   :-10.61   Min.   :-3.54000  
##  1st Qu.:-0.19600   1st Qu.:-0.60000   1st Qu.: -5.97   1st Qu.:-0.94000  
##  Median : 0.00100   Median :-0.06800   Median : -4.38   Median :-0.28000  
##  Mean   : 0.08623   Mean   : 0.02142   Mean   : -4.29   Mean   : 0.04866  
##  3rd Qu.: 0.11700   3rd Qu.: 0.50200   3rd Qu.: -3.00   3rd Qu.: 0.98000  
##  Max.   : 3.84700   Max.   : 2.72800   Max.   :  5.73   Max.   : 3.89000  
##       T3.1               T4.1              T5.1            VHSE1.1        
##  Min.   :-2.34000   Min.   :-1.9600   Min.   :-0.7900   Min.   :-1.47000  
##  1st Qu.:-0.49000   1st Qu.:-0.3900   1st Qu.:-0.2100   1st Qu.:-0.96000  
##  Median : 0.01000   Median : 0.3100   Median : 0.3300   Median : 0.15000  
##  Mean   :-0.05346   Mean   : 0.2676   Mean   : 0.4768   Mean   : 0.04167  
##  3rd Qu.: 0.63000   3rd Qu.: 1.1000   3rd Qu.: 0.9500   3rd Qu.: 1.01000  
##  Max.   : 1.39000   Max.   : 1.6400   Max.   : 3.2500   Max.   : 1.52000  
##     VHSE2.1            VHSE3.1            VHSE4.1       
##  Min.   :-1.67000   Min.   :-2.63000   Min.   :-1.9100  
##  1st Qu.:-0.86000   1st Qu.:-0.50000   1st Qu.:-0.9200  
##  Median :-0.14000   Median : 0.10000   Median :-0.1600  
##  Mean   :-0.08737   Mean   :-0.04296   Mean   :-0.2499  
##  3rd Qu.: 0.40000   3rd Qu.: 0.37000   3rd Qu.: 0.3600  
##  Max.   : 2.06000   Max.   : 1.79000   Max.   : 2.2800  
##     VHSE5.1            VHSE6.1           VHSE7.1            VHSE8.1       
##  Min.   :-2.68000   Min.   :-1.6100   Min.   :-1.61000   Min.   :-1.3400  
##  1st Qu.:-0.06000   1st Qu.:-1.3700   1st Qu.:-0.64000   1st Qu.:-0.5200  
##  Median : 0.22000   Median :-0.0100   Median :-0.16000   Median :-0.1300  
##  Mean   :-0.03978   Mean   :-0.2049   Mean   : 0.04066   Mean   :-0.0169  
##  3rd Qu.: 0.30000   3rd Qu.: 0.4200   3rd Qu.: 0.73000   3rd Qu.: 0.1300  
##  Max.   : 1.64000   Max.   : 1.4700   Max.   : 2.01000   Max.   : 3.5600  
##    ProtFP1.1         ProtFP2.1         ProtFP3.1         ProtFP4.1      
##  Min.   :-6.6100   Min.   :-8.7200   Min.   :-3.5900   Min.   :-4.5800  
##  1st Qu.:-4.5700   1st Qu.:-2.5500   1st Qu.:-2.2900   1st Qu.:-1.1200  
##  Median :-0.1000   Median :-1.3300   Median :-0.8300   Median : 0.7100  
##  Mean   : 0.1791   Mean   :-0.3101   Mean   :-0.6521   Mean   : 0.2796  
##  3rd Qu.: 5.1100   3rd Qu.: 2.2000   3rd Qu.: 0.8900   3rd Qu.: 1.1100  
##  Max.   : 7.3300   Max.   : 6.6000   Max.   : 4.1800   Max.   : 3.0000  
##    ProtFP5.1         ProtFP6.1          ProtFP7.1       
##  Min.   :-3.2200   Min.   :-3.54000   Min.   :-2.95000  
##  1st Qu.:-1.2300   1st Qu.:-0.55000   1st Qu.:-0.45000  
##  Median :-0.3100   Median : 0.08000   Median : 0.00000  
##  Mean   :-0.1774   Mean   : 0.05045   Mean   : 0.04758  
##  3rd Qu.: 0.9900   3rd Qu.: 0.76000   3rd Qu.: 0.74000  
##  Max.   : 3.2700   Max.   : 2.91000   Max.   : 1.99000  
##    ProtFP8.1             ST1.1             ST2.1            ST3.1        
##  Min.   :-2.790000   Min.   :-1.8440   Min.   :-1.010   Min.   :-0.9170  
##  1st Qu.:-0.510000   1st Qu.:-1.1330   1st Qu.:-0.791   1st Qu.:-0.6270  
##  Median : 0.300000   Median :-0.8260   Median :-0.379   Median :-0.1930  
##  Mean   : 0.008729   Mean   :-0.8091   Mean   :-0.258   Mean   :-0.1694  
##  3rd Qu.: 0.870000   3rd Qu.:-0.6290   3rd Qu.: 0.228   3rd Qu.: 0.0380  
##  Max.   : 1.650000   Max.   : 0.8530   Max.   : 0.731   Max.   : 1.1000  
##      ST4.1              ST5.1              ST6.1         
##  Min.   :-1.16300   Min.   :-0.93700   Min.   :-3.31700  
##  1st Qu.:-0.21400   1st Qu.:-0.56100   1st Qu.:-0.77500  
##  Median :-0.06500   Median :-0.25300   Median : 0.27300  
##  Mean   :-0.05008   Mean   :-0.07831   Mean   :-0.06242  
##  3rd Qu.:-0.04900   3rd Qu.: 0.53800   3rd Qu.: 1.01100  
##  Max.   : 0.85900   Max.   : 1.12000   Max.   : 1.09100  
##      ST7.1             ST8.1            BLOSUM1.1           BLOSUM2.1     
##  Min.   :-1.0990   Min.   :-0.89400   Min.   :-1.620000   Min.   :-1.230  
##  1st Qu.:-0.1470   1st Qu.:-0.71700   1st Qu.:-1.140000   1st Qu.:-0.860  
##  Median : 0.1660   Median :-0.07500   Median : 0.190000   Median :-0.450  
##  Mean   : 0.1141   Mean   : 0.06613   Mean   :-0.003723   Mean   :-0.265  
##  3rd Qu.: 0.2480   3rd Qu.: 0.36700   3rd Qu.: 1.090000   3rd Qu.: 0.220  
##  Max.   : 1.2560   Max.   : 2.52200   Max.   : 1.550000   Max.   : 2.280  
##    BLOSUM3.1         BLOSUM4.1           BLOSUM5.1        
##  Min.   :-0.9700   Min.   :-1.610000   Min.   :-1.240000  
##  1st Qu.:-0.8600   1st Qu.:-0.360000   1st Qu.:-0.550000  
##  Median :-0.6300   Median : 0.000000   Median : 0.030000  
##  Mean   :-0.2491   Mean   :-0.007583   Mean   : 0.009381  
##  3rd Qu.: 0.3200   3rd Qu.: 0.380000   3rd Qu.: 0.240000  
##  Max.   : 1.7300   Max.   : 1.550000   Max.   : 1.830000  
##    BLOSUM6.1          BLOSUM7.1          BLOSUM8.1       
##  Min.   :-2.02000   Min.   :-1.62000   Min.   :-1.96000  
##  1st Qu.: 0.01000   1st Qu.:-0.30000   1st Qu.:-0.08000  
##  Median : 0.20000   Median : 0.01000   Median : 0.15000  
##  Mean   : 0.09162   Mean   :-0.02557   Mean   :-0.06479  
##  3rd Qu.: 0.34000   3rd Qu.: 0.21000   3rd Qu.: 0.20000  
##  Max.   : 1.19000   Max.   : 1.21000   Max.   : 0.87000  
##    BLOSUM9.1          BLOSUM10.1         MSWHIM1.1         MSWHIM2.1      
##  Min.   :-1.20000   Min.   :-1.29000   Min.   :-1.0000   Min.   :-1.0000  
##  1st Qu.:-0.22000   1st Qu.:-0.25000   1st Qu.:-0.7400   1st Qu.: 0.2000  
##  Median : 0.01000   Median : 0.06000   Median :-0.5800   Median : 0.6700  
##  Mean   : 0.09893   Mean   : 0.02646   Mean   :-0.3291   Mean   : 0.4594  
##  3rd Qu.: 0.43000   3rd Qu.: 0.33000   3rd Qu.: 0.1400   3rd Qu.: 0.8300  
##  Max.   : 1.36000   Max.   : 0.99000   Max.   : 1.0000   Max.   : 1.0000  
##    MSWHIM3.1      
##  Min.   :-1.0000  
##  1st Qu.:-0.6600  
##  Median :-0.3200  
##  Mean   :-0.3532  
##  3rd Qu.:-0.1600  
##  Max.   : 1.0000
#Thermostability value of the cytosolic protein
seq.zkv.i<-aIndex(seq.zkv)
jk<-which(seq.zkv.i==0.0)
seq.zkv.in<-seq.zkv.i[-jk]
therm.zkv<-sum(seq.zkv.in)/length(seq.zkv.in)
(therm.zkv)
## [1] 306.4413
#Protein Interaction
seq.zkv.pp<-boman(seq.zkv)
bin.pot1<-sum(seq.zkv.pp)/length(seq.zkv)
(bin.pot1)
## [1] 1.220504
#binpot<2.68 which implies the protein has lower affinity to bind with another protein
#Principle components
#A function for protein pca
protanapca<-function(seq){
  library("bio3d")
  library("Peptides")
  ch<-charge(seq, pH = 7, pKscale = "EMBOSS")
  pp<-boman(seq) 
  ampI<-hmoment(seq, angle = 100, window = 11)
  hy<-hydrophobicity(seq, scale = "KyteDoolittle")
  pI<-pI(seq,pKscale = "EMBOSS")
  mw<-mw(seq, monoisotopic = FALSE)
  protana<-cbind(ch,pp,ampI,hy,pI,mw)
  colnames(protana)<-c("CHARGE","P-P","AMPIPHILLICITY","HYDRPHOBICITY","ISOELECTRIC","MOL WEIGHT")
  p<-cbind(seq,protana)
  colnames(p)<-c("AA","CHARGE","P-P","AMPIPHILLICITY","HYDRPHOBICITY","ISOELECTRIC","MOL WEIGHT")
  pr.p<-prcomp(protana)
  return(pr.p)}
#A function for protein analysis of different properties
protana<-function(seq){
  library("bio3d")
  library("Peptides")
  ch<-charge(seq, pH = 7, pKscale = "EMBOSS")
  pp<-boman(seq) 
  ampI<-hmoment(seq, angle = 100, window = 11)
  hy<-hydrophobicity(seq, scale = "KyteDoolittle")
  pI<-pI(seq,pKscale = "EMBOSS")
  mw<-mw(seq, monoisotopic = FALSE)
  protana<-cbind(ch,pp,ampI,hy,pI,mw)
  colnames(protana)<-c("CHARGE","P-P","AMPIPHILLICITY","HYDRPHOBICITY","ISOELECTRIC","MOL WEIGHT")
  p<-cbind(seq,protana)
  colnames(p)<-c("AA","CHARGE","P-P","AMPIPHILLICITY","HYDRPHOBICITY","ISOELECTRIC","MOL WEIGHT")
  return(p)}
#Function for the matrix
prot<-function(seq){
  library("bio3d")
  library("Peptides")
  ch<-charge(seq, pH = 7, pKscale = "EMBOSS")
  pp<-boman(seq) 
  ampI<-hmoment(seq, angle = 100, window = 11)
  hy<-hydrophobicity(seq, scale = "KyteDoolittle")
  pI<-pI(seq,pKscale = "EMBOSS")
  mw<-mw(seq, monoisotopic = FALSE)
  protana<-cbind(ch,pp,ampI,hy,pI,mw)
  colnames(protana)<-c("CHARGE","P-P","AMPIPHILLICITY","HYDRPHOBICITY","ISOELECTRIC","MOL WEIGHT")
  p<-cbind(seq,protana)
  colnames(p)<-c("AA","CHARGE","P-P","AMPIPHILLICITY","HYDRPHOBICITY","ISOELECTRIC","MOL WEIGHT")
  return(p)}
protana1<-function(seq){
  library("bio3d")
  library("Peptides")
  ch<-charge(seq, pH = 7, pKscale = "EMBOSS")
  pp<-boman(seq) 
  ampI<-hmoment(seq, angle = 100, window = 11)
  hy<-hydrophobicity(seq, scale = "KyteDoolittle")
  pI<-pI(seq,pKscale = "EMBOSS")
  mw<-mw(seq, monoisotopic = FALSE)
  protana<-cbind(ch,pp,ampI,hy,pI,mw)
  colnames(protana)<-c("CHARGE","P-P","AMPIPHILLICITY","HYDRPHOBICITY","ISOELECTRIC","MOL WEIGHT")
  return(protana)}

#Membrane protein
mem.pgu<-read.pdb("4pgu.pdb")
##    PDB has ALT records, taking A only, rm.alt=TRUE
seq.pgu<-pdbseq(mem.pgu)
pgu.a<-protana(seq.pgu)
pca.pgu<-protanapca(seq.pgu)
par(mfrow=c(1,1))
pgu.m<-prot(seq.pgu)

biplot(pca.pgu,main="4PGU")

#Cytosolic protein
cyto.zkv<-read.pdb("3zkv.pdb")
seq.zkv<-pdbseq(cyto.zkv)
zkv.a<-protana(seq.zkv)
pca.zkv<-protanapca(seq.zkv)
zkv.m<-prot(seq.zkv)
zpca<-protana1(seq.zkv)


#pca and biplot
biplot(pca.zkv,main="3ZKV")