PROBLEM 1

SPECTRAL ANALYSIS:

#setwd("/Source files")
p<-read.csv("p.spectra.csv")
x<-p$lam
y<-p$absorb
par(mfrow=c(1,1))
plot(x,y,col="red",xlab ="Lambda",ylab ="Absorbance",cex=0.3,main ="Wavelength Vs Absorbance")

m<-which(x>500&x<700)
length(m)
## [1] 1036
wv<-x[m]
AA<-y[m]
length(wv)
## [1] 1036
max(AA)
## [1] 0.386
max(wv)
## [1] 699.95
plot(wv,AA,col="blue",xlab ="Lambda",cex=0.5,ylab ="Absorbance",main ="Wavelength Vs Absorbance")
maxi<-max(AA)
z<-which(AA==maxi)
l<-wv[z]
wv[z]
## [1] 618.08
text(575,0.390,"wvmax=618.08~~~~~",col ="red")
abline(v=(618.08),col="red")

summary(l)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   618.1   618.1   618.1   618.1   618.1   618.1

PROBLEM 2:

DISTRICT WISE ANALYSIS OF THALASSEMIC AND NON-THALASSEMIC PATIENTS-

a.nad<-read.csv("nadia.csv")
x<-a.nad$MOLECULAR
y<-which(x!="normal")
nalpha=length(y)
n<-length(x)
p<-nalpha/n
A<-a.nad$HB
B<-a.nad$HbA0
C<-a.nad$HbF
mean.HB.nad<-mean(A)
mean.HbA0.nad<-mean(B)
mean.HbF.nad<-mean(C)
mean(a.nad$HB[y])
## [1] 13.05333
mean(a.nad$HbA0[y])
## [1] 87.02
mean(a.nad$HbF[y])
## [1] 0.3533333
S<-cbind(mean.HB.nad,mean.HbA0.nad,mean.HbF.nad)
summary(S)
##   mean.HB.nad    mean.HbA0.nad    mean.HbF.nad   
##  Min.   :12.95   Min.   :87.01   Min.   :0.3702  
##  1st Qu.:12.95   1st Qu.:87.01   1st Qu.:0.3702  
##  Median :12.95   Median :87.01   Median :0.3702  
##  Mean   :12.95   Mean   :87.01   Mean   :0.3702  
##  3rd Qu.:12.95   3rd Qu.:87.01   3rd Qu.:0.3702  
##  Max.   :12.95   Max.   :87.01   Max.   :0.3702
a.bir<-read.csv("birbhum.csv")
m<-a.bir$MOLECULAR
s<-which(m!="normal")
nalpha=length(s)
n<-length(m)
p<-nalpha/n
D<-a.bir$HB
E<-a.bir$HbA0
H<-a.bir$HbF
mean.HB.bir<-mean(D)
mean.HbA0.bir<-mean(E)
mean.HbF.bir<-mean(H)
mean(a.bir$HB[s])
## [1] 12.82069
mean(a.bir$HbA0[s])
## [1] 86.51724
mean(a.bir$HbF[s])
## [1] 0.3172414
U<-cbind(mean.HB.bir,mean.HbA0.bir,mean.HbF.bir)
summary(U)
##   mean.HB.bir    mean.HbA0.bir    mean.HbF.bir  
##  Min.   :12.83   Min.   :86.49   Min.   :0.336  
##  1st Qu.:12.83   1st Qu.:86.49   1st Qu.:0.336  
##  Median :12.83   Median :86.49   Median :0.336  
##  Mean   :12.83   Mean   :86.49   Mean   :0.336  
##  3rd Qu.:12.83   3rd Qu.:86.49   3rd Qu.:0.336  
##  Max.   :12.83   Max.   :86.49   Max.   :0.336
a.ban<-read.csv("bankura.csv")
z<-a.ban$MOLECULAR
x<-which(x!="normal")
nalpha=length(x)
n<-length(z)
a<-nalpha/n
i<-a.ban$HB
j<-a.ban$HbA0
k<-a.ban$HbF
mean.HB.ban<-mean(i)
mean.HbA0.ban<-mean(j)
mean.HbF.ban<-mean(k)
mean(a.bir$HB[x])
## [1] 12.81333
mean(a.bir$HbA0[x])
## [1] 86.42667
mean(a.bir$HbF[x])
## [1] 0.3266667
w<-cbind(mean.HB.ban,mean.HbA0.ban,mean.HbF.ban)
summary(w)
##   mean.HB.ban    mean.HbA0.ban    mean.HbF.ban   
##  Min.   :12.47   Min.   :87.72   Min.   :0.3583  
##  1st Qu.:12.47   1st Qu.:87.72   1st Qu.:0.3583  
##  Median :12.47   Median :87.72   Median :0.3583  
##  Mean   :12.47   Mean   :87.72   Mean   :0.3583  
##  3rd Qu.:12.47   3rd Qu.:87.72   3rd Qu.:0.3583  
##  Max.   :12.47   Max.   :87.72   Max.   :0.3583
a.jal<-read.csv("jalpaiguri.csv")
j<-a.jal$MOLECULAR
k<-which(j!="normal")
nalpha=length(k)
n<-length(j)
p<-nalpha/n
a<-a.jal$HB
b<-a.jal$HbA0
c<-a.jal$HbF
mean.HB.jal<-mean(a)
mean.HbA0.jal<-mean(b)
mean.HbF.jal<-mean(c)
mean(a.jal$HB[k])
## [1] 12.55
mean(a.jal$HbA0[k])
## [1] 87.68571
mean(a.jal$HbF[k])
## [1] 0.2142857
P<-cbind(mean.HB.jal,mean.HbA0.jal,mean.HbF.jal)
summary(P)
##   mean.HB.jal    mean.HbA0.jal    mean.HbF.jal   
##  Min.   :12.48   Min.   :87.78   Min.   :0.2158  
##  1st Qu.:12.48   1st Qu.:87.78   1st Qu.:0.2158  
##  Median :12.48   Median :87.78   Median :0.2158  
##  Mean   :12.48   Mean   :87.78   Mean   :0.2158  
##  3rd Qu.:12.48   3rd Qu.:87.78   3rd Qu.:0.2158  
##  Max.   :12.48   Max.   :87.78   Max.   :0.2158
a.mur<-read.csv("murshidabad.csv")
s<-a.mur$MOLECULAR
t<-which(s!="normal")
nalpha=length(t)
n<-length(s)
p<-nalpha/n
r<-a.mur$HB
o<-a.mur$HbA0
l<-a.mur$HbF
mean.HB.mur<-mean(r)
mean.HbA0.mur<-mean(o)
mean.HbF.mur<-mean(l)
mean(a.mur$HB[t])
## [1] 12.37143
mean(a.mur$HbA0[t])
## [1] 87.45714
mean(a.mur$HbF[t])
## [1] 0.2642857
Q<-cbind(mean.HB.mur,mean.HbA0.mur,mean.HbF.mur)
summary(Q)
##   mean.HB.mur    mean.HbA0.mur    mean.HbF.mur 
##  Min.   :12.37   Min.   :87.29   Min.   :0.25  
##  1st Qu.:12.37   1st Qu.:87.29   1st Qu.:0.25  
##  Median :12.37   Median :87.29   Median :0.25  
##  Mean   :12.37   Mean   :87.29   Mean   :0.25  
##  3rd Qu.:12.37   3rd Qu.:87.29   3rd Qu.:0.25  
##  Max.   :12.37   Max.   :87.29   Max.   :0.25
HB<-cbind(mean.HB.nad,mean.HB.bir,mean.HB.ban,mean.HB.jal,mean.HB.mur)
HbA0<-cbind(mean.HbA0.nad,mean.HbA0.bir,mean.HbA0.ban,mean.HbA0.jal,mean.HbA0.mur)
HbF<-cbind(mean.HbF.nad,mean.HbF.bir,mean.HbF.ban,mean.HbF.jal,mean.HbF.mur)
colnames(HB)<-c("N","bir","ban","J","M")
colnames(HbA0)<-c("N","bir","ban","J","M")
colnames(HbF)<-c("N","bir","ban","J","M")
par(mfrow=c(1,1))
barplot(HB)

barplot(HbA0)

barplot(HbF)

PROBLEM 3:

BOX PLOT ANALYSIS OF SEVERAL DISTRICTS-

nad<-read.csv("nadia.csv")
bir<-read.csv("birbhum.csv")
ban<-read.csv("bankura.csv")
jal<-read.csv("jalpaiguri.csv")
mur<-read.csv("murshidabad.csv")
boxHB<-cbind(nad$HB,bir$HB,ban$HB,jal$HB,mur$HB)
## Warning in cbind(nad$HB, bir$HB, ban$HB, jal$HB, mur$HB): number of rows of
## result is not a multiple of vector length (arg 1)
colnames(boxHB)<-c("N","bir","ban","J","M")
par(mfrow=c(1,1))
boxplot(boxHB,ylab="HB",xlab="district",main="boxplot")

boxMCV<-cbind(nad$MCV,bir$MCV,ban$MCV,jal$MCV,mur$MCV)
## Warning in cbind(nad$MCV, bir$MCV, ban$MCV, jal$MCV, mur$MCV): number of
## rows of result is not a multiple of vector length (arg 1)
boxplot(boxMCV,ylab="MCV",xlab="district",main="boxplot")

boxMCH<-cbind(nad$MCH,bir$MCH,ban$MCH,jal$MCH,mur$MCH)
## Warning in cbind(nad$MCH, bir$MCH, ban$MCH, jal$MCH, mur$MCH): number of
## rows of result is not a multiple of vector length (arg 1)
boxplot(boxMCH,ylab="MCH",xlab="district",main="boxplot")

boxHbA0<-cbind(nad$HbA0,bir$HbA0,ban$HbA0,jal$HbA0,mur$HbA0)
## Warning in cbind(nad$HbA0, bir$HbA0, ban$HbA0, jal$HbA0, mur$HbA0): number
## of rows of result is not a multiple of vector length (arg 1)
colnames(boxHbA0)<-c("N","bir","ban","J","M")
boxplot(boxHbA0,ylab="HbA0",xlab="district",main="boxplot")

boxHbA2<-cbind(nad$HbA2,bir$HbA2,ban$HbA2,jal$HbA2,mur$HbA2)
## Warning in cbind(nad$HbA2, bir$HbA2, ban$HbA2, jal$HbA2, mur$HbA2): number
## of rows of result is not a multiple of vector length (arg 1)
boxplot(boxHbA2,ylab="HbA2",xlab="district",main="boxplot")

boxHbF<-cbind(nad$HbF,bir$HbF,ban$HbF,jal$HbF,mur$HbF)
## Warning in cbind(nad$HbF, bir$HbF, ban$HbF, jal$HbF, mur$HbF): number of
## rows of result is not a multiple of vector length (arg 1)
boxplot(boxHbF,ylab="HbF",xlab="district",main="boxplot")

PROBLEM 4:

MASTER FILE-

nad<-read.csv("nadia.csv")
bir<-read.csv("birbhum.csv")
ban<-read.csv("bankura.csv")
jal<-read.csv("jalpaiguri.csv")
mur<-read.csv("murshidabad.csv")

n<-cbind(nad$HB,nad$MCV,nad$MCH,nad$HbA0,nad$HbA2,nad$HbF,nad$MOLECULAR)
distn<-rep("nadia",nrow(nad))
colnames(n)<-c("HB","MCV","MCH","HbA0","HbA2","HbF","mol")
f.n<-cbind(n,distn)

bm<-cbind(bir$HB,bir$MCV,bir$MCH,bir$HbA0,bir$HbA2,bir$HbF,bir$MOLECULAR)
distbm<-rep("birbhum",nrow(bir))
colnames(bm)<-c("HB","MCV","MCH","HbA0","HbA2","HbF","mol")
f.bm<-cbind(bm,distbm)


bn<-cbind(ban$HB,ban$MCV,ban$MCH,ban$HbA0,ban$HbA2,ban$HbF,ban$MOLECULAR)
distbn<-rep("bankura",nrow(ban))
colnames(bn)<-c("HB","MCV","MCH","HbA0","HbA2","HbF","mol")
f.bn<-cbind(bn,distbn)

j<-cbind(jal$HB,jal$MCV,jal$MCH,jal$HbA0,jal$HbA2,jal$HbF,jal$MOLECULAR)
distj<-rep("jalpaiguri",nrow(jal))
colnames(j)<-c("HB","MCV","MCH","HbA0","HbA2","HbF","mol")
f.j<-cbind(j,distj)


m<-cbind(mur$HB,mur$MCV,mur$MCH,mur$HbA0,mur$HbA2,mur$HbF,mur$MOLECULAR)
distm<-rep("murshidabad",nrow(mur))
colnames(m)<-c("HB","MCV","MCH","HbA0","HbA2","HbF","mol")
f.m<-cbind(m,distm)


Master<-rbind(f.n,f.bm,f.bn,f.j,f.m)
master1<-data.frame(Master)
mst<-as.matrix(master1)
summary(master1)
##        HB          MCV           MCH           HbA0          HbA2   
##  12.2   :25   69.3   :  5   20.6   :  7   87.5   : 12   2.7    :26  
##  12.1   :17   70.8   :  5   20.9   :  7   87.3   :  9   2.8    :23  
##  12.3   :16   69.8   :  4   21.1   :  7   86.5   :  8   2.9    :23  
##  12.5   :15   71.3   :  4   21.8   :  6   87.4   :  7   3      :20  
##  12.4   :12   71.9   :  4   22.6   :  6   87.7   :  7   2.6    :18  
##  12.6   : 9   72.3   :  4   21.6   :  5   86.1   :  6   2.5    :14  
##  (Other):66   (Other):134   (Other):122   (Other):111   (Other):36  
##       HbF     mol            distn   
##  0.2    :51   1:89   bankura    :24  
##  0.3    :51   2:71   birbhum    :50  
##  0.4    :20          jalpaiguri :19  
##  0.5    : 8          murshidabad:20  
##  0.7    : 8          nadia      :47  
##  0.6    : 7                          
##  (Other):15
## BOX plot:

box<-cbind(master1$HB,master1$MCV,master1$MCH,master1$HbA0,master1$HbA2,master1$HbF)
colnames(box)<-c("HB","MCV","MCH","HbA0","HbA2","HbF")
boxplot(box,main="Boxplot")

csvxtract<-function(csvfile) {master1<-read.csv(csvfile,header =TRUE)
hb<-master1$HB
mcv<-master1$MCV
mch<-master1$MCH
hba0<-master1$HbA0
hba2<-master1$HbA2
hbf<-master1$HbF
diag<-master1$MOLECULAR
H<-cbind(hb,mcv,mch,hba0,hba2,hbf)
h<-list("NUM"=H,"MUT"=diag)
return(h)
}

nad<-csvxtract("nadia.csv")
summary(nad)
##     Length Class  Mode   
## NUM 282    -none- numeric
## MUT  47    factor numeric
bir<-csvxtract("birbhum.csv")
summary(bir)
##     Length Class  Mode   
## NUM 300    -none- numeric
## MUT  50    factor numeric
ban<-csvxtract("bankura.csv")
summary(ban)
##     Length Class  Mode   
## NUM 144    -none- numeric
## MUT  24    factor numeric
jal<-csvxtract("jalpaiguri.csv")
summary(jal)
##     Length Class  Mode   
## NUM 114    -none- numeric
## MUT  19    factor numeric
mur<-csvxtract("murshidabad.csv")
summary(mur)
##     Length Class  Mode   
## NUM 120    -none- numeric
## MUT  20    factor numeric
mastcsv<-rbind(nad$NUM,bir$NUM,ban$NUM,jal$NUM,mur$NUM)
colnames(mastcsv)<-c("Hb","MCV","MCH","HbA0","HbA2","HbF")
mastgrp<-c(nad$MUT,bir$MUT,ban$MUT,jal$MUT,mur$MUT)


recode<-c(normal=2,alphamutation=1)
gclass<-factor(mastgrp,levels = recode,labels =names(recode))
summary(gclass)
##        normal alphamutation 
##            71            89
mast.pca<-prcomp(mastcsv,scale=TRUE)
summary(mast.pca)
## Importance of components:
##                           PC1    PC2    PC3    PC4    PC5     PC6
## Standard deviation     1.4068 1.1502 1.0408 0.9169 0.8177 0.32420
## Proportion of Variance 0.3299 0.2205 0.1806 0.1401 0.1114 0.01752
## Cumulative Proportion  0.3299 0.5504 0.7309 0.8710 0.9825 1.00000

Bi-plot, Screeplot $ Bar-plot:

biplot(mast.pca, main="Biplot")

par(mfrow=c(1,1))
screeplot(mast.pca, main ="Screeplot")

barplot(mast.pca$rotation, main ="Barplot")

## ggbiplot :
  
library(ggbiplot)
## Loading required package: ggplot2
## Loading required package: plyr
## Loading required package: scales
## Loading required package: grid

g<-ggbiplot(mast.pca,obs.scale =1,var.scale =1,groups = gclass,ellipse =TRUE,circle =TRUE)
g<-g +scale_color_discrete(name='')
g<-g +theme(legend.direction ='horizontal',legend.position ='top')
print(g)

PROTEIN PROBLEMS:

library(bio3d)
## Warning: package 'bio3d' was built under R version 3.4.4
pdb<-read.pdb("1b7t.pdb")
pdb
## 
##  Call:  read.pdb(file = "1b7t.pdb")
## 
##    Total Models#: 1
##      Total Atoms#: 8383,  XYZs#: 25149  Chains#: 3  (values: A Y Z)
## 
##      Protein Atoms#: 8249  (residues/Calpha atoms#: 1057)
##      Nucleic acid Atoms#: 0  (residues/phosphate atoms#: 0)
## 
##      Non-protein/nucleic Atoms#: 134  (residues: 108)
##      Non-protein/nucleic resid values: [ ADP (1), CA (1), HOH (104), MG (2) ]
## 
##    Protein sequence:
##       FSDPDFQYLAVDAFDGKKNCWVPDEKEGFASAEIQSSKGDEITVKIVADSSTRTVKKDDI
##       QSMNPPKFEKLEDMANMTYLNEASVLYNLRSRYTSGLIYTYSGLFCIAVNPYRRLPIYTD
##       SVIAKYRGKRKTEIPPHLFSVADNAYQNMVTDRENQSCLITGESGAGKTENTKKVIMYLA
##       KVACAEGSLEDQIIQANPVLEAYGNAKTTRNNNSSRFGKFIRIHF...<cut>...GPYP
## 
## + attr: atom, xyz, seqres, helix, sheet,
##         calpha, remark, call
tor<-torsion.pdb(pdb)
om<-tor$omega
phi1<-tor$phi
psi1<-tor$psi

s1<-c(phi1[1:70],psi1[1:70])
summary(s1)
##     Min.  1st Qu.   Median     Mean  3rd Qu.     Max.     NA's 
## -176.004  -88.690  -46.056   -6.572  113.657  179.408        1
a<-is.na(phi1)
p<-which(a)
q<-phi1[-p]
ij<-is.na(psi1)
ijj<-which(ij)
s<-psi1[-ijj]

ss<-c(phi1[1:9],psi1[1:9])
par(mfrow=c(1,1))
plot(phi1,psi1,pch=19,xlim = c(-180,180),ylim = c(-180,180),col="green",cex=0.5,main = "RAMACHANDRAN PLOT")

ss<-c(phi1[1:500],psi1[1:500])
plot(om,pch=19,xlim = c(0,180),ylim = c(-180,180),col="blue",cex=0.5,main = "OMEGA PLOT")

summary(pdb$atom)
##      type               eleno         elety               alt           
##  Length:8383        Min.   :   1   Length:8383        Length:8383       
##  Class :character   1st Qu.:2096   Class :character   Class :character  
##  Mode  :character   Median :4192   Mode  :character   Mode  :character  
##                     Mean   :4192                                        
##                     3rd Qu.:6288                                        
##                     Max.   :8386                                        
##     resid              chain               resno           insert         
##  Length:8383        Length:8383        Min.   :   2.0   Length:8383       
##  Class :character   Class :character   1st Qu.: 101.0   Class :character  
##  Mode  :character   Mode  :character   Median : 279.0   Mode  :character  
##                                        Mean   : 340.5                     
##                                        3rd Qu.: 557.0                     
##                                        Max.   :1079.0                     
##        x                 y                 z                 o    
##  Min.   :-33.793   Min.   :-30.572   Min.   :-45.648   Min.   :1  
##  1st Qu.: -2.071   1st Qu.: -4.072   1st Qu.: -8.161   1st Qu.:1  
##  Median :  9.854   Median : 10.482   Median : 16.458   Median :1  
##  Mean   : 16.301   Mean   : 10.594   Mean   : 22.093   Mean   :1  
##  3rd Qu.: 28.107   3rd Qu.: 23.730   3rd Qu.: 53.596   3rd Qu.:1  
##  Max.   : 88.836   Max.   : 56.518   Max.   :100.859   Max.   :1  
##        b             segid              elesy              charge         
##  Min.   :  2.00   Length:8383        Length:8383        Length:8383       
##  1st Qu.: 41.01   Class :character   Class :character   Class :character  
##  Median : 54.78   Mode  :character   Mode  :character   Mode  :character  
##  Mean   : 55.84                                                           
##  3rd Qu.: 69.98                                                           
##  Max.   :100.00
x<-pdb$atom[, c("x","y","z","elety")]
xx<-which(x$elety=="CA")
xy<-pdb$atom[, c("x","y","z")]
r<-xy[xx,]


pdb1<-read.pdb("1b7t.pdb")
B<-which(pdb$atom$elety=="CA")
Bxyz<-pdb1$atom[B,c("x","y","z")]
plot(Bxyz,cex=0.3)

library(scatterplot3d)
## Warning: package 'scatterplot3d' was built under R version 3.4.4
scatterplot3d(r,highlight.3d = TRUE,col.axis = "blue",col.grid = "green",main = "scatterplot3d-1",pch = 20)

## DATA QUIZ ### PROBLEM- Reading .csv files through function:

csvxtract<-function(csvfile) {master1<-read.csv(csvfile,header = TRUE)
hb<-master1$HB
mcv<-master1$MCV
mch<-master1$MCH
hba0<-master1$HbA0
hba2<-master1$HbA2
hbf<-master1$HbF
diag<-master1$MOLECULAR
H<-cbind(hb,mcv,mch,hba0,hba2,hbf)
h<-list("NUM"=H,"MUT"=diag)
return(h)
}

nad<-csvxtract("nadia.csv")
summary(nad)
##     Length Class  Mode   
## NUM 282    -none- numeric
## MUT  47    factor numeric
bir<-csvxtract("birbhum.csv")
summary(bir)
##     Length Class  Mode   
## NUM 300    -none- numeric
## MUT  50    factor numeric
ban<-csvxtract("bankura.csv")
summary(ban)
##     Length Class  Mode   
## NUM 144    -none- numeric
## MUT  24    factor numeric
jal<-csvxtract("jalpaiguri.csv")
summary(jal)
##     Length Class  Mode   
## NUM 114    -none- numeric
## MUT  19    factor numeric
mur<-csvxtract("murshidabad.csv")
summary(mur)
##     Length Class  Mode   
## NUM 120    -none- numeric
## MUT  20    factor numeric
mastcsv<-rbind(nad$NUM,bir$NUM,ban$NUM,jal$NUM,mur$NUM)
colnames(mastcsv)<-c("Hb","MCV","MCH","HbA0","HbA2","HbF")
mastgrp<-c(nad$MUT,bir$MUT,ban$MUT,jal$MUT,mur$MUT)


recode<-c(normal=2,alphamutation=1)
gclass<-factor(mastgrp,levels = recode,labels = names(recode))
summary(gclass)
##        normal alphamutation 
##            71            89

PROBLEM- Reading .csv files through TEMP and comparison between different classes:

temp =list.files(pattern = "*.csv")
a<-list()
for (i in 1:5) {
  a[[i]]<-read.csv(temp[i])
}
bind.data<-list()
for (i in 1:5) {
  bind.data<-rbind(bind.data,a[i])
}
(bind.data)
##      [,1]   
## [1,] List,11
## [2,] List,9 
## [3,] List,7 
## [4,] List,18
## [5,] List,7
mol.ban<-bind.data[[1]][["MOLECULAR"]]
ban.nor<-which(bind.data[[1]][["MOLECULAR"]]=="normal")
p<-(mol.ban)[ban.nor]
HB<-bind.data[[1]][["HB"]]
ban.hb<-(HB)[ban.nor]
MCV<-bind.data[[1]][["MCV"]]
ban.mcv<-(MCV)[ban.nor]
MCH<-bind.data[[1]][["MCH"]]
ban.mch<-(MCH)[ban.nor]
HBA0<-bind.data[[1]][["HbA0"]]
ban.hba0<-(HBA0)[ban.nor]
HBA2<-bind.data[[1]][["HbA2"]]
ban.hba2<-(HBA2)[ban.nor]
HBF<-bind.data[[1]][["MCH"]]
ban.hbf<-(HBF)[ban.nor]
bankura.normal<-cbind(ban.hb,ban.mcv,ban.mch,ban.hba0,ban.hba2,ban.hbf)
summary(bankura.normal)
##      ban.hb       ban.mcv       ban.mch       ban.hba0      ban.hba2  
##  Min.   : NA   Min.   : NA   Min.   : NA   Min.   : NA   Min.   : NA  
##  1st Qu.: NA   1st Qu.: NA   1st Qu.: NA   1st Qu.: NA   1st Qu.: NA  
##  Median : NA   Median : NA   Median : NA   Median : NA   Median : NA  
##  Mean   :NaN   Mean   :NaN   Mean   :NaN   Mean   :NaN   Mean   :NaN  
##  3rd Qu.: NA   3rd Qu.: NA   3rd Qu.: NA   3rd Qu.: NA   3rd Qu.: NA  
##  Max.   : NA   Max.   : NA   Max.   : NA   Max.   : NA   Max.   : NA  
##     ban.hbf   
##  Min.   : NA  
##  1st Qu.: NA  
##  Median : NA  
##  Mean   :NaN  
##  3rd Qu.: NA  
##  Max.   : NA
mol.ban<-bind.data[[1]][["MOLECULAR"]]
ban.alpha<-which(bind.data[[1]][["MOLECULAR"]]=="alpha")
q<-(mol.ban)[ban.alpha]
HB<-bind.data[[1]][["HB"]]
ban.hb.a<-(HB)[ban.alpha]
MCV<-bind.data[[1]][["MCV"]]
ban.mcv.a<-(MCV)[ban.alpha]
MCH<-bind.data[[1]][["MCH"]]
ban.mch.a<-(MCH)[ban.alpha]
HBA0<-bind.data[[1]][["HbA0"]]
ban.hba0.a<-(HBA0)[ban.alpha]
HBA2<-bind.data[[1]][["HbA2"]]
ban.hba2.a<-(HBA2)[ban.alpha]
HBF<-bind.data[[1]][["MCH"]]
ban.hbf.a<-(HBF)[ban.alpha]
ban.hbf.a
## numeric(0)
bankura.alpha<-cbind(ban.hb.a,ban.mcv.a,ban.mch.a,ban.hba0.a,ban.hba2.a,ban.hbf.a)
bankura.alpha
##      ban.hb.a ban.mcv.a ban.mch.a ban.hba0.a ban.hba2.a ban.hbf.a
summary(bankura.alpha)
##     ban.hb.a     ban.mcv.a     ban.mch.a     ban.hba0.a    ban.hba2.a 
##  Min.   : NA   Min.   : NA   Min.   : NA   Min.   : NA   Min.   : NA  
##  1st Qu.: NA   1st Qu.: NA   1st Qu.: NA   1st Qu.: NA   1st Qu.: NA  
##  Median : NA   Median : NA   Median : NA   Median : NA   Median : NA  
##  Mean   :NaN   Mean   :NaN   Mean   :NaN   Mean   :NaN   Mean   :NaN  
##  3rd Qu.: NA   3rd Qu.: NA   3rd Qu.: NA   3rd Qu.: NA   3rd Qu.: NA  
##  Max.   : NA   Max.   : NA   Max.   : NA   Max.   : NA   Max.   : NA  
##    ban.hbf.a  
##  Min.   : NA  
##  1st Qu.: NA  
##  Median : NA  
##  Mean   :NaN  
##  3rd Qu.: NA  
##  Max.   : NA
mol.bir<-bind.data[[2]][["MOLECULAR"]]
bir.nor<-which(bind.data[[2]][["MOLECULAR"]]=="normal")
r<-(mol.bir)[bir.nor]
HB<-bind.data[[2]][["HB"]]
bir.hb<-(HB)[bir.nor]
MCV<-bind.data[[2]][["MCV"]]
bir.mcv<-(MCV)[bir.nor]
MCH<-bind.data[[2]][["MCH"]]
bir.mch<-(MCH)[bir.nor]
HBA0<-bind.data[[2]][["HbA0"]]
bir.hba0<-(HBA0)[bir.nor]
HBA2<-bind.data[[2]][["HbA2"]]
bir.hba2<-(HBA2)[bir.nor]
HBF<-bind.data[[2]][["MCH"]]
bir.hbf<-(HBF)[bir.nor]
birbhum.normal<-cbind(bir.hb,bir.mcv,bir.mch,bir.hba0,bir.hba2,bir.hbf)
summary(birbhum.normal)
## Length  Class   Mode 
##      0   NULL   NULL
mol.bir<-bind.data[[2]][["MOLECULAR"]]
bir.alpha<-which(bind.data[[2]][["MOLECULAR"]]=="alpha")
s<-(mol.bir)[bir.alpha]
HB<-bind.data[[2]][["HB"]]
bir.hb.a<-(HB)[bir.alpha]
MCV<-bind.data[[2]][["MCV"]]
bir.mcv.a<-(MCV)[bir.alpha]
MCH<-bind.data[[2]][["MCH"]]
bir.mch.a<-(MCH)[bir.alpha]
HBA0<-bind.data[[2]][["HbA0"]]
bir.hba0.a<-(HBA0)[bir.alpha]
HBA2<-bind.data[[2]][["HbA2"]]
bir.hba2.a<-(HBA2)[bir.alpha]
HBF<-bind.data[[2]][["MCH"]]
bir.hbf.a<-(HBF)[bir.alpha]
birbhum.alpha<-cbind(bir.hb.a,bir.mcv.a,bir.mch.a,bir.hba0.a,bir.hba2.a,bir.hbf.a)
summary(birbhum.alpha)
## Length  Class   Mode 
##      0   NULL   NULL
mol.jal<-bind.data[[3]][["MOLECULAR"]]
jal.nor<-which(bind.data[[3]][["MOLECULAR"]]=="normal")
t<-(mol.jal)[jal.nor]
HB<-bind.data[[3]][["HB"]]
jal.hb<-(HB)[jal.nor]
MCV<-bind.data[[3]][["MCV"]]
jal.mcv<-(MCV)[jal.nor]
MCH<-bind.data[[3]][["MCH"]]
jal.mch<-(MCH)[jal.nor]
HBA0<-bind.data[[3]][["HbA0"]]
jal.hba0<-(HBA0)[jal.nor]
HBA2<-bind.data[[3]][["HbA2"]]
jal.hba2<-(HBA2)[jal.nor]
HBF<-bind.data[[3]][["MCH"]]
jal.hbf<-(HBF)[jal.nor]
jalpaiguri.normal<-cbind(jal.hb,jal.mcv,jal.mch,jal.hba0,jal.hba2,jal.hbf)
summary(jalpaiguri.normal)
##      jal.hb         jal.mcv         jal.mch         jal.hba0    
##  Min.   :12.10   Min.   :68.70   Min.   :20.90   Min.   :87.10  
##  1st Qu.:12.15   1st Qu.:69.40   1st Qu.:21.00   1st Qu.:87.35  
##  Median :12.30   Median :71.90   Median :21.60   Median :88.20  
##  Mean   :12.33   Mean   :73.17   Mean   :22.67   Mean   :87.94  
##  3rd Qu.:12.40   3rd Qu.:76.45   3rd Qu.:24.50   3rd Qu.:88.50  
##  Max.   :12.80   Max.   :79.90   Max.   :25.20   Max.   :88.60  
##     jal.hba2        jal.hbf     
##  Min.   :2.500   Min.   :20.90  
##  1st Qu.:2.700   1st Qu.:21.00  
##  Median :2.800   Median :21.60  
##  Mean   :2.757   Mean   :22.67  
##  3rd Qu.:2.850   3rd Qu.:24.50  
##  Max.   :2.900   Max.   :25.20
mol.jal<-bind.data[[3]][["MOLECULAR"]]
jal.alpha<-which(bind.data[[3]][["MOLECULAR"]]=="alpha")
u<-(mol.jal)[jal.alpha]
HB<-bind.data[[3]][["HB"]]
jal.hb.a<-(HB)[jal.alpha]
MCV<-bind.data[[3]][["MCV"]]
jal.mcv.a<-(MCV)[jal.alpha]
MCH<-bind.data[[3]][["MCH"]]
jal.mch.a<-(MCH)[jal.alpha]
HBA0<-bind.data[[3]][["HbA0"]]
jal.hba0.a<-(HBA0)[jal.alpha]
HBA2<-bind.data[[3]][["HbA2"]]
jal.hba2.a<-(HBA2)[jal.nor]
HBF<-bind.data[[3]][["MCH"]]
jal.hbf.a<-(HBF)[jal.alpha]
jalpaiguri.alpha<-cbind(jal.hb.a,jal.mcv.a,jal.mch.a,jal.hba0.a,jal.hba2.a,jal.hbf.a)
## Warning in cbind(jal.hb.a, jal.mcv.a, jal.mch.a, jal.hba0.a, jal.hba2.a, :
## number of rows of result is not a multiple of vector length (arg 5)
summary(jalpaiguri.alpha)
##     jal.hb.a       jal.mcv.a       jal.mch.a       jal.hba0.a   
##  Min.   :12.10   Min.   :68.40   Min.   :20.20   Min.   :86.50  
##  1st Qu.:12.30   1st Qu.:70.50   1st Qu.:21.20   1st Qu.:87.40  
##  Median :12.50   Median :72.50   Median :22.00   Median :87.70  
##  Mean   :12.52   Mean   :72.61   Mean   :21.99   Mean   :87.63  
##  3rd Qu.:12.70   3rd Qu.:73.90   3rd Qu.:22.60   3rd Qu.:87.90  
##  Max.   :13.40   Max.   :79.70   Max.   :25.10   Max.   :88.30  
##    jal.hba2.a      jal.hbf.a    
##  Min.   :2.500   Min.   :20.20  
##  1st Qu.:2.700   1st Qu.:21.20  
##  Median :2.800   Median :22.00  
##  Mean   :2.759   Mean   :21.99  
##  3rd Qu.:2.800   3rd Qu.:22.60  
##  Max.   :2.900   Max.   :25.10
mol.mur<-bind.data[[3]][["MOLECULAR"]]
mur.nor<-which(bind.data[[3]][["MOLECULAR"]]=="normal")
v<-(mol.mur)[mur.nor]
HB<-bind.data[[3]][["HB"]]
mur.hb<-(HB)[mur.nor]
MCV<-bind.data[[3]][["MCV"]]
mur.mcv<-(MCV)[mur.nor]
MCH<-bind.data[[3]][["MCH"]]
mur.mch<-(MCH)[mur.nor]
HBA0<-bind.data[[3]][["HbA0"]]
mur.hba0<-(HBA0)[mur.nor]
HBA2<-bind.data[[3]][["HbA2"]]
mur.hba2<-(HBA2)[mur.nor]
HBF<-bind.data[[3]][["MCH"]]
mur.hbf<-(HBF)[mur.nor]
murshidabad.normal<-cbind(mur.hb,mur.mcv,mur.mch,mur.hba0,mur.hba2,mur.hbf)
summary(murshidabad.normal)
##      mur.hb         mur.mcv         mur.mch         mur.hba0    
##  Min.   :12.10   Min.   :68.70   Min.   :20.90   Min.   :87.10  
##  1st Qu.:12.15   1st Qu.:69.40   1st Qu.:21.00   1st Qu.:87.35  
##  Median :12.30   Median :71.90   Median :21.60   Median :88.20  
##  Mean   :12.33   Mean   :73.17   Mean   :22.67   Mean   :87.94  
##  3rd Qu.:12.40   3rd Qu.:76.45   3rd Qu.:24.50   3rd Qu.:88.50  
##  Max.   :12.80   Max.   :79.90   Max.   :25.20   Max.   :88.60  
##     mur.hba2        mur.hbf     
##  Min.   :2.500   Min.   :20.90  
##  1st Qu.:2.700   1st Qu.:21.00  
##  Median :2.800   Median :21.60  
##  Mean   :2.757   Mean   :22.67  
##  3rd Qu.:2.850   3rd Qu.:24.50  
##  Max.   :2.900   Max.   :25.20
mol.mur<-bind.data[[3]][["MOLECULAR"]]
mur.alpha<-which(bind.data[[3]][["MOLECULAR"]]=="alpha")
w<-(mol.mur)[mur.alpha]
HB<-bind.data[[3]][["HB"]]
mur.hb.a<-(HB)[mur.alpha]
MCV<-bind.data[[3]][["MCV"]]
mur.mcv.a<-(MCV)[mur.alpha]
MCH<-bind.data[[3]][["MCH"]]
mur.mch.a<-(MCH)[mur.alpha]
HBA0<-bind.data[[3]][["HbA0"]]
mur.hba0.a<-(HBA0)[mur.alpha]
HBA2<-bind.data[[3]][["HbA2"]]
mur.hba2.a<-(HBA2)[mur.alpha]
HBF<-bind.data[[3]][["MCH"]]
mur.hbf.a<-(HBF)[mur.alpha]
murshidabad.alpha<-cbind(mur.hb.a,mur.mcv.a,mur.mch.a,mur.hba0.a,mur.hba2.a,mur.hbf.a)
summary(murshidabad.alpha)
##     mur.hb.a       mur.mcv.a       mur.mch.a       mur.hba0.a   
##  Min.   :12.10   Min.   :68.40   Min.   :20.20   Min.   :86.50  
##  1st Qu.:12.30   1st Qu.:70.50   1st Qu.:21.20   1st Qu.:87.40  
##  Median :12.50   Median :72.50   Median :22.00   Median :87.70  
##  Mean   :12.52   Mean   :72.61   Mean   :21.99   Mean   :87.63  
##  3rd Qu.:12.70   3rd Qu.:73.90   3rd Qu.:22.60   3rd Qu.:87.90  
##  Max.   :13.40   Max.   :79.70   Max.   :25.10   Max.   :88.30  
##    mur.hba2.a      mur.hbf.a    
##  Min.   :2.400   Min.   :20.20  
##  1st Qu.:2.800   1st Qu.:21.20  
##  Median :2.800   Median :22.00  
##  Mean   :2.876   Mean   :21.99  
##  3rd Qu.:3.000   3rd Qu.:22.60  
##  Max.   :3.200   Max.   :25.10
mol.nad<-bind.data[[3]][["MOLECULAR"]]
nad.nor<-which(bind.data[[3]][["MOLECULAR"]]=="normal")
x<-(mol.nad)[nad.nor]
HB<-bind.data[[3]][["HB"]]
nad.hb<-(HB)[nad.nor]
MCV<-bind.data[[3]][["MCV"]]
nad.mcv<-(MCV)[nad.nor]
MCH<-bind.data[[3]][["MCH"]]
nad.mch<-(MCH)[nad.nor]
HBA0<-bind.data[[3]][["HbA0"]]
nad.hba0<-(HBA0)[nad.nor]
HBA2<-bind.data[[3]][["HbA2"]]
nad.hba2<-(HBA2)[nad.nor]
HBF<-bind.data[[3]][["MCH"]]
nad.hbf<-(HBF)[nad.nor]
nadia.normal<-cbind(nad.hb,nad.mcv,nad.mch,nad.hba0,nad.hba2,nad.hbf)
summary(nadia.normal)
##      nad.hb         nad.mcv         nad.mch         nad.hba0    
##  Min.   :12.10   Min.   :68.70   Min.   :20.90   Min.   :87.10  
##  1st Qu.:12.15   1st Qu.:69.40   1st Qu.:21.00   1st Qu.:87.35  
##  Median :12.30   Median :71.90   Median :21.60   Median :88.20  
##  Mean   :12.33   Mean   :73.17   Mean   :22.67   Mean   :87.94  
##  3rd Qu.:12.40   3rd Qu.:76.45   3rd Qu.:24.50   3rd Qu.:88.50  
##  Max.   :12.80   Max.   :79.90   Max.   :25.20   Max.   :88.60  
##     nad.hba2        nad.hbf     
##  Min.   :2.500   Min.   :20.90  
##  1st Qu.:2.700   1st Qu.:21.00  
##  Median :2.800   Median :21.60  
##  Mean   :2.757   Mean   :22.67  
##  3rd Qu.:2.850   3rd Qu.:24.50  
##  Max.   :2.900   Max.   :25.20
mol.nad<-bind.data[[3]][["MOLECULAR"]]
nad.alpha<-which(bind.data[[3]][["MOLECULAR"]]=="alpha")
y<-(mol.nad)[nad.alpha]
HB<-bind.data[[3]][["HB"]]
nad.hb.a<-(HB)[nad.alpha]
nad.hb.a
##  [1] 12.7 12.4 12.1 12.5 12.2 12.2 13.4 12.6 12.3 12.7 12.5 12.3 12.5 13.0
## [15] 12.8 12.2 12.5
MCV<-bind.data[[3]][["MCV"]]
nad.mcv.a<-(MCV)[nad.alpha]
MCH<-bind.data[[3]][["MCH"]]
nad.mch.a<-(MCH)[nad.alpha]
HBA0<-bind.data[[3]][["HbA0"]]
nad.hba0.a<-(HBA0)[nad.alpha]
HBA2<-bind.data[[3]][["HbA2"]]
nad.hba2.a<-(HBA2)[nad.alpha]
HBF<-bind.data[[3]][["MCH"]]
nad.hbf.a<-(HBF)[nad.alpha]
nadia.alpha<-cbind(nad.hb.a,nad.mcv.a,nad.mch.a,nad.hba0.a,nad.hba2.a,nad.hbf.a)
summary(nadia.alpha)
##     nad.hb.a       nad.mcv.a       nad.mch.a       nad.hba0.a   
##  Min.   :12.10   Min.   :68.40   Min.   :20.20   Min.   :86.50  
##  1st Qu.:12.30   1st Qu.:70.50   1st Qu.:21.20   1st Qu.:87.40  
##  Median :12.50   Median :72.50   Median :22.00   Median :87.70  
##  Mean   :12.52   Mean   :72.61   Mean   :21.99   Mean   :87.63  
##  3rd Qu.:12.70   3rd Qu.:73.90   3rd Qu.:22.60   3rd Qu.:87.90  
##  Max.   :13.40   Max.   :79.70   Max.   :25.10   Max.   :88.30  
##    nad.hba2.a      nad.hbf.a    
##  Min.   :2.400   Min.   :20.20  
##  1st Qu.:2.800   1st Qu.:21.20  
##  Median :2.800   Median :22.00  
##  Mean   :2.876   Mean   :21.99  
##  3rd Qu.:3.000   3rd Qu.:22.60  
##  Max.   :3.200   Max.   :25.10
mch.p<-cbind(ban.mch,bir.mch,jal.mch,mur.mch,nad.mch)
 
mcv.p<-cbind(ban.mcv,bir.mcv,jal.mcv,mur.mcv,nad.mcv)
 
mch.pa<-cbind(ban.mch.a,bir.mch.a,jal.mch.a,mur.mch.a,nad.mch.a)
 
mcv.pa<-cbind(ban.mcv.a,bir.mcv.a,jal.mcv.a,mur.mcv.a,nad.mcv.a)

par(mfrow=c(2,2))
v<-plot(mch.p,mcv.p,cex=0.8,pch="n",main = "MCV Vs MCH",col="red")
points(mch.pa,mcv.pa, pch= "a", col= "blue")


hbf.p<-cbind(ban.hbf,bir.hbf,jal.hbf,mur.hbf,nad.hbf)
hbf.p
##      jal.hbf mur.hbf nad.hbf
## [1,]    21.6    21.6    21.6
## [2,]    24.7    24.7    24.7
## [3,]    25.2    25.2    25.2
## [4,]    20.9    20.9    20.9
## [5,]    20.9    20.9    20.9
## [6,]    21.1    21.1    21.1
## [7,]    24.3    24.3    24.3
hba0.p<-cbind(ban.hba0,bir.hba0,jal.hba0,mur.hba0,nad.hba0)

hbf.pa<-cbind(ban.hbf.a,bir.hbf.a,jal.hbf.a,mur.hbf.a,nad.hbf.a)

hba0.pa<-cbind(ban.hba0.a,bir.hba0.a,jal.hba0.a,mur.hba0.a,nad.hba0.a)

plot(hba0.p,hbf.p,cex=0.8,pch="n",main = "HbF Vs HbA0",col="red")
points(hba0.pa,hbf.pa, pch= "a", col= "blue")


hbf.p<-cbind(ban.hbf,bir.hbf,jal.hbf,mur.hbf,nad.hbf)

mcv.p<-cbind(ban.mcv,bir.mcv,jal.mcv,mur.mcv,nad.mcv)

hbf.pa<-cbind(ban.hbf.a,bir.hbf.a,jal.hbf.a,mur.hbf.a,nad.hbf.a)

mcv.pa<-cbind(ban.mcv.a,bir.mcv.a,jal.mcv.a,mur.mcv.a,nad.mcv.a)

plot(hbf.p,mcv.p,cex=0.8,pch="n",main = "MCV Vs HbF",col="red")
points(hbf.pa,mcv.pa, pch= "a", col= "blue")

hba0.p<-cbind(ban.hba0,bir.hba0,jal.hba0,mur.hba0,nad.hba0)

hba2.p<-cbind(ban.hba2,bir.hba2,jal.hba2,mur.hba2,nad.hba2)

hba0.pa<-cbind(ban.hba0.a,bir.hba0.a,jal.hba0.a,mur.hba0.a,nad.hba0.a)

hba2.pa<-cbind(ban.hba2.a,bir.hba2.a,jal.hba2.a,mur.hba2.a,nad.hba2.a)
## Warning in cbind(ban.hba2.a, bir.hba2.a, jal.hba2.a, mur.hba2.a,
## nad.hba2.a): number of rows of result is not a multiple of vector length
## (arg 3)
plot(hba0.p,hba2.p,cex=0.8,pch="n",main = "HbA2 Vs HbA0",col="red")
points(hba0.pa,hba2.pa,pch= "a", col= "blue")

QUIZ 2:

PROBLEM 1:Given a sequence of numbers d write a script to generate the moving average with no NA values :

p<-c(2,6,3,10,26,35,8,56,31,74,46,12,17,39,22,89,42,5,13,70)
l<-length(p)
m<-list()
s<-3
for (i in 1:l) {
  m[i]<-(p[i]+p[i+1]+p[i+2])/s
}
t<-is.na(m)
t1<-which(t)
f<-m[-t1]
f
## [[1]]
## [1] 3.666667
## 
## [[2]]
## [1] 6.333333
## 
## [[3]]
## [1] 13
## 
## [[4]]
## [1] 23.66667
## 
## [[5]]
## [1] 23
## 
## [[6]]
## [1] 33
## 
## [[7]]
## [1] 31.66667
## 
## [[8]]
## [1] 53.66667
## 
## [[9]]
## [1] 50.33333
## 
## [[10]]
## [1] 44
## 
## [[11]]
## [1] 25
## 
## [[12]]
## [1] 22.66667
## 
## [[13]]
## [1] 26
## 
## [[14]]
## [1] 50
## 
## [[15]]
## [1] 51
## 
## [[16]]
## [1] 45.33333
## 
## [[17]]
## [1] 20
## 
## [[18]]
## [1] 29.33333

PROBLEM 2:Generated X and Y variables with some NA elements .Write a program to find : (a)mean of P (b)mean of Q (c)Plot of Q against P

s<-read.csv("Problem3.csv")
A<-s$X
B<-s$Y
a<-is.na(A)
b<-is.na(B)
a1<-which(a)
b1<-which(b)
P<-A[-a1]
P_M<-mean(P)
P_M
## [1] 72.88636
Q<-B[-b1]
Q_M<-mean(Q)
Q_M
## [1] 22.20909
plot(P,Q,col="blue")

PROBLEM 4:Suppose you have a spectrum with more than one peaks and a program to find out all the peaks:

p<-read.csv("problem4.csv")
x<-p$wavelen
y<-p$Tmean
par(mfrow=c(1,1))
plot(x,y,col="red",xlab = "Lambda",ylab = "Absorbance",cex=0.6,main = "Wavelength Vs Absorbance")

m<-which(x>365 & x<395)
length(m)
## [1] 29
wv1<-x[m]
AA1<-y[m]
length(wv1)
## [1] 29
max(AA1)
## [1] 2520060
max(wv1)
## [1] 394
plot(wv1,AA1,col="blue",xlab = "Lambda",ylab = "Absorbance",cex=0.6,main = "Wavelength Vs Absorbance")

n<-which(x>400&x<455)
length(n)
## [1] 54
wv2<-x[n]
AA2<-y[n]
length(wv2)
## [1] 54
max(AA2)
## [1] 865840.4
max(wv2)
## [1] 454
plot(wv2,AA2,col="purple",xlab = "Lambda",ylab = "Absorbance",cex=0.6,main = "Wavelength Vs Absorbance")

p<-which(x>495&x<565)
length(p)
## [1] 69
wv3<-x[p]
AA3<-y[p]
length(wv3)
## [1] 69
max(AA3)
## [1] 559970.1
max(wv3)
## [1] 564
plot(wv3,AA3,col="green",xlab = "Lambda",ylab = "Absorbance",cex=0.6,main = "Wavelength Vs Absorbance")

PROBLEM 5:Program to generate a random sequence of amino acids:

m<-"YWVSPTMFKLGHICQERDNA"
m
## [1] "YWVSPTMFKLGHICQERDNA"
## [1] "YWVSPTMFKLGHICQERDNA"
X<-unlist(strsplit(m,""))
for (i in 1:20) {cat(sample(X))
  cat("\n")
}
## R Q I F C L H W P Y M G V A N S E T K D
## V P H L S G Y R E D M F W Q K I T N C A
## S R L N H M I C G Y W Q E K F A V P D T
## R L Q M F S G N P Y W A D K H V T C E I
## E M F P N R D T I G L V A Y S Q W C K H
## H M L A C D K I W R N V E P F Q G T Y S
## G L D N I A Q S K F R E Y T M P C V W H
## E R T L N D P C Q G V F H I M W S A Y K
## F R N W K E Q V G A H C L Y P M T S D I
## A Q I P E V C Y W S K R F G N M T L H D
## D C I M E S N W Q A G K R T V F L P H Y
## D Q G M Y I S A N T W F L R K E V P C H
## A Q Y F N E L I K T S H W M D P G C R V
## Q R L Y I S T E C K D G H W M N F A V P
## T E W S M I Q F K V L G D Y P N C H A R
## W P V I K F A L C R Y M Q H E T D S G N
## N D V L K Q T F R Y H E S P C A G M I W
## T V W Q D E N F P M K R C I A Y G S H L
## A P K C H M Y D S T W V R F E L N G Q I
## H C L W T E P G I M V N F R S K Y D Q A
sample(X)
##  [1] "P" "G" "H" "C" "I" "D" "T" "Q" "R" "Y" "N" "V" "E" "W" "L" "M" "K"
## [18] "A" "F" "S"

OVERLAYED RAMACHANDRAN PLOT:

library(bio3d)
pdb1<-read.pdb("4h6z")
##   Note: Accessing on-line PDB file
tor1<-torsion.pdb(pdb1)
phi1<-tor1$phi
psi1<-tor1$psi

s1<-c(phi1[1:70],psi1[1:70])
summary(s1)
##      Min.   1st Qu.    Median      Mean   3rd Qu.      Max.      NA's 
## -176.7810 -107.9999  -37.9770    0.6643  126.9249  173.8367         1
a1<-is.na(phi1)
p1<-which(a1)
q1<-phi1[-p1]
ij1<-is.na(psi1)
ijj1<-which(ij1)
s1<-psi1[-ijj1]

ss1<-c(phi1[1:9],psi1[1:9])
par(mfrow=c(1,1))


pdb2<-read.pdb("2VAS")
##   Note: Accessing on-line PDB file
tor2<-torsion.pdb(pdb2)
phi2<-tor2$phi
psi2<-tor2$psi

s2<-c(phi2[1:70],psi2[1:70])
summary(s2)
##     Min.  1st Qu.   Median     Mean  3rd Qu.     Max.     NA's 
## -174.099  -97.015  -50.892   -8.585  124.697  175.501        1
a2<-is.na(phi2)
p2<-which(a2)
q2<-phi2[-p2]
ij2<-is.na(psi2)
ijj2<-which(ij2)
s2<-psi2[-ijj2]
ss2<-c(phi2[1:9],psi2[1:9])


pdb3<-read.pdb("1WDC")
##   Note: Accessing on-line PDB file
tor3<-torsion.pdb(pdb3)
phi3<-tor3$phi
psi3<-tor3$psi
s3<-c(phi3[1:70],psi3[1:70])
summary(s3)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
## -128.20  -65.11  -52.59  -47.23  -39.48  163.37       1
a3<-is.na(phi3)
p3<-which(a3)
q3<-phi3[-p3]
ij3<-is.na(psi3)
ijj3<-which(ij3)
s3<-psi3[-ijj3]
ss3<-c(phi3[1:9],psi3[1:9])


pdb4<-read.pdb("1WDC")
##   Note: Accessing on-line PDB file
## Warning in get.pdb(file, path = tempdir(), verbose = FALSE): /var/folders/
## t7/t6964h0d35938mcmcfhp1ngw0000gn/T//RtmpVLFqRL/1WDC.pdb exists. Skipping
## download
tor4<-torsion.pdb(pdb4)
phi4<-tor4$phi
psi4<-tor4$psi
s4<-c(phi4[1:70],psi4[1:70])
summary(s4)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
## -128.20  -65.11  -52.59  -47.23  -39.48  163.37       1
a4<-is.na(phi4)
p4<-which(a4)
q4<-phi4[-p4]
ij4<-is.na(psi4)
ijj4<-which(ij4)
s4<-psi3[-ijj4]
ss4<-c(phi4[1:9],psi4[1:9])

plot(phi1,psi1,pch=19,xlim = c(-180,180),ylim = c(-180,180),col="red",cex=0.6)
abline(h=c(0,0),v=c(0,0))
points(phi2,psi2,pch=20,xlim = c(-180,180),ylim = c(-180,180),col="blue",cex=0.5)
points(phi3,psi3,pch=18,xlim = c(-180,180),ylim = c(-180,180),col="green",cex=0.3)
points(phi4,psi4,pch=20,xlim = c(-180,180),ylim = c(-180,180),col="yellow",cex=0.4)

T-test FOR ASPIRIN:

z<-read.table("aspirin.txt",header = TRUE)
View(z)

con<-z$condition
d<-which(con=="placebo")
pla<-con[d]
lp<-length(pla)
p<-1/lp
p
## [1] 0.0625
g<-z$change
length(which(g>mean(g)))
## [1] 17
which(g>mean(g) & z$condition=="placebo")
##  [1]  2  4  9 10 14 16 17 18 20 22 23 24 25 26 30
l<-which(g>mean(g) & z$condition=="placebo")
length(l)
## [1] 15
z$change<-((z$change)**2)**0.5
z$change
##  [1] 3.2 2.1 6.1 2.0 5.3 3.9 3.1 6.5 2.5 0.9 3.4 5.6 7.1 0.0 4.8 1.9 2.2
## [18] 1.7 5.5 1.1 4.5 1.5 1.9 2.4 2.1 1.1 2.2 3.3 4.5 2.0
m<-mean(z$change)
m
## [1] 3.146667
ik<-which(z$change>m & z$condition=="placebo")
length(ik)
## [1] 1
jk<-which(z$change<m & z$condition=="placebo")
length(jk)
## [1] 15
ik1<-which(z$change>m & z$condition=="aspirin")
length(ik1)
## [1] 12
jk1<-which(z$change<m & z$condition=="aspirin")
length(jk1)
## [1] 2
placebo<-cbind(length(ik),length(jk))
placebo
##      [,1] [,2]
## [1,]    1   15
colnames(placebo)<-c("+","-")
placebo
##      +  -
## [1,] 1 15
aspirin<-cbind(length(ik1),length(jk1))
data.file<-rbind(placebo,aspirin)
data.file
##       +  -
## [1,]  1 15
## [2,] 12  2
rownames(data.file)<-c("placebo","aspirin")
data.file
##          +  -
## placebo  1 15
## aspirin 12  2
pa.pos<-((data.file[2,2]+data.file[2,1])/30)*((data.file[1,1]+data.file[2,1])/30)
pp.pos<-((data.file[1,1]+data.file[1,2])/30)*((data.file[1,1]+data.file[2,1])/30)
pa.neg<-((data.file[2,1]+data.file[2,2])/30)*((data.file[1,2]+data.file[2,2])/30)
pp.neg<-((data.file[1,2]+data.file[1,1])/30)*((data.file[1,2]+data.file[2,2])/30)
pap<-pa.pos*30
pan<-pa.neg*30
ppp<-pp.pos*30
ppn<-pp.neg*30
e11<-(data.file[1,1]-ppp)^2/ppp
e12<-(data.file[1,2]-ppn)^2/ppn
e21<-(data.file[2,1]-pap)^2/pap
e22<-(data.file[2,2]-pan)^2/pan

chivalue<-sum(e11,e12,e22,e21)
chivalue
## [1] 19.20087
# So, this is an alternate hypothesis.

CHI-SQUARE VALUE FOR TWO DIFERENT PROTEINS:

library(bio3d)
a<-read.pdb("1akr.pdb")
t.a<-torsion.pdb(a)
phi.a<-t.a$phi
b<-is.na(phi.a)
b1<-which(b)
phi.ak<-phi.a[-b1]
psi.a<-t.a$psi
e<-is.na(psi.a)
e1<-which(e)
psi.ak<-psi.a[-e1]
par(mfrow=c(1,1))
plot(phi.ak,psi.ak,xlim = c(-180,180),ylim = c(-180,180),main = "1akr")
abline(h=c(0,0),v=c(0,0))

phi1<- -50.0
phi2<- -100.0
ik<-which(phi.ak<phi1 & phi.ak>phi2)
phial.ak<-length(ik)
phi3<- -40.0
phi4<- -150.0
ij<-which(phi.ak<phi3 & phi.ak>phi4)
phib.ak<-length(ij)
psi5<- -80.0
psi6<- 10.0
il<-which(psi.ak<psi6 & psi.ak>psi5)
psial.ak<-length(il)
psi7<- 110.0
psi8<- 160.0
im<-which(psi.ak>psi7 & psi.ak<psi8)
psib.ak<-length(im)
r<-cbind(phial.ak,psial.ak,phib.ak,psib.ak)
rownames(r)<-c("1akr")
(r)
##      phial.ak psial.ak phib.ak psib.ak
## 1akr       90       78     131      50
ua<-read.pdb("3ua0.pdb")
t.ua<-torsion.pdb(ua)
phi.ua<-t.ua$phi
x<-is.na(phi.ua)
x1<-which(x)
phi.ua<-phi.ua[-x1]
psi.ua<-t.ua$psi
s<-is.na(psi.ua)
s1<-which(s)
psi.ua<-psi.ua[-s1]
plot(phi.ua,psi.ua,xlim = c(-180,180),ylim = c(-180,180),main = "3ua0")
abline(h=c(0,0),v=c(0,0))

phi9<- -50.0
phi10<- -110.0
ik1<-which(phi.ua<phi9 & phi.ua>phi10)
phial.ua<-length(ik1)
phi11<- -50.0
phi12<- -150.0
ij1<-which(phi.ak<phi11 & phi.ak>phi12)
phib.ua<-length(ij1)
psi13<- -50.0
psi14<- 10.0
il1<-which(psi.ua<psi14 & psi.ua>psi13)
psial.ua<-length(il1)
psi15<- 100.0
psi16<- 160.0
im1<-which(psi.ua>psi15 & psi.ua<psi16)
psib.ua<-length(im1)
r1<-cbind(phial.ua,psial.ua,phib.ua,psib.ua)
rownames(r1)<-c("3ua0")
(r1)
##      phial.ua psial.ua phib.ua psib.ua
## 3ua0       60       15     129      90
main.pro<-rbind(r,r1)
main.pro
##      phial.ak psial.ak phib.ak psib.ak
## 1akr       90       78     131      50
## 3ua0       60       15     129      90
colnames(main.pro)<-c("Phi-alpha","Psi-alpha","Phi-beta","Psi-beta")
main.pro
##      Phi-alpha Psi-alpha Phi-beta Psi-beta
## 1akr        90        78      131       50
## 3ua0        60        15      129       90
chisq.test(main.pro)
## 
##  Pearson's Chi-squared test
## 
## data:  main.pro
## X-squared = 55.825, df = 3, p-value = 4.577e-12
t.test(main.pro)
## 
##  One Sample t-test
## 
## data:  main.pro
## t = 5.8065, df = 7, p-value = 0.0006591
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
##   47.64353 113.10647
## sample estimates:
## mean of x 
##    80.375

T-TEST PROBLEMS:

x<-c(0.593, 0.142, 0.329, 0.691, 0.231, 0.793, 0.519, 0.392, 0.418) 
ttest<-function(dataset){
  s<-sd(dataset)
  d.mean<-mean(dataset)
  n1<-length(dataset)
  n<-(n1)**0.5
  u<-0.3
  t<-(d.mean-u)/(s/n)
  return(t)
}
tt<-ttest(x)
tt
## [1] 2.205059
mean(x)
## [1] 0.4564444
## INTERPRETATION OF PROBLEM 
##The mean level of salmonella in all batches of icecream is 0.3 MNP/gm.Here the hypothesis of interest can be expressed as: H0 i.e null hypothesis:= 0.3 H1 i.e alternative hypothesis: >0.3 At 95% confidence level. If p value is less than 0.05 then it supports alternative hypothesis. The degree of freedom is 8 i.e (n-1),where n=9 From the reference index given, our t-value is 2.205059 which indicates that the p value is 0.025(i.e.?the confidence interval is more than 95%).So it supports alternative hypothesis as it is less than 0.05.
##Thus, Salmonella is present in the given batch of ice-cream.

##T-TEST PROBLEM 2

###Suppose there is a debate whether foootball players are fitter than ballet dancer. We may consider the two way t-test to compate the fitness results using ttest.

tt<-function(d1,d2){
  s1<-sd(d1)
  s2<-sd(d2)
  m1<-mean(d1)
  m2<-mean(d2)
  n1<-length(d1)
  n2<-length(d2)
  spsq<-(s1^2*(n1-1)+s2^2*(n2-1))/(n1+n2-2)
  D<-sqrt(spsq*(1/n1+1/n2))
  D
  N<-m1-m2
  tt<-N/D
  return(tt)
}




ballet<-c(89.2,78.2,89.3,88.3,87.3,90.1,95.2,94.3,78.3,89.3)
football<-c(79.3,78.3,85.3,79.3,88.9,91.2,87.2,89.2,93.3,79.9)
ttt<-tt(ballet,football)
ttt
## [1] 1.094723
##INTERPRETATION OF PROBLEM 2

##The t table does not give you the presise probability of every t value, you can use it for hypothesise testing.According to the given reference index our value is 1.0947 which is almost near to 1.372 having p value=0.10.So at 95% confidence, having value greater than 0.05. It indicate null hypothesis. This means that we cannot distinguish between the fitness level of football player and ballet dancer.


## T-TEST PROBLEM 3 CHOLESTEROL

tt1<-function(d3,d4){
  s3<-sd(d3)
  s4<-sd(d4)
  m3<-mean(d3)
  m4<-mean(d4)
  n3<-length(d3)
  n4<-length(d4)
  d<-(s3^2*(n3-1))
  e<-(s4^2*(n4-1))
  f<-(n3+n4-2)
  spsq<-((d+e)/f)
  D1<-sqrt(spsq*((1/n3)+(1/n4)))
  N1<-m3-m4
  tt1<-N1/D1
  return(tt1)
}

data<-c(220,200,240,210,225,210,180,170,210,220,190,180,195,190,200,190,210,220,240,210) 
M<-matrix(data,nrow=10,byrow=TRUE)
colnames(M)<-c("Before","After")
(M)
##       Before After
##  [1,]    220   200
##  [2,]    240   210
##  [3,]    225   210
##  [4,]    180   170
##  [5,]    210   220
##  [6,]    190   180
##  [7,]    195   190
##  [8,]    200   190
##  [9,]    210   220
## [10,]    240   210
ttt1<-tt1(M[,1],M[,2])
ttt1
## [1] 1.310597
## INTERPRETATION OF PROBLEM 3
  
##The t-value is greater than the critical value so the alternative hypothesis is true in this case.Hence there is a difference in cholesterol level before and after the exercise and diet program.

DIFFERENT PROTEIN ANALYSIS BY T-TEST:

seq2charge<- function(seq) {
  seq<- strsplit(seq,"")
  seqq<-unlist(seq)
  library("Peptides")
  seq1<-charge(seqq, pH = 7)
  return(seq1)}

ch<-seq2charge("ARNDCEQGHILKMFPSTWYV")
## Warning: package 'Peptides' was built under R version 3.4.4
p<-read.csv("amino.csv")
aa<-p$amino1
ch<-seq2charge("ARNDCEQGHILKMFPSTWYV")
df<-data.frame(aa,ch)

dff<-as.matrix(df)

h<-dff[,2]
H<-as.double(h)
names(H)<-aa
barplot(H)

vecseq<- function(seq) {
  seq<- strsplit(seq,"")
}

hy <-function  (ami,dff) 
{
  I<- which(df$aa==ami) 
  df$ch[which (df$aa==ami)] 
}

library("bio3d")
 
pdb <- read.pdb("4q21.pdb")

seq<-pdbseq(pdb)
seq2hydro<- function(seq,dff)
{
  seq<-vecseq(seq)
  H[1]<-0
  for (i in 1:length(seq))
  {
    H[i]<-hy(seq[i],dff)
  }
  seq2hydro<-H
  return(seq2hydro)
}
H<-seq2hydro(seq,dff)

plot(H,type="l",xlab="Residue No",ylab="Charge")

movav<-function(HH,w) {
  movav<-HH[1]
  l<- length(HH)- w+1
  for (i in 1:l){
    movav<-c(movav, mean(HH[i:i+w-1]))
  }
  return(movav)
}

dolittle<-movav(H,7)
plot(dolittle,type="l",xlab="Residue No",ylab="Charge")

z1<-read.pdb("2mqu.pdb")
z2<-read.pdb("2oau.pdb")
seq1<-pdbseq(z1)
seq2<-pdbseq(z2)

hyseq1<-seq2hydro(seq1,dff)
hyseq2<-seq2hydro(seq2,dff)

T1<-t.test(hyseq1,hyseq2,alternative="two.sided",mu=0,paired=FALSE,var.equal=FALSE,conf.level=0.95)
(T1)
## 
##  Welch Two Sample t-test
## 
## data:  hyseq1 and hyseq2
## t = -0.55562, df = 186.96, p-value = 0.5791
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -0.1086704  0.0609082
## sample estimates:
##   mean of x   mean of y 
## -0.01009846  0.01378264

PROJECT FOR A MEMBRANE AND A CYTOSOLIC PROTEIN:

library("bio3d")
library("Peptides")

#Membrane protein

mem.rpb<-read.pdb("2rpb.pdb")
seq.rpb<-pdbseq(mem.rpb)

#Amino acid descriptors for the membrane protein compared with the reference 20 aa present
seq.rpb.d<-aaDescriptors(seq.rpb)
summary(seq.rpb.d)
##      PP1.1              PP2.1             PP3.1              KF1.1        
##  Min.   :-1.00000   Min.   :-1.0000   Min.   :-1.00000   Min.   :-1.5600  
##  1st Qu.:-0.94000   1st Qu.:-0.6400   1st Qu.:-0.24000   1st Qu.:-0.7400  
##  Median : 0.06000   Median :-0.4300   Median :-0.14000   Median :-0.4700  
##  Mean   :-0.08292   Mean   :-0.3142   Mean   :-0.07743   Mean   :-0.1777  
##  3rd Qu.: 0.80000   3rd Qu.: 0.0300   3rd Qu.: 0.31000   3rd Qu.: 0.5800  
##  Max.   : 1.00000   Max.   : 1.0000   Max.   : 1.00000   Max.   : 2.0600  
##      KF2.1             KF3.1             KF4.1             KF5.1        
##  Min.   :-1.9600   Min.   :-1.6100   Min.   :-1.5700   Min.   :-1.7000  
##  1st Qu.:-0.7100   1st Qu.:-0.9700   1st Qu.:-0.7300   1st Qu.:-0.9200  
##  Median :-0.1600   Median :-0.1200   Median :-0.1600   Median :-0.5400  
##  Mean   :-0.1574   Mean   : 0.2178   Mean   : 0.1736   Mean   :-0.1805  
##  3rd Qu.: 0.1900   3rd Qu.: 1.7900   3rd Qu.: 0.8100   3rd Qu.: 0.5000  
##  Max.   : 2.1000   Max.   : 2.0400   Max.   : 1.8700   Max.   : 2.0000  
##      KF6.1             KF7.1             KF8.1              KF9.1         
##  Min.   :-2.0500   Min.   :-1.8900   Min.   :-2.30000   Min.   :-2.30000  
##  1st Qu.:-0.8100   1st Qu.:-1.0700   1st Qu.:-0.39000   1st Qu.:-0.46000  
##  Median : 0.0300   Median :-0.2000   Median : 0.06000   Median : 0.21000  
##  Mean   :-0.2591   Mean   :-0.2119   Mean   : 0.01071   Mean   : 0.05965  
##  3rd Qu.: 0.3700   3rd Qu.: 0.8400   3rd Qu.: 0.47000   3rd Qu.: 0.66000  
##  Max.   : 2.4100   Max.   : 1.5200   Max.   : 2.36000   Max.   : 1.71000  
##      KF10.1              Z1.1               Z2.1              Z3.1        
##  Min.   :-2.33000   Min.   :-4.36000   Min.   :-4.0600   Min.   :-3.5000  
##  1st Qu.:-0.28000   1st Qu.:-2.59000   1st Qu.:-2.1800   1st Qu.:-1.5400  
##  Median : 0.53000   Median : 0.75000   Median :-0.2200   Median :-1.1200  
##  Mean   : 0.02956   Mean   : 0.07885   Mean   :-0.4954   Mean   :-0.5196  
##  3rd Qu.: 0.70000   3rd Qu.: 3.11000   3rd Qu.: 0.9300   3rd Qu.: 0.6000  
##  Max.   : 1.63000   Max.   : 3.98000   Max.   : 3.9400   Max.   : 3.7500  
##       Z4.1              Z5.1              F1.1              F2.1        
##  Min.   :-3.0400   Min.   :-2.6500   Min.   :-1.3870   Min.   :-2.2190  
##  1st Qu.:-1.3900   1st Qu.:-0.1700   1st Qu.:-1.3320   1st Qu.:-0.4440  
##  Median :-0.8400   Median : 0.2600   Median :-0.8800   Median : 0.5360  
##  Mean   :-0.6002   Mean   : 0.2838   Mean   :-0.4078   Mean   : 0.1471  
##  3rd Qu.: 0.0400   3rd Qu.: 0.7500   3rd Qu.: 0.2070   3rd Qu.: 0.5720  
##  Max.   : 3.9000   Max.   : 2.0000   Max.   : 1.5240   Max.   : 1.3880  
##       F3.1               F4.1              F5.1              F6.1         
##  Min.   :-1.65600   Min.   :-2.0800   Min.   :-1.1150   Min.   :-1.76200  
##  1st Qu.:-0.58400   1st Qu.:-0.1750   1st Qu.:-0.2290   1st Qu.:-1.10800  
##  Median : 0.02900   Median : 0.3330   Median :-0.1690   Median : 0.16900  
##  Mean   :-0.04194   Mean   : 0.3839   Mean   : 0.0538   Mean   : 0.05325  
##  3rd Qu.: 0.51600   3rd Qu.: 1.0260   3rd Qu.: 0.0630   3rd Qu.: 1.03800  
##  Max.   : 2.06900   Max.   : 1.9040   Max.   : 3.8470   Max.   : 2.72800  
##       T1.1              T2.1              T3.1               T4.1        
##  Min.   :-10.610   Min.   :-3.5400   Min.   :-2.34000   Min.   :-1.9600  
##  1st Qu.: -5.870   1st Qu.:-0.9400   1st Qu.:-0.49000   1st Qu.:-0.3900  
##  Median : -4.620   Median :-0.2800   Median : 0.28000   Median : 0.3100  
##  Mean   : -4.377   Mean   : 0.1591   Mean   : 0.09106   Mean   : 0.3085  
##  3rd Qu.: -3.030   3rd Qu.: 0.7500   3rd Qu.: 0.63000   3rd Qu.: 1.1000  
##  Max.   :  5.730   Max.   : 3.8900   Max.   : 1.39000   Max.   : 1.6400  
##       T5.1           VHSE1.1            VHSE2.1            VHSE3.1        
##  Min.   :-0.790   Min.   :-1.47000   Min.   :-1.67000   Min.   :-2.63000  
##  1st Qu.:-0.210   1st Qu.:-1.15000   1st Qu.:-0.86000   1st Qu.:-0.41000  
##  Median : 0.430   Median :-0.20000   Median :-0.14000   Median :-0.17000  
##  Mean   : 0.511   Mean   :-0.07301   Mean   :-0.02735   Mean   :-0.06425  
##  3rd Qu.: 0.950   3rd Qu.: 0.76000   3rd Qu.: 0.67000   3rd Qu.: 0.36000  
##  Max.   : 3.250   Max.   : 1.52000   Max.   : 2.06000   Max.   : 1.79000  
##     VHSE4.1           VHSE5.1           VHSE6.1          VHSE7.1       
##  Min.   :-1.9100   Min.   :-2.6800   Min.   :-1.610   Min.   :-1.6100  
##  1st Qu.:-1.8000   1st Qu.:-0.5300   1st Qu.:-1.400   1st Qu.:-0.2400  
##  Median :-0.0100   Median : 0.2200   Median :-0.170   Median : 0.0300  
##  Mean   :-0.3459   Mean   :-0.1935   Mean   :-0.255   Mean   : 0.1885  
##  3rd Qu.: 0.3600   3rd Qu.: 0.3000   3rd Qu.: 0.670   3rd Qu.: 0.7300  
##  Max.   : 2.2800   Max.   : 1.6400   Max.   : 1.470   Max.   : 2.0100  
##     VHSE8.1          ProtFP1.1         ProtFP2.1         ProtFP3.1      
##  Min.   :-1.3400   Min.   :-6.6100   Min.   :-8.7200   Min.   :-3.5900  
##  1st Qu.:-0.4100   1st Qu.:-4.9900   1st Qu.:-2.5500   1st Qu.:-2.4900  
##  Median :-0.0300   Median :-2.0000   Median :-1.3300   Median :-1.7100  
##  Mean   : 0.1059   Mean   :-0.2753   Mean   :-0.2528   Mean   :-0.9828  
##  3rd Qu.: 0.3900   3rd Qu.: 5.0400   3rd Qu.: 2.2000   3rd Qu.: 0.7000  
##  Max.   : 3.5600   Max.   : 7.3300   Max.   : 6.6000   Max.   : 4.1800  
##    ProtFP4.1         ProtFP5.1         ProtFP6.1           ProtFP7.1      
##  Min.   :-4.5800   Min.   :-3.2200   Min.   :-3.540000   Min.   :-2.9500  
##  1st Qu.:-1.2700   1st Qu.:-0.4900   1st Qu.:-0.650000   1st Qu.:-0.0800  
##  Median : 1.0200   Median : 0.0600   Median : 0.080000   Median : 0.1000  
##  Mean   : 0.1008   Mean   :-0.1801   Mean   : 0.006637   Mean   : 0.4133  
##  3rd Qu.: 1.3800   3rd Qu.: 0.4800   3rd Qu.: 0.760000   3rd Qu.: 1.7900  
##  Max.   : 3.0000   Max.   : 3.2700   Max.   : 2.910000   Max.   : 1.9900  
##    ProtFP8.1            ST1.1             ST2.1             ST3.1        
##  Min.   :-2.79000   Min.   :-1.8440   Min.   :-1.0100   Min.   :-0.9170  
##  1st Qu.:-0.51000   1st Qu.:-1.1330   1st Qu.:-0.8930   1st Qu.:-0.6270  
##  Median :-0.23000   Median :-0.8880   Median :-0.3790   Median :-0.3250  
##  Mean   :-0.04823   Mean   :-0.8357   Mean   :-0.3141   Mean   :-0.2672  
##  3rd Qu.: 0.87000   3rd Qu.:-0.6290   3rd Qu.:-0.0180   3rd Qu.:-0.0130  
##  Max.   : 1.65000   Max.   : 0.8530   Max.   : 0.7310   Max.   : 1.1000  
##      ST4.1              ST5.1              ST6.1         
##  Min.   :-1.16300   Min.   :-0.93700   Min.   :-3.31700  
##  1st Qu.:-0.21400   1st Qu.:-0.56100   1st Qu.:-0.17500  
##  Median :-0.06600   Median :-0.38700   Median : 0.10100  
##  Mean   :-0.04624   Mean   :-0.04304   Mean   :-0.03138  
##  3rd Qu.: 0.23700   3rd Qu.: 0.54900   3rd Qu.: 0.57000  
##  Max.   : 0.85900   Max.   : 1.12000   Max.   : 1.09100  
##      ST7.1              ST8.1            BLOSUM1.1         BLOSUM2.1      
##  Min.   :-1.09900   Min.   :-0.89400   Min.   :-1.6200   Min.   :-1.2300  
##  1st Qu.:-0.14700   1st Qu.:-0.31100   1st Qu.:-1.1300   1st Qu.:-1.1300  
##  Median :-0.02000   Median :-0.07500   Median : 0.7200   Median :-0.4500  
##  Mean   : 0.09591   Mean   : 0.05606   Mean   : 0.1139   Mean   :-0.3535  
##  3rd Qu.: 0.17500   3rd Qu.: 0.36700   3rd Qu.: 1.1400   3rd Qu.: 0.1900  
##  Max.   : 1.25600   Max.   : 2.52200   Max.   : 1.5500   Max.   : 2.2800  
##    BLOSUM3.1         BLOSUM4.1           BLOSUM5.1      
##  Min.   :-0.9700   Min.   :-1.610000   Min.   :-1.2400  
##  1st Qu.:-0.8000   1st Qu.:-0.360000   1st Qu.:-0.6000  
##  Median :-0.6300   Median : 0.060000   Median :-0.2800  
##  Mean   :-0.2843   Mean   :-0.000708   Mean   :-0.1652  
##  3rd Qu.: 0.3200   3rd Qu.: 0.380000   3rd Qu.: 0.2400  
##  Max.   : 1.7300   Max.   : 1.550000   Max.   : 1.8300  
##    BLOSUM6.1          BLOSUM7.1         BLOSUM8.1       
##  Min.   :-2.02000   Min.   :-1.6200   Min.   :-1.96000  
##  1st Qu.: 0.01000   1st Qu.:-0.3000   1st Qu.:-0.05000  
##  Median : 0.16000   Median : 0.0100   Median : 0.15000  
##  Mean   : 0.08858   Mean   :-0.1111   Mean   : 0.02894  
##  3rd Qu.: 0.28000   3rd Qu.: 0.2100   3rd Qu.: 0.20000  
##  Max.   : 1.19000   Max.   : 1.2100   Max.   : 0.87000  
##    BLOSUM9.1          BLOSUM10.1         MSWHIM1.1         MSWHIM2.1      
##  Min.   :-1.20000   Min.   :-1.29000   Min.   :-1.0000   Min.   :-1.0000  
##  1st Qu.:-0.22000   1st Qu.:-0.28000   1st Qu.:-0.9100   1st Qu.: 0.0800  
##  Median : 0.01000   Median : 0.06000   Median :-0.5100   Median : 0.6700  
##  Mean   : 0.06761   Mean   : 0.05982   Mean   :-0.3746   Mean   : 0.3335  
##  3rd Qu.: 0.25000   3rd Qu.: 0.33000   3rd Qu.: 0.1100   3rd Qu.: 0.7900  
##  Max.   : 1.36000   Max.   : 0.99000   Max.   : 1.0000   Max.   : 1.0000  
##    MSWHIM3.1     
##  Min.   :-1.000  
##  1st Qu.:-0.660  
##  Median :-0.580  
##  Mean   :-0.355  
##  3rd Qu.:-0.160  
##  Max.   : 1.000
#Thermostability value of the membrane protein
seq.rpb.i<-aIndex(seq.rpb)
ik<-which(seq.rpb.i==0.0)
seq.rpb.in<-seq.rpb.i[-ik]
therm.rpb<-sum(seq.rpb.in)/length(seq.rpb.in)
(therm.rpb)
## [1] 310
#Protein Interaction
seq.rpb.pp<-boman(seq.rpb)
bin.pot<-sum(seq.rpb.pp)/length(seq.rpb)
bin.pot
## [1] 1.849735
#binpot<2.68 which implies the protein has lower affinity to bind with another protein


#Cytosolic protein:

cyto.cjy<-read.pdb("1cjy.pdb")
seq.cjy<-pdbseq(cyto.cjy)

#Amino acid descriptors for the cytosolic protein compared with the reference 20 aa present
seq.cjy.d<-aaDescriptors(seq.cjy)
summary(seq.cjy.d)
##      PP1.1            PP2.1             PP3.1             KF1.1         
##  Min.   :-1.000   Min.   :-1.0000   Min.   :-1.0000   Min.   :-1.56000  
##  1st Qu.:-0.900   1st Qu.:-0.6400   1st Qu.:-0.2400   1st Qu.:-1.04000  
##  Median : 0.060   Median :-0.4000   Median :-0.0800   Median :-0.21000  
##  Mean   :-0.106   Mean   :-0.2787   Mean   :-0.0129   Mean   :-0.07562  
##  3rd Qu.: 0.670   3rd Qu.: 0.0300   3rd Qu.: 0.3700   3rd Qu.: 0.81000  
##  Max.   : 1.000   Max.   : 1.0000   Max.   : 1.0000   Max.   : 2.06000  
##      KF2.1             KF3.1              KF4.1         
##  Min.   :-1.9600   Min.   :-1.61000   Min.   :-1.57000  
##  1st Qu.:-0.7100   1st Qu.:-0.42000   1st Qu.:-0.75000  
##  Median :-0.0700   Median :-0.23000   Median :-0.16000  
##  Mean   :-0.1178   Mean   :-0.03326   Mean   : 0.05175  
##  3rd Qu.: 0.2400   3rd Qu.: 0.45000   3rd Qu.: 0.81000  
##  Max.   : 2.1000   Max.   : 2.04000   Max.   : 1.87000  
##      KF5.1               KF6.1             KF7.1          
##  Min.   :-1.700000   Min.   :-2.0500   Min.   :-1.89e+00  
##  1st Qu.:-0.550000   1st Qu.:-0.8100   1st Qu.:-8.30e-01  
##  Median :-0.100000   Median :-0.4300   Median : 2.40e-01  
##  Mean   : 0.003753   Mean   :-0.3056   Mean   : 4.01e-05  
##  3rd Qu.: 0.500000   3rd Qu.: 0.3700   3rd Qu.: 9.20e-01  
##  Max.   : 2.000000   Max.   : 2.4100   Max.   : 1.52e+00  
##      KF8.1             KF9.1              KF10.1        
##  Min.   :-2.3000   Min.   :-2.30000   Min.   :-2.33000  
##  1st Qu.:-0.7600   1st Qu.:-0.48000   1st Qu.:-0.28000  
##  Median : 0.0600   Median :-0.03000   Median : 0.19000  
##  Mean   :-0.0187   Mean   : 0.03362   Mean   : 0.04022  
##  3rd Qu.: 0.4700   3rd Qu.: 0.74000   3rd Qu.: 0.65000  
##  Max.   : 2.3600   Max.   : 1.71000   Max.   : 1.63000  
##       Z1.1               Z2.1              Z3.1              Z4.1        
##  Min.   :-4.36000   Min.   :-4.0600   Min.   :-3.5000   Min.   :-3.0400  
##  1st Qu.:-2.85000   1st Qu.:-1.7300   1st Qu.:-1.4900   1st Qu.:-1.3900  
##  Median : 0.75000   Median : 0.2600   Median : 0.2600   Median :-0.8200  
##  Mean   :-0.06876   Mean   :-0.3111   Mean   :-0.1939   Mean   :-0.3595  
##  3rd Qu.: 2.39000   3rd Qu.: 0.9300   3rd Qu.: 1.0600   3rd Qu.: 0.6200  
##  Max.   : 3.98000   Max.   : 3.9400   Max.   : 3.7500   Max.   : 3.9000  
##       Z5.1              F1.1              F2.1               F3.1         
##  Min.   :-2.6500   Min.   :-1.3870   Min.   :-2.21900   Min.   :-1.65600  
##  1st Qu.:-0.3800   1st Qu.:-1.2980   1st Qu.:-0.84700   1st Qu.:-0.58400  
##  Median : 0.2600   Median :-0.4070   Median : 0.37800   Median :-0.02400  
##  Mean   : 0.2265   Mean   :-0.2098   Mean   : 0.04858   Mean   :-0.05281  
##  3rd Qu.: 0.7500   3rd Qu.: 0.8860   3rd Qu.: 0.82100   3rd Qu.: 0.60950  
##  Max.   : 2.0000   Max.   : 1.5240   Max.   : 1.38800   Max.   : 2.06900  
##       F4.1              F5.1              F6.1               T1.1        
##  Min.   :-2.0800   Min.   :-1.1150   Min.   :-1.76200   Min.   :-10.610  
##  1st Qu.:-0.1750   1st Qu.:-0.2290   1st Qu.:-0.60500   1st Qu.: -5.870  
##  Median : 0.3330   Median : 0.0070   Median :-0.06800   Median : -4.380  
##  Mean   : 0.3209   Mean   : 0.1453   Mean   :-0.06492   Mean   : -4.263  
##  3rd Qu.: 1.0260   3rd Qu.: 0.1170   3rd Qu.: 0.50200   3rd Qu.: -3.000  
##  Max.   : 1.9040   Max.   : 3.8470   Max.   : 2.72800   Max.   :  5.730  
##       T2.1              T3.1              T4.1              T5.1        
##  Min.   :-3.5400   Min.   :-2.3400   Min.   :-1.9600   Min.   :-0.7900  
##  1st Qu.:-0.9400   1st Qu.:-0.5300   1st Qu.:-0.4000   1st Qu.:-0.2100  
##  Median :-0.4700   Median :-0.0700   Median :-0.1700   Median : 0.3300  
##  Mean   :-0.0336   Mean   :-0.0654   Mean   : 0.1583   Mean   : 0.4958  
##  3rd Qu.: 0.8650   3rd Qu.: 0.6300   3rd Qu.: 1.1000   3rd Qu.: 0.9500  
##  Max.   : 3.8900   Max.   : 1.3900   Max.   : 1.6400   Max.   : 3.2500  
##     VHSE1.1              VHSE2.1            VHSE3.1       
##  Min.   :-1.4700000   Min.   :-1.67000   Min.   :-2.6300  
##  1st Qu.:-0.9900000   1st Qu.:-0.86000   1st Qu.:-0.5000  
##  Median :-0.2000000   Median : 0.00000   Median : 0.1000  
##  Mean   :-0.0008821   Mean   :-0.06416   Mean   :-0.0881  
##  3rd Qu.: 1.0100000   3rd Qu.: 0.61000   3rd Qu.: 0.4300  
##  Max.   : 1.5200000   Max.   : 2.06000   Max.   : 1.7900  
##     VHSE4.1           VHSE5.1            VHSE6.1           VHSE7.1       
##  Min.   :-1.9100   Min.   :-2.68000   Min.   :-1.6100   Min.   :-1.6100  
##  1st Qu.:-0.8000   1st Qu.:-0.32000   1st Qu.:-1.3400   1st Qu.:-0.6400  
##  Median :-0.0100   Median : 0.22000   Median :-0.0100   Median : 0.0300  
##  Mean   :-0.1212   Mean   :-0.09397   Mean   :-0.1817   Mean   : 0.1073  
##  3rd Qu.: 0.5250   3rd Qu.: 0.25000   3rd Qu.: 0.6700   3rd Qu.: 0.9100  
##  Max.   : 2.2800   Max.   : 1.64000   Max.   : 1.4700   Max.   : 2.0100  
##     VHSE8.1           ProtFP1.1         ProtFP2.1         ProtFP3.1      
##  Min.   :-1.34000   Min.   :-6.6100   Min.   :-8.7200   Min.   :-3.5900  
##  1st Qu.:-0.52000   1st Qu.:-4.9900   1st Qu.:-2.5500   1st Qu.:-2.2900  
##  Median :-0.03000   Median :-2.0000   Median :-1.3300   Median :-0.7000  
##  Mean   : 0.05954   Mean   :-0.3529   Mean   :-0.3545   Mean   :-0.3858  
##  3rd Qu.: 0.13000   3rd Qu.: 5.1100   3rd Qu.: 2.1400   3rd Qu.: 0.8900  
##  Max.   : 3.56000   Max.   : 7.3300   Max.   : 6.6000   Max.   : 4.1800  
##    ProtFP4.1         ProtFP5.1         ProtFP6.1          ProtFP7.1       
##  Min.   :-4.5800   Min.   :-3.2200   Min.   :-3.54000   Min.   :-2.95000  
##  1st Qu.:-1.1200   1st Qu.:-1.2300   1st Qu.:-0.65000   1st Qu.:-0.45000  
##  Median : 0.6300   Median :-0.3100   Median :-0.28000   Median : 0.10000  
##  Mean   : 0.1688   Mean   :-0.3168   Mean   :-0.01734   Mean   : 0.05531  
##  3rd Qu.: 1.1100   3rd Qu.: 0.9900   3rd Qu.: 1.04000   3rd Qu.: 0.74000  
##  Max.   : 3.0000   Max.   : 3.2700   Max.   : 2.91000   Max.   : 1.99000  
##    ProtFP8.1            ST1.1             ST2.1             ST3.1        
##  Min.   :-2.79000   Min.   :-1.8440   Min.   :-1.0100   Min.   :-0.9170  
##  1st Qu.:-0.51000   1st Qu.:-1.1330   1st Qu.:-0.4070   1st Qu.:-0.6270  
##  Median : 0.30000   Median :-0.8260   Median :-0.3110   Median :-0.1840  
##  Mean   : 0.01309   Mean   :-0.8093   Mean   :-0.2293   Mean   :-0.1428  
##  3rd Qu.: 0.87000   3rd Qu.:-0.6290   3rd Qu.: 0.0240   3rd Qu.: 0.0790  
##  Max.   : 1.65000   Max.   : 0.8530   Max.   : 0.7310   Max.   : 1.1000  
##      ST4.1             ST5.1              ST6.1            ST7.1         
##  Min.   :-1.1630   Min.   :-0.93700   Min.   :-3.317   Min.   :-1.09900  
##  1st Qu.:-0.2200   1st Qu.:-0.62500   1st Qu.:-0.775   1st Qu.:-0.14700  
##  Median :-0.0650   Median :-0.38700   Median : 0.164   Median : 0.02100  
##  Mean   :-0.0595   Mean   :-0.09555   Mean   :-0.123   Mean   : 0.08658  
##  3rd Qu.:-0.0490   3rd Qu.: 0.54900   3rd Qu.: 1.011   3rd Qu.: 0.17500  
##  Max.   : 0.8590   Max.   : 1.12000   Max.   : 1.091   Max.   : 1.25600  
##      ST8.1          BLOSUM1.1         BLOSUM2.1         BLOSUM3.1      
##  Min.   :-0.894   Min.   :-1.6200   Min.   :-1.2300   Min.   :-0.9700  
##  1st Qu.:-0.311   1st Qu.:-1.1350   1st Qu.:-0.8600   1st Qu.:-0.8000  
##  Median :-0.001   Median : 0.7200   Median :-0.4500   Median :-0.5800  
##  Mean   : 0.115   Mean   : 0.1026   Mean   :-0.1869   Mean   :-0.2003  
##  3rd Qu.: 0.367   3rd Qu.: 1.1400   3rd Qu.: 0.2200   3rd Qu.: 0.3500  
##  Max.   : 2.522   Max.   : 1.5500   Max.   : 2.2800   Max.   : 1.7300  
##    BLOSUM4.1          BLOSUM5.1          BLOSUM6.1      
##  Min.   :-1.61000   Min.   :-1.24000   Min.   :-2.0200  
##  1st Qu.:-0.36000   1st Qu.:-0.55000   1st Qu.: 0.0100  
##  Median : 0.00000   Median : 0.02000   Median : 0.2000  
##  Mean   : 0.00575   Mean   :-0.09447   Mean   : 0.1072  
##  3rd Qu.: 0.38000   3rd Qu.: 0.24000   3rd Qu.: 0.3700  
##  Max.   : 1.55000   Max.   : 1.83000   Max.   : 1.1900  
##    BLOSUM7.1           BLOSUM8.1          BLOSUM9.1      
##  Min.   :-1.620000   Min.   :-1.96000   Min.   :-1.2000  
##  1st Qu.:-0.130000   1st Qu.:-0.35000   1st Qu.:-0.3500  
##  Median : 0.010000   Median : 0.13000   Median : 0.1500  
##  Mean   : 0.007506   Mean   :-0.07026   Mean   : 0.0719  
##  3rd Qu.: 0.210000   3rd Qu.: 0.20000   3rd Qu.: 0.4300  
##  Max.   : 1.210000   Max.   : 0.87000   Max.   : 1.3600  
##    BLOSUM10.1         MSWHIM1.1         MSWHIM2.1         MSWHIM3.1      
##  Min.   :-1.29000   Min.   :-1.0000   Min.   :-1.0000   Min.   :-1.0000  
##  1st Qu.:-0.28000   1st Qu.:-0.7400   1st Qu.: 0.0800   1st Qu.:-0.7500  
##  Median :-0.02000   Median :-0.5100   Median : 0.6700   Median :-0.3400  
##  Mean   :-0.06097   Mean   :-0.2829   Mean   : 0.4127   Mean   :-0.3768  
##  3rd Qu.: 0.30000   3rd Qu.: 0.1400   3rd Qu.: 0.8300   3rd Qu.:-0.1600  
##  Max.   : 0.99000   Max.   : 1.0000   Max.   : 1.0000   Max.   : 1.0000
#Thermostability value of the cytosolic protein
seq.cjy.i<-aIndex(seq.cjy)
jk<-which(seq.cjy.i==0.0)
seq.cjy.in<-seq.cjy.i[-jk]
therm.cjy<-sum(seq.cjy.in)/length(seq.cjy.in)
(therm.cjy)
## [1] 312.5155
#Protein Interaction
seq.cjy.pp<-boman(seq.cjy)
bin.pot1<-sum(seq.cjy.pp)/length(seq.cjy)
(bin.pot1)
## [1] 1.445413
#binpot<2.68 which implies the protein has lower affinity to bind with another protein
#Principle components
#A function for protein pca

protanapca<-function(seq){
  library("bio3d")
  library("Peptides")
  ch<-charge(seq, pH = 7, pKscale = "EMBOSS")
  pp<-boman(seq) 
  ampI<-hmoment(seq, angle = 100, window = 11)
  hy<-hydrophobicity(seq, scale = "KyteDoolittle")
  pI<-pI(seq,pKscale = "EMBOSS")
  mw<-mw(seq, monoisotopic = FALSE)
  protana<-cbind(ch,pp,ampI,hy,pI,mw)
  colnames(protana)<-c("CHARGE","P-P","AMPIPHILLICITY","HYDRPHOBICITY","ISOELECTRIC","MOL WEIGHT")
  p<-cbind(seq,protana)
  colnames(p)<-c("AA","CHARGE","P-P","AMPIPHILLICITY","HYDRPHOBICITY","ISOELECTRIC","MOL WEIGHT")
  pr.p<-prcomp(protana)
  return(pr.p)}
#A function for protein analysis of different properties: 

protana<-function(seq){
  library("bio3d")
  library("Peptides")
  ch<-charge(seq, pH = 7, pKscale = "EMBOSS")
  pp<-boman(seq) 
  ampI<-hmoment(seq, angle = 100, window = 11)
  hy<-hydrophobicity(seq, scale = "KyteDoolittle")
  pI<-pI(seq,pKscale = "EMBOSS")
  mw<-mw(seq, monoisotopic = FALSE)
  protana<-cbind(ch,pp,ampI,hy,pI,mw)
  colnames(protana)<-c("CHARGE","P-P","AMPIPHILLICITY","HYDRPHOBICITY","ISOELECTRIC","MOL WEIGHT")
  p1<-cbind(seq,protana)
  colnames(p1)<-c("AA","CHARGE","P-P","AMPIPHILLICITY","HYDRPHOBICITY","ISOELECTRIC","MOL WEIGHT")
  return(p1)}

#Membrane protein
mem.rpb<-read.pdb("2rpb.pdb")
seq.rpb<-pdbseq(mem.rpb)
rpb.a<-protana(seq.rpb)
pca.rpb<-protanapca(seq.rpb)
par(mfrow=c(1,2))
biplot(pca.rpb,main="2RPB")

#Cytosolic protein
cyto.cjy<-read.pdb("1cjy.pdb")
seq.cjy<-pdbseq(cyto.cjy)
cjy.a<-protana(seq.cjy)
pca.cjy<-protanapca(seq.cjy)
biplot(pca.cjy,main="1CJY")

#CONCLUSION
#The membrane and cytosolic protein have equal thermal stability
#The protein protein interaction parameter concludes that the cytosolic protein has higher binding potential for a protein of equal thermal stability
#The pca shows out of the six parameters chosen for analysis of this protein molecular weight is an outlier and has minimum relation to protein functionality whereas protein-protein interaction and hydrophobicity are inversely related 
#The other four factors such as charge,pI,Amphophilicity and polarity are related factors for a protein irrespective of its origin
#Hence the a conclusive outcome for the above pca could be that for proteins of different origins is that hydrophobicity and protein protein interaction are inversely related irrespective of its origin
#The stastical tool used are pca,biplot and self made function protanapca(which gives the pca values of any protein sequence) and protana(which gives a matrix of all amino acid vs six parameters for any given protein)

PROJECT 2:

A webserver based bio3d application to explore molecular dynamics

1Z4A

1Z4A

nma fluctuations

nma fluctuations

overlapprotein

overlapprotein

PROJECT 3

REAL DATA ANALYSIS

ali<-read.csv("Alipurduarr.csv")
bir<-read.csv("Birbhumm.csv")
bur<-read.csv("Burdwann.csv")
coo<-read.csv("Coochbeharr.csv")
d.dina<-read.csv("D Dinajpur.csv")
e.med<-read.csv("E Medinipurr.csv")
how<-read.csv("Howrahh.csv")
jal<-read.csv("Jalpaigurii.csv")
kol<-read.csv("Kolkataa.csv")
mur<-read.csv("Murshidabadd.csv")
npgs<-read.csv("N 24 Pgs.csv")



alipur<-cbind(ali$AGE,ali$HB,ali$MCV,ali$MCH,ali$HBA0,ali$HBA2)
distn<-rep("Alipurduarr",nrow(ali))
sex1<-as.character(ali$SEX)
alipura1<-cbind(alipur,distn,sex1)
summary(alipura1)
##        V1            V2            V3            V4            V5     
##  16     :143   11.3   : 43   72.3   : 13   22.9   : 26   218    : 29  
##  13     :134   11.6   : 40   70.5   : 12   21.7   : 22   228    : 27  
##  14     :120   11     : 39   71.3   : 12   21.8   : 20   225    : 26  
##  15     :115   10.7   : 35   71.8   : 11   20.2   : 19   226    : 26  
##  17     :115   10.8   : 35   72.9   : 11   20.9   : 19   217    : 25  
##  12     :111   12     : 35   (Other):993   22.7   : 19   221    : 24  
##  (Other):315   (Other):826   NA's   :  1   (Other):928   (Other):896  
##        V6              distn      sex1   
##  0      :143   Alipurduarr:1053    :  1  
##  2.8    :102                      f:  3  
##  2.6    : 98                      F:595  
##  2.7    : 97                      m:  1  
##  3      : 91                      M:453  
##  (Other):519                             
##  NA's   :  3
birbhum<-cbind(bir$AGE,bir$HB,bir$MCV,bir$MCH,bir$HBA0,bir$HBA2)
distn2<-rep("Birbhumm",nrow(bir))
sex3<-as.character(bir$SEX)
birbhuma1<-cbind(birbhum,distn2,sex3)
summary(birbhuma1)
##        V1             V2             V3             V4      
##  1      :1121   11.4   :  60   69.7   :  22   20.5   :  29  
##  5      : 207   11.5   :  52   70.7   :  17   20.8   :  28  
##  4      : 203   11.6   :  38   77.6   :  16   21     :  28  
##  6      : 137   11.8   :  36   68.5   :  15   21.2   :  28  
##  7      :  96   11.9   :  36   69.2   :  15   19.9   :  27  
##  3      :  82   (Other): 728   (Other): 865   (Other): 810  
##  (Other): 225   NA's   :1121   NA's   :1121   NA's   :1121  
##        V5             V6            distn2     sex3     
##  87.9   :  61   2.9    : 140   Birbhumm:2071     :1124  
##  87.5   :  54   3      : 128                   F : 326  
##  87.6   :  54   3.2    : 121                   M : 618  
##  87.8   :  52   3.1    : 113                   M`:   3  
##  87     :  48   2.8    : 106                            
##  (Other): 681   (Other): 342                            
##  NA's   :1121   NA's   :1121
burdwan<-cbind(bur$AGE,bur$HB,bur$MCV,bur$MCH,bur$HBA0,bur$HBA2)
distn3<-rep("BurdWann",nrow(bur))
sex4<-as.character(bur$SEX)
burdwana1<-cbind(burdwan,distn3,sex4)
summary(burdwana1)
##        V1            V2             V3             V4             V5      
##  14     :352   11     : 100   72.3   :  28   22     :  60   88     : 136  
##  13     :351   11.4   :  89   72.5   :  21   22.1   :  48   87.8   : 125  
##  15     :327   11.7   :  77   71     :  20   21.5   :  45   87.9   : 113  
##  12     :324   11.2   :  76   71.2   :  19   21.4   :  44   87.7   : 112  
##  11     :258   11.1   :  73   73.2   :  19   22.4   :  44   87.6   : 105  
##  (Other):646   11.5   :  72   76     :  19   20.7   :  40   (Other):1666  
##  NA's   :  1   (Other):1772   (Other):2133   (Other):1978   NA's   :   2  
##        V6           distn3     sex4    
##  2.6    :358   BurdWann:2259    :   1  
##  2.7    :352                   F: 521  
##  2.8    :311                   M:1737  
##  2.5    :288                           
##  2.9    :209                           
##  (Other):734                           
##  NA's   :  7
cooch<-cbind(coo$AGE,coo$HB,coo$MCV,coo$MCH,coo$HBA0,coo$HBA2)
distn4<-rep("Coochbeharr",nrow(coo))
sex5<-as.character(coo$SEX)
coocha1<-cbind(cooch,distn4,sex5)
summary(coocha1)
##   V1          V2           V3           V4           V5           V6   
##  11:3   10.8   : 2   72.4   : 3   20.2   : 3   84.5   : 3   3.4    :5  
##  12:3   11.2   : 2   69.1   : 2   22.3   : 3   84.6   : 3   3.2    :4  
##  13:9   11.5   : 2   73.1   : 2   20.9   : 2   83.4   : 2   3.3    :3  
##  14:7   11.6   : 2   61.7   : 1   23.4   : 2   84.2   : 2   3.5    :3  
##  15:3   12.6   : 2   64.7   : 1   19     : 1   25.4   : 1   3.6    :3  
##  16:2   12.9   : 2   66.8   : 1   19.7   : 1   5      : 1   3      :2  
##         (Other):15   (Other):17   (Other):15   (Other):15   (Other):7  
##          distn4   sex5  
##  Coochbeharr:27   M:27  
##                         
##                         
##                         
##                         
##                         
## 
dinaj<-cbind(d.dina$AGE,d.dina$HB,d.dina$MCV,d.dina$MCH,d.dina$HBA0,d.dina$HBA2)
distn5<-rep("D Dinajpur",nrow(d.dina))
sex6<-as.character(d.dina$SEX)
dinaja1<-cbind(dinaj,distn5,sex6)
summary(dinaja1)
##        V1            V2             V3             V4             V5      
##  13     :408   10.2   : 183   80.2   :  85   21.9   : 137   88.1   : 117  
##  14     :368   10.8   : 171   81.2   :  73   20.8   : 125   87.8   : 114  
##  12     :357   11.2   : 154   78.2   :  66   21.3   :  94   88     : 109  
##  15     :282   11.6   : 152   78.6   :  63   22.8   :  94   88.3   : 103  
##  11     :206   11.9   : 129   76.2   :  53   21.5   :  84   87.7   : 102  
##  16     :173   10.6   : 123   69.8   :  45   22.4   :  84   88.2   : 101  
##  (Other):296   (Other):1178   (Other):1705   (Other):1472   (Other):1444  
##        V6             distn5     sex6    
##  2.9    :333   D Dinajpur:2090   F: 694  
##  2.8    :330                     M:1396  
##  2.7    :279                             
##  3      :275                             
##  2.6    :177                             
##  (Other):695                             
##  NA's   :  1
e.medini<-cbind(e.med$AGE,e.med$HB,e.med$MCV,e.med$MCH,e.med$HBA0,e.med$HBA2)
distn6<-rep("E medinipurr",nrow(e.med))
sex7<-as.character(e.med$SEX)
emedini1<-cbind(e.medini,distn6,sex7)
summary(emedini1)
##        V1            V2            V3            V4            V5     
##  12     :153   12.8   : 32   65     : 13   22.1   : 24   82.7   : 37  
##  13     :135   12.7   : 29   65.5   : 12   22.6   : 22   82.2   : 35  
##  14     :116   12.6   : 28   70.6   : 11   21.8   : 19   82.4   : 30  
##  15     : 82   11.4   : 24   73.8   : 11   22.8   : 19   82.3   : 29  
##  11     : 80   12.9   : 24   63.7   : 10   22     : 18   82.5   : 29  
##  16     : 66   11.9   : 23   63.2   :  9   22.2   : 18   81.9   : 28  
##  (Other):120   (Other):592   (Other):686   (Other):632   (Other):564  
##        V6               distn6    sex7   
##  2.6    : 89   E medinipurr:752   F:123  
##  2.7    : 88                      M:629  
##  2.5    : 79                             
##  2.8    : 74                             
##  2.4    : 62                             
##  (Other):357                             
##  NA's   :  3
howrah<-cbind(how$AGE,how$HB,how$MCV,how$MCH,how$HBA0,how$HBA2)
distn8<-rep("Howarhh",nrow(how))
sex8<-as.character(how$SEX)
howrah1<-cbind(howrah,distn8,sex8)
summary(howrah1)
##   V1           V2           V3           V4           V5           V6   
##  11:10   12.4   : 4   71.2   : 2   26.4   : 3   87.2   : 4   2.6    :8  
##  12: 9   11.7   : 3   73.2   : 2   21.2   : 2   88     : 4   2.7    :7  
##  13: 4   13.4   : 3   88.1   : 2   21.3   : 2   87     : 3   2.9    :7  
##  14:12   10.6   : 2   62.9   : 1   22.1   : 2   87.4   : 3   3.1    :7  
##  15: 6   11     : 2   67.9   : 1   22.5   : 2   87.6   : 3   2.8    :4  
##  16: 2   11.2   : 2   68.9   : 1   22.6   : 2   88.2   : 3   2.5    :3  
##  17: 2   (Other):29   (Other):36   (Other):32   (Other):25   (Other):9  
##      distn8   sex8  
##  Howarhh:45   F: 8  
##               M:37  
##                     
##                     
##                     
##                     
## 
jalpai<-cbind(jal$AGE,jal$HB,jal$MCV,jal$MCH,jal$HBA0,jal$HBA2)
distn9<-rep("Jalpaiguri",nrow(jal))
sex9<-as.character(jal$SEX)
jalpai1<-cbind(jalpai,distn9,sex9)
summary(jalpai1)
##        V1            V2            V3            V4            V5     
##  8      : 99   11.4   : 35   70.5   : 10   22.5   : 18   85     : 39  
##  10     : 91   11.3   : 29   71.3   : 10   23     : 18   82     : 35  
##  6      : 61   12     : 27   71.4   : 10   22.7   : 17   83     : 32  
##  9      : 56   12.1   : 27   71.7   : 10   22.6   : 16   89     : 26  
##  5      : 54   11.8   : 26   71.1   :  8   23.1   : 16   84     : 24  
##  13     : 47   12.3   : 25   68.7   :  7   21.6   : 15   88     : 24  
##  (Other):257   (Other):496   (Other):610   (Other):565   (Other):485  
##        V6             distn9    sex9   
##  2.9    : 98   Jalpaiguri:665   F:264  
##  2.8    : 93                    M:401  
##  2.7    : 78                           
##  3      : 66                           
##  3.1    : 60                           
##  (Other):265                           
##  NA's   :  5
kolkata<-cbind(kol$AGE,kol$HB,kol$MCV,kol$MCH,kol$HBA0,kol$HBA2)
distn10<-rep("kolkataa",nrow(kol))
sex10<-as.character(kol$SEX)
kolkata1<-cbind(kolkata,distn10,sex10)
summary(kolkata1)
##        V1           V2           V3           V4           V5    
##  20     :24   13.3   : 4   85.3   : 4   22.5   : 3   88.8   :10  
##  21     :18   13.5   : 4   71.1   : 2   26.4   : 3   88.2   : 8  
##  22     :17   14.6   : 4   71.3   : 2   26.7   : 3   88.9   : 8  
##  19     :12   14.7   : 4   71.5   : 2   27.1   : 3   88.5   : 7  
##  18     :10   10.9   : 3   76     : 2   27.6   : 3   88.6   : 7  
##  23     : 7   11.8   : 3   79.1   : 2   28.6   : 3   89     : 5  
##  (Other): 8   (Other):74   (Other):82   (Other):78   (Other):51  
##        V6         distn10   sex10 
##  2.6    :17   kolkataa:96   F:21  
##  2.4    :13                 M:75  
##  2.5    :12                       
##  2.7    :12                       
##  2.3    :10                       
##  2.9    : 7                       
##  (Other):25
murshi<-cbind(mur$AGE,mur$HB,mur$MCV,mur$MCH,mur$HBA0,mur$HBA2)
distn12<-rep("Murshidabadd",nrow(mur))
sex12<-as.character(mur$SEX)
murshi1<-cbind(murshi,distn12,sex12)
summary(murshi1)
##        V1            V2            V3            V4            V5     
##  13     : 49   11     : 15   80     : 10   24     : 11   86.8   : 15  
##  14     : 44   10.2   : 14   82.9   :  7   25.3   : 11   87     : 13  
##  15     : 44   10.7   : 12   81.6   :  6   25.9   :  8   87.1   : 13  
##  12     : 40   13.9   : 12   81.7   :  6   21.1   :  7   87.3   : 13  
##  16     : 39   10     : 11   82.5   :  6   25.1   :  7   87.5   : 13  
##  11     : 36   11.1   : 10   90.4   :  5   25.4   :  7   87.8   : 12  
##  (Other):108   (Other):286   (Other):320   (Other):309   (Other):281  
##        V6              distn12    sex12  
##  2.7    : 49   Murshidabadd:360   F: 81  
##  3      : 43                      M:279  
##  2.9    : 40                             
##  2.6    : 38                             
##  2.8    : 35                             
##  3.1    : 32                             
##  (Other):123
Npgs<-cbind(npgs$AGE,npgs$HB,npgs$MCV,npgs$MCH,npgs$HBA0,npgs$HBA2)
distn13<-rep("N 24 Pgs",nrow(npgs))
sex13<-as.character(npgs$SEX)
npgs1<-cbind(Npgs,distn13,sex13)
summary(npgs1)
##        V1           V2            V3            V4            V5     
##  13     :38   11.9   : 14   67.2   :  5   21.3   : 10   87.7   : 17  
##  11     :29   11.1   : 13   69.1   :  4   21.2   :  8   87.5   : 15  
##  12     :28   11.6   : 13   69.2   :  4   21.5   :  7   87.3   : 13  
##  10     :23   11.7   : 12   69.5   :  4   21.7   :  7   87.8   : 13  
##  8      :21   11     : 10   70.5   :  4   22.2   :  7   87.6   : 12  
##  9      :21   11.8   :  9   76.7   :  4   21.4   :  6   87.4   : 11  
##  (Other):62   (Other):151   (Other):197   (Other):177   (Other):141  
##        V6         distn13    sex13  
##  2.7    :51   N 24 Pgs:222   F: 28  
##  2.8    :39                  M:194  
##  2.6    :28                         
##  2.5    :21                         
##  2.9    :20                         
##  3      :16                         
##  (Other):47
master<-rbind(alipura1,birbhuma1,burdwana1,coocha1,dinaja1,emedini1,howrah1,jalpai1,kolkata1,murshi1,npgs1)
master<-data.frame(master)
master.f<-na.omit(master)
master1<-rbind(alipur,burdwan,birbhum,cooch,dinaj,e.medini,howrah,jalpai,kolkata,murshi,Npgs)
colnames(master1)<-c("age","hb","hba2","hba0","mcv","mch")
master1.f<-na.omit(master1)
pca<-prcomp(master1.f,scale= TRUE)
biplot(pca)

g_class<- master.f$sex
library("ggbiplot")
g<-ggbiplot(pca,obs.scale = 0.3,var.scale = 0.3,groups = g_class,ellipse = TRUE,circle = TRUE,pc.biplot = TRUE)
g<-g+xlim(-5,5)
g<-g+ylim(-5,5)
g<-g+theme(legend.direction = 'horizontal',legend.position = 'top')
print(g)
## Warning: Removed 18 rows containing missing values (geom_point).