#setwd("/Source files")
p<-read.csv("p.spectra.csv")
x<-p$lam
y<-p$absorb
par(mfrow=c(1,1))
plot(x,y,col="red",xlab ="Lambda",ylab ="Absorbance",cex=0.3,main ="Wavelength Vs Absorbance")
m<-which(x>500&x<700)
length(m)
## [1] 1036
wv<-x[m]
AA<-y[m]
length(wv)
## [1] 1036
max(AA)
## [1] 0.386
max(wv)
## [1] 699.95
plot(wv,AA,col="blue",xlab ="Lambda",cex=0.5,ylab ="Absorbance",main ="Wavelength Vs Absorbance")
maxi<-max(AA)
z<-which(AA==maxi)
l<-wv[z]
wv[z]
## [1] 618.08
text(575,0.390,"wvmax=618.08~~~~~",col ="red")
abline(v=(618.08),col="red")
summary(l)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 618.1 618.1 618.1 618.1 618.1 618.1
a.nad<-read.csv("nadia.csv")
x<-a.nad$MOLECULAR
y<-which(x!="normal")
nalpha=length(y)
n<-length(x)
p<-nalpha/n
A<-a.nad$HB
B<-a.nad$HbA0
C<-a.nad$HbF
mean.HB.nad<-mean(A)
mean.HbA0.nad<-mean(B)
mean.HbF.nad<-mean(C)
mean(a.nad$HB[y])
## [1] 13.05333
mean(a.nad$HbA0[y])
## [1] 87.02
mean(a.nad$HbF[y])
## [1] 0.3533333
S<-cbind(mean.HB.nad,mean.HbA0.nad,mean.HbF.nad)
summary(S)
## mean.HB.nad mean.HbA0.nad mean.HbF.nad
## Min. :12.95 Min. :87.01 Min. :0.3702
## 1st Qu.:12.95 1st Qu.:87.01 1st Qu.:0.3702
## Median :12.95 Median :87.01 Median :0.3702
## Mean :12.95 Mean :87.01 Mean :0.3702
## 3rd Qu.:12.95 3rd Qu.:87.01 3rd Qu.:0.3702
## Max. :12.95 Max. :87.01 Max. :0.3702
a.bir<-read.csv("birbhum.csv")
m<-a.bir$MOLECULAR
s<-which(m!="normal")
nalpha=length(s)
n<-length(m)
p<-nalpha/n
D<-a.bir$HB
E<-a.bir$HbA0
H<-a.bir$HbF
mean.HB.bir<-mean(D)
mean.HbA0.bir<-mean(E)
mean.HbF.bir<-mean(H)
mean(a.bir$HB[s])
## [1] 12.82069
mean(a.bir$HbA0[s])
## [1] 86.51724
mean(a.bir$HbF[s])
## [1] 0.3172414
U<-cbind(mean.HB.bir,mean.HbA0.bir,mean.HbF.bir)
summary(U)
## mean.HB.bir mean.HbA0.bir mean.HbF.bir
## Min. :12.83 Min. :86.49 Min. :0.336
## 1st Qu.:12.83 1st Qu.:86.49 1st Qu.:0.336
## Median :12.83 Median :86.49 Median :0.336
## Mean :12.83 Mean :86.49 Mean :0.336
## 3rd Qu.:12.83 3rd Qu.:86.49 3rd Qu.:0.336
## Max. :12.83 Max. :86.49 Max. :0.336
a.ban<-read.csv("bankura.csv")
z<-a.ban$MOLECULAR
x<-which(x!="normal")
nalpha=length(x)
n<-length(z)
a<-nalpha/n
i<-a.ban$HB
j<-a.ban$HbA0
k<-a.ban$HbF
mean.HB.ban<-mean(i)
mean.HbA0.ban<-mean(j)
mean.HbF.ban<-mean(k)
mean(a.bir$HB[x])
## [1] 12.81333
mean(a.bir$HbA0[x])
## [1] 86.42667
mean(a.bir$HbF[x])
## [1] 0.3266667
w<-cbind(mean.HB.ban,mean.HbA0.ban,mean.HbF.ban)
summary(w)
## mean.HB.ban mean.HbA0.ban mean.HbF.ban
## Min. :12.47 Min. :87.72 Min. :0.3583
## 1st Qu.:12.47 1st Qu.:87.72 1st Qu.:0.3583
## Median :12.47 Median :87.72 Median :0.3583
## Mean :12.47 Mean :87.72 Mean :0.3583
## 3rd Qu.:12.47 3rd Qu.:87.72 3rd Qu.:0.3583
## Max. :12.47 Max. :87.72 Max. :0.3583
a.jal<-read.csv("jalpaiguri.csv")
j<-a.jal$MOLECULAR
k<-which(j!="normal")
nalpha=length(k)
n<-length(j)
p<-nalpha/n
a<-a.jal$HB
b<-a.jal$HbA0
c<-a.jal$HbF
mean.HB.jal<-mean(a)
mean.HbA0.jal<-mean(b)
mean.HbF.jal<-mean(c)
mean(a.jal$HB[k])
## [1] 12.55
mean(a.jal$HbA0[k])
## [1] 87.68571
mean(a.jal$HbF[k])
## [1] 0.2142857
P<-cbind(mean.HB.jal,mean.HbA0.jal,mean.HbF.jal)
summary(P)
## mean.HB.jal mean.HbA0.jal mean.HbF.jal
## Min. :12.48 Min. :87.78 Min. :0.2158
## 1st Qu.:12.48 1st Qu.:87.78 1st Qu.:0.2158
## Median :12.48 Median :87.78 Median :0.2158
## Mean :12.48 Mean :87.78 Mean :0.2158
## 3rd Qu.:12.48 3rd Qu.:87.78 3rd Qu.:0.2158
## Max. :12.48 Max. :87.78 Max. :0.2158
a.mur<-read.csv("murshidabad.csv")
s<-a.mur$MOLECULAR
t<-which(s!="normal")
nalpha=length(t)
n<-length(s)
p<-nalpha/n
r<-a.mur$HB
o<-a.mur$HbA0
l<-a.mur$HbF
mean.HB.mur<-mean(r)
mean.HbA0.mur<-mean(o)
mean.HbF.mur<-mean(l)
mean(a.mur$HB[t])
## [1] 12.37143
mean(a.mur$HbA0[t])
## [1] 87.45714
mean(a.mur$HbF[t])
## [1] 0.2642857
Q<-cbind(mean.HB.mur,mean.HbA0.mur,mean.HbF.mur)
summary(Q)
## mean.HB.mur mean.HbA0.mur mean.HbF.mur
## Min. :12.37 Min. :87.29 Min. :0.25
## 1st Qu.:12.37 1st Qu.:87.29 1st Qu.:0.25
## Median :12.37 Median :87.29 Median :0.25
## Mean :12.37 Mean :87.29 Mean :0.25
## 3rd Qu.:12.37 3rd Qu.:87.29 3rd Qu.:0.25
## Max. :12.37 Max. :87.29 Max. :0.25
HB<-cbind(mean.HB.nad,mean.HB.bir,mean.HB.ban,mean.HB.jal,mean.HB.mur)
HbA0<-cbind(mean.HbA0.nad,mean.HbA0.bir,mean.HbA0.ban,mean.HbA0.jal,mean.HbA0.mur)
HbF<-cbind(mean.HbF.nad,mean.HbF.bir,mean.HbF.ban,mean.HbF.jal,mean.HbF.mur)
colnames(HB)<-c("N","bir","ban","J","M")
colnames(HbA0)<-c("N","bir","ban","J","M")
colnames(HbF)<-c("N","bir","ban","J","M")
par(mfrow=c(1,1))
barplot(HB)
barplot(HbA0)
barplot(HbF)
nad<-read.csv("nadia.csv")
bir<-read.csv("birbhum.csv")
ban<-read.csv("bankura.csv")
jal<-read.csv("jalpaiguri.csv")
mur<-read.csv("murshidabad.csv")
boxHB<-cbind(nad$HB,bir$HB,ban$HB,jal$HB,mur$HB)
## Warning in cbind(nad$HB, bir$HB, ban$HB, jal$HB, mur$HB): number of rows of
## result is not a multiple of vector length (arg 1)
colnames(boxHB)<-c("N","bir","ban","J","M")
par(mfrow=c(1,1))
boxplot(boxHB,ylab="HB",xlab="district",main="boxplot")
boxMCV<-cbind(nad$MCV,bir$MCV,ban$MCV,jal$MCV,mur$MCV)
## Warning in cbind(nad$MCV, bir$MCV, ban$MCV, jal$MCV, mur$MCV): number of
## rows of result is not a multiple of vector length (arg 1)
boxplot(boxMCV,ylab="MCV",xlab="district",main="boxplot")
boxMCH<-cbind(nad$MCH,bir$MCH,ban$MCH,jal$MCH,mur$MCH)
## Warning in cbind(nad$MCH, bir$MCH, ban$MCH, jal$MCH, mur$MCH): number of
## rows of result is not a multiple of vector length (arg 1)
boxplot(boxMCH,ylab="MCH",xlab="district",main="boxplot")
boxHbA0<-cbind(nad$HbA0,bir$HbA0,ban$HbA0,jal$HbA0,mur$HbA0)
## Warning in cbind(nad$HbA0, bir$HbA0, ban$HbA0, jal$HbA0, mur$HbA0): number
## of rows of result is not a multiple of vector length (arg 1)
colnames(boxHbA0)<-c("N","bir","ban","J","M")
boxplot(boxHbA0,ylab="HbA0",xlab="district",main="boxplot")
boxHbA2<-cbind(nad$HbA2,bir$HbA2,ban$HbA2,jal$HbA2,mur$HbA2)
## Warning in cbind(nad$HbA2, bir$HbA2, ban$HbA2, jal$HbA2, mur$HbA2): number
## of rows of result is not a multiple of vector length (arg 1)
boxplot(boxHbA2,ylab="HbA2",xlab="district",main="boxplot")
boxHbF<-cbind(nad$HbF,bir$HbF,ban$HbF,jal$HbF,mur$HbF)
## Warning in cbind(nad$HbF, bir$HbF, ban$HbF, jal$HbF, mur$HbF): number of
## rows of result is not a multiple of vector length (arg 1)
boxplot(boxHbF,ylab="HbF",xlab="district",main="boxplot")
nad<-read.csv("nadia.csv")
bir<-read.csv("birbhum.csv")
ban<-read.csv("bankura.csv")
jal<-read.csv("jalpaiguri.csv")
mur<-read.csv("murshidabad.csv")
n<-cbind(nad$HB,nad$MCV,nad$MCH,nad$HbA0,nad$HbA2,nad$HbF,nad$MOLECULAR)
distn<-rep("nadia",nrow(nad))
colnames(n)<-c("HB","MCV","MCH","HbA0","HbA2","HbF","mol")
f.n<-cbind(n,distn)
bm<-cbind(bir$HB,bir$MCV,bir$MCH,bir$HbA0,bir$HbA2,bir$HbF,bir$MOLECULAR)
distbm<-rep("birbhum",nrow(bir))
colnames(bm)<-c("HB","MCV","MCH","HbA0","HbA2","HbF","mol")
f.bm<-cbind(bm,distbm)
bn<-cbind(ban$HB,ban$MCV,ban$MCH,ban$HbA0,ban$HbA2,ban$HbF,ban$MOLECULAR)
distbn<-rep("bankura",nrow(ban))
colnames(bn)<-c("HB","MCV","MCH","HbA0","HbA2","HbF","mol")
f.bn<-cbind(bn,distbn)
j<-cbind(jal$HB,jal$MCV,jal$MCH,jal$HbA0,jal$HbA2,jal$HbF,jal$MOLECULAR)
distj<-rep("jalpaiguri",nrow(jal))
colnames(j)<-c("HB","MCV","MCH","HbA0","HbA2","HbF","mol")
f.j<-cbind(j,distj)
m<-cbind(mur$HB,mur$MCV,mur$MCH,mur$HbA0,mur$HbA2,mur$HbF,mur$MOLECULAR)
distm<-rep("murshidabad",nrow(mur))
colnames(m)<-c("HB","MCV","MCH","HbA0","HbA2","HbF","mol")
f.m<-cbind(m,distm)
Master<-rbind(f.n,f.bm,f.bn,f.j,f.m)
master1<-data.frame(Master)
mst<-as.matrix(master1)
summary(master1)
## HB MCV MCH HbA0 HbA2
## 12.2 :25 69.3 : 5 20.6 : 7 87.5 : 12 2.7 :26
## 12.1 :17 70.8 : 5 20.9 : 7 87.3 : 9 2.8 :23
## 12.3 :16 69.8 : 4 21.1 : 7 86.5 : 8 2.9 :23
## 12.5 :15 71.3 : 4 21.8 : 6 87.4 : 7 3 :20
## 12.4 :12 71.9 : 4 22.6 : 6 87.7 : 7 2.6 :18
## 12.6 : 9 72.3 : 4 21.6 : 5 86.1 : 6 2.5 :14
## (Other):66 (Other):134 (Other):122 (Other):111 (Other):36
## HbF mol distn
## 0.2 :51 1:89 bankura :24
## 0.3 :51 2:71 birbhum :50
## 0.4 :20 jalpaiguri :19
## 0.5 : 8 murshidabad:20
## 0.7 : 8 nadia :47
## 0.6 : 7
## (Other):15
## BOX plot:
box<-cbind(master1$HB,master1$MCV,master1$MCH,master1$HbA0,master1$HbA2,master1$HbF)
colnames(box)<-c("HB","MCV","MCH","HbA0","HbA2","HbF")
boxplot(box,main="Boxplot")
csvxtract<-function(csvfile) {master1<-read.csv(csvfile,header =TRUE)
hb<-master1$HB
mcv<-master1$MCV
mch<-master1$MCH
hba0<-master1$HbA0
hba2<-master1$HbA2
hbf<-master1$HbF
diag<-master1$MOLECULAR
H<-cbind(hb,mcv,mch,hba0,hba2,hbf)
h<-list("NUM"=H,"MUT"=diag)
return(h)
}
nad<-csvxtract("nadia.csv")
summary(nad)
## Length Class Mode
## NUM 282 -none- numeric
## MUT 47 factor numeric
bir<-csvxtract("birbhum.csv")
summary(bir)
## Length Class Mode
## NUM 300 -none- numeric
## MUT 50 factor numeric
ban<-csvxtract("bankura.csv")
summary(ban)
## Length Class Mode
## NUM 144 -none- numeric
## MUT 24 factor numeric
jal<-csvxtract("jalpaiguri.csv")
summary(jal)
## Length Class Mode
## NUM 114 -none- numeric
## MUT 19 factor numeric
mur<-csvxtract("murshidabad.csv")
summary(mur)
## Length Class Mode
## NUM 120 -none- numeric
## MUT 20 factor numeric
mastcsv<-rbind(nad$NUM,bir$NUM,ban$NUM,jal$NUM,mur$NUM)
colnames(mastcsv)<-c("Hb","MCV","MCH","HbA0","HbA2","HbF")
mastgrp<-c(nad$MUT,bir$MUT,ban$MUT,jal$MUT,mur$MUT)
recode<-c(normal=2,alphamutation=1)
gclass<-factor(mastgrp,levels = recode,labels =names(recode))
summary(gclass)
## normal alphamutation
## 71 89
mast.pca<-prcomp(mastcsv,scale=TRUE)
summary(mast.pca)
## Importance of components:
## PC1 PC2 PC3 PC4 PC5 PC6
## Standard deviation 1.4068 1.1502 1.0408 0.9169 0.8177 0.32420
## Proportion of Variance 0.3299 0.2205 0.1806 0.1401 0.1114 0.01752
## Cumulative Proportion 0.3299 0.5504 0.7309 0.8710 0.9825 1.00000
biplot(mast.pca, main="Biplot")
par(mfrow=c(1,1))
screeplot(mast.pca, main ="Screeplot")
barplot(mast.pca$rotation, main ="Barplot")
## ggbiplot :
library(ggbiplot)
## Loading required package: ggplot2
## Loading required package: plyr
## Loading required package: scales
## Loading required package: grid
g<-ggbiplot(mast.pca,obs.scale =1,var.scale =1,groups = gclass,ellipse =TRUE,circle =TRUE)
g<-g +scale_color_discrete(name='')
g<-g +theme(legend.direction ='horizontal',legend.position ='top')
print(g)
library(bio3d)
## Warning: package 'bio3d' was built under R version 3.4.4
pdb<-read.pdb("1b7t.pdb")
pdb
##
## Call: read.pdb(file = "1b7t.pdb")
##
## Total Models#: 1
## Total Atoms#: 8383, XYZs#: 25149 Chains#: 3 (values: A Y Z)
##
## Protein Atoms#: 8249 (residues/Calpha atoms#: 1057)
## Nucleic acid Atoms#: 0 (residues/phosphate atoms#: 0)
##
## Non-protein/nucleic Atoms#: 134 (residues: 108)
## Non-protein/nucleic resid values: [ ADP (1), CA (1), HOH (104), MG (2) ]
##
## Protein sequence:
## FSDPDFQYLAVDAFDGKKNCWVPDEKEGFASAEIQSSKGDEITVKIVADSSTRTVKKDDI
## QSMNPPKFEKLEDMANMTYLNEASVLYNLRSRYTSGLIYTYSGLFCIAVNPYRRLPIYTD
## SVIAKYRGKRKTEIPPHLFSVADNAYQNMVTDRENQSCLITGESGAGKTENTKKVIMYLA
## KVACAEGSLEDQIIQANPVLEAYGNAKTTRNNNSSRFGKFIRIHF...<cut>...GPYP
##
## + attr: atom, xyz, seqres, helix, sheet,
## calpha, remark, call
tor<-torsion.pdb(pdb)
om<-tor$omega
phi1<-tor$phi
psi1<-tor$psi
s1<-c(phi1[1:70],psi1[1:70])
summary(s1)
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## -176.004 -88.690 -46.056 -6.572 113.657 179.408 1
a<-is.na(phi1)
p<-which(a)
q<-phi1[-p]
ij<-is.na(psi1)
ijj<-which(ij)
s<-psi1[-ijj]
ss<-c(phi1[1:9],psi1[1:9])
par(mfrow=c(1,1))
plot(phi1,psi1,pch=19,xlim = c(-180,180),ylim = c(-180,180),col="green",cex=0.5,main = "RAMACHANDRAN PLOT")
ss<-c(phi1[1:500],psi1[1:500])
plot(om,pch=19,xlim = c(0,180),ylim = c(-180,180),col="blue",cex=0.5,main = "OMEGA PLOT")
summary(pdb$atom)
## type eleno elety alt
## Length:8383 Min. : 1 Length:8383 Length:8383
## Class :character 1st Qu.:2096 Class :character Class :character
## Mode :character Median :4192 Mode :character Mode :character
## Mean :4192
## 3rd Qu.:6288
## Max. :8386
## resid chain resno insert
## Length:8383 Length:8383 Min. : 2.0 Length:8383
## Class :character Class :character 1st Qu.: 101.0 Class :character
## Mode :character Mode :character Median : 279.0 Mode :character
## Mean : 340.5
## 3rd Qu.: 557.0
## Max. :1079.0
## x y z o
## Min. :-33.793 Min. :-30.572 Min. :-45.648 Min. :1
## 1st Qu.: -2.071 1st Qu.: -4.072 1st Qu.: -8.161 1st Qu.:1
## Median : 9.854 Median : 10.482 Median : 16.458 Median :1
## Mean : 16.301 Mean : 10.594 Mean : 22.093 Mean :1
## 3rd Qu.: 28.107 3rd Qu.: 23.730 3rd Qu.: 53.596 3rd Qu.:1
## Max. : 88.836 Max. : 56.518 Max. :100.859 Max. :1
## b segid elesy charge
## Min. : 2.00 Length:8383 Length:8383 Length:8383
## 1st Qu.: 41.01 Class :character Class :character Class :character
## Median : 54.78 Mode :character Mode :character Mode :character
## Mean : 55.84
## 3rd Qu.: 69.98
## Max. :100.00
x<-pdb$atom[, c("x","y","z","elety")]
xx<-which(x$elety=="CA")
xy<-pdb$atom[, c("x","y","z")]
r<-xy[xx,]
pdb1<-read.pdb("1b7t.pdb")
B<-which(pdb$atom$elety=="CA")
Bxyz<-pdb1$atom[B,c("x","y","z")]
plot(Bxyz,cex=0.3)
library(scatterplot3d)
## Warning: package 'scatterplot3d' was built under R version 3.4.4
scatterplot3d(r,highlight.3d = TRUE,col.axis = "blue",col.grid = "green",main = "scatterplot3d-1",pch = 20)
## DATA QUIZ ### PROBLEM- Reading .csv files through function:
csvxtract<-function(csvfile) {master1<-read.csv(csvfile,header = TRUE)
hb<-master1$HB
mcv<-master1$MCV
mch<-master1$MCH
hba0<-master1$HbA0
hba2<-master1$HbA2
hbf<-master1$HbF
diag<-master1$MOLECULAR
H<-cbind(hb,mcv,mch,hba0,hba2,hbf)
h<-list("NUM"=H,"MUT"=diag)
return(h)
}
nad<-csvxtract("nadia.csv")
summary(nad)
## Length Class Mode
## NUM 282 -none- numeric
## MUT 47 factor numeric
bir<-csvxtract("birbhum.csv")
summary(bir)
## Length Class Mode
## NUM 300 -none- numeric
## MUT 50 factor numeric
ban<-csvxtract("bankura.csv")
summary(ban)
## Length Class Mode
## NUM 144 -none- numeric
## MUT 24 factor numeric
jal<-csvxtract("jalpaiguri.csv")
summary(jal)
## Length Class Mode
## NUM 114 -none- numeric
## MUT 19 factor numeric
mur<-csvxtract("murshidabad.csv")
summary(mur)
## Length Class Mode
## NUM 120 -none- numeric
## MUT 20 factor numeric
mastcsv<-rbind(nad$NUM,bir$NUM,ban$NUM,jal$NUM,mur$NUM)
colnames(mastcsv)<-c("Hb","MCV","MCH","HbA0","HbA2","HbF")
mastgrp<-c(nad$MUT,bir$MUT,ban$MUT,jal$MUT,mur$MUT)
recode<-c(normal=2,alphamutation=1)
gclass<-factor(mastgrp,levels = recode,labels = names(recode))
summary(gclass)
## normal alphamutation
## 71 89
temp =list.files(pattern = "*.csv")
a<-list()
for (i in 1:5) {
a[[i]]<-read.csv(temp[i])
}
bind.data<-list()
for (i in 1:5) {
bind.data<-rbind(bind.data,a[i])
}
(bind.data)
## [,1]
## [1,] List,11
## [2,] List,9
## [3,] List,7
## [4,] List,18
## [5,] List,7
mol.ban<-bind.data[[1]][["MOLECULAR"]]
ban.nor<-which(bind.data[[1]][["MOLECULAR"]]=="normal")
p<-(mol.ban)[ban.nor]
HB<-bind.data[[1]][["HB"]]
ban.hb<-(HB)[ban.nor]
MCV<-bind.data[[1]][["MCV"]]
ban.mcv<-(MCV)[ban.nor]
MCH<-bind.data[[1]][["MCH"]]
ban.mch<-(MCH)[ban.nor]
HBA0<-bind.data[[1]][["HbA0"]]
ban.hba0<-(HBA0)[ban.nor]
HBA2<-bind.data[[1]][["HbA2"]]
ban.hba2<-(HBA2)[ban.nor]
HBF<-bind.data[[1]][["MCH"]]
ban.hbf<-(HBF)[ban.nor]
bankura.normal<-cbind(ban.hb,ban.mcv,ban.mch,ban.hba0,ban.hba2,ban.hbf)
summary(bankura.normal)
## ban.hb ban.mcv ban.mch ban.hba0 ban.hba2
## Min. : NA Min. : NA Min. : NA Min. : NA Min. : NA
## 1st Qu.: NA 1st Qu.: NA 1st Qu.: NA 1st Qu.: NA 1st Qu.: NA
## Median : NA Median : NA Median : NA Median : NA Median : NA
## Mean :NaN Mean :NaN Mean :NaN Mean :NaN Mean :NaN
## 3rd Qu.: NA 3rd Qu.: NA 3rd Qu.: NA 3rd Qu.: NA 3rd Qu.: NA
## Max. : NA Max. : NA Max. : NA Max. : NA Max. : NA
## ban.hbf
## Min. : NA
## 1st Qu.: NA
## Median : NA
## Mean :NaN
## 3rd Qu.: NA
## Max. : NA
mol.ban<-bind.data[[1]][["MOLECULAR"]]
ban.alpha<-which(bind.data[[1]][["MOLECULAR"]]=="alpha")
q<-(mol.ban)[ban.alpha]
HB<-bind.data[[1]][["HB"]]
ban.hb.a<-(HB)[ban.alpha]
MCV<-bind.data[[1]][["MCV"]]
ban.mcv.a<-(MCV)[ban.alpha]
MCH<-bind.data[[1]][["MCH"]]
ban.mch.a<-(MCH)[ban.alpha]
HBA0<-bind.data[[1]][["HbA0"]]
ban.hba0.a<-(HBA0)[ban.alpha]
HBA2<-bind.data[[1]][["HbA2"]]
ban.hba2.a<-(HBA2)[ban.alpha]
HBF<-bind.data[[1]][["MCH"]]
ban.hbf.a<-(HBF)[ban.alpha]
ban.hbf.a
## numeric(0)
bankura.alpha<-cbind(ban.hb.a,ban.mcv.a,ban.mch.a,ban.hba0.a,ban.hba2.a,ban.hbf.a)
bankura.alpha
## ban.hb.a ban.mcv.a ban.mch.a ban.hba0.a ban.hba2.a ban.hbf.a
summary(bankura.alpha)
## ban.hb.a ban.mcv.a ban.mch.a ban.hba0.a ban.hba2.a
## Min. : NA Min. : NA Min. : NA Min. : NA Min. : NA
## 1st Qu.: NA 1st Qu.: NA 1st Qu.: NA 1st Qu.: NA 1st Qu.: NA
## Median : NA Median : NA Median : NA Median : NA Median : NA
## Mean :NaN Mean :NaN Mean :NaN Mean :NaN Mean :NaN
## 3rd Qu.: NA 3rd Qu.: NA 3rd Qu.: NA 3rd Qu.: NA 3rd Qu.: NA
## Max. : NA Max. : NA Max. : NA Max. : NA Max. : NA
## ban.hbf.a
## Min. : NA
## 1st Qu.: NA
## Median : NA
## Mean :NaN
## 3rd Qu.: NA
## Max. : NA
mol.bir<-bind.data[[2]][["MOLECULAR"]]
bir.nor<-which(bind.data[[2]][["MOLECULAR"]]=="normal")
r<-(mol.bir)[bir.nor]
HB<-bind.data[[2]][["HB"]]
bir.hb<-(HB)[bir.nor]
MCV<-bind.data[[2]][["MCV"]]
bir.mcv<-(MCV)[bir.nor]
MCH<-bind.data[[2]][["MCH"]]
bir.mch<-(MCH)[bir.nor]
HBA0<-bind.data[[2]][["HbA0"]]
bir.hba0<-(HBA0)[bir.nor]
HBA2<-bind.data[[2]][["HbA2"]]
bir.hba2<-(HBA2)[bir.nor]
HBF<-bind.data[[2]][["MCH"]]
bir.hbf<-(HBF)[bir.nor]
birbhum.normal<-cbind(bir.hb,bir.mcv,bir.mch,bir.hba0,bir.hba2,bir.hbf)
summary(birbhum.normal)
## Length Class Mode
## 0 NULL NULL
mol.bir<-bind.data[[2]][["MOLECULAR"]]
bir.alpha<-which(bind.data[[2]][["MOLECULAR"]]=="alpha")
s<-(mol.bir)[bir.alpha]
HB<-bind.data[[2]][["HB"]]
bir.hb.a<-(HB)[bir.alpha]
MCV<-bind.data[[2]][["MCV"]]
bir.mcv.a<-(MCV)[bir.alpha]
MCH<-bind.data[[2]][["MCH"]]
bir.mch.a<-(MCH)[bir.alpha]
HBA0<-bind.data[[2]][["HbA0"]]
bir.hba0.a<-(HBA0)[bir.alpha]
HBA2<-bind.data[[2]][["HbA2"]]
bir.hba2.a<-(HBA2)[bir.alpha]
HBF<-bind.data[[2]][["MCH"]]
bir.hbf.a<-(HBF)[bir.alpha]
birbhum.alpha<-cbind(bir.hb.a,bir.mcv.a,bir.mch.a,bir.hba0.a,bir.hba2.a,bir.hbf.a)
summary(birbhum.alpha)
## Length Class Mode
## 0 NULL NULL
mol.jal<-bind.data[[3]][["MOLECULAR"]]
jal.nor<-which(bind.data[[3]][["MOLECULAR"]]=="normal")
t<-(mol.jal)[jal.nor]
HB<-bind.data[[3]][["HB"]]
jal.hb<-(HB)[jal.nor]
MCV<-bind.data[[3]][["MCV"]]
jal.mcv<-(MCV)[jal.nor]
MCH<-bind.data[[3]][["MCH"]]
jal.mch<-(MCH)[jal.nor]
HBA0<-bind.data[[3]][["HbA0"]]
jal.hba0<-(HBA0)[jal.nor]
HBA2<-bind.data[[3]][["HbA2"]]
jal.hba2<-(HBA2)[jal.nor]
HBF<-bind.data[[3]][["MCH"]]
jal.hbf<-(HBF)[jal.nor]
jalpaiguri.normal<-cbind(jal.hb,jal.mcv,jal.mch,jal.hba0,jal.hba2,jal.hbf)
summary(jalpaiguri.normal)
## jal.hb jal.mcv jal.mch jal.hba0
## Min. :12.10 Min. :68.70 Min. :20.90 Min. :87.10
## 1st Qu.:12.15 1st Qu.:69.40 1st Qu.:21.00 1st Qu.:87.35
## Median :12.30 Median :71.90 Median :21.60 Median :88.20
## Mean :12.33 Mean :73.17 Mean :22.67 Mean :87.94
## 3rd Qu.:12.40 3rd Qu.:76.45 3rd Qu.:24.50 3rd Qu.:88.50
## Max. :12.80 Max. :79.90 Max. :25.20 Max. :88.60
## jal.hba2 jal.hbf
## Min. :2.500 Min. :20.90
## 1st Qu.:2.700 1st Qu.:21.00
## Median :2.800 Median :21.60
## Mean :2.757 Mean :22.67
## 3rd Qu.:2.850 3rd Qu.:24.50
## Max. :2.900 Max. :25.20
mol.jal<-bind.data[[3]][["MOLECULAR"]]
jal.alpha<-which(bind.data[[3]][["MOLECULAR"]]=="alpha")
u<-(mol.jal)[jal.alpha]
HB<-bind.data[[3]][["HB"]]
jal.hb.a<-(HB)[jal.alpha]
MCV<-bind.data[[3]][["MCV"]]
jal.mcv.a<-(MCV)[jal.alpha]
MCH<-bind.data[[3]][["MCH"]]
jal.mch.a<-(MCH)[jal.alpha]
HBA0<-bind.data[[3]][["HbA0"]]
jal.hba0.a<-(HBA0)[jal.alpha]
HBA2<-bind.data[[3]][["HbA2"]]
jal.hba2.a<-(HBA2)[jal.nor]
HBF<-bind.data[[3]][["MCH"]]
jal.hbf.a<-(HBF)[jal.alpha]
jalpaiguri.alpha<-cbind(jal.hb.a,jal.mcv.a,jal.mch.a,jal.hba0.a,jal.hba2.a,jal.hbf.a)
## Warning in cbind(jal.hb.a, jal.mcv.a, jal.mch.a, jal.hba0.a, jal.hba2.a, :
## number of rows of result is not a multiple of vector length (arg 5)
summary(jalpaiguri.alpha)
## jal.hb.a jal.mcv.a jal.mch.a jal.hba0.a
## Min. :12.10 Min. :68.40 Min. :20.20 Min. :86.50
## 1st Qu.:12.30 1st Qu.:70.50 1st Qu.:21.20 1st Qu.:87.40
## Median :12.50 Median :72.50 Median :22.00 Median :87.70
## Mean :12.52 Mean :72.61 Mean :21.99 Mean :87.63
## 3rd Qu.:12.70 3rd Qu.:73.90 3rd Qu.:22.60 3rd Qu.:87.90
## Max. :13.40 Max. :79.70 Max. :25.10 Max. :88.30
## jal.hba2.a jal.hbf.a
## Min. :2.500 Min. :20.20
## 1st Qu.:2.700 1st Qu.:21.20
## Median :2.800 Median :22.00
## Mean :2.759 Mean :21.99
## 3rd Qu.:2.800 3rd Qu.:22.60
## Max. :2.900 Max. :25.10
mol.mur<-bind.data[[3]][["MOLECULAR"]]
mur.nor<-which(bind.data[[3]][["MOLECULAR"]]=="normal")
v<-(mol.mur)[mur.nor]
HB<-bind.data[[3]][["HB"]]
mur.hb<-(HB)[mur.nor]
MCV<-bind.data[[3]][["MCV"]]
mur.mcv<-(MCV)[mur.nor]
MCH<-bind.data[[3]][["MCH"]]
mur.mch<-(MCH)[mur.nor]
HBA0<-bind.data[[3]][["HbA0"]]
mur.hba0<-(HBA0)[mur.nor]
HBA2<-bind.data[[3]][["HbA2"]]
mur.hba2<-(HBA2)[mur.nor]
HBF<-bind.data[[3]][["MCH"]]
mur.hbf<-(HBF)[mur.nor]
murshidabad.normal<-cbind(mur.hb,mur.mcv,mur.mch,mur.hba0,mur.hba2,mur.hbf)
summary(murshidabad.normal)
## mur.hb mur.mcv mur.mch mur.hba0
## Min. :12.10 Min. :68.70 Min. :20.90 Min. :87.10
## 1st Qu.:12.15 1st Qu.:69.40 1st Qu.:21.00 1st Qu.:87.35
## Median :12.30 Median :71.90 Median :21.60 Median :88.20
## Mean :12.33 Mean :73.17 Mean :22.67 Mean :87.94
## 3rd Qu.:12.40 3rd Qu.:76.45 3rd Qu.:24.50 3rd Qu.:88.50
## Max. :12.80 Max. :79.90 Max. :25.20 Max. :88.60
## mur.hba2 mur.hbf
## Min. :2.500 Min. :20.90
## 1st Qu.:2.700 1st Qu.:21.00
## Median :2.800 Median :21.60
## Mean :2.757 Mean :22.67
## 3rd Qu.:2.850 3rd Qu.:24.50
## Max. :2.900 Max. :25.20
mol.mur<-bind.data[[3]][["MOLECULAR"]]
mur.alpha<-which(bind.data[[3]][["MOLECULAR"]]=="alpha")
w<-(mol.mur)[mur.alpha]
HB<-bind.data[[3]][["HB"]]
mur.hb.a<-(HB)[mur.alpha]
MCV<-bind.data[[3]][["MCV"]]
mur.mcv.a<-(MCV)[mur.alpha]
MCH<-bind.data[[3]][["MCH"]]
mur.mch.a<-(MCH)[mur.alpha]
HBA0<-bind.data[[3]][["HbA0"]]
mur.hba0.a<-(HBA0)[mur.alpha]
HBA2<-bind.data[[3]][["HbA2"]]
mur.hba2.a<-(HBA2)[mur.alpha]
HBF<-bind.data[[3]][["MCH"]]
mur.hbf.a<-(HBF)[mur.alpha]
murshidabad.alpha<-cbind(mur.hb.a,mur.mcv.a,mur.mch.a,mur.hba0.a,mur.hba2.a,mur.hbf.a)
summary(murshidabad.alpha)
## mur.hb.a mur.mcv.a mur.mch.a mur.hba0.a
## Min. :12.10 Min. :68.40 Min. :20.20 Min. :86.50
## 1st Qu.:12.30 1st Qu.:70.50 1st Qu.:21.20 1st Qu.:87.40
## Median :12.50 Median :72.50 Median :22.00 Median :87.70
## Mean :12.52 Mean :72.61 Mean :21.99 Mean :87.63
## 3rd Qu.:12.70 3rd Qu.:73.90 3rd Qu.:22.60 3rd Qu.:87.90
## Max. :13.40 Max. :79.70 Max. :25.10 Max. :88.30
## mur.hba2.a mur.hbf.a
## Min. :2.400 Min. :20.20
## 1st Qu.:2.800 1st Qu.:21.20
## Median :2.800 Median :22.00
## Mean :2.876 Mean :21.99
## 3rd Qu.:3.000 3rd Qu.:22.60
## Max. :3.200 Max. :25.10
mol.nad<-bind.data[[3]][["MOLECULAR"]]
nad.nor<-which(bind.data[[3]][["MOLECULAR"]]=="normal")
x<-(mol.nad)[nad.nor]
HB<-bind.data[[3]][["HB"]]
nad.hb<-(HB)[nad.nor]
MCV<-bind.data[[3]][["MCV"]]
nad.mcv<-(MCV)[nad.nor]
MCH<-bind.data[[3]][["MCH"]]
nad.mch<-(MCH)[nad.nor]
HBA0<-bind.data[[3]][["HbA0"]]
nad.hba0<-(HBA0)[nad.nor]
HBA2<-bind.data[[3]][["HbA2"]]
nad.hba2<-(HBA2)[nad.nor]
HBF<-bind.data[[3]][["MCH"]]
nad.hbf<-(HBF)[nad.nor]
nadia.normal<-cbind(nad.hb,nad.mcv,nad.mch,nad.hba0,nad.hba2,nad.hbf)
summary(nadia.normal)
## nad.hb nad.mcv nad.mch nad.hba0
## Min. :12.10 Min. :68.70 Min. :20.90 Min. :87.10
## 1st Qu.:12.15 1st Qu.:69.40 1st Qu.:21.00 1st Qu.:87.35
## Median :12.30 Median :71.90 Median :21.60 Median :88.20
## Mean :12.33 Mean :73.17 Mean :22.67 Mean :87.94
## 3rd Qu.:12.40 3rd Qu.:76.45 3rd Qu.:24.50 3rd Qu.:88.50
## Max. :12.80 Max. :79.90 Max. :25.20 Max. :88.60
## nad.hba2 nad.hbf
## Min. :2.500 Min. :20.90
## 1st Qu.:2.700 1st Qu.:21.00
## Median :2.800 Median :21.60
## Mean :2.757 Mean :22.67
## 3rd Qu.:2.850 3rd Qu.:24.50
## Max. :2.900 Max. :25.20
mol.nad<-bind.data[[3]][["MOLECULAR"]]
nad.alpha<-which(bind.data[[3]][["MOLECULAR"]]=="alpha")
y<-(mol.nad)[nad.alpha]
HB<-bind.data[[3]][["HB"]]
nad.hb.a<-(HB)[nad.alpha]
nad.hb.a
## [1] 12.7 12.4 12.1 12.5 12.2 12.2 13.4 12.6 12.3 12.7 12.5 12.3 12.5 13.0
## [15] 12.8 12.2 12.5
MCV<-bind.data[[3]][["MCV"]]
nad.mcv.a<-(MCV)[nad.alpha]
MCH<-bind.data[[3]][["MCH"]]
nad.mch.a<-(MCH)[nad.alpha]
HBA0<-bind.data[[3]][["HbA0"]]
nad.hba0.a<-(HBA0)[nad.alpha]
HBA2<-bind.data[[3]][["HbA2"]]
nad.hba2.a<-(HBA2)[nad.alpha]
HBF<-bind.data[[3]][["MCH"]]
nad.hbf.a<-(HBF)[nad.alpha]
nadia.alpha<-cbind(nad.hb.a,nad.mcv.a,nad.mch.a,nad.hba0.a,nad.hba2.a,nad.hbf.a)
summary(nadia.alpha)
## nad.hb.a nad.mcv.a nad.mch.a nad.hba0.a
## Min. :12.10 Min. :68.40 Min. :20.20 Min. :86.50
## 1st Qu.:12.30 1st Qu.:70.50 1st Qu.:21.20 1st Qu.:87.40
## Median :12.50 Median :72.50 Median :22.00 Median :87.70
## Mean :12.52 Mean :72.61 Mean :21.99 Mean :87.63
## 3rd Qu.:12.70 3rd Qu.:73.90 3rd Qu.:22.60 3rd Qu.:87.90
## Max. :13.40 Max. :79.70 Max. :25.10 Max. :88.30
## nad.hba2.a nad.hbf.a
## Min. :2.400 Min. :20.20
## 1st Qu.:2.800 1st Qu.:21.20
## Median :2.800 Median :22.00
## Mean :2.876 Mean :21.99
## 3rd Qu.:3.000 3rd Qu.:22.60
## Max. :3.200 Max. :25.10
mch.p<-cbind(ban.mch,bir.mch,jal.mch,mur.mch,nad.mch)
mcv.p<-cbind(ban.mcv,bir.mcv,jal.mcv,mur.mcv,nad.mcv)
mch.pa<-cbind(ban.mch.a,bir.mch.a,jal.mch.a,mur.mch.a,nad.mch.a)
mcv.pa<-cbind(ban.mcv.a,bir.mcv.a,jal.mcv.a,mur.mcv.a,nad.mcv.a)
par(mfrow=c(2,2))
v<-plot(mch.p,mcv.p,cex=0.8,pch="n",main = "MCV Vs MCH",col="red")
points(mch.pa,mcv.pa, pch= "a", col= "blue")
hbf.p<-cbind(ban.hbf,bir.hbf,jal.hbf,mur.hbf,nad.hbf)
hbf.p
## jal.hbf mur.hbf nad.hbf
## [1,] 21.6 21.6 21.6
## [2,] 24.7 24.7 24.7
## [3,] 25.2 25.2 25.2
## [4,] 20.9 20.9 20.9
## [5,] 20.9 20.9 20.9
## [6,] 21.1 21.1 21.1
## [7,] 24.3 24.3 24.3
hba0.p<-cbind(ban.hba0,bir.hba0,jal.hba0,mur.hba0,nad.hba0)
hbf.pa<-cbind(ban.hbf.a,bir.hbf.a,jal.hbf.a,mur.hbf.a,nad.hbf.a)
hba0.pa<-cbind(ban.hba0.a,bir.hba0.a,jal.hba0.a,mur.hba0.a,nad.hba0.a)
plot(hba0.p,hbf.p,cex=0.8,pch="n",main = "HbF Vs HbA0",col="red")
points(hba0.pa,hbf.pa, pch= "a", col= "blue")
hbf.p<-cbind(ban.hbf,bir.hbf,jal.hbf,mur.hbf,nad.hbf)
mcv.p<-cbind(ban.mcv,bir.mcv,jal.mcv,mur.mcv,nad.mcv)
hbf.pa<-cbind(ban.hbf.a,bir.hbf.a,jal.hbf.a,mur.hbf.a,nad.hbf.a)
mcv.pa<-cbind(ban.mcv.a,bir.mcv.a,jal.mcv.a,mur.mcv.a,nad.mcv.a)
plot(hbf.p,mcv.p,cex=0.8,pch="n",main = "MCV Vs HbF",col="red")
points(hbf.pa,mcv.pa, pch= "a", col= "blue")
hba0.p<-cbind(ban.hba0,bir.hba0,jal.hba0,mur.hba0,nad.hba0)
hba2.p<-cbind(ban.hba2,bir.hba2,jal.hba2,mur.hba2,nad.hba2)
hba0.pa<-cbind(ban.hba0.a,bir.hba0.a,jal.hba0.a,mur.hba0.a,nad.hba0.a)
hba2.pa<-cbind(ban.hba2.a,bir.hba2.a,jal.hba2.a,mur.hba2.a,nad.hba2.a)
## Warning in cbind(ban.hba2.a, bir.hba2.a, jal.hba2.a, mur.hba2.a,
## nad.hba2.a): number of rows of result is not a multiple of vector length
## (arg 3)
plot(hba0.p,hba2.p,cex=0.8,pch="n",main = "HbA2 Vs HbA0",col="red")
points(hba0.pa,hba2.pa,pch= "a", col= "blue")
p<-c(2,6,3,10,26,35,8,56,31,74,46,12,17,39,22,89,42,5,13,70)
l<-length(p)
m<-list()
s<-3
for (i in 1:l) {
m[i]<-(p[i]+p[i+1]+p[i+2])/s
}
t<-is.na(m)
t1<-which(t)
f<-m[-t1]
f
## [[1]]
## [1] 3.666667
##
## [[2]]
## [1] 6.333333
##
## [[3]]
## [1] 13
##
## [[4]]
## [1] 23.66667
##
## [[5]]
## [1] 23
##
## [[6]]
## [1] 33
##
## [[7]]
## [1] 31.66667
##
## [[8]]
## [1] 53.66667
##
## [[9]]
## [1] 50.33333
##
## [[10]]
## [1] 44
##
## [[11]]
## [1] 25
##
## [[12]]
## [1] 22.66667
##
## [[13]]
## [1] 26
##
## [[14]]
## [1] 50
##
## [[15]]
## [1] 51
##
## [[16]]
## [1] 45.33333
##
## [[17]]
## [1] 20
##
## [[18]]
## [1] 29.33333
s<-read.csv("Problem3.csv")
A<-s$X
B<-s$Y
a<-is.na(A)
b<-is.na(B)
a1<-which(a)
b1<-which(b)
P<-A[-a1]
P_M<-mean(P)
P_M
## [1] 72.88636
Q<-B[-b1]
Q_M<-mean(Q)
Q_M
## [1] 22.20909
plot(P,Q,col="blue")
p<-read.csv("problem4.csv")
x<-p$wavelen
y<-p$Tmean
par(mfrow=c(1,1))
plot(x,y,col="red",xlab = "Lambda",ylab = "Absorbance",cex=0.6,main = "Wavelength Vs Absorbance")
m<-which(x>365 & x<395)
length(m)
## [1] 29
wv1<-x[m]
AA1<-y[m]
length(wv1)
## [1] 29
max(AA1)
## [1] 2520060
max(wv1)
## [1] 394
plot(wv1,AA1,col="blue",xlab = "Lambda",ylab = "Absorbance",cex=0.6,main = "Wavelength Vs Absorbance")
n<-which(x>400&x<455)
length(n)
## [1] 54
wv2<-x[n]
AA2<-y[n]
length(wv2)
## [1] 54
max(AA2)
## [1] 865840.4
max(wv2)
## [1] 454
plot(wv2,AA2,col="purple",xlab = "Lambda",ylab = "Absorbance",cex=0.6,main = "Wavelength Vs Absorbance")
p<-which(x>495&x<565)
length(p)
## [1] 69
wv3<-x[p]
AA3<-y[p]
length(wv3)
## [1] 69
max(AA3)
## [1] 559970.1
max(wv3)
## [1] 564
plot(wv3,AA3,col="green",xlab = "Lambda",ylab = "Absorbance",cex=0.6,main = "Wavelength Vs Absorbance")
m<-"YWVSPTMFKLGHICQERDNA"
m
## [1] "YWVSPTMFKLGHICQERDNA"
## [1] "YWVSPTMFKLGHICQERDNA"
X<-unlist(strsplit(m,""))
for (i in 1:20) {cat(sample(X))
cat("\n")
}
## R Q I F C L H W P Y M G V A N S E T K D
## V P H L S G Y R E D M F W Q K I T N C A
## S R L N H M I C G Y W Q E K F A V P D T
## R L Q M F S G N P Y W A D K H V T C E I
## E M F P N R D T I G L V A Y S Q W C K H
## H M L A C D K I W R N V E P F Q G T Y S
## G L D N I A Q S K F R E Y T M P C V W H
## E R T L N D P C Q G V F H I M W S A Y K
## F R N W K E Q V G A H C L Y P M T S D I
## A Q I P E V C Y W S K R F G N M T L H D
## D C I M E S N W Q A G K R T V F L P H Y
## D Q G M Y I S A N T W F L R K E V P C H
## A Q Y F N E L I K T S H W M D P G C R V
## Q R L Y I S T E C K D G H W M N F A V P
## T E W S M I Q F K V L G D Y P N C H A R
## W P V I K F A L C R Y M Q H E T D S G N
## N D V L K Q T F R Y H E S P C A G M I W
## T V W Q D E N F P M K R C I A Y G S H L
## A P K C H M Y D S T W V R F E L N G Q I
## H C L W T E P G I M V N F R S K Y D Q A
sample(X)
## [1] "P" "G" "H" "C" "I" "D" "T" "Q" "R" "Y" "N" "V" "E" "W" "L" "M" "K"
## [18] "A" "F" "S"
library(bio3d)
pdb1<-read.pdb("4h6z")
## Note: Accessing on-line PDB file
tor1<-torsion.pdb(pdb1)
phi1<-tor1$phi
psi1<-tor1$psi
s1<-c(phi1[1:70],psi1[1:70])
summary(s1)
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## -176.7810 -107.9999 -37.9770 0.6643 126.9249 173.8367 1
a1<-is.na(phi1)
p1<-which(a1)
q1<-phi1[-p1]
ij1<-is.na(psi1)
ijj1<-which(ij1)
s1<-psi1[-ijj1]
ss1<-c(phi1[1:9],psi1[1:9])
par(mfrow=c(1,1))
pdb2<-read.pdb("2VAS")
## Note: Accessing on-line PDB file
tor2<-torsion.pdb(pdb2)
phi2<-tor2$phi
psi2<-tor2$psi
s2<-c(phi2[1:70],psi2[1:70])
summary(s2)
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## -174.099 -97.015 -50.892 -8.585 124.697 175.501 1
a2<-is.na(phi2)
p2<-which(a2)
q2<-phi2[-p2]
ij2<-is.na(psi2)
ijj2<-which(ij2)
s2<-psi2[-ijj2]
ss2<-c(phi2[1:9],psi2[1:9])
pdb3<-read.pdb("1WDC")
## Note: Accessing on-line PDB file
tor3<-torsion.pdb(pdb3)
phi3<-tor3$phi
psi3<-tor3$psi
s3<-c(phi3[1:70],psi3[1:70])
summary(s3)
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## -128.20 -65.11 -52.59 -47.23 -39.48 163.37 1
a3<-is.na(phi3)
p3<-which(a3)
q3<-phi3[-p3]
ij3<-is.na(psi3)
ijj3<-which(ij3)
s3<-psi3[-ijj3]
ss3<-c(phi3[1:9],psi3[1:9])
pdb4<-read.pdb("1WDC")
## Note: Accessing on-line PDB file
## Warning in get.pdb(file, path = tempdir(), verbose = FALSE): /var/folders/
## t7/t6964h0d35938mcmcfhp1ngw0000gn/T//RtmpVLFqRL/1WDC.pdb exists. Skipping
## download
tor4<-torsion.pdb(pdb4)
phi4<-tor4$phi
psi4<-tor4$psi
s4<-c(phi4[1:70],psi4[1:70])
summary(s4)
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## -128.20 -65.11 -52.59 -47.23 -39.48 163.37 1
a4<-is.na(phi4)
p4<-which(a4)
q4<-phi4[-p4]
ij4<-is.na(psi4)
ijj4<-which(ij4)
s4<-psi3[-ijj4]
ss4<-c(phi4[1:9],psi4[1:9])
plot(phi1,psi1,pch=19,xlim = c(-180,180),ylim = c(-180,180),col="red",cex=0.6)
abline(h=c(0,0),v=c(0,0))
points(phi2,psi2,pch=20,xlim = c(-180,180),ylim = c(-180,180),col="blue",cex=0.5)
points(phi3,psi3,pch=18,xlim = c(-180,180),ylim = c(-180,180),col="green",cex=0.3)
points(phi4,psi4,pch=20,xlim = c(-180,180),ylim = c(-180,180),col="yellow",cex=0.4)
z<-read.table("aspirin.txt",header = TRUE)
View(z)
con<-z$condition
d<-which(con=="placebo")
pla<-con[d]
lp<-length(pla)
p<-1/lp
p
## [1] 0.0625
g<-z$change
length(which(g>mean(g)))
## [1] 17
which(g>mean(g) & z$condition=="placebo")
## [1] 2 4 9 10 14 16 17 18 20 22 23 24 25 26 30
l<-which(g>mean(g) & z$condition=="placebo")
length(l)
## [1] 15
z$change<-((z$change)**2)**0.5
z$change
## [1] 3.2 2.1 6.1 2.0 5.3 3.9 3.1 6.5 2.5 0.9 3.4 5.6 7.1 0.0 4.8 1.9 2.2
## [18] 1.7 5.5 1.1 4.5 1.5 1.9 2.4 2.1 1.1 2.2 3.3 4.5 2.0
m<-mean(z$change)
m
## [1] 3.146667
ik<-which(z$change>m & z$condition=="placebo")
length(ik)
## [1] 1
jk<-which(z$change<m & z$condition=="placebo")
length(jk)
## [1] 15
ik1<-which(z$change>m & z$condition=="aspirin")
length(ik1)
## [1] 12
jk1<-which(z$change<m & z$condition=="aspirin")
length(jk1)
## [1] 2
placebo<-cbind(length(ik),length(jk))
placebo
## [,1] [,2]
## [1,] 1 15
colnames(placebo)<-c("+","-")
placebo
## + -
## [1,] 1 15
aspirin<-cbind(length(ik1),length(jk1))
data.file<-rbind(placebo,aspirin)
data.file
## + -
## [1,] 1 15
## [2,] 12 2
rownames(data.file)<-c("placebo","aspirin")
data.file
## + -
## placebo 1 15
## aspirin 12 2
pa.pos<-((data.file[2,2]+data.file[2,1])/30)*((data.file[1,1]+data.file[2,1])/30)
pp.pos<-((data.file[1,1]+data.file[1,2])/30)*((data.file[1,1]+data.file[2,1])/30)
pa.neg<-((data.file[2,1]+data.file[2,2])/30)*((data.file[1,2]+data.file[2,2])/30)
pp.neg<-((data.file[1,2]+data.file[1,1])/30)*((data.file[1,2]+data.file[2,2])/30)
pap<-pa.pos*30
pan<-pa.neg*30
ppp<-pp.pos*30
ppn<-pp.neg*30
e11<-(data.file[1,1]-ppp)^2/ppp
e12<-(data.file[1,2]-ppn)^2/ppn
e21<-(data.file[2,1]-pap)^2/pap
e22<-(data.file[2,2]-pan)^2/pan
chivalue<-sum(e11,e12,e22,e21)
chivalue
## [1] 19.20087
# So, this is an alternate hypothesis.
library(bio3d)
a<-read.pdb("1akr.pdb")
t.a<-torsion.pdb(a)
phi.a<-t.a$phi
b<-is.na(phi.a)
b1<-which(b)
phi.ak<-phi.a[-b1]
psi.a<-t.a$psi
e<-is.na(psi.a)
e1<-which(e)
psi.ak<-psi.a[-e1]
par(mfrow=c(1,1))
plot(phi.ak,psi.ak,xlim = c(-180,180),ylim = c(-180,180),main = "1akr")
abline(h=c(0,0),v=c(0,0))
phi1<- -50.0
phi2<- -100.0
ik<-which(phi.ak<phi1 & phi.ak>phi2)
phial.ak<-length(ik)
phi3<- -40.0
phi4<- -150.0
ij<-which(phi.ak<phi3 & phi.ak>phi4)
phib.ak<-length(ij)
psi5<- -80.0
psi6<- 10.0
il<-which(psi.ak<psi6 & psi.ak>psi5)
psial.ak<-length(il)
psi7<- 110.0
psi8<- 160.0
im<-which(psi.ak>psi7 & psi.ak<psi8)
psib.ak<-length(im)
r<-cbind(phial.ak,psial.ak,phib.ak,psib.ak)
rownames(r)<-c("1akr")
(r)
## phial.ak psial.ak phib.ak psib.ak
## 1akr 90 78 131 50
ua<-read.pdb("3ua0.pdb")
t.ua<-torsion.pdb(ua)
phi.ua<-t.ua$phi
x<-is.na(phi.ua)
x1<-which(x)
phi.ua<-phi.ua[-x1]
psi.ua<-t.ua$psi
s<-is.na(psi.ua)
s1<-which(s)
psi.ua<-psi.ua[-s1]
plot(phi.ua,psi.ua,xlim = c(-180,180),ylim = c(-180,180),main = "3ua0")
abline(h=c(0,0),v=c(0,0))
phi9<- -50.0
phi10<- -110.0
ik1<-which(phi.ua<phi9 & phi.ua>phi10)
phial.ua<-length(ik1)
phi11<- -50.0
phi12<- -150.0
ij1<-which(phi.ak<phi11 & phi.ak>phi12)
phib.ua<-length(ij1)
psi13<- -50.0
psi14<- 10.0
il1<-which(psi.ua<psi14 & psi.ua>psi13)
psial.ua<-length(il1)
psi15<- 100.0
psi16<- 160.0
im1<-which(psi.ua>psi15 & psi.ua<psi16)
psib.ua<-length(im1)
r1<-cbind(phial.ua,psial.ua,phib.ua,psib.ua)
rownames(r1)<-c("3ua0")
(r1)
## phial.ua psial.ua phib.ua psib.ua
## 3ua0 60 15 129 90
main.pro<-rbind(r,r1)
main.pro
## phial.ak psial.ak phib.ak psib.ak
## 1akr 90 78 131 50
## 3ua0 60 15 129 90
colnames(main.pro)<-c("Phi-alpha","Psi-alpha","Phi-beta","Psi-beta")
main.pro
## Phi-alpha Psi-alpha Phi-beta Psi-beta
## 1akr 90 78 131 50
## 3ua0 60 15 129 90
chisq.test(main.pro)
##
## Pearson's Chi-squared test
##
## data: main.pro
## X-squared = 55.825, df = 3, p-value = 4.577e-12
t.test(main.pro)
##
## One Sample t-test
##
## data: main.pro
## t = 5.8065, df = 7, p-value = 0.0006591
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
## 47.64353 113.10647
## sample estimates:
## mean of x
## 80.375
x<-c(0.593, 0.142, 0.329, 0.691, 0.231, 0.793, 0.519, 0.392, 0.418)
ttest<-function(dataset){
s<-sd(dataset)
d.mean<-mean(dataset)
n1<-length(dataset)
n<-(n1)**0.5
u<-0.3
t<-(d.mean-u)/(s/n)
return(t)
}
tt<-ttest(x)
tt
## [1] 2.205059
mean(x)
## [1] 0.4564444
## INTERPRETATION OF PROBLEM
##The mean level of salmonella in all batches of icecream is 0.3 MNP/gm.Here the hypothesis of interest can be expressed as: H0 i.e null hypothesis:= 0.3 H1 i.e alternative hypothesis: >0.3 At 95% confidence level. If p value is less than 0.05 then it supports alternative hypothesis. The degree of freedom is 8 i.e (n-1),where n=9 From the reference index given, our t-value is 2.205059 which indicates that the p value is 0.025(i.e.?the confidence interval is more than 95%).So it supports alternative hypothesis as it is less than 0.05.
##Thus, Salmonella is present in the given batch of ice-cream.
##T-TEST PROBLEM 2
###Suppose there is a debate whether foootball players are fitter than ballet dancer. We may consider the two way t-test to compate the fitness results using ttest.
tt<-function(d1,d2){
s1<-sd(d1)
s2<-sd(d2)
m1<-mean(d1)
m2<-mean(d2)
n1<-length(d1)
n2<-length(d2)
spsq<-(s1^2*(n1-1)+s2^2*(n2-1))/(n1+n2-2)
D<-sqrt(spsq*(1/n1+1/n2))
D
N<-m1-m2
tt<-N/D
return(tt)
}
ballet<-c(89.2,78.2,89.3,88.3,87.3,90.1,95.2,94.3,78.3,89.3)
football<-c(79.3,78.3,85.3,79.3,88.9,91.2,87.2,89.2,93.3,79.9)
ttt<-tt(ballet,football)
ttt
## [1] 1.094723
##INTERPRETATION OF PROBLEM 2
##The t table does not give you the presise probability of every t value, you can use it for hypothesise testing.According to the given reference index our value is 1.0947 which is almost near to 1.372 having p value=0.10.So at 95% confidence, having value greater than 0.05. It indicate null hypothesis. This means that we cannot distinguish between the fitness level of football player and ballet dancer.
## T-TEST PROBLEM 3 CHOLESTEROL
tt1<-function(d3,d4){
s3<-sd(d3)
s4<-sd(d4)
m3<-mean(d3)
m4<-mean(d4)
n3<-length(d3)
n4<-length(d4)
d<-(s3^2*(n3-1))
e<-(s4^2*(n4-1))
f<-(n3+n4-2)
spsq<-((d+e)/f)
D1<-sqrt(spsq*((1/n3)+(1/n4)))
N1<-m3-m4
tt1<-N1/D1
return(tt1)
}
data<-c(220,200,240,210,225,210,180,170,210,220,190,180,195,190,200,190,210,220,240,210)
M<-matrix(data,nrow=10,byrow=TRUE)
colnames(M)<-c("Before","After")
(M)
## Before After
## [1,] 220 200
## [2,] 240 210
## [3,] 225 210
## [4,] 180 170
## [5,] 210 220
## [6,] 190 180
## [7,] 195 190
## [8,] 200 190
## [9,] 210 220
## [10,] 240 210
ttt1<-tt1(M[,1],M[,2])
ttt1
## [1] 1.310597
## INTERPRETATION OF PROBLEM 3
##The t-value is greater than the critical value so the alternative hypothesis is true in this case.Hence there is a difference in cholesterol level before and after the exercise and diet program.
seq2charge<- function(seq) {
seq<- strsplit(seq,"")
seqq<-unlist(seq)
library("Peptides")
seq1<-charge(seqq, pH = 7)
return(seq1)}
ch<-seq2charge("ARNDCEQGHILKMFPSTWYV")
## Warning: package 'Peptides' was built under R version 3.4.4
p<-read.csv("amino.csv")
aa<-p$amino1
ch<-seq2charge("ARNDCEQGHILKMFPSTWYV")
df<-data.frame(aa,ch)
dff<-as.matrix(df)
h<-dff[,2]
H<-as.double(h)
names(H)<-aa
barplot(H)
vecseq<- function(seq) {
seq<- strsplit(seq,"")
}
hy <-function (ami,dff)
{
I<- which(df$aa==ami)
df$ch[which (df$aa==ami)]
}
library("bio3d")
pdb <- read.pdb("4q21.pdb")
seq<-pdbseq(pdb)
seq2hydro<- function(seq,dff)
{
seq<-vecseq(seq)
H[1]<-0
for (i in 1:length(seq))
{
H[i]<-hy(seq[i],dff)
}
seq2hydro<-H
return(seq2hydro)
}
H<-seq2hydro(seq,dff)
plot(H,type="l",xlab="Residue No",ylab="Charge")
movav<-function(HH,w) {
movav<-HH[1]
l<- length(HH)- w+1
for (i in 1:l){
movav<-c(movav, mean(HH[i:i+w-1]))
}
return(movav)
}
dolittle<-movav(H,7)
plot(dolittle,type="l",xlab="Residue No",ylab="Charge")
z1<-read.pdb("2mqu.pdb")
z2<-read.pdb("2oau.pdb")
seq1<-pdbseq(z1)
seq2<-pdbseq(z2)
hyseq1<-seq2hydro(seq1,dff)
hyseq2<-seq2hydro(seq2,dff)
T1<-t.test(hyseq1,hyseq2,alternative="two.sided",mu=0,paired=FALSE,var.equal=FALSE,conf.level=0.95)
(T1)
##
## Welch Two Sample t-test
##
## data: hyseq1 and hyseq2
## t = -0.55562, df = 186.96, p-value = 0.5791
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -0.1086704 0.0609082
## sample estimates:
## mean of x mean of y
## -0.01009846 0.01378264
library("bio3d")
library("Peptides")
#Membrane protein
mem.rpb<-read.pdb("2rpb.pdb")
seq.rpb<-pdbseq(mem.rpb)
#Amino acid descriptors for the membrane protein compared with the reference 20 aa present
seq.rpb.d<-aaDescriptors(seq.rpb)
summary(seq.rpb.d)
## PP1.1 PP2.1 PP3.1 KF1.1
## Min. :-1.00000 Min. :-1.0000 Min. :-1.00000 Min. :-1.5600
## 1st Qu.:-0.94000 1st Qu.:-0.6400 1st Qu.:-0.24000 1st Qu.:-0.7400
## Median : 0.06000 Median :-0.4300 Median :-0.14000 Median :-0.4700
## Mean :-0.08292 Mean :-0.3142 Mean :-0.07743 Mean :-0.1777
## 3rd Qu.: 0.80000 3rd Qu.: 0.0300 3rd Qu.: 0.31000 3rd Qu.: 0.5800
## Max. : 1.00000 Max. : 1.0000 Max. : 1.00000 Max. : 2.0600
## KF2.1 KF3.1 KF4.1 KF5.1
## Min. :-1.9600 Min. :-1.6100 Min. :-1.5700 Min. :-1.7000
## 1st Qu.:-0.7100 1st Qu.:-0.9700 1st Qu.:-0.7300 1st Qu.:-0.9200
## Median :-0.1600 Median :-0.1200 Median :-0.1600 Median :-0.5400
## Mean :-0.1574 Mean : 0.2178 Mean : 0.1736 Mean :-0.1805
## 3rd Qu.: 0.1900 3rd Qu.: 1.7900 3rd Qu.: 0.8100 3rd Qu.: 0.5000
## Max. : 2.1000 Max. : 2.0400 Max. : 1.8700 Max. : 2.0000
## KF6.1 KF7.1 KF8.1 KF9.1
## Min. :-2.0500 Min. :-1.8900 Min. :-2.30000 Min. :-2.30000
## 1st Qu.:-0.8100 1st Qu.:-1.0700 1st Qu.:-0.39000 1st Qu.:-0.46000
## Median : 0.0300 Median :-0.2000 Median : 0.06000 Median : 0.21000
## Mean :-0.2591 Mean :-0.2119 Mean : 0.01071 Mean : 0.05965
## 3rd Qu.: 0.3700 3rd Qu.: 0.8400 3rd Qu.: 0.47000 3rd Qu.: 0.66000
## Max. : 2.4100 Max. : 1.5200 Max. : 2.36000 Max. : 1.71000
## KF10.1 Z1.1 Z2.1 Z3.1
## Min. :-2.33000 Min. :-4.36000 Min. :-4.0600 Min. :-3.5000
## 1st Qu.:-0.28000 1st Qu.:-2.59000 1st Qu.:-2.1800 1st Qu.:-1.5400
## Median : 0.53000 Median : 0.75000 Median :-0.2200 Median :-1.1200
## Mean : 0.02956 Mean : 0.07885 Mean :-0.4954 Mean :-0.5196
## 3rd Qu.: 0.70000 3rd Qu.: 3.11000 3rd Qu.: 0.9300 3rd Qu.: 0.6000
## Max. : 1.63000 Max. : 3.98000 Max. : 3.9400 Max. : 3.7500
## Z4.1 Z5.1 F1.1 F2.1
## Min. :-3.0400 Min. :-2.6500 Min. :-1.3870 Min. :-2.2190
## 1st Qu.:-1.3900 1st Qu.:-0.1700 1st Qu.:-1.3320 1st Qu.:-0.4440
## Median :-0.8400 Median : 0.2600 Median :-0.8800 Median : 0.5360
## Mean :-0.6002 Mean : 0.2838 Mean :-0.4078 Mean : 0.1471
## 3rd Qu.: 0.0400 3rd Qu.: 0.7500 3rd Qu.: 0.2070 3rd Qu.: 0.5720
## Max. : 3.9000 Max. : 2.0000 Max. : 1.5240 Max. : 1.3880
## F3.1 F4.1 F5.1 F6.1
## Min. :-1.65600 Min. :-2.0800 Min. :-1.1150 Min. :-1.76200
## 1st Qu.:-0.58400 1st Qu.:-0.1750 1st Qu.:-0.2290 1st Qu.:-1.10800
## Median : 0.02900 Median : 0.3330 Median :-0.1690 Median : 0.16900
## Mean :-0.04194 Mean : 0.3839 Mean : 0.0538 Mean : 0.05325
## 3rd Qu.: 0.51600 3rd Qu.: 1.0260 3rd Qu.: 0.0630 3rd Qu.: 1.03800
## Max. : 2.06900 Max. : 1.9040 Max. : 3.8470 Max. : 2.72800
## T1.1 T2.1 T3.1 T4.1
## Min. :-10.610 Min. :-3.5400 Min. :-2.34000 Min. :-1.9600
## 1st Qu.: -5.870 1st Qu.:-0.9400 1st Qu.:-0.49000 1st Qu.:-0.3900
## Median : -4.620 Median :-0.2800 Median : 0.28000 Median : 0.3100
## Mean : -4.377 Mean : 0.1591 Mean : 0.09106 Mean : 0.3085
## 3rd Qu.: -3.030 3rd Qu.: 0.7500 3rd Qu.: 0.63000 3rd Qu.: 1.1000
## Max. : 5.730 Max. : 3.8900 Max. : 1.39000 Max. : 1.6400
## T5.1 VHSE1.1 VHSE2.1 VHSE3.1
## Min. :-0.790 Min. :-1.47000 Min. :-1.67000 Min. :-2.63000
## 1st Qu.:-0.210 1st Qu.:-1.15000 1st Qu.:-0.86000 1st Qu.:-0.41000
## Median : 0.430 Median :-0.20000 Median :-0.14000 Median :-0.17000
## Mean : 0.511 Mean :-0.07301 Mean :-0.02735 Mean :-0.06425
## 3rd Qu.: 0.950 3rd Qu.: 0.76000 3rd Qu.: 0.67000 3rd Qu.: 0.36000
## Max. : 3.250 Max. : 1.52000 Max. : 2.06000 Max. : 1.79000
## VHSE4.1 VHSE5.1 VHSE6.1 VHSE7.1
## Min. :-1.9100 Min. :-2.6800 Min. :-1.610 Min. :-1.6100
## 1st Qu.:-1.8000 1st Qu.:-0.5300 1st Qu.:-1.400 1st Qu.:-0.2400
## Median :-0.0100 Median : 0.2200 Median :-0.170 Median : 0.0300
## Mean :-0.3459 Mean :-0.1935 Mean :-0.255 Mean : 0.1885
## 3rd Qu.: 0.3600 3rd Qu.: 0.3000 3rd Qu.: 0.670 3rd Qu.: 0.7300
## Max. : 2.2800 Max. : 1.6400 Max. : 1.470 Max. : 2.0100
## VHSE8.1 ProtFP1.1 ProtFP2.1 ProtFP3.1
## Min. :-1.3400 Min. :-6.6100 Min. :-8.7200 Min. :-3.5900
## 1st Qu.:-0.4100 1st Qu.:-4.9900 1st Qu.:-2.5500 1st Qu.:-2.4900
## Median :-0.0300 Median :-2.0000 Median :-1.3300 Median :-1.7100
## Mean : 0.1059 Mean :-0.2753 Mean :-0.2528 Mean :-0.9828
## 3rd Qu.: 0.3900 3rd Qu.: 5.0400 3rd Qu.: 2.2000 3rd Qu.: 0.7000
## Max. : 3.5600 Max. : 7.3300 Max. : 6.6000 Max. : 4.1800
## ProtFP4.1 ProtFP5.1 ProtFP6.1 ProtFP7.1
## Min. :-4.5800 Min. :-3.2200 Min. :-3.540000 Min. :-2.9500
## 1st Qu.:-1.2700 1st Qu.:-0.4900 1st Qu.:-0.650000 1st Qu.:-0.0800
## Median : 1.0200 Median : 0.0600 Median : 0.080000 Median : 0.1000
## Mean : 0.1008 Mean :-0.1801 Mean : 0.006637 Mean : 0.4133
## 3rd Qu.: 1.3800 3rd Qu.: 0.4800 3rd Qu.: 0.760000 3rd Qu.: 1.7900
## Max. : 3.0000 Max. : 3.2700 Max. : 2.910000 Max. : 1.9900
## ProtFP8.1 ST1.1 ST2.1 ST3.1
## Min. :-2.79000 Min. :-1.8440 Min. :-1.0100 Min. :-0.9170
## 1st Qu.:-0.51000 1st Qu.:-1.1330 1st Qu.:-0.8930 1st Qu.:-0.6270
## Median :-0.23000 Median :-0.8880 Median :-0.3790 Median :-0.3250
## Mean :-0.04823 Mean :-0.8357 Mean :-0.3141 Mean :-0.2672
## 3rd Qu.: 0.87000 3rd Qu.:-0.6290 3rd Qu.:-0.0180 3rd Qu.:-0.0130
## Max. : 1.65000 Max. : 0.8530 Max. : 0.7310 Max. : 1.1000
## ST4.1 ST5.1 ST6.1
## Min. :-1.16300 Min. :-0.93700 Min. :-3.31700
## 1st Qu.:-0.21400 1st Qu.:-0.56100 1st Qu.:-0.17500
## Median :-0.06600 Median :-0.38700 Median : 0.10100
## Mean :-0.04624 Mean :-0.04304 Mean :-0.03138
## 3rd Qu.: 0.23700 3rd Qu.: 0.54900 3rd Qu.: 0.57000
## Max. : 0.85900 Max. : 1.12000 Max. : 1.09100
## ST7.1 ST8.1 BLOSUM1.1 BLOSUM2.1
## Min. :-1.09900 Min. :-0.89400 Min. :-1.6200 Min. :-1.2300
## 1st Qu.:-0.14700 1st Qu.:-0.31100 1st Qu.:-1.1300 1st Qu.:-1.1300
## Median :-0.02000 Median :-0.07500 Median : 0.7200 Median :-0.4500
## Mean : 0.09591 Mean : 0.05606 Mean : 0.1139 Mean :-0.3535
## 3rd Qu.: 0.17500 3rd Qu.: 0.36700 3rd Qu.: 1.1400 3rd Qu.: 0.1900
## Max. : 1.25600 Max. : 2.52200 Max. : 1.5500 Max. : 2.2800
## BLOSUM3.1 BLOSUM4.1 BLOSUM5.1
## Min. :-0.9700 Min. :-1.610000 Min. :-1.2400
## 1st Qu.:-0.8000 1st Qu.:-0.360000 1st Qu.:-0.6000
## Median :-0.6300 Median : 0.060000 Median :-0.2800
## Mean :-0.2843 Mean :-0.000708 Mean :-0.1652
## 3rd Qu.: 0.3200 3rd Qu.: 0.380000 3rd Qu.: 0.2400
## Max. : 1.7300 Max. : 1.550000 Max. : 1.8300
## BLOSUM6.1 BLOSUM7.1 BLOSUM8.1
## Min. :-2.02000 Min. :-1.6200 Min. :-1.96000
## 1st Qu.: 0.01000 1st Qu.:-0.3000 1st Qu.:-0.05000
## Median : 0.16000 Median : 0.0100 Median : 0.15000
## Mean : 0.08858 Mean :-0.1111 Mean : 0.02894
## 3rd Qu.: 0.28000 3rd Qu.: 0.2100 3rd Qu.: 0.20000
## Max. : 1.19000 Max. : 1.2100 Max. : 0.87000
## BLOSUM9.1 BLOSUM10.1 MSWHIM1.1 MSWHIM2.1
## Min. :-1.20000 Min. :-1.29000 Min. :-1.0000 Min. :-1.0000
## 1st Qu.:-0.22000 1st Qu.:-0.28000 1st Qu.:-0.9100 1st Qu.: 0.0800
## Median : 0.01000 Median : 0.06000 Median :-0.5100 Median : 0.6700
## Mean : 0.06761 Mean : 0.05982 Mean :-0.3746 Mean : 0.3335
## 3rd Qu.: 0.25000 3rd Qu.: 0.33000 3rd Qu.: 0.1100 3rd Qu.: 0.7900
## Max. : 1.36000 Max. : 0.99000 Max. : 1.0000 Max. : 1.0000
## MSWHIM3.1
## Min. :-1.000
## 1st Qu.:-0.660
## Median :-0.580
## Mean :-0.355
## 3rd Qu.:-0.160
## Max. : 1.000
#Thermostability value of the membrane protein
seq.rpb.i<-aIndex(seq.rpb)
ik<-which(seq.rpb.i==0.0)
seq.rpb.in<-seq.rpb.i[-ik]
therm.rpb<-sum(seq.rpb.in)/length(seq.rpb.in)
(therm.rpb)
## [1] 310
#Protein Interaction
seq.rpb.pp<-boman(seq.rpb)
bin.pot<-sum(seq.rpb.pp)/length(seq.rpb)
bin.pot
## [1] 1.849735
#binpot<2.68 which implies the protein has lower affinity to bind with another protein
#Cytosolic protein:
cyto.cjy<-read.pdb("1cjy.pdb")
seq.cjy<-pdbseq(cyto.cjy)
#Amino acid descriptors for the cytosolic protein compared with the reference 20 aa present
seq.cjy.d<-aaDescriptors(seq.cjy)
summary(seq.cjy.d)
## PP1.1 PP2.1 PP3.1 KF1.1
## Min. :-1.000 Min. :-1.0000 Min. :-1.0000 Min. :-1.56000
## 1st Qu.:-0.900 1st Qu.:-0.6400 1st Qu.:-0.2400 1st Qu.:-1.04000
## Median : 0.060 Median :-0.4000 Median :-0.0800 Median :-0.21000
## Mean :-0.106 Mean :-0.2787 Mean :-0.0129 Mean :-0.07562
## 3rd Qu.: 0.670 3rd Qu.: 0.0300 3rd Qu.: 0.3700 3rd Qu.: 0.81000
## Max. : 1.000 Max. : 1.0000 Max. : 1.0000 Max. : 2.06000
## KF2.1 KF3.1 KF4.1
## Min. :-1.9600 Min. :-1.61000 Min. :-1.57000
## 1st Qu.:-0.7100 1st Qu.:-0.42000 1st Qu.:-0.75000
## Median :-0.0700 Median :-0.23000 Median :-0.16000
## Mean :-0.1178 Mean :-0.03326 Mean : 0.05175
## 3rd Qu.: 0.2400 3rd Qu.: 0.45000 3rd Qu.: 0.81000
## Max. : 2.1000 Max. : 2.04000 Max. : 1.87000
## KF5.1 KF6.1 KF7.1
## Min. :-1.700000 Min. :-2.0500 Min. :-1.89e+00
## 1st Qu.:-0.550000 1st Qu.:-0.8100 1st Qu.:-8.30e-01
## Median :-0.100000 Median :-0.4300 Median : 2.40e-01
## Mean : 0.003753 Mean :-0.3056 Mean : 4.01e-05
## 3rd Qu.: 0.500000 3rd Qu.: 0.3700 3rd Qu.: 9.20e-01
## Max. : 2.000000 Max. : 2.4100 Max. : 1.52e+00
## KF8.1 KF9.1 KF10.1
## Min. :-2.3000 Min. :-2.30000 Min. :-2.33000
## 1st Qu.:-0.7600 1st Qu.:-0.48000 1st Qu.:-0.28000
## Median : 0.0600 Median :-0.03000 Median : 0.19000
## Mean :-0.0187 Mean : 0.03362 Mean : 0.04022
## 3rd Qu.: 0.4700 3rd Qu.: 0.74000 3rd Qu.: 0.65000
## Max. : 2.3600 Max. : 1.71000 Max. : 1.63000
## Z1.1 Z2.1 Z3.1 Z4.1
## Min. :-4.36000 Min. :-4.0600 Min. :-3.5000 Min. :-3.0400
## 1st Qu.:-2.85000 1st Qu.:-1.7300 1st Qu.:-1.4900 1st Qu.:-1.3900
## Median : 0.75000 Median : 0.2600 Median : 0.2600 Median :-0.8200
## Mean :-0.06876 Mean :-0.3111 Mean :-0.1939 Mean :-0.3595
## 3rd Qu.: 2.39000 3rd Qu.: 0.9300 3rd Qu.: 1.0600 3rd Qu.: 0.6200
## Max. : 3.98000 Max. : 3.9400 Max. : 3.7500 Max. : 3.9000
## Z5.1 F1.1 F2.1 F3.1
## Min. :-2.6500 Min. :-1.3870 Min. :-2.21900 Min. :-1.65600
## 1st Qu.:-0.3800 1st Qu.:-1.2980 1st Qu.:-0.84700 1st Qu.:-0.58400
## Median : 0.2600 Median :-0.4070 Median : 0.37800 Median :-0.02400
## Mean : 0.2265 Mean :-0.2098 Mean : 0.04858 Mean :-0.05281
## 3rd Qu.: 0.7500 3rd Qu.: 0.8860 3rd Qu.: 0.82100 3rd Qu.: 0.60950
## Max. : 2.0000 Max. : 1.5240 Max. : 1.38800 Max. : 2.06900
## F4.1 F5.1 F6.1 T1.1
## Min. :-2.0800 Min. :-1.1150 Min. :-1.76200 Min. :-10.610
## 1st Qu.:-0.1750 1st Qu.:-0.2290 1st Qu.:-0.60500 1st Qu.: -5.870
## Median : 0.3330 Median : 0.0070 Median :-0.06800 Median : -4.380
## Mean : 0.3209 Mean : 0.1453 Mean :-0.06492 Mean : -4.263
## 3rd Qu.: 1.0260 3rd Qu.: 0.1170 3rd Qu.: 0.50200 3rd Qu.: -3.000
## Max. : 1.9040 Max. : 3.8470 Max. : 2.72800 Max. : 5.730
## T2.1 T3.1 T4.1 T5.1
## Min. :-3.5400 Min. :-2.3400 Min. :-1.9600 Min. :-0.7900
## 1st Qu.:-0.9400 1st Qu.:-0.5300 1st Qu.:-0.4000 1st Qu.:-0.2100
## Median :-0.4700 Median :-0.0700 Median :-0.1700 Median : 0.3300
## Mean :-0.0336 Mean :-0.0654 Mean : 0.1583 Mean : 0.4958
## 3rd Qu.: 0.8650 3rd Qu.: 0.6300 3rd Qu.: 1.1000 3rd Qu.: 0.9500
## Max. : 3.8900 Max. : 1.3900 Max. : 1.6400 Max. : 3.2500
## VHSE1.1 VHSE2.1 VHSE3.1
## Min. :-1.4700000 Min. :-1.67000 Min. :-2.6300
## 1st Qu.:-0.9900000 1st Qu.:-0.86000 1st Qu.:-0.5000
## Median :-0.2000000 Median : 0.00000 Median : 0.1000
## Mean :-0.0008821 Mean :-0.06416 Mean :-0.0881
## 3rd Qu.: 1.0100000 3rd Qu.: 0.61000 3rd Qu.: 0.4300
## Max. : 1.5200000 Max. : 2.06000 Max. : 1.7900
## VHSE4.1 VHSE5.1 VHSE6.1 VHSE7.1
## Min. :-1.9100 Min. :-2.68000 Min. :-1.6100 Min. :-1.6100
## 1st Qu.:-0.8000 1st Qu.:-0.32000 1st Qu.:-1.3400 1st Qu.:-0.6400
## Median :-0.0100 Median : 0.22000 Median :-0.0100 Median : 0.0300
## Mean :-0.1212 Mean :-0.09397 Mean :-0.1817 Mean : 0.1073
## 3rd Qu.: 0.5250 3rd Qu.: 0.25000 3rd Qu.: 0.6700 3rd Qu.: 0.9100
## Max. : 2.2800 Max. : 1.64000 Max. : 1.4700 Max. : 2.0100
## VHSE8.1 ProtFP1.1 ProtFP2.1 ProtFP3.1
## Min. :-1.34000 Min. :-6.6100 Min. :-8.7200 Min. :-3.5900
## 1st Qu.:-0.52000 1st Qu.:-4.9900 1st Qu.:-2.5500 1st Qu.:-2.2900
## Median :-0.03000 Median :-2.0000 Median :-1.3300 Median :-0.7000
## Mean : 0.05954 Mean :-0.3529 Mean :-0.3545 Mean :-0.3858
## 3rd Qu.: 0.13000 3rd Qu.: 5.1100 3rd Qu.: 2.1400 3rd Qu.: 0.8900
## Max. : 3.56000 Max. : 7.3300 Max. : 6.6000 Max. : 4.1800
## ProtFP4.1 ProtFP5.1 ProtFP6.1 ProtFP7.1
## Min. :-4.5800 Min. :-3.2200 Min. :-3.54000 Min. :-2.95000
## 1st Qu.:-1.1200 1st Qu.:-1.2300 1st Qu.:-0.65000 1st Qu.:-0.45000
## Median : 0.6300 Median :-0.3100 Median :-0.28000 Median : 0.10000
## Mean : 0.1688 Mean :-0.3168 Mean :-0.01734 Mean : 0.05531
## 3rd Qu.: 1.1100 3rd Qu.: 0.9900 3rd Qu.: 1.04000 3rd Qu.: 0.74000
## Max. : 3.0000 Max. : 3.2700 Max. : 2.91000 Max. : 1.99000
## ProtFP8.1 ST1.1 ST2.1 ST3.1
## Min. :-2.79000 Min. :-1.8440 Min. :-1.0100 Min. :-0.9170
## 1st Qu.:-0.51000 1st Qu.:-1.1330 1st Qu.:-0.4070 1st Qu.:-0.6270
## Median : 0.30000 Median :-0.8260 Median :-0.3110 Median :-0.1840
## Mean : 0.01309 Mean :-0.8093 Mean :-0.2293 Mean :-0.1428
## 3rd Qu.: 0.87000 3rd Qu.:-0.6290 3rd Qu.: 0.0240 3rd Qu.: 0.0790
## Max. : 1.65000 Max. : 0.8530 Max. : 0.7310 Max. : 1.1000
## ST4.1 ST5.1 ST6.1 ST7.1
## Min. :-1.1630 Min. :-0.93700 Min. :-3.317 Min. :-1.09900
## 1st Qu.:-0.2200 1st Qu.:-0.62500 1st Qu.:-0.775 1st Qu.:-0.14700
## Median :-0.0650 Median :-0.38700 Median : 0.164 Median : 0.02100
## Mean :-0.0595 Mean :-0.09555 Mean :-0.123 Mean : 0.08658
## 3rd Qu.:-0.0490 3rd Qu.: 0.54900 3rd Qu.: 1.011 3rd Qu.: 0.17500
## Max. : 0.8590 Max. : 1.12000 Max. : 1.091 Max. : 1.25600
## ST8.1 BLOSUM1.1 BLOSUM2.1 BLOSUM3.1
## Min. :-0.894 Min. :-1.6200 Min. :-1.2300 Min. :-0.9700
## 1st Qu.:-0.311 1st Qu.:-1.1350 1st Qu.:-0.8600 1st Qu.:-0.8000
## Median :-0.001 Median : 0.7200 Median :-0.4500 Median :-0.5800
## Mean : 0.115 Mean : 0.1026 Mean :-0.1869 Mean :-0.2003
## 3rd Qu.: 0.367 3rd Qu.: 1.1400 3rd Qu.: 0.2200 3rd Qu.: 0.3500
## Max. : 2.522 Max. : 1.5500 Max. : 2.2800 Max. : 1.7300
## BLOSUM4.1 BLOSUM5.1 BLOSUM6.1
## Min. :-1.61000 Min. :-1.24000 Min. :-2.0200
## 1st Qu.:-0.36000 1st Qu.:-0.55000 1st Qu.: 0.0100
## Median : 0.00000 Median : 0.02000 Median : 0.2000
## Mean : 0.00575 Mean :-0.09447 Mean : 0.1072
## 3rd Qu.: 0.38000 3rd Qu.: 0.24000 3rd Qu.: 0.3700
## Max. : 1.55000 Max. : 1.83000 Max. : 1.1900
## BLOSUM7.1 BLOSUM8.1 BLOSUM9.1
## Min. :-1.620000 Min. :-1.96000 Min. :-1.2000
## 1st Qu.:-0.130000 1st Qu.:-0.35000 1st Qu.:-0.3500
## Median : 0.010000 Median : 0.13000 Median : 0.1500
## Mean : 0.007506 Mean :-0.07026 Mean : 0.0719
## 3rd Qu.: 0.210000 3rd Qu.: 0.20000 3rd Qu.: 0.4300
## Max. : 1.210000 Max. : 0.87000 Max. : 1.3600
## BLOSUM10.1 MSWHIM1.1 MSWHIM2.1 MSWHIM3.1
## Min. :-1.29000 Min. :-1.0000 Min. :-1.0000 Min. :-1.0000
## 1st Qu.:-0.28000 1st Qu.:-0.7400 1st Qu.: 0.0800 1st Qu.:-0.7500
## Median :-0.02000 Median :-0.5100 Median : 0.6700 Median :-0.3400
## Mean :-0.06097 Mean :-0.2829 Mean : 0.4127 Mean :-0.3768
## 3rd Qu.: 0.30000 3rd Qu.: 0.1400 3rd Qu.: 0.8300 3rd Qu.:-0.1600
## Max. : 0.99000 Max. : 1.0000 Max. : 1.0000 Max. : 1.0000
#Thermostability value of the cytosolic protein
seq.cjy.i<-aIndex(seq.cjy)
jk<-which(seq.cjy.i==0.0)
seq.cjy.in<-seq.cjy.i[-jk]
therm.cjy<-sum(seq.cjy.in)/length(seq.cjy.in)
(therm.cjy)
## [1] 312.5155
#Protein Interaction
seq.cjy.pp<-boman(seq.cjy)
bin.pot1<-sum(seq.cjy.pp)/length(seq.cjy)
(bin.pot1)
## [1] 1.445413
#binpot<2.68 which implies the protein has lower affinity to bind with another protein
#Principle components
#A function for protein pca
protanapca<-function(seq){
library("bio3d")
library("Peptides")
ch<-charge(seq, pH = 7, pKscale = "EMBOSS")
pp<-boman(seq)
ampI<-hmoment(seq, angle = 100, window = 11)
hy<-hydrophobicity(seq, scale = "KyteDoolittle")
pI<-pI(seq,pKscale = "EMBOSS")
mw<-mw(seq, monoisotopic = FALSE)
protana<-cbind(ch,pp,ampI,hy,pI,mw)
colnames(protana)<-c("CHARGE","P-P","AMPIPHILLICITY","HYDRPHOBICITY","ISOELECTRIC","MOL WEIGHT")
p<-cbind(seq,protana)
colnames(p)<-c("AA","CHARGE","P-P","AMPIPHILLICITY","HYDRPHOBICITY","ISOELECTRIC","MOL WEIGHT")
pr.p<-prcomp(protana)
return(pr.p)}
#A function for protein analysis of different properties:
protana<-function(seq){
library("bio3d")
library("Peptides")
ch<-charge(seq, pH = 7, pKscale = "EMBOSS")
pp<-boman(seq)
ampI<-hmoment(seq, angle = 100, window = 11)
hy<-hydrophobicity(seq, scale = "KyteDoolittle")
pI<-pI(seq,pKscale = "EMBOSS")
mw<-mw(seq, monoisotopic = FALSE)
protana<-cbind(ch,pp,ampI,hy,pI,mw)
colnames(protana)<-c("CHARGE","P-P","AMPIPHILLICITY","HYDRPHOBICITY","ISOELECTRIC","MOL WEIGHT")
p1<-cbind(seq,protana)
colnames(p1)<-c("AA","CHARGE","P-P","AMPIPHILLICITY","HYDRPHOBICITY","ISOELECTRIC","MOL WEIGHT")
return(p1)}
#Membrane protein
mem.rpb<-read.pdb("2rpb.pdb")
seq.rpb<-pdbseq(mem.rpb)
rpb.a<-protana(seq.rpb)
pca.rpb<-protanapca(seq.rpb)
par(mfrow=c(1,2))
biplot(pca.rpb,main="2RPB")
#Cytosolic protein
cyto.cjy<-read.pdb("1cjy.pdb")
seq.cjy<-pdbseq(cyto.cjy)
cjy.a<-protana(seq.cjy)
pca.cjy<-protanapca(seq.cjy)
biplot(pca.cjy,main="1CJY")
#CONCLUSION
#The membrane and cytosolic protein have equal thermal stability
#The protein protein interaction parameter concludes that the cytosolic protein has higher binding potential for a protein of equal thermal stability
#The pca shows out of the six parameters chosen for analysis of this protein molecular weight is an outlier and has minimum relation to protein functionality whereas protein-protein interaction and hydrophobicity are inversely related
#The other four factors such as charge,pI,Amphophilicity and polarity are related factors for a protein irrespective of its origin
#Hence the a conclusive outcome for the above pca could be that for proteins of different origins is that hydrophobicity and protein protein interaction are inversely related irrespective of its origin
#The stastical tool used are pca,biplot and self made function protanapca(which gives the pca values of any protein sequence) and protana(which gives a matrix of all amino acid vs six parameters for any given protein)
ali<-read.csv("Alipurduarr.csv")
bir<-read.csv("Birbhumm.csv")
bur<-read.csv("Burdwann.csv")
coo<-read.csv("Coochbeharr.csv")
d.dina<-read.csv("D Dinajpur.csv")
e.med<-read.csv("E Medinipurr.csv")
how<-read.csv("Howrahh.csv")
jal<-read.csv("Jalpaigurii.csv")
kol<-read.csv("Kolkataa.csv")
mur<-read.csv("Murshidabadd.csv")
npgs<-read.csv("N 24 Pgs.csv")
alipur<-cbind(ali$AGE,ali$HB,ali$MCV,ali$MCH,ali$HBA0,ali$HBA2)
distn<-rep("Alipurduarr",nrow(ali))
sex1<-as.character(ali$SEX)
alipura1<-cbind(alipur,distn,sex1)
summary(alipura1)
## V1 V2 V3 V4 V5
## 16 :143 11.3 : 43 72.3 : 13 22.9 : 26 218 : 29
## 13 :134 11.6 : 40 70.5 : 12 21.7 : 22 228 : 27
## 14 :120 11 : 39 71.3 : 12 21.8 : 20 225 : 26
## 15 :115 10.7 : 35 71.8 : 11 20.2 : 19 226 : 26
## 17 :115 10.8 : 35 72.9 : 11 20.9 : 19 217 : 25
## 12 :111 12 : 35 (Other):993 22.7 : 19 221 : 24
## (Other):315 (Other):826 NA's : 1 (Other):928 (Other):896
## V6 distn sex1
## 0 :143 Alipurduarr:1053 : 1
## 2.8 :102 f: 3
## 2.6 : 98 F:595
## 2.7 : 97 m: 1
## 3 : 91 M:453
## (Other):519
## NA's : 3
birbhum<-cbind(bir$AGE,bir$HB,bir$MCV,bir$MCH,bir$HBA0,bir$HBA2)
distn2<-rep("Birbhumm",nrow(bir))
sex3<-as.character(bir$SEX)
birbhuma1<-cbind(birbhum,distn2,sex3)
summary(birbhuma1)
## V1 V2 V3 V4
## 1 :1121 11.4 : 60 69.7 : 22 20.5 : 29
## 5 : 207 11.5 : 52 70.7 : 17 20.8 : 28
## 4 : 203 11.6 : 38 77.6 : 16 21 : 28
## 6 : 137 11.8 : 36 68.5 : 15 21.2 : 28
## 7 : 96 11.9 : 36 69.2 : 15 19.9 : 27
## 3 : 82 (Other): 728 (Other): 865 (Other): 810
## (Other): 225 NA's :1121 NA's :1121 NA's :1121
## V5 V6 distn2 sex3
## 87.9 : 61 2.9 : 140 Birbhumm:2071 :1124
## 87.5 : 54 3 : 128 F : 326
## 87.6 : 54 3.2 : 121 M : 618
## 87.8 : 52 3.1 : 113 M`: 3
## 87 : 48 2.8 : 106
## (Other): 681 (Other): 342
## NA's :1121 NA's :1121
burdwan<-cbind(bur$AGE,bur$HB,bur$MCV,bur$MCH,bur$HBA0,bur$HBA2)
distn3<-rep("BurdWann",nrow(bur))
sex4<-as.character(bur$SEX)
burdwana1<-cbind(burdwan,distn3,sex4)
summary(burdwana1)
## V1 V2 V3 V4 V5
## 14 :352 11 : 100 72.3 : 28 22 : 60 88 : 136
## 13 :351 11.4 : 89 72.5 : 21 22.1 : 48 87.8 : 125
## 15 :327 11.7 : 77 71 : 20 21.5 : 45 87.9 : 113
## 12 :324 11.2 : 76 71.2 : 19 21.4 : 44 87.7 : 112
## 11 :258 11.1 : 73 73.2 : 19 22.4 : 44 87.6 : 105
## (Other):646 11.5 : 72 76 : 19 20.7 : 40 (Other):1666
## NA's : 1 (Other):1772 (Other):2133 (Other):1978 NA's : 2
## V6 distn3 sex4
## 2.6 :358 BurdWann:2259 : 1
## 2.7 :352 F: 521
## 2.8 :311 M:1737
## 2.5 :288
## 2.9 :209
## (Other):734
## NA's : 7
cooch<-cbind(coo$AGE,coo$HB,coo$MCV,coo$MCH,coo$HBA0,coo$HBA2)
distn4<-rep("Coochbeharr",nrow(coo))
sex5<-as.character(coo$SEX)
coocha1<-cbind(cooch,distn4,sex5)
summary(coocha1)
## V1 V2 V3 V4 V5 V6
## 11:3 10.8 : 2 72.4 : 3 20.2 : 3 84.5 : 3 3.4 :5
## 12:3 11.2 : 2 69.1 : 2 22.3 : 3 84.6 : 3 3.2 :4
## 13:9 11.5 : 2 73.1 : 2 20.9 : 2 83.4 : 2 3.3 :3
## 14:7 11.6 : 2 61.7 : 1 23.4 : 2 84.2 : 2 3.5 :3
## 15:3 12.6 : 2 64.7 : 1 19 : 1 25.4 : 1 3.6 :3
## 16:2 12.9 : 2 66.8 : 1 19.7 : 1 5 : 1 3 :2
## (Other):15 (Other):17 (Other):15 (Other):15 (Other):7
## distn4 sex5
## Coochbeharr:27 M:27
##
##
##
##
##
##
dinaj<-cbind(d.dina$AGE,d.dina$HB,d.dina$MCV,d.dina$MCH,d.dina$HBA0,d.dina$HBA2)
distn5<-rep("D Dinajpur",nrow(d.dina))
sex6<-as.character(d.dina$SEX)
dinaja1<-cbind(dinaj,distn5,sex6)
summary(dinaja1)
## V1 V2 V3 V4 V5
## 13 :408 10.2 : 183 80.2 : 85 21.9 : 137 88.1 : 117
## 14 :368 10.8 : 171 81.2 : 73 20.8 : 125 87.8 : 114
## 12 :357 11.2 : 154 78.2 : 66 21.3 : 94 88 : 109
## 15 :282 11.6 : 152 78.6 : 63 22.8 : 94 88.3 : 103
## 11 :206 11.9 : 129 76.2 : 53 21.5 : 84 87.7 : 102
## 16 :173 10.6 : 123 69.8 : 45 22.4 : 84 88.2 : 101
## (Other):296 (Other):1178 (Other):1705 (Other):1472 (Other):1444
## V6 distn5 sex6
## 2.9 :333 D Dinajpur:2090 F: 694
## 2.8 :330 M:1396
## 2.7 :279
## 3 :275
## 2.6 :177
## (Other):695
## NA's : 1
e.medini<-cbind(e.med$AGE,e.med$HB,e.med$MCV,e.med$MCH,e.med$HBA0,e.med$HBA2)
distn6<-rep("E medinipurr",nrow(e.med))
sex7<-as.character(e.med$SEX)
emedini1<-cbind(e.medini,distn6,sex7)
summary(emedini1)
## V1 V2 V3 V4 V5
## 12 :153 12.8 : 32 65 : 13 22.1 : 24 82.7 : 37
## 13 :135 12.7 : 29 65.5 : 12 22.6 : 22 82.2 : 35
## 14 :116 12.6 : 28 70.6 : 11 21.8 : 19 82.4 : 30
## 15 : 82 11.4 : 24 73.8 : 11 22.8 : 19 82.3 : 29
## 11 : 80 12.9 : 24 63.7 : 10 22 : 18 82.5 : 29
## 16 : 66 11.9 : 23 63.2 : 9 22.2 : 18 81.9 : 28
## (Other):120 (Other):592 (Other):686 (Other):632 (Other):564
## V6 distn6 sex7
## 2.6 : 89 E medinipurr:752 F:123
## 2.7 : 88 M:629
## 2.5 : 79
## 2.8 : 74
## 2.4 : 62
## (Other):357
## NA's : 3
howrah<-cbind(how$AGE,how$HB,how$MCV,how$MCH,how$HBA0,how$HBA2)
distn8<-rep("Howarhh",nrow(how))
sex8<-as.character(how$SEX)
howrah1<-cbind(howrah,distn8,sex8)
summary(howrah1)
## V1 V2 V3 V4 V5 V6
## 11:10 12.4 : 4 71.2 : 2 26.4 : 3 87.2 : 4 2.6 :8
## 12: 9 11.7 : 3 73.2 : 2 21.2 : 2 88 : 4 2.7 :7
## 13: 4 13.4 : 3 88.1 : 2 21.3 : 2 87 : 3 2.9 :7
## 14:12 10.6 : 2 62.9 : 1 22.1 : 2 87.4 : 3 3.1 :7
## 15: 6 11 : 2 67.9 : 1 22.5 : 2 87.6 : 3 2.8 :4
## 16: 2 11.2 : 2 68.9 : 1 22.6 : 2 88.2 : 3 2.5 :3
## 17: 2 (Other):29 (Other):36 (Other):32 (Other):25 (Other):9
## distn8 sex8
## Howarhh:45 F: 8
## M:37
##
##
##
##
##
jalpai<-cbind(jal$AGE,jal$HB,jal$MCV,jal$MCH,jal$HBA0,jal$HBA2)
distn9<-rep("Jalpaiguri",nrow(jal))
sex9<-as.character(jal$SEX)
jalpai1<-cbind(jalpai,distn9,sex9)
summary(jalpai1)
## V1 V2 V3 V4 V5
## 8 : 99 11.4 : 35 70.5 : 10 22.5 : 18 85 : 39
## 10 : 91 11.3 : 29 71.3 : 10 23 : 18 82 : 35
## 6 : 61 12 : 27 71.4 : 10 22.7 : 17 83 : 32
## 9 : 56 12.1 : 27 71.7 : 10 22.6 : 16 89 : 26
## 5 : 54 11.8 : 26 71.1 : 8 23.1 : 16 84 : 24
## 13 : 47 12.3 : 25 68.7 : 7 21.6 : 15 88 : 24
## (Other):257 (Other):496 (Other):610 (Other):565 (Other):485
## V6 distn9 sex9
## 2.9 : 98 Jalpaiguri:665 F:264
## 2.8 : 93 M:401
## 2.7 : 78
## 3 : 66
## 3.1 : 60
## (Other):265
## NA's : 5
kolkata<-cbind(kol$AGE,kol$HB,kol$MCV,kol$MCH,kol$HBA0,kol$HBA2)
distn10<-rep("kolkataa",nrow(kol))
sex10<-as.character(kol$SEX)
kolkata1<-cbind(kolkata,distn10,sex10)
summary(kolkata1)
## V1 V2 V3 V4 V5
## 20 :24 13.3 : 4 85.3 : 4 22.5 : 3 88.8 :10
## 21 :18 13.5 : 4 71.1 : 2 26.4 : 3 88.2 : 8
## 22 :17 14.6 : 4 71.3 : 2 26.7 : 3 88.9 : 8
## 19 :12 14.7 : 4 71.5 : 2 27.1 : 3 88.5 : 7
## 18 :10 10.9 : 3 76 : 2 27.6 : 3 88.6 : 7
## 23 : 7 11.8 : 3 79.1 : 2 28.6 : 3 89 : 5
## (Other): 8 (Other):74 (Other):82 (Other):78 (Other):51
## V6 distn10 sex10
## 2.6 :17 kolkataa:96 F:21
## 2.4 :13 M:75
## 2.5 :12
## 2.7 :12
## 2.3 :10
## 2.9 : 7
## (Other):25
murshi<-cbind(mur$AGE,mur$HB,mur$MCV,mur$MCH,mur$HBA0,mur$HBA2)
distn12<-rep("Murshidabadd",nrow(mur))
sex12<-as.character(mur$SEX)
murshi1<-cbind(murshi,distn12,sex12)
summary(murshi1)
## V1 V2 V3 V4 V5
## 13 : 49 11 : 15 80 : 10 24 : 11 86.8 : 15
## 14 : 44 10.2 : 14 82.9 : 7 25.3 : 11 87 : 13
## 15 : 44 10.7 : 12 81.6 : 6 25.9 : 8 87.1 : 13
## 12 : 40 13.9 : 12 81.7 : 6 21.1 : 7 87.3 : 13
## 16 : 39 10 : 11 82.5 : 6 25.1 : 7 87.5 : 13
## 11 : 36 11.1 : 10 90.4 : 5 25.4 : 7 87.8 : 12
## (Other):108 (Other):286 (Other):320 (Other):309 (Other):281
## V6 distn12 sex12
## 2.7 : 49 Murshidabadd:360 F: 81
## 3 : 43 M:279
## 2.9 : 40
## 2.6 : 38
## 2.8 : 35
## 3.1 : 32
## (Other):123
Npgs<-cbind(npgs$AGE,npgs$HB,npgs$MCV,npgs$MCH,npgs$HBA0,npgs$HBA2)
distn13<-rep("N 24 Pgs",nrow(npgs))
sex13<-as.character(npgs$SEX)
npgs1<-cbind(Npgs,distn13,sex13)
summary(npgs1)
## V1 V2 V3 V4 V5
## 13 :38 11.9 : 14 67.2 : 5 21.3 : 10 87.7 : 17
## 11 :29 11.1 : 13 69.1 : 4 21.2 : 8 87.5 : 15
## 12 :28 11.6 : 13 69.2 : 4 21.5 : 7 87.3 : 13
## 10 :23 11.7 : 12 69.5 : 4 21.7 : 7 87.8 : 13
## 8 :21 11 : 10 70.5 : 4 22.2 : 7 87.6 : 12
## 9 :21 11.8 : 9 76.7 : 4 21.4 : 6 87.4 : 11
## (Other):62 (Other):151 (Other):197 (Other):177 (Other):141
## V6 distn13 sex13
## 2.7 :51 N 24 Pgs:222 F: 28
## 2.8 :39 M:194
## 2.6 :28
## 2.5 :21
## 2.9 :20
## 3 :16
## (Other):47
master<-rbind(alipura1,birbhuma1,burdwana1,coocha1,dinaja1,emedini1,howrah1,jalpai1,kolkata1,murshi1,npgs1)
master<-data.frame(master)
master.f<-na.omit(master)
master1<-rbind(alipur,burdwan,birbhum,cooch,dinaj,e.medini,howrah,jalpai,kolkata,murshi,Npgs)
colnames(master1)<-c("age","hb","hba2","hba0","mcv","mch")
master1.f<-na.omit(master1)
pca<-prcomp(master1.f,scale= TRUE)
biplot(pca)
g_class<- master.f$sex
library("ggbiplot")
g<-ggbiplot(pca,obs.scale = 0.3,var.scale = 0.3,groups = g_class,ellipse = TRUE,circle = TRUE,pc.biplot = TRUE)
g<-g+xlim(-5,5)
g<-g+ylim(-5,5)
g<-g+theme(legend.direction = 'horizontal',legend.position = 'top')
print(g)
## Warning: Removed 18 rows containing missing values (geom_point).