#setwd("E:/kanta")
Z<-read.csv("spectra.csv")
x<-Z$wavelength
y<-Z$absorb
mainlab=paste("wavelength vs absorbtion")
xlabel=paste("wavelength")
ylabel=paste("absorption")
plot(x,y,main = mainlab,col="red",cex=0.5,xlab = xlabel,ylab = ylabel)
jk<-which(x>450 & x<750)
length(jk)
## [1] 1557
wv<-x[jk]
aa<-y[jk]
plot(wv,aa,main = mainlab,col="red",cex=0.5,xlab = xlabel,ylab = ylabel)
mia<-max(aa)
mia
## [1] 0.386
miv<-which(aa==mia)
miv
## [1] 850
wvmax<-wv[850]
wvmax
## [1] 618.08
text(545,0.386,"wvmax= 618.08--->",cex = 0.7, col="blue")
lm(aa~wv)
##
## Call:
## lm(formula = aa ~ wv)
##
## Coefficients:
## (Intercept) wv
## 8.721e-02 2.193e-06
abline(h=0.386,v=618.08,col="black")
summary(wvmax)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 618.1 618.1 618.1 618.1 618.1 618.1
#setwd("E:/kanta")
z<-read.csv("Murshidabad.csv")
z.m<-z
summary(z.m)
## AGE SEX HPLC.NO HB RBC
## 13 YRS :4 F: 3 Min. : 13.00 Min. :12.10 Min. :3.200
## 14 YRS :4 M:17 1st Qu.: 48.25 1st Qu.:12.20 1st Qu.:3.900
## 15 YRS :2 Median :114.00 Median :12.40 Median :4.800
## 17 YRS :2 Mean :103.95 Mean :12.37 Mean :4.595
## 8 YRS :2 3rd Qu.:155.25 3rd Qu.:12.50 3rd Qu.:5.025
## 11 YRS :1 Max. :174.00 Max. :12.80 Max. :6.000
## (Other):5
## PCV MCV MCH MCHC
## Min. :27.90 Min. :62.00 Min. :16.10 Min. :25.40
## 1st Qu.:33.80 1st Qu.:68.47 1st Qu.:19.70 1st Qu.:28.40
## Median :36.50 Median :71.40 Median :20.60 Median :28.75
## Mean :35.32 Mean :70.77 Mean :20.57 Mean :28.68
## 3rd Qu.:37.17 3rd Qu.:73.38 3rd Qu.:21.30 3rd Qu.:29.60
## Max. :39.10 Max. :77.30 Max. :25.00 Max. :30.90
##
## RDW HbA0 HbA2 HbF
## Min. :11.60 Min. :84.30 Min. :1.600 Min. :0.10
## 1st Qu.:12.62 1st Qu.:86.60 1st Qu.:2.375 1st Qu.:0.20
## Median :13.65 Median :87.45 Median :2.500 Median :0.25
## Mean :14.23 Mean :87.29 Mean :2.400 Mean :0.25
## 3rd Qu.:15.47 3rd Qu.:88.33 3rd Qu.:2.600 3rd Qu.:0.30
## Max. :21.50 Max. :89.20 Max. :2.600 Max. :0.50
##
## MOLECULAR
## alpha mutation (3) :7
## alpha mutation (34):2
## alpha mutation (4) :1
## alpha mutation (6) :2
## alpha mutation (8) :2
## normal :6
##
#setwd("E:/kanta")
zx<-read.csv("Nadiatribes.csv")
zx.n<-zx
summary(zx.n)
## HPLC..No HB X RBC
## Min. : 8.00 Min. :12.10 Mode:logical Min. :4.510
## 1st Qu.: 52.50 1st Qu.:12.25 NA's:47 1st Qu.:4.965
## Median : 87.00 Median :12.70 Median :5.180
## Mean : 79.06 Mean :12.95 Mean :5.256
## 3rd Qu.:106.50 3rd Qu.:13.45 3rd Qu.:5.600
## Max. :121.00 Max. :14.90 Max. :6.030
## HCT MCV MCH MCHC
## Min. :31.60 Min. :61.50 Min. :17.60 Min. :28.60
## 1st Qu.:34.70 1st Qu.:68.80 1st Qu.:20.60 1st Qu.:29.90
## Median :36.80 Median :70.80 Median :21.40 Median :30.30
## Mean :37.01 Mean :70.45 Mean :21.41 Mean :30.38
## 3rd Qu.:39.25 3rd Qu.:72.30 3rd Qu.:22.10 3rd Qu.:30.80
## Max. :45.00 Max. :74.90 Max. :23.70 Max. :31.80
## RDW HBA0 HBA2 HBF
## Min. :13.70 Min. :86.00 Min. :1.600 Min. :0.1000
## 1st Qu.:15.05 1st Qu.:86.50 1st Qu.:2.600 1st Qu.:0.2000
## Median :16.00 Median :87.00 Median :2.700 Median :0.3000
## Mean :16.07 Mean :87.01 Mean :2.723 Mean :0.3702
## 3rd Qu.:16.80 3rd Qu.:87.35 3rd Qu.:2.900 3rd Qu.:0.5000
## Max. :18.80 Max. :88.90 Max. :3.200 Max. :0.9000
## MOLECULAR
## amut:15
## n :32
##
##
##
##
#setwd("E:/kanta")
xz<-read.csv("birbhum.csv")
xz.b<-xz
summary(xz.b)
## AGE SEX SCHOOL HPLC.TEST.NO. HB
## 12Y : 9 F:12 : 1 622A : 1 Min. :12.10
## 14Y : 5 M:38 N.A.T.HIGH SCHOOL:49 628A : 1 1st Qu.:12.30
## 14Y2M : 3 631A : 1 Median :12.75
## 15Y7M : 3 632A : 1 Mean :12.83
## 11Y10M : 2 636A : 1 3rd Qu.:13.20
## 12Y6M : 2 640A : 1 Max. :14.80
## (Other):26 (Other):44
## DIAGNOSIS HPLC.DATE RBC PCV
## Mode:logical 5.7.2016:10 Min. :4.920 Min. :37.50
## NA's:50 6.7.2016:31 1st Qu.:5.275 1st Qu.:40.42
## 7.7.2016: 6 Median :5.570 Median :41.45
## 8.7.2016: 3 Mean :5.604 Mean :41.71
## 3rd Qu.:5.950 3rd Qu.:43.15
## Max. :6.420 Max. :47.50
##
## MCV MCH MCHC RDW
## Min. :65.70 Min. :19.30 Min. :29.40 Min. :12.60
## 1st Qu.:72.33 1st Qu.:21.70 1st Qu.:30.10 1st Qu.:14.40
## Median :75.30 Median :23.20 Median :30.80 Median :15.20
## Mean :74.57 Mean :22.99 Mean :30.82 Mean :15.29
## 3rd Qu.:77.17 3rd Qu.:24.27 3rd Qu.:31.57 3rd Qu.:16.00
## Max. :79.30 Max. :26.00 Max. :32.90 Max. :18.90
##
## HbA0 HbA2 HbF MOLECULAR
## Min. :84.10 Min. :2.100 Min. :0.200 Alpha Mutation :29
## 1st Qu.:85.70 1st Qu.:2.600 1st Qu.:0.200 Normal :21
## Median :86.50 Median :2.800 Median :0.300
## Mean :86.49 Mean :2.764 Mean :0.336
## 3rd Qu.:87.28 3rd Qu.:2.900 3rd Qu.:0.400
## Max. :88.60 Max. :3.700 Max. :0.700
##
#setwd("E:/kanta")
mn<-read.csv("jalpaiguri.csv")
mn.j<-mn
summary(mn.j)
## SL.NO NAME SURNAME AGE SEX
## Min. : 2.00 Sahil : 2 Oraon :6 Min. :10.00 M:18
## 1st Qu.: 6.25 Ashik : 1 Oraon :6 1st Qu.:11.00
## Median :10.50 Atish : 1 Bhagat :1 Median :12.00
## Mean :10.50 Bipul : 1 Lama :1 Mean :12.22
## 3rd Qu.:14.75 Bishal : 1 Munda :1 3rd Qu.:13.75
## Max. :19.00 Jeet : 1 Shaibya :1 Max. :15.00
## (Other):11 (Other) :2
## School HPLC.NO HB HPLC.TEST.NO.
## Nathuahat :18 Min. : 3.00 Min. :12.10 1/Nathua : 1
## 1st Qu.:15.25 1st Qu.:12.22 17/Nathua: 1
## Median :21.00 Median :12.45 23/Nathua: 1
## Mean :37.61 Mean :12.49 31/Nathua: 1
## 3rd Qu.:69.25 3rd Qu.:12.68 33/Nathua: 1
## Max. :83.00 Max. :13.10 37/Nathua: 1
## (Other) :12
## HPLC.DATE RBC PCV MCV
## 16.03.16:5 Min. :4.550 Min. :34.10 Min. :66.70
## 17.03.16:6 1st Qu.:4.787 1st Qu.:35.62 1st Qu.:70.05
## 18.03.17:1 Median :5.055 Median :36.55 Median :71.50
## 18.03.19:1 Mean :5.121 Mean :36.92 Mean :72.32
## 18.03.20:1 3rd Qu.:5.492 3rd Qu.:37.50 3rd Qu.:74.67
## 18.03.26:3 Max. :5.940 Max. :41.20 Max. :78.50
## 31.03.16:1
## MCH MCHC RDW HbA0
## Min. :21.20 Min. :31.10 Min. :14.00 Min. :87.30
## 1st Qu.:22.60 1st Qu.:31.90 1st Qu.:15.62 1st Qu.:87.40
## Median :23.10 Median :32.30 Median :16.45 Median :87.55
## Mean :23.41 Mean :32.35 Mean :16.59 Mean :87.70
## 3rd Qu.:24.20 3rd Qu.:32.65 3rd Qu.:18.18 3rd Qu.:87.88
## Max. :26.60 Max. :33.90 Max. :18.90 Max. :88.50
##
## HbA2 HbF MOLICULAR.MUTATION
## Min. :2.700 Min. :0.0000 amut:14
## 1st Qu.:2.900 1st Qu.:0.0250 n : 4
## Median :2.950 Median :0.2000
## Mean :3.000 Mean :0.2278
## 3rd Qu.:3.175 3rd Qu.:0.3750
## Max. :3.300 Max. :0.7000
##
#setwd("E:/kanta")
abc<-read.csv("bankura.csv")
abc.b<-abc
summary(abc.b)
## NAME SURNAME AGE SEX RBC
## SRIJAN : 10 MURMU :167 Min. : 2.00 : 1 Min. :3.450
## BISWAJIT: 9 MANDI :120 1st Qu.:10.00 F:458 1st Qu.:4.800
## SONALI : 9 SOREN : 97 Median :13.00 M:692 Median :5.180
## MARSHAL : 8 TUDU : 85 Mean :12.79 Mean :5.176
## RUPALI : 8 HANSDA : 75 3rd Qu.:15.00 3rd Qu.:5.550
## Sagun : 7 HEMBRAM: 63 Max. :31.00 Max. :8.450
## (Other) :1100 (Other):544 NA's :1
## HB MCV MCH MCHC
## Min. : 1.30 Min. : 21.20 Min. :12.60 Min. : 7.90
## 1st Qu.:11.20 1st Qu.: 71.00 1st Qu.:21.40 1st Qu.:29.70
## Median :11.90 Median : 74.40 Median :22.60 Median :30.60
## Mean :11.99 Mean : 83.04 Mean :23.36 Mean :30.59
## 3rd Qu.:12.65 3rd Qu.: 80.25 3rd Qu.:25.04 3rd Qu.:31.50
## Max. :35.10 Max. :7608.00 Max. :72.20 Max. :45.50
##
## RDW HBA0 HBA2 HBF
## Min. : 1.20 Min. :54.30 Min. :0.000 Min. : 0.0000
## 1st Qu.:14.80 1st Qu.:86.50 1st Qu.:2.600 1st Qu.: 0.2000
## Median :15.90 Median :87.60 Median :2.800 Median : 0.3000
## Mean :16.18 Mean :86.94 Mean :2.942 Mean : 0.3266
## 3rd Qu.:17.10 3rd Qu.:88.20 3rd Qu.:3.000 3rd Qu.: 0.4000
## Max. :70.10 Max. :90.50 Max. :6.900 Max. :14.1000
##
## X X.1 X.2 X.3
## Mode:logical Mode:logical Mode:logical Mode:logical
## NA's:1151 NA's:1151 NA's:1151 NA's:1151
##
##
##
##
##
## X.4 X.5 X.6
## Mode:logical Mode:logical Min. :25.9
## NA's:1151 NA's:1151 1st Qu.:25.9
## Median :25.9
## Mean :25.9
## 3rd Qu.:25.9
## Max. :25.9
## NA's :1150
#setwd("E:/kanta")
l<-read.csv(file = "Nadiatribes.csv")
z<-l$MOLECULAR
total<-length(z)
pk<- which(z=="n")
m<-length(pk)
alpha<-nrow(l)-m
prb<-alpha/total
prb
## [1] 0.3191489
hb<-l$HB
mean(hb)
## [1] 12.95106
hbf<-l$HBF
mean(hbf)
## [1] 0.3702128
hba0<-l$HBA0
mean(hba0)
## [1] 87.01277
hba2<-l$HBA2
mean(hba2)
## [1] 2.723404
alphamut<-which(z=="amut")
mean(hb[alphamut])
## [1] 13.05333
mean(hbf[alphamut])
## [1] 0.3533333
mean(hba0[alphamut])
## [1] 87.02
mean(hba2[alphamut])
## [1] 2.8
q<-read.csv(file = "birbhum.csv")
p<-q$MOLICULAR.MUTATION
totalbir<-length(p)
pkbir<- which(p=="amut")
mbir<-length(pkbir)
prbbir<-mbir/totalbir
prbbir
## [1] NaN
hbbir<-q$HB
mean(hbbir)
## [1] 12.832
hbfbir<-q$HbF
mean(hbfbir)
## [1] 0.336
hba0bir<-q$HbA0
mean(hba0bir)
## [1] 86.488
hba2bir<-q$HbA2
mean(hba2bir)
## [1] 2.764
alphamutbir<-which(p=="amut")
mean(hbbir[alphamutbir])
## [1] NaN
mean(hbfbir[alphamutbir])
## [1] NaN
mean(hba0bir[alphamutbir])
## [1] NaN
mean(hba2bir[alphamutbir])
## [1] NaN
f<-read.csv(file = "Murshidabad.csv")
g<-f$MOLECULAR
totalmur<-length(g)
pkmur<- which(g=="amut")
mmur<-length(pkmur)
prbmur<-mmur/totalmur
prbmur
## [1] 0
hbmur<-f$HB
mean(hbmur)
## [1] 12.37
hbfmur<-f$HbF
mean(hbfmur)
## [1] 0.25
hba0mur<-f$HbA0
mean(hba0mur)
## [1] 87.29
hba2mur<-f$HbA2
alphamutmur<-which(g=="amut")
mean(hbmur[alphamutmur])
## [1] NaN
mean(hbfmur[alphamutmur])
## [1] NaN
mean(hba0mur[alphamutmur])
## [1] NaN
mean(hba2mur[alphamutmur])
## [1] NaN
t<-read.csv(file = "JALPAIGURI.csv")
y<-t$MOLICULAR.MUTATION
totaljalp<-length(y)
pkjalp<- which(y=="amut")
mjalp<-length(pkjalp)
prbjalp<-mjalp/totaljalp
prbjalp
## [1] 0.7777778
hbjalp<-t$HB
mean(hbjalp)
## [1] 12.48889
hbfjalp<-t$HbF
mean(hbfjalp)
## [1] 0.2277778
hba0jalp<-t$HbA0
mean(hba0jalp)
## [1] 87.7
hba2jalp<-t$HbA2
alphamutjalp<-which(y=="amut")
mean(hbjalp[alphamutjalp])
## [1] 12.55
mean(hbfjalp[alphamutjalp])
## [1] 0.2142857
mean(hba0jalp[alphamutjalp])
## [1] 87.68571
mean(hba2jalp[alphamutjalp])
## [1] 3.042857
HB<-cbind(mean(hb),mean(hbbir),mean(hbmur),mean(hbjalp))
colnames(HB)<-c("Nhb","Bhb","Mhb","Jhb")
d<-rbind(HB)
print(d)
## Nhb Bhb Mhb Jhb
## [1,] 12.95106 12.832 12.37 12.48889
par(mfrow=c(2,2))
barplot(HB,col = "yellow")
HBF<-cbind(mean(hbf),mean(hbfbir),mean(hbfmur),mean(hbfjalp))
colnames(HBF)<-c("Nhbf","Bhbf","Mhbf","Jhbf")
d<-rbind(HBF)
print(d)
## Nhbf Bhbf Mhbf Jhbf
## [1,] 0.3702128 0.336 0.25 0.2277778
par(mfrow=c(2,2))
barplot(HBF,col = "red")
HBa0<-cbind(mean(hba0),mean(hba0bir),mean(hba0mur),mean(hba0jalp))
colnames(HB)<-c("Nhba0","Bhba0","Mhba0","Jhba0")
d<-rbind(HB)
print(d)
## Nhba0 Bhba0 Mhba0 Jhba0
## [1,] 12.95106 12.832 12.37 12.48889
par(mfrow=c(2,2))
barplot(HB,col = "blue")
HBa2<-cbind(mean(hba2),mean(hba2bir),mean(hba2mur),mean(hba2jalp))
colnames(HBa2)<-c("Nhba2","Bhba2","Mhba2","Jhba2")
d<-rbind(HBa2)
print(d)
## Nhba2 Bhba2 Mhba2 Jhba2
## [1,] 2.723404 2.764 2.4 3
par(mfrow=c(2,2))
barplot(HBa2,col = "black")
bp.hb<-cbind(hb,hbmur,hbjalp,hbbir)
## Warning in cbind(hb, hbmur, hbjalp, hbbir): number of rows of result is not
## a multiple of vector length (arg 1)
colnames(bp.hb)<-c("N.HB","M.HB","J.HB","B.HB")
boxplot(bp.hb)
bp.hbf<-cbind(hbf,hbfmur,hbfjalp,hbfbir)
## Warning in cbind(hbf, hbfmur, hbfjalp, hbfbir): number of rows of result is
## not a multiple of vector length (arg 1)
colnames(bp.hbf)<-c("N.HBF","M.HBF","J.HBF","B.HBF")
boxplot(bp.hbf)
bp.hba0<-cbind(hba0,hba0mur,hba0jalp,hba0bir)
## Warning in cbind(hba0, hba0mur, hba0jalp, hba0bir): number of rows of
## result is not a multiple of vector length (arg 1)
colnames(bp.hba0)<-c("N.HBA0","M.HBA0","J.HBA0","B.HBA0")
boxplot(bp.hba0)
bp.hba2<-cbind(hba2,hba2mur,hba2jalp,hba2bir)
## Warning in cbind(hba2, hba2mur, hba2jalp, hba2bir): number of rows of
## result is not a multiple of vector length (arg 1)
colnames(bp.hba0)<-c("N.HBA2","M.HBA2","J.HBA2","B.HBA2")
boxplot(bp.hba2)
#setwd("E:/kanta")
n<-read.csv("Nadiatribes.csv")
b<-read.csv("birbhum.csv")
m<-read.csv("Murshidabad.csv")
j<-read.csv("JALPAIGURI.csv")
nad<-cbind(n$HB,n$MCV,n$MCH,n$HBA0,n$HBA2,n$HBF,n$MOLECULAR)
distn<-rep("Nadiatribes",nrow(n))
f.n<-cbind(nad,distn)
bir<-cbind(b$HB,b$MCV,b$MCH,b$HbA0,b$HbA2,b$HbF,b$MOLECULAR)
distb<-rep("birbhum",nrow(b))
f.b<-cbind(bir,distb)
mur<-cbind(m$HB,m$MCV,m$MCH,m$HbA0,m$HbA2,m$HbF,m$MOLECULAR)
distm<-rep("Murshidabad",nrow(m))
f.m<-cbind(mur,distm)
jal<-cbind(j$HB,j$MCV,j$MCH,j$HbA0,j$HbA2,j$HbF,j$MOLICULAR.MUTATION)
distj<-rep("JALPAIGURI",nrow(j))
f.j<-cbind(jal,distj)
master<-rbind(f.n,f.b,f.m,f.j)
master1<-data.frame(master)
summary(master1)
## V1 V2 V3 V4 V5
## 12.2 :21 69.3 : 4 20.6 : 6 87.5 :10 2.7 :22
## 12.1 :14 70.8 : 4 21.1 : 6 87.3 : 8 2.9 :20
## 12.3 :12 69.8 : 3 21.8 : 5 86.5 : 7 2.6 :17
## 12.4 :10 71.3 : 3 22.6 : 5 86.1 : 6 3 :17
## 12.5 :10 71.9 : 3 23.1 : 5 86.7 : 6 2.8 :16
## 12.9 : 9 72.3 : 3 24.2 : 5 87.4 : 6 2.5 :13
## (Other):59 (Other):115 (Other):103 (Other):92 (Other):30
## V6 V7 distn
## 0.2 :43 1:65 birbhum :50
## 0.3 :40 2:59 JALPAIGURI :18
## 0.4 :20 3: 1 Murshidabad:20
## 0.5 : 8 4: 2 Nadiatribes:47
## 0.6 : 6 5: 2
## 0.7 : 6 6: 6
## (Other):12
bp<-cbind(master1$V1,master1$V2,master1$V3,master1$V4,master1$V5,master1$V6)
colnames(bp)<-c("HB","MCV","MCH","HBA0","HBA2","HBF")
boxplot(bp,main="boxplot")
barplot(bp,main = "BARPLOT",col = "blue")
M<-master1
hb<-master1$V1
mch<-master1$V3
mcv<-master1$V2
hba0<-master1$V4
hba2<-master1$V5
hbf<-master1$V6
disease<-master1$V7
x<-cbind(hb,mch,mcv,hba0,hba2,hbf)
colnames(x)<-c("HB","MCH","MCV","HBA0","HBA2","HBF")
x.pca<-prcomp(x,scale.=TRUE)
summary(x.pca)
## Importance of components:
## PC1 PC2 PC3 PC4 PC5 PC6
## Standard deviation 1.4197 1.1461 1.0618 0.8851 0.8038 0.3376
## Proportion of Variance 0.3359 0.2189 0.1879 0.1306 0.1077 0.0190
## Cumulative Proportion 0.3359 0.5548 0.7427 0.8733 0.9810 1.0000
screeplot(x.pca)
library(ggbiplot)
## Loading required package: ggplot2
## Loading required package: plyr
## Loading required package: scales
## Loading required package: grid
g<-ggbiplot(x.pca,obs.scale=1,var.scale=1,groups=disease,ellipse=TRUE,circle=TRUE)
g <- g + scale_color_discrete(name = '')
g <- g + theme(legend.direction = 'horizontal', legend.position = 'top')
print(g)
#PCA ANALYSIS OF EACH DISTRICTS
## nadia
#setwd("E:/kanta")
k<-read.csv(file ="Nadiatribes.csv")
hb.nad<-k$HB
hba0.nad<-k$HBA0
hba2.nad<-k$HBA2
hbf.nad<-k$HBF
mcv.nad<-k$MCV
mch.nad<-k$MCH
mol.nad<-k$MOLECULAR
c<-cbind(hb.nad,hba0.nad,hba2.nad,hbf.nad,mcv.nad,mch.nad)
colnames(c)<-c("hb.nad","hba0.nad","hba2.nad","hbf.nad","mcv.nad","mch.nad")
n.pca<-prcomp(c,scale. =TRUE)
summary(n.pca)
## Importance of components:
## PC1 PC2 PC3 PC4 PC5 PC6
## Standard deviation 1.4175 1.2004 1.0570 0.8700 0.7829 0.25066
## Proportion of Variance 0.3349 0.2401 0.1862 0.1261 0.1022 0.01047
## Cumulative Proportion 0.3349 0.5750 0.7612 0.8874 0.9895 1.00000
screeplot(n.pca)
biplot(n.pca)
library(ggbiplot)
g<-ggbiplot(n.pca,obs.scale = 1,var.scale = 1,groups = mol.nad,ellipse = TRUE,circle = TRUE)
g<-g+ scale_color_discrete(name='')
g<-g+ theme(legend.direction='horizontal',legend.position='top')
print(g)
## birbhum
t<-read.csv(file ="birbhum.csv")
hb.bir<-t$HB
hba0.bir<-t$HbA0
hba2.bir<-t$HbA2
hbf.bir<-t$HbF
mcv.bir<-t$MCV
mch.bir<-t$MCH
mol.bir<-t$MOLICULAR.MUTATION
c1<-cbind(hb.bir,hba0.bir,hba2.bir,hbf.bir,mcv.bir,mch.bir)
colnames(c1)<-c("hb.bir","hba0.bir","hba2.bir","hbf.bir","mcv.bir","mch.bir")
b.pca<-prcomp(c1,scale. =TRUE)
summary(b.pca)
## Importance of components:
## PC1 PC2 PC3 PC4 PC5 PC6
## Standard deviation 1.5002 1.2399 1.0553 0.8125 0.61801 0.23700
## Proportion of Variance 0.3751 0.2562 0.1856 0.1100 0.06366 0.00936
## Cumulative Proportion 0.3751 0.6313 0.8169 0.9270 0.99064 1.00000
screeplot(b.pca)
biplot(b.pca)
library(ggbiplot)
g<-ggbiplot(b.pca,obs.scale = 1,var.scale = 1,groups = mol.bir,ellipse = TRUE,circle = TRUE)
g<-g+ scale_color_discrete(name='')
g<-g+ theme(legend.direction='horizontal',legend.position='top')
print(g)
## murshidabad
s<-read.csv(file ="birbhum.csv")
hb.mur<-s$HB
hba0.mur<-s$HbA0
hba2.mur<-s$HbA2
hbf.mur<-s$HbF
mcv.mur<-s$MCV
mch.mur<-s$MCH
mol.mur<-s$MOLECULAR
c2<-cbind(hb.mur,hba0.mur,hba2.mur,hbf.mur,mcv.mur,mch.mur)
colnames(c2)<-c("hb.mur","hba0.mur","hba2.mur","hbf.mur","mcv.mur","mch.mur")
m.pca<-prcomp(c2,scale. =TRUE)
summary(m.pca)
## Importance of components:
## PC1 PC2 PC3 PC4 PC5 PC6
## Standard deviation 1.5002 1.2399 1.0553 0.8125 0.61801 0.23700
## Proportion of Variance 0.3751 0.2562 0.1856 0.1100 0.06366 0.00936
## Cumulative Proportion 0.3751 0.6313 0.8169 0.9270 0.99064 1.00000
screeplot(m.pca)
biplot(m.pca)
library(ggbiplot)
g<-ggbiplot(m.pca,obs.scale = 1,var.scale = 1,groups = mol.mur,ellipse = TRUE,circle = TRUE)
g<-g+ scale_color_discrete(name='')
g<-g+ theme(legend.direction='horizontal',legend.position='top')
print(g)
## jalpaiguri
p<-read.csv(file ="JALPAIGURI.csv")
hb.jal<-p$HB
hba0.jal<-p$HbA0
hba2.jal<-p$HbA2
hbf.jal<-p$HbF
mcv.jal<-p$MCV
mch.jal<-p$MCH
mol.jal<-p$MOLICULAR.MUTATION
c3<-cbind(hb.jal,hba0.jal,hba2.jal,hbf.jal,mcv.jal,mch.jal)
colnames(c3)<-c("hb.jal","hba0.jal","hba2.jal","hbf.jal","mcv.jal","mch.jal")
j.pca<-prcomp(c3,scale. =TRUE)
summary(j.pca)
## Importance of components:
## PC1 PC2 PC3 PC4 PC5 PC6
## Standard deviation 1.5271 1.1994 1.0591 0.8235 0.59691 0.27108
## Proportion of Variance 0.3886 0.2397 0.1869 0.1130 0.05938 0.01225
## Cumulative Proportion 0.3886 0.6284 0.8153 0.9284 0.98775 1.00000
screeplot(j.pca)
biplot(j.pca)
library(ggbiplot)
g<-ggbiplot(j.pca,obs.scale = 1,var.scale = 1,groups = mol.jal,ellipse = TRUE,circle = TRUE)
g<-g+ scale_color_discrete(name='')
g<-g+ theme(legend.direction='horizontal',legend.position='top')
print(g)
#setwd("E:/kanta")
temp = list.files(pattern = "*.csv")
x<-list()
for(i in 1:5) {x[[i]]<-read.csv(temp[i])}
bind.data<-list()
for(i in 1:5){bind.data<-rbind(bind.data,x[i])}
(bind.data)
## [,1]
## [1,] List,16
## [2,] List,6
## [3,] List,20
## [4,] List,17
## [5,] List,16
library("bio3d")
## Warning: package 'bio3d' was built under R version 3.4.4
library("scatterplot3d")
## Warning: package 'scatterplot3d' was built under R version 3.4.4
#setwd("E:/kanta")
a<-read.csv("bond.csv")
a
d<-a$atom
f<-a$x
g<-a$y
h<-a$z
x.ab<-f[1]-f[2]
y.ab<-g[1]-g[2]
z.ab<-h[1]-h[2]
x.cb<-f[3]-f[2]
y.cb<-g[3]-g[2]
z.cb<-h[3]-h[2]
u<-c(x.ab,y.ab,z.ab)
v<-c(x.cb,y.cb,z.cb)
dotuv<-u%*%v
modu<-sqrt(u%*%v)
modv<-sqrt(v%*%v)
mod<-modu%*%modv
ct<-dotuv/mod
ct%*%180
## [,1]
## [1,] 127.2792
s<-read.pdb("1b20")
## Note: Accessing on-line PDB file
## PDB has ALT records, taking A only, rm.alt=TRUE
summary(s)
##
## Call: read.pdb(file = "1b20")
##
## Total Models#: 1
## Total Atoms#: 3084, XYZs#: 9252 Chains#: 3 (values: A B C)
##
## Protein Atoms#: 2595 (residues/Calpha atoms#: 326)
## Nucleic acid Atoms#: 0 (residues/phosphate atoms#: 0)
##
## Non-protein/nucleic Atoms#: 489 (residues: 489)
## Non-protein/nucleic resid values: [ HOH (486), ZN (3) ]
##
## + attr: atom, xyz, seqres, helix, sheet,
## calpha, remark, call
t<-torsion.pdb(s)
phi<-t$phi
psi<-t$psi
om<-t$omega
r1<-c(phi[1:10],psi[1:10])
(r1)
## [1] NA -84.15111 -92.28564 -145.01497 -97.56397 -58.21666
## [7] -69.95596 -63.55970 -71.80134 -60.26555 56.07003 125.44498
## [13] 99.79264 15.87531 166.25670 -49.03518 -39.92110 -45.61653
## [19] -40.67541 -44.24673
plot(om,pch=19)
plot(psi,phi,pch=19)
##SCATTER PLOT ###problem 3
summary(s$atom)
## type eleno elety alt
## Length:3084 Min. : 1.0 Length:3084 Length:3084
## Class :character 1st Qu.: 775.8 Class :character Class :character
## Mode :character Median :1551.5 Mode :character Mode :character
## Mean :1552.7
## 3rd Qu.:2330.2
## Max. :3105.0
## resid chain resno insert
## Length:3084 Length:3084 Min. : 2 Length:3084
## Class :character Class :character 1st Qu.: 34 Class :character
## Mode :character Mode :character Median : 71 Mode :character
## Mean :107
## 3rd Qu.:100
## Max. :567
## x y z o
## Min. :-23.172 Min. : 3.066 Min. :16.48 Min. :0.0000
## 1st Qu.: 4.653 1st Qu.:22.232 1st Qu.:37.97 1st Qu.:1.0000
## Median : 21.022 Median :33.679 Median :47.63 Median :1.0000
## Mean : 17.226 Mean :32.855 Mean :48.32 Mean :0.9827
## 3rd Qu.: 29.867 3rd Qu.:42.504 3rd Qu.:57.38 3rd Qu.:1.0000
## Max. : 47.164 Max. :63.576 Max. :83.82 Max. :1.0000
## b segid elesy charge
## Min. : 6.69 Length:3084 Length:3084 Length:3084
## 1st Qu.:11.66 Class :character Class :character Class :character
## Median :14.09 Mode :character Mode :character Mode :character
## Mean :16.91
## 3rd Qu.:20.00
## Max. :54.99
a<-s$atom[, c("x","y","z","elety")]
aa<-which(a$elety=="CA")
a.a<-s$atom[, c("x","y","z")]
r<-a.a[aa,]
summary(r)
## x y z
## Min. :-19.196 Min. : 8.307 Min. :19.96
## 1st Qu.: 3.939 1st Qu.:22.906 1st Qu.:38.28
## Median : 20.413 Median :33.884 Median :47.77
## Mean : 16.839 Mean :33.154 Mean :48.77
## 3rd Qu.: 29.245 3rd Qu.:42.310 3rd Qu.:57.91
## Max. : 45.868 Max. :59.271 Max. :80.55
scatterplot3d(r,highlight.3d = TRUE, col.axis = "red",col.grid = "lightblue", main = "scatterplot 3D",pch = 20)
ik<-which(s$atom$elety=="CA")
CAxyz<-s$atom[ik,c("x","y","z")]
plot(CAxyz)
##PROTEIN BLAST ###problem 4
blast<-blast.pdb(pdbseq(s))
## Searching ... please wait (updates every 5 seconds) RID = E8JYTV1E015
##
## Reporting 156 hits
top.hits<-plot.blast(blast, cutoff = 25)
## * Possible cutoff values: 133 -3
## Yielding Nhits: 103 156
##
## * Chosen cutoff value of: 25
## Yielding Nhits: 103
head(top.hits$hits)
## pdb.id acc group
## 1 "1B20_A" "1B20_A" "1"
## 2 "1B20_A" "1B20_A" "1"
## 3 "1B20_A" "1B20_A" "1"
## 4 "1B21_A" "1B21_A" "1"
## 5 "1B21_A" "1B21_A" "1"
## 6 "1B21_A" "1B21_A" "1"
a<-blast$raw
returned.pdbids<-a$subjectids
(returned.pdbids)
## [1] "1B20_A" "1B20_A" "1B20_A" "1B21_A" "1B21_A" "1B21_A" "1BRN_L"
## [8] "1BRN_L" "1BRN_L" "1FW7_A" "1FW7_A" "1FW7_A" "1BSA_A" "1BSA_A"
## [15] "1BSA_A" "1BSB_A" "1BSB_A" "1BSB_A" "1BSC_A" "1BSC_A" "1BSC_A"
## [22] "1BSD_A" "1BSD_A" "1BSD_A" "1BAO_A" "1BAO_A" "1BAO_A" "1BAN_A"
## [29] "1BAN_A" "1BAN_A" "1BSE_A" "1BSE_A" "1BSE_A" "1B2Z_A" "1B2Z_A"
## [36] "1B2Z_A" "1BNS_A" "1BNS_A" "1BNS_A" "1BNE_A" "1BNE_A" "1BNE_A"
## [43] "1B2S_A" "1B2S_A" "1B2S_A" "2ZA4_A" "2ZA4_A" "2ZA4_A" "1BRI_A"
## [50] "1BRI_A" "1BRI_A" "1BRJ_A" "1BRJ_A" "1BRJ_A" "1BRK_A" "1BRK_A"
## [57] "1BRK_A" "1BNF_A" "1BNF_A" "1BNF_A" "2C4B_A" "2C4B_A" "2C4B_A"
## [64] "1BRH_A" "1BRH_A" "1BRH_A" "1X1Y_A" "1X1Y_A" "1X1Y_A" "2F4Y_A"
## [71] "2F4Y_A" "2F4Y_A" "1B3S_A" "1B3S_A" "1B3S_A" "1RNB_A" "1RNB_A"
## [78] "1RNB_A" "1BNG_A" "1BNG_A" "1BNG_A" "1BRG_A" "1BRG_A" "1BRG_A"
## [85] "3DA7_A" "3DA7_A" "3DA7_A" "3DA7_A" "3Q3F_A" "3Q3F_A" "3Q3F_A"
## [92] "1BUJ_A" "1BUJ_A" "1BUJ_A" "2RBI_A" "2RBI_A" "2RBI_A" "4HAA_A"
## [99] "4HAA_A" "4HAA_A" "1GOY_A" "1GOY_A" "1GOY_A" "1MGR_A" "1MGR_A"
## [106] "1MGR_A" "4Q5N_A" "1T2I_A" "1T2I_A" "1T2I_A" "2I8L_A" "2E85_A"
## [113] "1PY3_A" "1PY3_A" "1PY3_A" "1UCK_A" "1UCK_A" "1UCK_A" "1UCL_A"
## [120] "1UCL_A" "1UCL_A" "4J5G_A" "4J5G_A" "4J5G_A" "1AY7_A" "1AY7_A"
## [127] "1AY7_A" "1UCI_A" "1UCI_A" "1UCI_A" "3A5E_A" "3A5E_A" "3A5E_A"
## [134] "4J5K_A" "4J5K_A" "4J5K_A" "1RGF_B" "1RGF_B" "1RGF_B" "4GHO_A"
## [141] "4GHO_A" "4GHO_A" "2QMI_A" "1SAR_A" "1I70_A" "1I8V_A" "1T2H_A"
## [148] "1UCJ_A" "1YNV_X" "1YNV_X" "1YNV_X" "4B6Z_A" "1BOX_A" "6AZI_A"
## [155] "6AZI_A" "6AZI_A"
p<-a$subjectids
p[10]
## [1] "1FW7_A"
p[40]
## [1] "1BNE_A"
p[100]
## [1] "4HAA_A"
plot(a$bitscore,a$evalue)
plot(a$evalue,type = "l")
plot(a$bitscore,type = "l")
#DATA QUIZ2 ##problem 5
y<-read.pdb("1ban")
## Note: Accessing on-line PDB file
summary(y)
##
## Call: read.pdb(file = "1ban")
##
## Total Models#: 1
## Total Atoms#: 2825, XYZs#: 8475 Chains#: 3 (values: A B C)
##
## Protein Atoms#: 2559 (residues/Calpha atoms#: 325)
## Nucleic acid Atoms#: 0 (residues/phosphate atoms#: 0)
##
## Non-protein/nucleic Atoms#: 266 (residues: 266)
## Non-protein/nucleic resid values: [ HOH (266) ]
##
## + attr: atom, xyz, seqres, helix, sheet,
## calpha, remark, call
t1<-torsion.pdb(y)
phi1<-t1$phi
psi1<-t1$psi
om1<-t1$omega
z<-read.pdb("1box")
## Note: Accessing on-line PDB file
## PDB has ALT records, taking A only, rm.alt=TRUE
summary(z)
##
## Call: read.pdb(file = "1box")
##
## Total Models#: 1
## Total Atoms#: 823, XYZs#: 2469 Chains#: 1 (values: A)
##
## Protein Atoms#: 728 (residues/Calpha atoms#: 95)
## Nucleic acid Atoms#: 0 (residues/phosphate atoms#: 0)
##
## Non-protein/nucleic Atoms#: 95 (residues: 95)
## Non-protein/nucleic resid values: [ HOH (95) ]
##
## + attr: atom, xyz, seqres, helix, sheet,
## calpha, remark, call
t2<-torsion.pdb(z)
phi2<-t2$phi
psi2<-t2$psi
x<-read.pdb("2rbi")
## Note: Accessing on-line PDB file
summary(x)
##
## Call: read.pdb(file = "2rbi")
##
## Total Models#: 1
## Total Atoms#: 1947, XYZs#: 5841 Chains#: 2 (values: A B)
##
## Protein Atoms#: 1718 (residues/Calpha atoms#: 216)
## Nucleic acid Atoms#: 0 (residues/phosphate atoms#: 0)
##
## Non-protein/nucleic Atoms#: 229 (residues: 229)
## Non-protein/nucleic resid values: [ HOH (229) ]
##
## + attr: atom, xyz, seqres, helix, sheet,
## calpha, remark, call
t3<-torsion.pdb(x)
phi3<-t3$phi
psi3<-t3$psi
plot(psi,phi,pch=19,col="blue",cex=0.5,text(-100,150,"1.b=1ZNI",col="blue"))
points(psi2,phi2,pch=19,col="green",cex=0.4,text(-25,150,"2.g=1SJU",col="green"))
points(psi1,phi1,pch=19,col="red",cex=0.4,text(45,15,"3.r=2INS",col = "red"))
points(psi3,phi3,pch=19,col="pink",cex=0.4,text(120,150,"4.bla=5BQQ",col = "black"))
#setwd("E:/kanta")
z<-read.csv("xyz.csv")
x1<-z$x
y1<-z$y
z1<-z$z
AB<-c(x1[2]-x1[1],y1[2]-y1[1],z1[2]-z1[1])
BC<-c(x1[3]-x1[2],y1[3]-y1[2],z1[3]-z1[2])
crs<-function(u,v){
c1<- u[2]*v[3]-v[2]*u[3]
c2<- u[3]*v[1]-v[3]*u[1]
c3<- u[1]*v[2]-v[1]*u[2]
return(c(c1,c2,c3))
}
K<-crs(AB,BC)
(K)
## [1] 2.46 4.90 -0.54
#setwd("E:/kanta")
w<-read.csv("book new.csv")
f<-w$x
f1<-is.na(f)
f2<-which(f1)
b<-f[-f2]
b
## [1] 4 4 74 7 4 4 96 4 5 5 5 98 46 8 9 1 5 6 5
mean(b)
## [1] 20.52632
g<-w$y
g1<-is.na(g)
g2<-which(g1)
a<-g[-g2]
a
## [1] 4 4 7 78 9 596 87 78 55 4 5 78 7 2 22 48 5
## [18] 7 7
mean(a)
## [1] 58.05263
plot(b,a)
##PROBLEM-4
library("readxl")
#setwd("E:/kanta")
p4<-read_xlsx("rti.xlsx")
R<-p4$Ramean
W<-p4$wavelen
wv1<-(401-320)
wv1
## [1] 81
wv2<-wv1+(501-401)
wv2
## [1] 181
wv3<-wv2+(601-501)
wv3
## [1] 281
peak1=0
peak2=0
peak3=0
for(i in 1:wv1){
peak1[i]<-R[i]
}
for(i in (wv1+1):wv2){
peak2[i-wv1]<-R[i]
}
for(i in (wv2+1):wv3){
peak3[i-wv2]<-R[i]
}
plot(W,R,cex=0.7,col="blue",main="PLOT R",xlab = "Wavelength",ylab = "Fluorescence intensity")
plot(peak1,cex=0.7,col="RED",main="PLOT R.1")
plot(peak2,cex=0.7,col="BLACK",main="PLOT R.2")
plot(peak3,cex=0.7,col="VIOLET",main="PLOT R.3")
all<-"ARNDCQEGHILKMFPSTWYV"
ss<-unlist(strsplit(all,""))
for(i in 1:20){cat(sample(ss))
cat("\n")}
## L Q M R Y K A H E G W F D I C P S T V N
## H T W G K I S Q C N D L F A P M V R Y E
## R N D I P L C W Y H K E T M G S V A F Q
## Y V K A R P C Q D T I W G F N H L M S E
## I K W P R G N C S M T H V D Y L F Q A E
## E P F T S I W M C K A D Q L G N H V Y R
## Q E G F D N A K M L I T S V Y H C P R W
## Y D S E W M N L K A V H G R P Q T F C I
## G H N P S T E A K V D Y I W C F M R L Q
## V N D Y G M R S F P W Q I A H L K E C T
## K C M S L D F E R V Q N I P G Y H A T W
## A L W D Q K H E V G P S R C N I Y T M F
## N I P F S T M L V W Y H G D R E K Q C A
## P Y H V I A W C D K F M S G E L R N Q T
## Y F V A E P R Q K L N H S C I T G W M D
## M T V C N W F D K I L S A Q P H R G Y E
## Q G H P Y K R E A S F V M C W T I L N D
## Y F R T K P Q V C A G L D H N W E S M I
## W R N A E V I F K Q H M T Y L C G P D S
## H L Q S V E P I G W C F R D N M T Y A K
#setwd("E:/kanta")
u<-read.csv("aspirincsv.csv")
a<-u$condition
#chi-square-analysis:
con<-u$condition
pla<-which(con=="placebo")
chg<-u$change
mn<-mean(chg)
h<-chg-mn
h[pla]
## [1] -0.05333333 1.04666667 1.14666667 0.64666667 2.24666667
## [6] 3.14666667 1.24666667 0.94666667 1.44666667 2.04666667
## [11] 1.64666667 1.24666667 0.74666667 1.04666667 2.04666667
## [16] 1.14666667
pm<-h[pla]
which(pm>mn)
## [1] 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16
pp<-which(pm>mn)
length(pp)
## [1] 16
h
## [1] -0.05333333 1.04666667 -2.95333333 1.14666667 -2.15333333
## [6] -0.75333333 0.04666667 -3.35333333 0.64666667 2.24666667
## [11] -0.25333333 -2.45333333 -3.95333333 3.14666667 -1.65333333
## [16] 1.24666667 0.94666667 1.44666667 -2.35333333 2.04666667
## [21] -1.35333333 1.64666667 1.24666667 0.74666667 1.04666667
## [26] 2.04666667 0.94666667 -0.15333333 -1.35333333 1.14666667
mean(h)
## [1] -1.258433e-16
k<-u$change
k
## [1] -3.2 -2.1 -6.1 -2.0 -5.3 -3.9 -3.1 -6.5 -2.5 -0.9 -3.4 -5.6 -7.1 0.0
## [15] -4.8 -1.9 -2.2 -1.7 -5.5 -1.1 -4.5 -1.5 -1.9 -2.4 -2.1 -1.1 -2.2 -3.3
## [29] -4.5 -2.0
mean(k)
## [1] -3.146667
h
## [1] -0.05333333 1.04666667 -2.95333333 1.14666667 -2.15333333
## [6] -0.75333333 0.04666667 -3.35333333 0.64666667 2.24666667
## [11] -0.25333333 -2.45333333 -3.95333333 3.14666667 -1.65333333
## [16] 1.24666667 0.94666667 1.44666667 -2.35333333 2.04666667
## [21] -1.35333333 1.64666667 1.24666667 0.74666667 1.04666667
## [26] 2.04666667 0.94666667 -0.15333333 -1.35333333 1.14666667
which(k>mean(k))
## [1] 2 4 7 9 10 14 16 17 18 20 22 23 24 25 26 27 30
length(which(k>mean(k)))
## [1] 17
which(k>mean(k)& u$condition=="placebo")
## [1] 2 4 9 10 14 16 17 18 20 22 23 24 25 26 30
l<-which(k>mean(k) & u$condition=="placebo")
length(l)
## [1] 15
u$change<-((u$change)**2)**0.5
u$change
## [1] 3.2 2.1 6.1 2.0 5.3 3.9 3.1 6.5 2.5 0.9 3.4 5.6 7.1 0.0 4.8 1.9 2.2
## [18] 1.7 5.5 1.1 4.5 1.5 1.9 2.4 2.1 1.1 2.2 3.3 4.5 2.0
m<-mean(u$change)
m
## [1] 3.146667
ik<-which(u$change>m &u$condition=="placebo")
length(ik)
## [1] 1
jk<-which(u$change<m & u$condition=="placebo")
length(jk)
## [1] 15
kk<-which(u$change>m & u$condition=="aspirin")
length(kk)
## [1] 12
pk<-which(u$change<m & u$condition=="aspirin")
length(pk)
## [1] 2
placebo<-c(length(ik),length(jk))
placebo
## [1] 1 15
placebo<-cbind(length(ik),length(jk))
placebo
## [,1] [,2]
## [1,] 1 15
colnames(placebo)<-c("+","-")
placebo
## + -
## [1,] 1 15
aspirin<-cbind(length(kk),length(pk))
data.file<-rbind(placebo,aspirin)
data.file
## + -
## [1,] 1 15
## [2,] 12 2
colnames(data.file)<-c("+","-")
data.file
## + -
## [1,] 1 15
## [2,] 12 2
rownames(data.file)<-c("placebo","aspirin")
colnames(data.file)<-c("+","-")
data.file
## + -
## placebo 1 15
## aspirin 12 2
#probability of value
pa.pos<-((data.file[2,2]+data.file[2,1])/30)*((data.file[1,1]+data.file[2,1])/30)
po.pos<-((data.file[1,1]+data.file[1,2])/30)*((data.file[1,1]+data.file[2,1])/30)
po.pos
## [1] 0.2311111
pi.pos<-((data.file[2,1]+data.file[2,2])/30)*((data.file[1,2]+data.file[2,2])/30)
pi.pos
## [1] 0.2644444
pp.pos<-((data.file[1,2]+data.file[1,1])/30)*((data.file[1,2]+data.file[2,2])/30)
pp.pos
## [1] 0.3022222
pap<-pa.pos*30
pop<-po.pos*30
pip<-pi.pos*30
ppp<-pp.pos*30
e11<-(data.file[1,1]-pop)^2/pop
e12<-(data.file[1,2]-ppp)^2/ppp
e21<-(data.file[2,1]-pip)^2/pip
e22<-(data.file[2,2]-pap)^2/pap
chivalue<-sum(e11,e12,e21,e22)
chivalue
## [1] 13.77101
library(bio3d)
a<-read.pdb("1wba")
## Note: Accessing on-line PDB file
t.a<-torsion.pdb(a)
phi.a<-t.a$phi
b<-is.na(phi.a)
b1<-which(b)
phi.ak<-phi.a[-b1]
psi.a<-t.a$psi
e<-is.na(psi.a)
e1<-which(e)
psi.ak<-psi.a[-e1]
par(mfrow=c(2,2))
plot(phi.ak,psi.ak,xlim = c(-180,180),ylim = c(-180,180),main = "1wba")
abline(h=c(0,0),v=c(0,0))
phi1<- -50.0
phi2<- -100.0
ik<-which(phi.ak<phi1 & phi.ak>phi2)
phial.ak<-length(ik)
phi3<- -40.0
phi4<- -150.0
ij<-which(phi.ak<phi3 & phi.ak>phi4)
phib.ak<-length(ij)
psi5<- -80.0
psi6<- 10.0
il<-which(psi.ak<psi6 & psi.ak>psi5)
psial.ak<-length(il)
psi7<- 110.0
psi8<- 160.0
im<-which(psi.ak>psi7 & psi.ak<psi8)
psib.ak<-length(im)
r<-cbind(phial.ak,psial.ak,phib.ak,psib.ak)
rownames(r)<-c("1wba")
(r)
## phial.ak psial.ak phib.ak psib.ak
## 1wba 94 28 154 104
u<-read.pdb("3ua0")
## Note: Accessing on-line PDB file
t.ua<-torsion.pdb(u)
phi.ua<-t.ua$phi
x<-is.na(phi.ua)
x1<-which(x)
phi.ua<-phi.ua[-x1]
psi.ua<-t.ua$psi
s<-is.na(psi.ua)
s1<-which(s)
psi.ua<-psi.ua[-s1]
plot(phi.ua,psi.ua,xlim = c(-180,180),ylim = c(-180,180),main = "3ua0")
abline(h=c(0,0),v=c(0,0))
phi9<- -50.0
phi10<- -110.0
ik1<-which(phi.ua<phi9 & phi.ua>phi10)
phial.ua<-length(ik1)
phi11<- -50.0
phi12<- -150.0
ij1<-which(phi.ak<phi11 & phi.ak>phi12)
phib.ua<-length(ij1)
psi13<- -50.0
psi14<- 10.0
il1<-which(psi.ua<psi14 & psi.ua>psi13)
psial.ua<-length(il1)
psi15<- 100.0
psi16<- 160.0
im1<-which(psi.ua>psi15 & psi.ua<psi16)
psib.ua<-length(im1)
r1<-cbind(phial.ua,psial.ua,phib.ua,psib.ua)
rownames(r1)<-c("3ua0")
(r1)
## phial.ua psial.ua phib.ua psib.ua
## 3ua0 60 15 154 90
main.pro<-rbind(r,r1)
main.pro
## phial.ak psial.ak phib.ak psib.ak
## 1wba 94 28 154 104
## 3ua0 60 15 154 90
colnames(main.pro)<-c("Phi-alpha","Psi-alpha","Phi-beta","Psi-beta")
main.pro
## Phi-alpha Psi-alpha Phi-beta Psi-beta
## 1wba 94 28 154 104
## 3ua0 60 15 154 90
chisq.test(main.pro)
##
## Pearson's Chi-squared test
##
## data: main.pro
## X-squared = 7.1784, df = 3, p-value = 0.06642
x<-c(0.593, 0.142, 0.329, 0.691, 0.231, 0.793, 0.519, 0.392, 0.418)
t.test<-function(dataset){
s<-sd(dataset)
d.mean<-mean(dataset)
n1<-length(dataset)
n<-n1**0.5
u<-0.3
t<-(d.mean)/(s/n)
return(t)
}
t.test<-t.test(x)
t.test
## [1] 6.43351
mean(x)
## [1] 0.4564444
#INTERPRETATION
##from the above ,it was found that the mean of the above data ,level of Salmonella (MPN/g) of randomly sampled batches of ice cream is 0.4564444.We have taken the mean value for the Null hypothesis as 0.3,ie the Salmonella level is equal to 0.3. As the mean value found here is greater than the null hyopthesis, it is not possible to accept the Null hypothesis,we have to accept the alternate hypothesis,ie the level of Salmonella is more than 0.3 MPN/g.
ballet<-c(89.2,78.2,89.3,88.3,87.3,90.1,95.2,94.3,78.3,89.3)
mean1<-mean(ballet)
sd.ball<-sd(ballet)
sd.ball
## [1] 5.690587
s1<-sd.ball^2
s1
## [1] 32.38278
l1<-length(ballet)
lb<-l1-1
lb
## [1] 9
FOOTBALL<-c(79.3,78.3,85.3,79.3,88.9,91.2,87.2,89.2,93.3,79.9)
mean2<-mean(FOOTBALL)
sd.foot<-sd(FOOTBALL)
sd.foot
## [1] 5.583995
s2<-sd.foot^2
s2
## [1] 31.181
l2<-length(FOOTBALL)
lf<-l2-1
lf
## [1] 9
L<-l1+l2-1
L
## [1] 19
sp<-(s1*lb + s2*lf)/L
sp
## [1] 30.10916
Sq<-(sp*(1/l1 + 1/l2))^0.5
Sq
## [1] 2.453942
MM<-mean1 - mean2
MM
## [1] 2.76
ttest2<-MM/Sq
ttest2
## [1] 1.124721
#Interpritation:
## the t table does not give you the precise probability of every t value,you can use it for hypothesis testing.according to the given reference index,our value is 1.124721 which is almost near to 1.372 haveing p value=0.10
### so at 95% confidence level,haveing value greater than 0.05 it indicate null hypothesis.
####This means that we can not distinguish between the fitness level between the ballet dancers and the football player.
data<-c(220, 200, 240, 210, 225 ,210, 180, 170 ,210 ,220 ,190 ,180 ,195 ,190 ,200, 190, 210, 220,240,210)
M<-matrix(data,nrow=10,byrow=TRUE)
colnames(M)<-c("Before","After")
(M)
## Before After
## [1,] 220 200
## [2,] 240 210
## [3,] 225 210
## [4,] 180 170
## [5,] 210 220
## [6,] 190 180
## [7,] 195 190
## [8,] 200 190
## [9,] 210 220
## [10,] 240 210
t.test(M[,1],M[,2],alternative="two.sided",mu=0,paired=TRUE,var.equal=FALSE,conf.level=0.95)
##
## Paired t-test
##
## data: M[, 1] and M[, 2]
## t = 2.5017, df = 9, p-value = 0.03377
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## 1.053366 20.946634
## sample estimates:
## mean of the differences
## 11
library("bio3d")
library("Peptides")
## Warning: package 'Peptides' was built under R version 3.4.4
#membrane protein
#setwd("E:/kanta")
mem<-read.pdb("4nyk.pdb")
seq.mem<-pdbseq(mem)
#Amino acid descriptors for the membrane protein compared with the reference 20 aa present
#Thermostability value of the membrane protein
seq.mem.i<-aIndex(seq.mem)
ik<-which(seq.mem.i==0.0)
seq.mem.in<-seq.mem.i[-ik]
therm.mem<-sum(seq.mem.in)/length(seq.mem.in)
(therm.mem)
## [1] 302.2523
#Protein Interaction
seq.mem.pp<-boman(seq.mem)
bin.pot<-sum(seq.mem.pp)/length(seq.mem)
bin.pot
## [1] 1.41686
#binpot value = 1.41686, which is <2.68 which implies the protein has lower affinity to bind with another protein
#Cytosolic protein
cyto.ibv<-read.pdb("3ibv.pdb")
seq.ibv<-pdbseq(cyto.ibv)
#Amino acid descriptors for the cytosolic protein compared with the reference 20 aa present
#Thermostability value of the cytosolic protein
seq.ibv.i<-aIndex(seq.ibv)
jk<-which(seq.ibv.i==0.0)
seq.ibv.in<-seq.ibv.i[-jk]
therm.ibv<-sum(seq.ibv.in)/length(seq.ibv.in)
(therm.ibv)
## [1] 315.2815
#Protein Interaction
seq.ibv.pp<-boman(seq.ibv)
bin.pot1<-sum(seq.ibv.pp)/length(seq.ibv)
(bin.pot1)
## [1] 1.08492
#binpot<2.68 which implies the protein has lower affinity to bind with another protein
#Principle components
#A function for protein pca
protanapca<-function(seq)
{library("bio3d")
library("Peptides")
ch<-charge(seq, pH = 7, pKscale = "EMBOSS")
pp<-boman(seq)
ampI<-hmoment(seq, angle = 100, window = 11)
hy<-hydrophobicity(seq, scale = "KyteDoolittle")
pI<-pI(seq,pKscale = "EMBOSS")
mw<-mw(seq, monoisotopic = FALSE)
protana<-cbind(ch,pp,ampI,hy,pI,mw)
colnames(protana)<-c("CHARGE","P-P","AMPIPHILLICITY","HYDRPHOBICITY","ISOELECTRIC","MOL WEIGHT")
p<-cbind(seq,protana)
colnames(p)<-c("AA","CHARGE","P-P","AMPIPHILLICITY","HYDRPHOBICITY","ISOELECTRIC","MOL WEIGHT")
pr.p<-prcomp(protana)
return(pr.p)}
#A function for protein analysis of different properties
protana<-function(seq){
library("bio3d")
library("Peptides")
ch<-charge(seq, pH = 7, pKscale = "EMBOSS")
pp<-boman(seq)
ampI<-hmoment(seq, angle = 100, window = 11)
hy<-hydrophobicity(seq, scale = "KyteDoolittle")
pI<-pI(seq,pKscale = "EMBOSS")
mw<-mw(seq, monoisotopic = FALSE)
protana<-cbind(ch,pp,ampI,hy,pI,mw)
colnames(protana)<-c("CHARGE","P-P","AMPIPHILLICITY","HYDRPHOBICITY","ISOELECTRIC","MOL WEIGHT")
p<-cbind(seq,protana)
colnames(p)<-c("AA","CHARGE","P-P","AMPIPHILLICITY","HYDRPHOBICITY","ISOELECTRIC","MOL WEIGHT")
return(p)}
#Membrane protein
mem<-read.pdb("4nyk.pdb")
## PDB has ALT records, taking A only, rm.alt=TRUE
seq.mem<-pdbseq(mem)
mem.a<-protana(seq.mem)
pca.mem<-protanapca(seq.mem)
par(mfrow=c(1,1))
biplot(pca.mem,main="4NYK")
#Cytosolic protein
cyto.ibv<-read.pdb("3ibv.pdb")
seq.ibv<-pdbseq(cyto.ibv)
ibv.a<-protana(seq.ibv)
pca.ibv<-protanapca(seq.ibv)
pca.ibv
## Standard deviations (1, .., p=6):
## [1] 25.34836984 5.78031951 1.44318820 0.99829390 0.31081320 0.05188966
##
## Rotation (n x k) = (6 x 6):
## PC1 PC2 PC3 PC4
## CHARGE -0.001328563 0.006486049 -0.298910322 -0.084334098
## P-P -0.044389433 -0.862420827 -0.048271204 0.484420200
## AMPIPHILLICITY -0.008684262 0.008019359 -0.175336332 0.247985461
## HYDRPHOBICITY 0.028413760 0.502999758 -0.110877881 0.822395087
## ISOELECTRIC -0.009296199 -0.018256433 -0.930158122 -0.142804185
## MOL WEIGHT -0.998528235 0.052743568 0.009573086 0.001151903
## PC5 PC6
## CHARGE 0.142428225 0.9397929272
## P-P 0.130632948 0.0142087580
## AMPIPHILLICITY -0.946338281 0.1098385800
## HYDRPHOBICITY 0.239905690 -0.0012563071
## ISOELECTRIC 0.097323488 -0.3232974705
## MOL WEIGHT 0.008154187 0.0001367825
biplot(pca.ibv,main="3IBV")
#CONCLUSION
#The membrane and cytosolic protein have nearly equal thermal stability
#The protein protein interaction parameter concludes that the membrane protein has higher binding potential for a protein of equal thermal stability
#The pca shows out of the six parameters chosen for analysis of this protein molecular weight is an outlier and has minimum relation to protein functionality whereas protein-protein interaction and hydrophobicity are inversely related
#The other four factors such as charge,pI,Ampiphilicity and polarity are related factors for a protein irrespective of its origin
#Hence the a conclusive outcome for the above pca could be that for proteins of different origins is that hydrophobicity and protein protein interaction are inversely related irrespective of its origin
#The stastical tool used are pca,biplot and self made function protanapca(which gives the pca values of any protein sequence) and protana(which gives a matrix of all amino acid vs six parameters for any given protein)
Legand NMA FLUCTUATION
Legand protein structure
#setwd("E:/kanta")
ali<-read.csv("Alipurduarr.csv")
ban<-read.csv("Bankura.csv")
bir<-read.csv("Birbhummm.csv")
bur<-read.csv("Burdwann.csv")
mal<-read.csv("Malda.csv")
coo<-read.csv("Coochbeharr.csv")
d.dina<-read.csv("D Dinajpur.csv")
e.med<-read.csv("E Medinipurr.csv")
how<-read.csv("Howrahh.csv")
jal<-read.csv("Jalpaiguri.csv")
kol<-read.csv("Kolkataa.csv")
mur<-read.csv("Murshidabadd.csv")
npgs<-read.csv("N 24 Pgs.csv")
alipur<-cbind(ali$AGE,ali$HB,ali$MCV,ali$MCH,ali$HBA0,ali$HBA2,ali$HBF)
distn<-rep("Alipurduarr",nrow(ali))
sex1<-as.character(ali$SEX)
alipura1<-cbind(alipur,distn,sex1)
summary(alipura1)
## V1 V2 V3 V4 V5
## 16 :143 11.3 : 43 72.3 : 13 22.9 : 26 218 : 29
## 13 :134 11.6 : 40 70.5 : 12 21.7 : 22 228 : 27
## 14 :120 11 : 39 71.3 : 12 21.8 : 20 225 : 26
## 15 :115 10.7 : 35 71.8 : 11 20.2 : 19 226 : 26
## 17 :115 10.8 : 35 72.9 : 11 20.9 : 19 217 : 25
## 12 :111 12 : 35 (Other):993 22.7 : 19 221 : 24
## (Other):315 (Other):826 NA's : 1 (Other):928 (Other):896
## V6 V7 distn sex1
## 0 :143 0 :272 Alipurduarr:1053 : 1
## 2.8 :102 0.2 :193 f: 9
## 2.6 : 98 0.3 :149 F:589
## 2.7 : 97 0.4 : 78 m: 2
## 3 : 91 0.1 : 50 M:452
## (Other):519 (Other):310
## NA's : 3 NA's : 1
bankura<-cbind(ban$AGE,ban$HB,ban$HBF,ban$HBA2,ban$HBA0,ban$MCV,ban$MCH)
distn1<-rep("Bankura",nrow(ban))
sex<-as.character(ban$SEX)
bankura1<-cbind(bankura,distn1,sex)
summary(bankura1)
## V1 V2 V3 V4 V5
## 13 :138 12.1 : 53 0.2 :349 2.8 :174 88.3 : 61
## 12 :132 11.2 : 52 0.3 :266 2.9 :158 87.8 : 52
## 10 :101 12 : 52 0.4 :120 2.7 :149 88 : 52
## 8 : 91 11.9 : 50 0 :106 2.6 :128 87.9 : 50
## 15 : 86 11.3 : 48 0.1 : 90 3 :125 88.1 : 50
## (Other):602 11.7 : 47 0.5 : 78 3.1 : 88 87.5 : 48
## NA's : 1 (Other):849 (Other):142 (Other):329 (Other):838
## V6 V7 distn1 sex
## 72.7 : 17 21.6 : 31 Bankura:1151 : 1
## 71.8 : 14 22.4 : 30 F:458
## 70.7 : 13 22.3 : 27 M:692
## 72.5 : 13 21.7 : 26
## 70.6 : 11 22 : 26
## 71.9 : 11 22.1 : 26
## (Other):1072 (Other):985
birbhum<-cbind(bir$AGE,bir$HB,bir$HBF,bir$HBA0,bir$HBA2,bir$MCH,bir$MCV)
distn2<-rep("Birbhummm",nrow(bir))
sex3<-as.character(bir$SEX)
birbhuma1<-cbind(birbhum,distn2,sex3)
summary(birbhuma1)
## V1 V2 V3 V4
## 1 :1121 11.4 : 60 0.2 : 373 87.9 : 61
## 5 : 207 11.5 : 52 0.3 : 237 87.5 : 54
## 4 : 203 11.6 : 38 0.1 : 139 87.6 : 54
## 6 : 137 11.8 : 36 0.4 : 77 87.8 : 52
## 7 : 96 11.9 : 36 0.5 : 54 87 : 48
## 3 : 82 (Other): 728 (Other): 70 (Other): 681
## (Other): 225 NA's :1121 NA's :1121 NA's :1121
## V5 V6 V7 distn2 sex3
## 2.9 : 140 20.5 : 29 69.7 : 22 Birbhummm:2071 :1124
## 3 : 128 20.8 : 28 70.7 : 17 F : 326
## 3.2 : 121 21 : 28 77.6 : 16 M : 618
## 3.1 : 113 21.2 : 28 68.5 : 15 M`: 3
## 2.8 : 106 19.9 : 27 69.2 : 15
## (Other): 342 (Other): 810 (Other): 865
## NA's :1121 NA's :1121 NA's :1121
burdwan<-cbind(bur$AGE,bur$HB,bur$HBF,bur$HBA0,bur$HBA2,bur$MCV,bur$MCH)
distn3<-rep("Burdann",nrow(bur))
sex4<-as.character(bur$SEX)
burdwana1<-cbind(burdwan,distn3,sex4)
summary(burdwana1)
## V1 V2 V3 V4 V5
## 5 :352 11 : 100 0.2 :956 88 : 136 2.6 :358
## 4 :351 11.4 : 89 0.3 :430 87.8 : 125 2.7 :352
## 6 :327 11.7 : 77 0.1 :424 87.9 : 113 2.8 :311
## 3 :324 11.2 : 76 0.4 :215 87.7 : 112 2.5 :288
## 2 :258 11.1 : 73 0.5 : 92 87.6 : 105 2.9 :209
## 1 :188 11.5 : 72 (Other):139 (Other):1666 (Other):734
## (Other):459 (Other):1772 NA's : 3 NA's : 2 NA's : 7
## V6 V7 distn3 sex4
## 72.3 : 28 22 : 60 Burdann:2259 F : 22
## 72.5 : 21 22.1 : 48 Female: 499
## 71 : 20 21.5 : 45 M : 188
## 71.2 : 19 21.4 : 44 Male :1549
## 73.2 : 19 22.4 : 44 Sex : 1
## 76 : 19 20.7 : 40
## (Other):2133 (Other):1978
cooch<-cbind(coo$AGE,coo$HB,coo$HBF,coo$HBA0,coo$HBA2,coo$MCV,coo$MCH)
distn4<-rep("Coochbeharr",nrow(coo))
sex5<-as.character(coo$SEX)
coocha1<-cbind(cooch,distn4,sex5)
summary(coocha1)
## V1 V2 V3 V4 V5 V6
## 11:3 10.8 : 2 0 :13 84.5 : 3 3.4 :5 72.4 : 3
## 12:3 11.2 : 2 0.2 : 3 84.6 : 3 3.2 :4 69.1 : 2
## 13:9 11.5 : 2 0.4 : 3 83.4 : 2 3.3 :3 73.1 : 2
## 14:7 11.6 : 2 0.3 : 2 84.2 : 2 3.5 :3 61.7 : 1
## 15:3 12.6 : 2 0.1 : 1 25.4 : 1 3.6 :3 64.7 : 1
## 16:2 12.9 : 2 0.6 : 1 5 : 1 3 :2 66.8 : 1
## (Other):15 (Other): 4 (Other):15 (Other):7 (Other):17
## V7 distn4 sex5
## 20.2 : 3 Coochbeharr:27 M:27
## 22.3 : 3
## 20.9 : 2
## 23.4 : 2
## 19 : 1
## 19.7 : 1
## (Other):15
dinaj<-cbind(d.dina$AGE,d.dina$HBF,d.dina$HB,d.dina$HBA0,d.dina$HBA2,d.dina$MCV,d.dina$MCH)
distn5<-rep("D Dinajpur",nrow(d.dina))
sex6<-as.character(d.dina$SEX)
dinaja1<-cbind(dinaj,distn5,sex6)
summary(dinaja1)
## V1 V2 V3 V4 V5
## 13 :408 0.2 :772 10.2 : 183 88.1 : 117 2.9 :333
## 14 :368 0.3 :558 10.8 : 171 87.8 : 114 2.8 :330
## 12 :357 0.4 :223 11.2 : 154 88 : 109 2.7 :279
## 15 :282 0.1 :194 11.6 : 152 88.3 : 103 3 :275
## 11 :206 0.5 :119 11.9 : 129 87.7 : 102 2.6 :177
## 16 :173 0.6 : 72 10.6 : 123 88.2 : 101 (Other):695
## (Other):296 (Other):152 (Other):1178 (Other):1444 NA's : 1
## V6 V7 distn5 sex6
## 80.2 : 85 21.9 : 137 D Dinajpur:2090 F: 694
## 81.2 : 73 20.8 : 125 M:1396
## 78.2 : 66 21.3 : 94
## 78.6 : 63 22.8 : 94
## 76.2 : 53 21.5 : 84
## 69.8 : 45 22.4 : 84
## (Other):1705 (Other):1472
e.medini<-cbind(e.med$AGE,e.med$HB,e.med$HBF,e.med$HBA0,e.med$HBA2,e.med$MCV,e.med$MCH)
distn6<-rep("E medinipurr",nrow(e.med))
sex7<-as.character(e.med$SEX)
emedini1<-cbind(e.medini,distn6,sex7)
summary(emedini1)
## V1 V2 V3 V4 V5
## 12 :153 12.8 : 32 0.5 :204 82.7 : 37 2.6 : 89
## 13 :135 12.7 : 29 0.6 :134 82.2 : 35 2.7 : 88
## 14 :116 12.6 : 28 0.4 :117 82.4 : 30 2.5 : 79
## 15 : 82 11.4 : 24 0.2 : 82 82.3 : 29 2.8 : 74
## 11 : 80 12.9 : 24 0.3 : 73 82.5 : 29 2.4 : 62
## 16 : 66 11.9 : 23 0.7 : 47 81.9 : 28 (Other):357
## (Other):120 (Other):592 (Other): 95 (Other):564 NA's : 3
## V6 V7 distn6 sex7
## 65 : 13 22.1 : 24 E medinipurr:752 Female:123
## 65.5 : 12 22.6 : 22 Male :629
## 70.6 : 11 21.8 : 19
## 73.8 : 11 22.8 : 19
## 63.7 : 10 22 : 18
## 63.2 : 9 22.2 : 18
## (Other):686 (Other):632
howrah<-cbind(how$AGE,how$HB,how$MCV,how$MCH,how$HBA0,how$HBA2,how$HBF)
distn8<-rep("Howarhh",nrow(how))
sex8<-as.character(how$SEX)
howrah1<-cbind(howrah,distn8,sex8)
summary(howrah1)
## V1 V2 V3 V4 V5 V6
## 11:10 12.4 : 4 71.2 : 2 26.4 : 3 87.2 : 4 2.6 :8
## 12: 9 11.7 : 3 73.2 : 2 21.2 : 2 88 : 4 2.7 :7
## 13: 4 13.4 : 3 88.1 : 2 21.3 : 2 87 : 3 2.9 :7
## 14:12 10.6 : 2 62.9 : 1 22.1 : 2 87.4 : 3 3.1 :7
## 15: 6 11 : 2 67.9 : 1 22.5 : 2 87.6 : 3 2.8 :4
## 16: 2 11.2 : 2 68.9 : 1 22.6 : 2 88.2 : 3 2.5 :3
## 17: 2 (Other):29 (Other):36 (Other):32 (Other):25 (Other):9
## V7 distn8 sex8
## 0.1: 6 Howarhh:45 F: 8
## 0.2:16 M:37
## 0.3:15
## 0.4: 2
## 0.5: 2
## 0.6: 3
## 0.8: 1
kolkata<-cbind(kol$AGE,kol$HB,kol$MCV,kol$MCH,kol$HBA0,kol$HBA2,kol$HBF)
distn10<-rep("kolkataa",nrow(kol))
sex10<-as.character(kol$SEX)
kolkata1<-cbind(kolkata,distn10,sex10)
summary(kolkata1)
## V1 V2 V3 V4 V5
## 20 :24 13.3 : 4 85.3 : 4 22.5 : 3 88.8 :10
## 21 :18 13.5 : 4 71.1 : 2 26.4 : 3 88.2 : 8
## 22 :17 14.6 : 4 71.3 : 2 26.7 : 3 88.9 : 8
## 19 :12 14.7 : 4 71.5 : 2 27.1 : 3 88.5 : 7
## 18 :10 10.9 : 3 76 : 2 27.6 : 3 88.6 : 7
## 23 : 7 11.8 : 3 79.1 : 2 28.6 : 3 89 : 5
## (Other): 8 (Other):74 (Other):82 (Other):78 (Other):51
## V6 V7 distn10 sex10
## 2.6 :17 0.1 :42 kolkataa:96 F:21
## 2.4 :13 0.2 :30 M:75
## 2.5 :12 0.3 :11
## 2.7 :12 0.4 : 6
## 2.3 :10 0.5 : 2
## 2.9 : 7 0.6 : 1
## (Other):25 (Other): 4
malda<-cbind(mal$AGE,mal$HB,mal$MCV,mal$MCH,mal$HBA0,mal$HBA2,mal$HBF)
distn11<-rep("Malada",nrow(mal))
sex11<-as.character(mal$SEX)
malda1<-cbind(malda,distn11,sex11)
summary(malda1)
## V1 V2 V3 V4 V5
## 14 :156 11.7 : 71 81.6 : 50 25.1 : 63 86.9 : 36
## 12 :154 11 : 58 81.4 : 47 24.9 : 57 87.2 : 32
## 13 :146 11.9 : 58 80 : 44 25.6 : 54 87.4 : 25
## 15 :130 10.4 : 51 81.7 : 41 25.3 : 51 87.5 : 25
## 11 : 96 12 : 50 81 : 35 24.6 : 45 87.1 : 24
## (Other):450 10.7 : 49 81.9 : 33 24.7 : 42 81.7 : 23
## NA's : 1 (Other):796 (Other):883 (Other):821 (Other):968
## V6 V7 distn11 sex11
## 2.7 :145 0.3 :336 Malada:1133 F :224
## 2.6 :144 0.4 :231 Female:236
## 2.5 :143 0.2 :214 M :455
## 2.9 :102 0.5 :105 Male :218
## 2.8 :101 0.6 : 68
## 2.4 : 83 (Other):178
## (Other):415 NA's : 1
murshi<-cbind(mur$AGE,mur$HB,mur$MCV,mur$MCH,mur$HBA0,mur$HBA2,mur$HBF)
distn12<-rep("Murshidabadd",nrow(mur))
sex12<-as.character(mur$SEX)
murshi1<-cbind(murshi,distn12,sex12)
summary(murshi1)
## V1 V2 V3 V4 V5
## 13 : 49 11 : 15 80 : 10 24 : 11 86.8 : 15
## 14 : 44 10.2 : 14 82.9 : 7 25.3 : 11 87 : 13
## 15 : 44 10.7 : 12 81.6 : 6 25.9 : 8 87.1 : 13
## 12 : 40 13.9 : 12 81.7 : 6 21.1 : 7 87.3 : 13
## 16 : 39 10 : 11 82.5 : 6 25.1 : 7 87.5 : 13
## 11 : 36 11.1 : 10 90.4 : 5 25.4 : 7 87.8 : 12
## (Other):108 (Other):286 (Other):320 (Other):309 (Other):281
## V6 V7 distn12 sex12
## 2.7 : 49 0.3 :100 Murshidabadd:360 F: 81
## 3 : 43 0.2 : 97 M:279
## 2.9 : 40 0.4 : 54
## 2.6 : 38 0.5 : 27
## 2.8 : 35 0.9 : 14
## 3.1 : 32 (Other): 67
## (Other):123 NA's : 1
Npgs<-cbind(npgs$AGE,npgs$HB,npgs$MCV,npgs$MCH,npgs$HBA0,npgs$HBA2,npgs$HBF)
distn13<-rep("N 24 Pgs",nrow(npgs))
sex13<-as.character(npgs$SEX)
npgs1<-cbind(Npgs,distn13,sex13)
summary(npgs1)
## V1 V2 V3 V4 V5
## 13 :38 11.9 : 14 67.2 : 5 21.3 : 10 87.7 : 17
## 11 :29 11.1 : 13 69.1 : 4 21.2 : 8 87.5 : 15
## 12 :28 11.6 : 13 69.2 : 4 21.5 : 7 87.3 : 13
## 10 :23 11.7 : 12 69.5 : 4 21.7 : 7 87.8 : 13
## 8 :21 11 : 10 70.5 : 4 22.2 : 7 87.6 : 12
## 9 :21 11.8 : 9 76.7 : 4 21.4 : 6 87.4 : 11
## (Other):62 (Other):151 (Other):197 (Other):177 (Other):141
## V6 V7 distn13 sex13
## 2.7 :51 0.2 :82 N 24 Pgs:222 F : 25
## 2.8 :39 0.3 :41 Female: 3
## 2.6 :28 0.1 :38 M :194
## 2.5 :21 0.4 :19
## 2.9 :20 0.5 :17
## 3 :16 0.6 : 9
## (Other):47 (Other):16
master<-rbind(alipura1,bankura1,birbhuma1,burdwana1,coocha1,dinaja1,emedini1,howrah1,kolkata1,malda1,murshi1,npgs1)
master<-data.frame(master)
master.f<-na.omit(master)
master1<-rbind(alipur,burdwan,bankura,birbhum,cooch,dinaj,e.medini,howrah,kolkata,malda,murshi,Npgs)
colnames(master1)<-c("age","hb","hbf","hba2","hba0","mcv","mch")
master1.f<-na.omit(master1)
pca<-prcomp(master1.f,scale= TRUE)
biplot(pca)
g_class<- master.f$sex
library("ggbiplot")
g<-ggbiplot(pca,obs.scale = 0.3,var.scale = 0.3,groups = g_class,ellipse = TRUE,circle = TRUE,pc.biplot = TRUE)
g<-g+theme(legend.direction = 'horizontal',legend.position = 'top')
print(g)
library("ggbiplot")
g<-ggbiplot(pca,obs.scale = 0.3,var.scale = 0.3,groups = g_class,ellipse = TRUE,circle = TRUE,pc.biplot = TRUE)
g<-g+xlim(-2.5,2.5)
g<-g+ylim(-2.5,2.5)
g<-g+theme(legend.direction = 'horizontal',legend.position = 'top')
print(g)
## Warning: Removed 24 rows containing missing values (geom_point).
g_class1<-master.f$distn
library("ggbiplot")
g<-ggbiplot(pca,obs.scale = 0.3,var.scale = 0.3,groups = g_class1,ellipse = TRUE,circle = TRUE,pc.biplot = TRUE)
g<-g+xlim(-2.5,2.5)
g<-g+ylim(-2.5,2.5)
g<-g+theme(legend.direction = 'horizontal',legend.position = 'top')
print(g)
## Warning: Removed 24 rows containing missing values (geom_point).
## Warning: Removed 12 rows containing missing values (geom_path).