print(Sys.time())
## [1] "2023-01-12 18:56:25 CST"
Descriptive statistical analysis
年龄、BMI
path = "C:/Users/liao/Desktop/Analysis/data2.csv"
dat <- read.csv(path)
par(mfrow=c(1,2))
age = dat$年龄
hist(age,
main='年龄分布',
breaks = 30,
xlab='患者年龄',
ylab='',
xlim=range(18, 49),
ylim=range(0, 20))
abline(v=mean(na.omit(age)),lwd=1,col='red')
text(x=35, y=16,round(mean(na.omit(age)),2))
BMI = dat$BMI
hist(BMI,
main='BMI指数分布',
breaks = 25,
xlab='患者BMI',
ylab='',
xlim=range(10, 30),
ylim=range(0, 20))
abline(v=mean(na.omit(BMI)),lwd=1,col='red')
text(x=24, y=13,round(mean(na.omit(BMI)),2))

血糖、胰岛素、狼疮抗体
blood_glucose <- dat$血糖
data1<-data.frame(Sample<-c('normal','abnormal','missing'),
status<-c('normal','abnormal','missing'),
value<-c(length(which(blood_glucose == 1))/length(blood_glucose),
length(which(blood_glucose == 0))/length(blood_glucose),
sum(is.na(blood_glucose))/length(blood_glucose)))
p1 = ggplot(data1,mapping = aes(Sample,value,fill=status))+
geom_bar(stat='identity',position='dodge') +
geom_text(aes(label=paste0(round(value*length(blood_glucose)),' ','(',round(value,3),')'),y=value+0.01), position=position_dodge(0.9), vjust=0,family='serif')+
labs(x = '',y = '',family='serif') +
theme(axis.title =element_text(size = 12),
axis.text =element_text(size = 12, color = 'black'))+
theme(axis.text.x = element_text(angle = 0, hjust = 0.3))+
theme(text=element_text(family='serif'))+
ggtitle('血糖')
insulin<-dat$胰岛素
data2<-data.frame(Sample1<-c('normal','abnormal','missing'),
status1<-c('normal','abnormal','missing'),
value1<-c(length(which(insulin == 1))/length(insulin),
length(which(insulin == 0))/length(insulin),
sum(is.na(insulin))/length(insulin)))
p2 = ggplot(data2,mapping = aes(Sample1,value1,fill=status1))+
geom_bar(stat='identity',position='dodge') +
geom_text(aes(label=paste0(round(value1*length(insulin)),' ','(',round(value1,3),')'),y=value1+0.01), position=position_dodge(0.9), vjust=0,family='serif')+
labs(x = '',y = '',family='serif') +
theme(axis.title =element_text(size = 12),
axis.text =element_text(size = 12, color = 'black'))+
theme(axis.text.x = element_text(angle = 0, hjust = 0.3))+
theme(text=element_text(family='serif'))+
ggtitle('胰岛素',)
LAC<-dat$狼疮抗体
data4<-data.frame(Sample2<-c('normal','abnormal','missing'),
status2<-c('normal','abnormal','missing'),
value2<-c(length(which(LAC == 1))/length(LAC),
length(which(LAC == 0))/length(LAC),
sum(is.na(LAC))/length(LAC)))
p3 = ggplot(data4,mapping = aes(Sample2,value2,fill=status2))+
geom_bar(stat='identity',position='dodge') +
geom_text(aes(label=paste0(round(value2*length(LAC)),' ','(',round(value2,3),')'),y=value2+0.01), position=position_dodge(0.9), vjust=0,family='serif')+
labs(x = '',y = '',family='serif') +
theme(axis.title =element_text(size = 12),
axis.text =element_text(size = 12, color = 'black'))+
theme(axis.text.x = element_text(angle = 0, hjust = 0.3))+
theme(text=element_text(family='serif'))+
ggtitle('狼疮抗体',)
ggarrange(p1,p2,p3,labels=c('A','B','C'),ncol=2,nrow=2)

免疫抗体
Ancl <- dat$抗核抗体
IgM <- dat$抗B2糖蛋白1抗体
IgG <- dat$抗心磷脂抗体
data4<-data.frame(Ancl,IgM,IgG)
a <- sum(complete.cases(data4))
data3<-data.frame(Sample3<-c('ANA正常,b2-GP1正常,ACA正常',
'ANA正常,b2-GP1正常,ACA异常',
'ANA正常,b2-GP1异常,ACA正常',
'ANA正常,b2-GP1异常,ACA异常',
'ANA异常,b2-GP1正常,ACA正常',
'ANA异常,b2-GP1正常,ACA异常',
'ANA异常,b2-GP1异常,ACA正常',
'ANA异常,b2-GP1异常,ACA异常'),
status3<-c('ANA正常,b2-GP1正常,ACA正常',
'ANA正常,b2-GP1正常,ACA异常',
'ANA正常,b2-GP1异常,ACA正常',
'ANA正常,b2-GP1异常,ACA异常',
'ANA异常,b2-GP1正常,ACA正常',
'ANA异常,b2-GP1正常,ACA异常',
'ANA异常,b2-GP1异常,ACA正常',
'ANA异常,b2-GP1异常,ACA异常'),
value3<-c(length(which(Ancl == 1 & IgM ==1 & IgG == 1))/a,
length(which(Ancl == 1 & IgM ==1 & IgG == 0))/a,
length(which(Ancl == 1 & IgM ==0 & IgG == 1))/a,
length(which(Ancl == 1 & IgM ==0 & IgG == 0))/a,
length(which(Ancl == 0 & IgM ==1 & IgG == 1))/a,
length(which(Ancl == 0 & IgM ==1 & IgG == 0))/a,
length(which(Ancl == 0 & IgM ==0 & IgG == 1))/a,
length(which(Ancl == 0 & IgM ==0 & IgG == 0))/a))
ggplot(data3,mapping = aes(Sample3,value3,fill=status3))+
geom_bar(stat='identity',position='dodge') +
geom_text(aes(label=paste0(round(value3*a),' ','(',round(value3,3),')'),y=value3+0.01), position=position_dodge(0.9), vjust=0,family='serif')+
labs(x = ' ',y = '',family='serif') +
theme(axis.title =element_text(size = 12),
axis.text =element_text(size = 12, color = 'black'))+
theme(axis.text.x = element_text(angle = 90, hjust = 0.3, vjust = 0.5))+
theme(text=element_text(family='serif'))+
ggtitle('指标异常')

guides(fill='None')+ylim(0,1)
## NULL
单独统计三项指标
data5<-data.frame(Sample4<-c('normal','abnormal','missing'),
status4<-c('normal','abnormal','missing'),
value4<-c(length(which(Ancl == 1))/length(Ancl),
length(which(Ancl == 0))/length(Ancl),
sum(is.na(Ancl))/length(Ancl)))
p4 = ggplot(data5,mapping = aes(Sample4,value4,fill=status4))+
geom_bar(stat='identity',position='dodge') +
geom_text(aes(label=paste0(round(value4*length(Ancl)),' ','(',round(value4,3),')'),y=value4+0.01), position=position_dodge(0.9), vjust=0,family='serif')+
labs(x = '',y = '',family='serif') +
theme(axis.title =element_text(size = 12),
axis.text =element_text(size = 12, color = 'black'))+
theme(axis.text.x = element_text(angle = 0, hjust = 0.3))+
theme(text=element_text(family='serif'))+
ggtitle('抗核抗体')
data6<-data.frame(Sample5<-c('normal','abnormal','missing'),
status5<-c('normal','abnormal','missing'),
value5<-c(length(which(IgM == 1))/length(IgM),
length(which(IgM == 0))/length(IgM),
sum(is.na(IgM))/length(IgM)))
p5 = ggplot(data6,mapping = aes(Sample5,value5,fill=status5))+
geom_bar(stat='identity',position='dodge') +
geom_text(aes(label=paste0(round(value5*length(IgM)),' ','(',round(value5,3),')'),y=value5+0.01), position=position_dodge(0.9), vjust=0,family='serif')+
labs(x = '',y = '',family='serif') +
theme(axis.title =element_text(size = 12),
axis.text =element_text(size = 12, color = 'black'))+
theme(axis.text.x = element_text(angle = 0, hjust = 0.3))+
theme(text=element_text(family='serif'))+
ggtitle('抗B2糖蛋白1抗体')
data7<-data.frame(Sample6<-c('normal','abnormal','missing'),
status6<-c('normal','abnormal','missing'),
value6<-c(length(which(IgG == 1))/length(IgG),
length(which(IgG == 0))/length(IgG),
sum(is.na(IgG))/length(IgG)))
p6 = ggplot(data7,mapping = aes(Sample6,value6,fill=status6))+
geom_bar(stat='identity',position='dodge') +
geom_text(aes(label=paste0(round(value6*length(IgG)),' ','(',round(value6,3),')'),y=value6+0.01), position=position_dodge(0.9), vjust=0,family='serif')+
labs(x = '',y = '',family='serif') +
theme(axis.title =element_text(size = 12),
axis.text =element_text(size = 12, color = 'black'))+
theme(axis.text.x = element_text(angle = 0, hjust = 0.3))+
theme(text=element_text(family='serif'))+
ggtitle('抗心磷脂抗体')
ggarrange(p4,p5,p6,labels=c('A','B','C'),ncol=2,nrow=2)

correlation
library(corrplot)
## corrplot 0.92 loaded
data8 <- data.frame(age,BMI,Ancl,IgM,IgG,blood_glucose,insulin,LAC)
data9 <- na.omit(data8)
corrplot(cor(data9))
