Analysis

Liao

2023-01-12

print(Sys.time())
## [1] "2023-01-12 18:56:25 CST"

Descriptive statistical analysis

年龄、BMI

path = "C:/Users/liao/Desktop/Analysis/data2.csv"
dat <- read.csv(path)
par(mfrow=c(1,2))
age = dat$年龄
hist(age, 
     main='年龄分布', 
     breaks = 30, 
     xlab='患者年龄', 
     ylab='', 
     xlim=range(18, 49), 
     ylim=range(0, 20))
abline(v=mean(na.omit(age)),lwd=1,col='red')
text(x=35, y=16,round(mean(na.omit(age)),2))

BMI = dat$BMI
hist(BMI, 
     main='BMI指数分布', 
     breaks = 25, 
     xlab='患者BMI', 
     ylab='', 
     xlim=range(10, 30), 
     ylim=range(0, 20))
abline(v=mean(na.omit(BMI)),lwd=1,col='red')
text(x=24, y=13,round(mean(na.omit(BMI)),2))

血糖、胰岛素、狼疮抗体

blood_glucose <- dat$血糖
data1<-data.frame(Sample<-c('normal','abnormal','missing'), 
                  status<-c('normal','abnormal','missing'),
                  value<-c(length(which(blood_glucose == 1))/length(blood_glucose),
                           length(which(blood_glucose == 0))/length(blood_glucose),
                           sum(is.na(blood_glucose))/length(blood_glucose)))
p1 = ggplot(data1,mapping = aes(Sample,value,fill=status))+
  geom_bar(stat='identity',position='dodge') +
  geom_text(aes(label=paste0(round(value*length(blood_glucose)),' ','(',round(value,3),')'),y=value+0.01), position=position_dodge(0.9), vjust=0,family='serif')+
  labs(x = '',y = '',family='serif') +
  theme(axis.title =element_text(size = 12),
        axis.text =element_text(size = 12, color = 'black'))+
  theme(axis.text.x = element_text(angle = 0, hjust = 0.3))+
  theme(text=element_text(family='serif'))+
  ggtitle('血糖')

insulin<-dat$胰岛素
data2<-data.frame(Sample1<-c('normal','abnormal','missing'), 
                  status1<-c('normal','abnormal','missing'),
                  value1<-c(length(which(insulin == 1))/length(insulin),
                            length(which(insulin == 0))/length(insulin),
                            sum(is.na(insulin))/length(insulin)))
p2 = ggplot(data2,mapping = aes(Sample1,value1,fill=status1))+
  geom_bar(stat='identity',position='dodge') +
  geom_text(aes(label=paste0(round(value1*length(insulin)),' ','(',round(value1,3),')'),y=value1+0.01), position=position_dodge(0.9), vjust=0,family='serif')+
  labs(x = '',y = '',family='serif') +
  theme(axis.title =element_text(size = 12),
        axis.text =element_text(size = 12, color = 'black'))+
  theme(axis.text.x = element_text(angle = 0, hjust = 0.3))+
  theme(text=element_text(family='serif'))+
  ggtitle('胰岛素',)

LAC<-dat$狼疮抗体
data4<-data.frame(Sample2<-c('normal','abnormal','missing'), 
                  status2<-c('normal','abnormal','missing'),
                  value2<-c(length(which(LAC == 1))/length(LAC),
                            length(which(LAC == 0))/length(LAC),
                            sum(is.na(LAC))/length(LAC)))
p3 = ggplot(data4,mapping = aes(Sample2,value2,fill=status2))+
  geom_bar(stat='identity',position='dodge') +
  geom_text(aes(label=paste0(round(value2*length(LAC)),' ','(',round(value2,3),')'),y=value2+0.01), position=position_dodge(0.9), vjust=0,family='serif')+
  labs(x = '',y = '',family='serif') +
  theme(axis.title =element_text(size = 12),
        axis.text =element_text(size = 12, color = 'black'))+
  theme(axis.text.x = element_text(angle = 0, hjust = 0.3))+
  theme(text=element_text(family='serif'))+
  ggtitle('狼疮抗体',)

ggarrange(p1,p2,p3,labels=c('A','B','C'),ncol=2,nrow=2)

免疫抗体

Ancl <- dat$抗核抗体
IgM <- dat$抗B2糖蛋白1抗体
IgG <- dat$抗心磷脂抗体
data4<-data.frame(Ancl,IgM,IgG)
a <- sum(complete.cases(data4))
data3<-data.frame(Sample3<-c('ANA正常,b2-GP1正常,ACA正常',
                             'ANA正常,b2-GP1正常,ACA异常',
                             'ANA正常,b2-GP1异常,ACA正常',
                             'ANA正常,b2-GP1异常,ACA异常',
                             'ANA异常,b2-GP1正常,ACA正常',
                             'ANA异常,b2-GP1正常,ACA异常',
                             'ANA异常,b2-GP1异常,ACA正常',
                             'ANA异常,b2-GP1异常,ACA异常'), 
                  status3<-c('ANA正常,b2-GP1正常,ACA正常',
                             'ANA正常,b2-GP1正常,ACA异常',
                             'ANA正常,b2-GP1异常,ACA正常',
                             'ANA正常,b2-GP1异常,ACA异常',
                             'ANA异常,b2-GP1正常,ACA正常',
                             'ANA异常,b2-GP1正常,ACA异常',
                             'ANA异常,b2-GP1异常,ACA正常',
                             'ANA异常,b2-GP1异常,ACA异常'),
                  value3<-c(length(which(Ancl == 1 & IgM ==1 & IgG == 1))/a,
                            length(which(Ancl == 1 & IgM ==1 & IgG == 0))/a,
                            length(which(Ancl == 1 & IgM ==0 & IgG == 1))/a,
                            length(which(Ancl == 1 & IgM ==0 & IgG == 0))/a,
                            length(which(Ancl == 0 & IgM ==1 & IgG == 1))/a,
                            length(which(Ancl == 0 & IgM ==1 & IgG == 0))/a,
                            length(which(Ancl == 0 & IgM ==0 & IgG == 1))/a,
                            length(which(Ancl == 0 & IgM ==0 & IgG == 0))/a))
ggplot(data3,mapping = aes(Sample3,value3,fill=status3))+
  geom_bar(stat='identity',position='dodge') +
  geom_text(aes(label=paste0(round(value3*a),' ','(',round(value3,3),')'),y=value3+0.01), position=position_dodge(0.9), vjust=0,family='serif')+
  labs(x = ' ',y = '',family='serif') +
  theme(axis.title =element_text(size = 12),
        axis.text =element_text(size = 12, color = 'black'))+
  theme(axis.text.x = element_text(angle = 90, hjust = 0.3, vjust = 0.5))+
  theme(text=element_text(family='serif'))+
  ggtitle('指标异常')

guides(fill='None')+ylim(0,1)
## NULL

单独统计三项指标

data5<-data.frame(Sample4<-c('normal','abnormal','missing'), 
                  status4<-c('normal','abnormal','missing'),
                  value4<-c(length(which(Ancl == 1))/length(Ancl),
                            length(which(Ancl == 0))/length(Ancl),
                            sum(is.na(Ancl))/length(Ancl)))
p4 = ggplot(data5,mapping = aes(Sample4,value4,fill=status4))+
  geom_bar(stat='identity',position='dodge') +
  geom_text(aes(label=paste0(round(value4*length(Ancl)),' ','(',round(value4,3),')'),y=value4+0.01), position=position_dodge(0.9), vjust=0,family='serif')+
  labs(x = '',y = '',family='serif') +
  theme(axis.title =element_text(size = 12),
        axis.text =element_text(size = 12, color = 'black'))+
  theme(axis.text.x = element_text(angle = 0, hjust = 0.3))+
  theme(text=element_text(family='serif'))+
  ggtitle('抗核抗体')

data6<-data.frame(Sample5<-c('normal','abnormal','missing'), 
                  status5<-c('normal','abnormal','missing'),
                  value5<-c(length(which(IgM == 1))/length(IgM),
                            length(which(IgM == 0))/length(IgM),
                            sum(is.na(IgM))/length(IgM)))
p5 = ggplot(data6,mapping = aes(Sample5,value5,fill=status5))+
  geom_bar(stat='identity',position='dodge') +
  geom_text(aes(label=paste0(round(value5*length(IgM)),' ','(',round(value5,3),')'),y=value5+0.01), position=position_dodge(0.9), vjust=0,family='serif')+
  labs(x = '',y = '',family='serif') +
  theme(axis.title =element_text(size = 12),
        axis.text =element_text(size = 12, color = 'black'))+
  theme(axis.text.x = element_text(angle = 0, hjust = 0.3))+
  theme(text=element_text(family='serif'))+
  ggtitle('抗B2糖蛋白1抗体')

data7<-data.frame(Sample6<-c('normal','abnormal','missing'), 
                  status6<-c('normal','abnormal','missing'),
                  value6<-c(length(which(IgG == 1))/length(IgG),
                            length(which(IgG == 0))/length(IgG),
                            sum(is.na(IgG))/length(IgG)))
p6 = ggplot(data7,mapping = aes(Sample6,value6,fill=status6))+
  geom_bar(stat='identity',position='dodge') +
  geom_text(aes(label=paste0(round(value6*length(IgG)),' ','(',round(value6,3),')'),y=value6+0.01), position=position_dodge(0.9), vjust=0,family='serif')+
  labs(x = '',y = '',family='serif') +
  theme(axis.title =element_text(size = 12),
        axis.text =element_text(size = 12, color = 'black'))+
  theme(axis.text.x = element_text(angle = 0, hjust = 0.3))+
  theme(text=element_text(family='serif'))+
  ggtitle('抗心磷脂抗体')

ggarrange(p4,p5,p6,labels=c('A','B','C'),ncol=2,nrow=2)

correlation

library(corrplot)
## corrplot 0.92 loaded
data8 <- data.frame(age,BMI,Ancl,IgM,IgG,blood_glucose,insulin,LAC)
data9 <- na.omit(data8)
corrplot(cor(data9))