print(Sys.time())
## [1] "2022-11-16 22:15:52 CST"
Descriptive statistical analysis
年龄、BMI
path = "C:/Users/liao/Desktop/Analysis/data.xlsx"
dat <- readxl::read_excel(path, sheet = 2)
par(mfrow=c(1,2))
age = dat$年龄
hist(age,
main='年龄分布',
breaks = 30,
xlab='患者年龄',
ylab='',
xlim=range(18, 49),
ylim=range(0, 20))
abline(v=mean(na.omit(age)),lwd=1,col='red')
text(x=35, y=16,round(mean(na.omit(age)),2))
BMI = dat$`BMI:体重/kg / (身高/m)^2`
hist(BMI,
main='BMI指数分布',
breaks = 25,
xlab='患者BMI',
ylab='',
xlim=range(10, 30),
ylim=range(0, 15))
abline(v=mean(na.omit(BMI)),lwd=1,col='red')
text(x=24, y=13,round(mean(na.omit(BMI)),2))
血糖、胰岛素
blood_glucose <- dat$血糖
data1<-data.frame(Sample<-c('normal','abnormal','missing'),
status<-c('normal','abnormal','missing'),
value<-c(length(which(blood_glucose == 1))/length(blood_glucose),
length(which(blood_glucose == 0))/length(blood_glucose),
sum(is.na(blood_glucose))/length(blood_glucose)))
p1 = ggplot(data1,mapping = aes(Sample,value,fill=status))+
geom_bar(stat='identity',position='dodge') +
geom_text(aes(label=paste0(round(value*length(blood_glucose)),' ','(',round(value,3),')'),y=value+0.01), position=position_dodge(0.9), vjust=0,family='serif')+
labs(x = '',y = '',family='serif') +
theme(axis.title =element_text(size = 12),
axis.text =element_text(size = 12, color = 'black'))+
theme(axis.text.x = element_text(angle = 0, hjust = 0.3))+
theme(text=element_text(family='serif'))+
ggtitle('血糖')
insulin<-dat$胰岛素
data2<-data.frame(Sample1<-c('normal','abnormal','missing'),
status1<-c('normal','abnormal','missing'),
value1<-c(length(which(insulin == 1))/length(insulin),
length(which(insulin == 0))/length(insulin),
sum(is.na(insulin))/length(insulin)))
p2 = ggplot(data2,mapping = aes(Sample1,value1,fill=status1))+
geom_bar(stat='identity',position='dodge') +
geom_text(aes(label=paste0(round(value1*length(insulin)),' ','(',round(value1,3),')'),y=value1+0.01), position=position_dodge(0.9), vjust=0,family='serif')+
labs(x = '',y = '',family='serif') +
theme(axis.title =element_text(size = 12),
axis.text =element_text(size = 12, color = 'black'))+
theme(axis.text.x = element_text(angle = 0, hjust = 0.3))+
theme(text=element_text(family='serif'))+
ggtitle('胰岛素',)
ggarrange(p1,p2,labels=c('A','B'),ncol=2,nrow=1)
血糖和胰岛素的简单线性回归
data6<-data.frame(dat$血糖,dat$胰岛素)
data6_1<-na.omit(data6)
table(data6_1$dat.血糖,data6_1$dat.胰岛素)
##
## 0 1
## 0 41 15
## 1 27 13
cor(data6_1)
## dat.血糖 dat.胰岛素
## dat.血糖 1.00000000 0.06198013
## dat.胰岛素 0.06198013 1.00000000
data6_1[,3]<-data6_1$dat.血糖*10+data6_1$dat.胰岛素
ggplot(data6_1, mapping = aes(data6_1$dat.血糖, data6_1$dat.胰岛素)) +
geom_point() +
geom_point(mapping = aes(color = data6_1$V3))+
geom_smooth(mothod="lm", se=FALSE)+
labs(x='血糖', y='胰岛素', title='Linear Regression Plot') +
theme(plot.title = element_text(hjust=0.5, size=20, face='bold'))
## Warning: Ignoring unknown parameters: mothod
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : pseudoinverse used at -0.005
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : neighborhood radius 1.005
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : reciprocal condition number 0
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : There are other near singularities as well. 1.01
典型磷脂抗体
dat$抗心磷脂抗体IgM[which(dat$抗心磷脂抗体IgM>=0 & dat$抗心磷脂抗体IgM<=12)] = 1
dat$抗心磷脂抗体IgM[which(dat$抗心磷脂抗体IgM ==28.1)] = 0
dat$抗心磷脂抗体IgG[which(dat$抗心磷脂抗体IgG>=0 & dat$抗心磷脂抗体IgG<=12)] = 1
Ancl <- dat$抗核抗体
IgM <- dat$抗心磷脂抗体IgM
IgG <- dat$抗心磷脂抗体IgG
data4<-data.frame(Ancl,IgM,IgG)
a <- sum(complete.cases(data4))
data3<-data.frame(Sample2<-c('抗核抗体正常,lgM正常,lgG正常',
'抗核抗体正常,lgM正常,lgG异常',
'抗核抗体正常,lgM异常,lgG正常',
'抗核抗体正常,lgM异常,lgG异常',
'抗核抗体异常,lgM正常,lgG正常',
'抗核抗体异常,lgM正常,lgG异常',
'抗核抗体异常,lgM异常,lgG正常',
'抗核抗体异常,lgM异常,lgG异常'),
status2<-c('抗核抗体正常,lgM正常,lgG正常',
'抗核抗体正常,lgM正常,lgG异常',
'抗核抗体正常,lgM异常,lgG正常',
'抗核抗体正常,lgM异常,lgG异常',
'抗核抗体异常,lgM正常,lgG正常',
'抗核抗体异常,lgM正常,lgG异常',
'抗核抗体异常,lgM异常,lgG正常',
'抗核抗体异常,lgM异常,lgG异常'),
value2<-c(length(which(Ancl == 1 & IgM ==1 & IgG == 1))/a,
length(which(Ancl == 1 & IgM ==1 & IgG == 0))/a,
length(which(Ancl == 1 & IgM ==0 & IgG == 1))/a,
length(which(Ancl == 1 & IgM ==0 & IgG == 0))/a,
length(which(Ancl == 0 & IgM ==1 & IgG == 1))/a,
length(which(Ancl == 0 & IgM ==1 & IgG == 0))/a,
length(which(Ancl == 0 & IgM ==0 & IgG == 1))/a,
length(which(Ancl == 0 & IgM ==0 & IgG == 0))/a))
ggplot(data3,mapping = aes(Sample2,value2,fill=status2))+
geom_bar(stat='identity',position='dodge') +
geom_text(aes(label=paste0(round(value2*a),' ','(',round(value2,3),')'),y=value2+0.01), position=position_dodge(0.9), vjust=0,family='serif')+
labs(x = ' ',y = '',family='serif') +
theme(axis.title =element_text(size = 12),
axis.text =element_text(size = 12, color = 'black'))+
theme(axis.text.x = element_text(angle = 90, hjust = 0.3, vjust = 0.5))+
theme(text=element_text(family='serif'))+
ggtitle('指标异常')
guides(fill='None')+ylim(0,1)
## NULL
单独统计三项指标
Ancl <- dat$抗核抗体
IgM <- dat$抗心磷脂抗体IgM
IgG <- dat$抗心磷脂抗体IgG
data7<-data.frame(Sample3<-c('normal','abnormal','missing'),
status3<-c('normal','abnormal','missing'),
value3<-c(length(which(Ancl == 1))/length(Ancl),
length(which(Ancl == 0))/length(Ancl),
sum(is.na(Ancl))/length(Ancl)))
p3 = ggplot(data7,mapping = aes(Sample3,value3,fill=status3))+
geom_bar(stat='identity',position='dodge') +
geom_text(aes(label=paste0(round(value3*length(Ancl)),' ','(',round(value3,3),')'),y=value3+0.01), position=position_dodge(0.9), vjust=0,family='serif')+
labs(x = '',y = '',family='serif') +
theme(axis.title =element_text(size = 12),
axis.text =element_text(size = 12, color = 'black'))+
theme(axis.text.x = element_text(angle = 0, hjust = 0.3))+
theme(text=element_text(family='serif'))+
ggtitle('抗核抗体')
data8<-data.frame(Sample4<-c('normal','abnormal','missing'),
status4<-c('normal','abnormal','missing'),
value4<-c(length(which(IgM == 1))/length(IgM),
length(which(IgM == 0))/length(IgM),
sum(is.na(IgM))/length(IgM)))
p4 = ggplot(data8,mapping = aes(Sample4,value4,fill=status4))+
geom_bar(stat='identity',position='dodge') +
geom_text(aes(label=paste0(round(value4*length(IgM)),' ','(',round(value4,3),')'),y=value4+0.01), position=position_dodge(0.9), vjust=0,family='serif')+
labs(x = '',y = '',family='serif') +
theme(axis.title =element_text(size = 12),
axis.text =element_text(size = 12, color = 'black'))+
theme(axis.text.x = element_text(angle = 0, hjust = 0.3))+
theme(text=element_text(family='serif'))+
ggtitle('抗心磷脂抗体IgM')
data9<-data.frame(Sample5<-c('normal','abnormal','missing'),
status5<-c('normal','abnormal','missing'),
value5<-c(length(which(IgG == 1))/length(IgG),
length(which(IgG == 0))/length(IgG),
sum(is.na(IgG))/length(IgG)))
p5 = ggplot(data9,mapping = aes(Sample5,value5,fill=status5))+
geom_bar(stat='identity',position='dodge') +
geom_text(aes(label=paste0(round(value4*length(IgG)),' ','(',round(value5,3),')'),y=value5+0.01), position=position_dodge(0.9), vjust=0,family='serif')+
labs(x = '',y = '',family='serif') +
theme(axis.title =element_text(size = 12),
axis.text =element_text(size = 12, color = 'black'))+
theme(axis.text.x = element_text(angle = 0, hjust = 0.3))+
theme(text=element_text(family='serif'))+
ggtitle('抗心磷脂抗体IgG')
ggarrange(p3,p4,p5,labels=c('A','B','C'),ncol=2,nrow=2)
correlation
IgM、IgG除去空值都为1(正常)
library(corrplot)
## corrplot 0.92 loaded
data4 <- data.frame(age,BMI,Ancl,IgM,IgG,blood_glucose,insulin)
data5 <- na.omit(data4)
corrplot(cor(data5))
## Warning in cor(data5): 标准差为零
addition
PV=c(0.01, 0.02, 0.025, 0.05)
Rctrl=1
RCS=c(1, 2, 4, 6, 8, 10)
mat=matrix(0, length(PV), length(RCS))
for(i in 1:length(PV)) {
pv=PV[i]
for(j in 1:length(RCS)) {
Rcs=RCS[j]
mat[i,j]=Rcs*pv/(pv*Rcs+(1-pv)*Rctrl)
}
}
rownames(mat)=PV
barplot(t(mat), col=RCS, beside = T, border = F, xlab = "prevalence of positive antinuclear antibody", ylab = "Observed rate in clinic")
legend("topleft", legend = RCS, pch=15, col=RCS, box.col = "white")
abline(h=c(0.15, 0.2), lty=2)
疑问
不孕程度打分规则?
最终结果是否只分为生育成功和生育失败?是否区分程度
有些指标异常是否需要区分程度?比如抗核抗体的异常中含有比值