### MCA 说明https://zhuanlan.zhihu.com/p/499395932
rm(list = ls())
library(dplyr)
## Warning: 程辑包'dplyr'是用R版本4.3.3 来建造的
##
## 载入程辑包:'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
#install.packages('glmnet')
library(Matrix)
library(glmnet) ##Lasso回归
## Loaded glmnet 4.1-8
#install.packages('rms')
#install.packages('Hmisc')
library(Hmisc)
##
## 载入程辑包:'Hmisc'
## The following objects are masked from 'package:dplyr':
##
## src, summarize
## The following objects are masked from 'package:base':
##
## format.pval, units
library(rms) ## 画列线图;
## Warning in .recacheSubclasses(def@className, def, env):
## "replValueSp"类别的子类别"ndiMatrix"没有定义;因此没有更新
#install.packages('VIM')
library(colorspace)
library(grid)
library(VIM) ## 包中aggr()函数,判断数据缺失情况
## VIM is ready to use.
## Suggestions and bug-reports can be submitted at: https://github.com/statistikat/VIM/issues
##
## 载入程辑包:'VIM'
## The following object is masked from 'package:datasets':
##
## sleep
#install.packages('survival')
library(survival) ## 生存分析包
sleepdisturbance<-read.csv('data.csv')
#睡眠相关
SDMode<-sleepdisturbance[,c(2:3,9:12)]
#A为入睡困难
#B为夜间易醒或早醒
#C为多梦或噩梦
#D次日疲劳、嗜睡、精神不佳
#E为入睡时打呼噜
for(j in 1:ncol(SDMode)){
SDMode[,j] <- as.factor(as.numeric(SDMode[,j]))
}
levels(SDMode$Sex)[levels(SDMode$Sex)=='0']<-'Female'
levels(SDMode$Sex)[levels(SDMode$Sex)=='1']<-"Male"
levels(SDMode$Age)[levels(SDMode$Age)=='0']<-'Young'
levels(SDMode$Age)[levels(SDMode$Age)=='1']<-'Adult'
levels(SDMode$Age)[levels(SDMode$Age)=='2']<-'Aged'
levels(SDMode$Routine)[levels(SDMode$Routine)=='1']<-'Regular'
levels(SDMode$Routine)[levels(SDMode$Routine)=='2']<-'Occasionally stay up late'
levels(SDMode$Routine)[levels(SDMode$Routine)=='3']<-'Stay up late often'
levels(SDMode$Routine)[levels(SDMode$Routine)=='4']<-'Very irregular'
levels(SDMode$Sleep.quality)[levels(SDMode$Sleep.quality)=='1']<-'Very good'
levels(SDMode$Sleep.quality)[levels(SDMode$Sleep.quality)=='2']<-'Better'
levels(SDMode$Sleep.quality)[levels(SDMode$Sleep.quality)=='3']<-'Normal'
levels(SDMode$Sleep.quality)[levels(SDMode$Sleep.quality)=='4']<-'Poor'
levels(SDMode$Sleep.quality)[levels(SDMode$Sleep.quality)=='5']<-'Very poor'
levels(SDMode$Sleep.duration)[levels(SDMode$Sleep.duration)=='1']<-'<7h'
levels(SDMode$Sleep.duration)[levels(SDMode$Sleep.duration)=='2']<-'7-9h'
levels(SDMode$Sleep.duration)[levels(SDMode$Sleep.duration)=='3']<-'>9h'
levels(SDMode$SIS)[levels(SDMode$SIS)=='0']<-'No snoring in sleep'
levels(SDMode$SIS)[levels(SDMode$SIS)=='1']<-'Snoring in sleep'
SDMode$Routine<-as.factor(SDMode$Routine)
SDMode$Sleep.quality<-as.factor(SDMode$Sleep.quality)
SDMode$Sleep.duration<-as.factor(SDMode$Sleep.duration)
SDMode$SIS<-as.factor(SDMode$SIS)
#for循环
for(i in 1:ncol(SDMode)){
plot(SDMode[,i],main=colnames(SDMode)[i],ylab='Count',col='darkblue',las=1,col.main='darkblue')
}
#install.packages('FactoMineR')
library("FactoMineR")
#install.packages('factoextra')
library("factoextra")
## 载入需要的程辑包:ggplot2
## Welcome! Want to learn more? See two factoextra-related books at https://goo.gl/ve3WBa
library("ggplot2")
#添加协变量,quali.sup=()添加协变量
#图1 每个指标对1,2维度的影响
res.mca <- MCA(SDMode, quali.sup = c(1:2), ncp = 5, graph = TRUE)
#res.mca<-MCA(SDMode,ncp=5,graph=TRUE)
###图2 每个维度可解释程度,看Eigenvalues,% of var.
fviz_screeplot(res.mca, addlabels = TRUE, ylim = c(0, 15))
summary(res.mca)
##
## Call:
## MCA(X = SDMode, ncp = 5, quali.sup = c(1:2), graph = TRUE)
##
##
## Eigenvalues
## Dim.1 Dim.2 Dim.3 Dim.4 Dim.5 Dim.6 Dim.7
## Variance 0.370 0.310 0.266 0.254 0.253 0.240 0.236
## % of var. 14.780 12.401 10.637 10.177 10.114 9.612 9.433
## Cumulative % of var. 14.780 27.182 37.819 47.996 58.110 67.722 77.155
## Dim.8 Dim.9 Dim.10
## Variance 0.213 0.194 0.163
## % of var. 8.531 7.779 6.536
## Cumulative % of var. 85.686 93.464 100.000
##
## Individuals (the 10 first)
## Dim.1 ctr cos2 Dim.2 ctr cos2
## 1 | -1.219 0.081 0.571 | 0.733 0.035 0.206 |
## 2 | 0.186 0.002 0.021 | -0.079 0.000 0.004 |
## 3 | 0.491 0.013 0.135 | -0.729 0.034 0.298 |
## 4 | 0.200 0.002 0.044 | -0.606 0.024 0.409 |
## 5 | 0.200 0.002 0.044 | -0.606 0.024 0.409 |
## 6 | -0.701 0.027 0.263 | -0.430 0.012 0.099 |
## 7 | -0.997 0.054 0.500 | 0.134 0.001 0.009 |
## 8 | 0.544 0.016 0.219 | -0.226 0.003 0.038 |
## 9 | -0.676 0.025 0.241 | 0.704 0.032 0.261 |
## 10 | -0.343 0.006 0.073 | -0.578 0.022 0.209 |
## Dim.3 ctr cos2
## 1 -0.167 0.002 0.011 |
## 2 -0.252 0.005 0.039 |
## 3 0.401 0.012 0.090 |
## 4 0.046 0.000 0.002 |
## 5 0.046 0.000 0.002 |
## 6 0.573 0.025 0.176 |
## 7 0.190 0.003 0.018 |
## 8 -0.484 0.018 0.174 |
## 9 -0.462 0.016 0.112 |
## 10 0.340 0.009 0.072 |
##
## Categories (the 10 first)
## Dim.1 ctr cos2 v.test Dim.2 ctr
## Regular | -0.747 12.613 0.280 -37.289 | 0.527 7.468
## Occasionally stay up late | -0.026 0.018 0.000 -1.493 | -0.730 17.034
## Stay up late often | 0.811 10.287 0.198 31.352 | 0.117 0.254
## Very irregular | 1.900 9.288 0.143 26.628 | 2.280 15.934
## Very good | -1.079 13.100 0.232 -33.972 | 1.035 14.372
## Better | -0.540 5.542 0.114 -23.794 | -0.299 2.028
## Normal | 0.331 2.990 0.074 19.185 | -0.627 12.802
## Poor | 1.212 12.824 0.218 32.883 | 0.916 8.717
## Very poor | 2.055 5.750 0.087 20.760 | 2.329 8.800
## <7h | 0.329 5.500 0.325 40.188 | -0.050 0.151
## cos2 v.test Dim.3 ctr cos2 v.test
## Regular 0.139 26.283 | -0.328 3.370 0.054 -16.353 |
## Occasionally stay up late 0.350 -41.715 | 0.463 7.987 0.141 26.455 |
## Stay up late often 0.004 4.512 | -0.631 8.668 0.120 -24.415 |
## Very irregular 0.205 31.949 | 1.890 12.764 0.141 26.482 |
## Very good 0.214 32.595 | -0.303 1.433 0.018 -9.531 |
## Better 0.035 -13.184 | 0.433 4.965 0.073 19.106 |
## Normal 0.266 -36.364 | -0.046 0.080 0.001 -2.661 |
## Poor 0.124 24.833 | -1.018 12.570 0.154 -27.619 |
## Very poor 0.111 23.525 | 3.894 28.683 0.311 39.334 |
## <7h 0.007 -6.104 | -0.169 2.006 0.085 -20.590 |
##
## Categorical variables (eta2)
## Dim.1 Dim.2 Dim.3
## Routine | 0.476 0.505 0.349 |
## Sleep.quality | 0.594 0.579 0.508 |
## Sleep.duration | 0.325 0.144 0.119 |
## SIS | 0.083 0.012 0.089 |
##
## Supplementary categories
## Dim.1 cos2 v.test Dim.2 cos2 v.test
## Female | -0.046 0.002 -2.836 | 0.049 0.002 3.033 |
## Male | 0.035 0.002 2.836 | -0.038 0.002 -3.033 |
## Young | 0.158 0.013 7.968 | -0.142 0.010 -7.156 |
## Adult | 0.014 0.000 0.942 | 0.009 0.000 0.597 |
## Aged | -0.342 0.025 -11.138 | 0.249 0.013 8.110 |
## Dim.3 cos2 v.test
## Female -0.118 0.011 -7.265 |
## Male 0.090 0.011 7.265 |
## Young 0.030 0.000 1.503 |
## Adult 0.028 0.001 1.938 |
## Aged -0.135 0.004 -4.411 |
##
## Supplementary categorical variables (eta2)
## Dim.1 Dim.2 Dim.3
## Sex | 0.002 0.002 0.011 |
## Age | 0.029 0.018 0.004 |
####图3 变量分析:变量对每个维度的贡献
fviz_contrib(res.mca, choice = "var", axes = 1)
fviz_contrib(res.mca, choice = "var", axes = 2)
##现在,我们展示每个类别的表现质量。我们看到它们相对较低,这可能是由于数据量的原因。
fviz_cos2(res.mca, choice = "var", axes = 1:2)
###图4 ACM plot ###活动类别图
plot(res.mca, invisible = c("quali.sup", "ind"), cex=1, col.var = "darkblue",
title = "Active categories", cex.main=2, col.main= "darkblue")
#plot of individuals,以habillage='Sex',按照性别进行划分,
fviz_mca_ind(res.mca, label = "none", habillage="Sex",
title="MCA individues", addEllipses = TRUE, ellipse.level = 0.95,
palette = c("#00AFBB", "#E7B800"), ggtheme = theme_minimal())
#plot of individuals,以habillage='Age',按照性别进行划分,
fviz_mca_ind(res.mca, label = "none", habillage="Age",
title="MCA individues", addEllipses = TRUE, ellipse.level = 0.95)
#如果您想同时使用多个分类变量为个人着色,请使用函数 fviz_ellipses() [in factoextra ],如下所示:
fviz_ellipses(res.mca, c("Sex", "Age"), geom = "point",addEllipses = TRUE, ellipse.level = 0.95)
## Warning: `gather_()` was deprecated in tidyr 1.2.0.
## ℹ Please use `gather()` instead.
## ℹ The deprecated feature was likely used in the factoextra package.
## Please report the issue at <https://github.com/kassambara/factoextra/issues>.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
#Individuals and categories 图4 ACM plot ###活动类别图
plot(res.mca, label = c("quali.sup", "var"), select = "cos2 10",
cex=1, col.var = "darkblue", col.quali.sup = "brown3",
col.ind = "seashell3", title = "Individuals with active and supplementary categories",
cex.main=2, col.main= "darkblue")
#
# library("corrplot")
# ind <- get_mca_ind(res.mca)
# pdf("cor.pdf")
# corrplot(ind$cos2, is.corr = F)
# dev.off()
###Kmeans Extraction of coordinates kmeans 5维数据分成4类
km=data.frame(res.mca$ind$coord)[,c(1:2)]
set.seed(111)
groupes.kmeans4 <- kmeans(km, centers = 3, nstart = 5)
#print(groupes.kmeans4)
# pdf("cluster4.pdf")
# fviz_cluster(groupes.kmeans4, data = km, palette = "jco", repel= TRUE,
# main = "Kmeans", ggtheme = theme_classic())
# dev.off()
clus_k54<-groupes.kmeans4$cluster
###图6选择K数目
set.seed(111)
fviz_nbclust(km, kmeans, method = "wss")+ geom_vline(xintercept = 3, linetype = 2)+ labs(subtitle = "Elbow method")
###图7 聚类结果
fviz_cluster(groupes.kmeans4, data = km, show.clust.cent = TRUE, geom = "point", palette = "jco")
fviz_cluster(groupes.kmeans4, data = km, palette = "Set2", ggtheme = theme_minimal())
fviz_cluster(groupes.kmeans4, data = km, geom = "text")
# #
fviz_cluster(groupes.kmeans4, data = km, ellipse.type = "norm")
fviz_cluster(groupes.kmeans4, data = km, palette = "jco", main = "Kmeans", ggtheme = theme_classic())
cluster_4<-groupes.kmeans4$cluster
sleepdisturbance$cluster_4<-cluster_4
sum(abs(sleepdisturbance[,61]-sleepdisturbance[,26]))
## [1] 7985
library(cluster)
library(factoextra)
###2类
set.seed(111)
c2<- kmeans(km, centers = 2, nstart = 5)
silhouette.result2 <-silhouette(c2$cluster,dist(km))
fviz_silhouette(silhouette.result2)
## cluster size ave.sil.width
## 1 1 3370 0.37
## 2 2 1600 0.47
mean_silhouette2<-mean(silhouette.result2[,"sil_width"])
mean_silhouette2
## [1] 0.399774
###3类
set.seed(111)
groupes.kmeans4 <- kmeans(km, centers = 3, nstart = 5)
silhouette.result3 <-silhouette(groupes.kmeans4$cluster,dist(km))
fviz_silhouette(silhouette.result3)
## cluster size ave.sil.width
## 1 1 1576 0.43
## 2 2 2725 0.51
## 3 3 669 0.45
mean_silhouette3 <-mean(silhouette.result3[,"sil_width"])
mean_silhouette3
## [1] 0.4742346
###4类
set.seed(111)
c4<- kmeans(km, centers = 4, nstart = 5)
silhouette.result4 <-silhouette(c4$cluster,dist(km))
fviz_silhouette(silhouette.result4)
## cluster size ave.sil.width
## 1 1 665 0.42
## 2 2 2076 0.32
## 3 3 1713 0.53
## 4 4 516 0.57
mean_silhouette4 <-mean(silhouette.result4[,"sil_width"])
mean_silhouette4
## [1] 0.4326377
###5类
set.seed(111)
c5<- kmeans(km, centers = 5, nstart = 5)
silhouette.result5 <-silhouette(c5$cluster,dist(km))
fviz_silhouette(silhouette.result5)
## cluster size ave.sil.width
## 1 1 1904 0.36
## 2 2 992 0.50
## 3 3 663 0.35
## 4 4 821 0.50
## 5 5 590 0.37
mean_silhouette5 <-mean(silhouette.result5[,"sil_width"])
mean_silhouette5
## [1] 0.4093591
###6类
set.seed(111)
c6<- kmeans(km, centers = 6, nstart = 5)
silhouette.result6 <-silhouette(c6$cluster,dist(km))
fviz_silhouette(silhouette.result6)
## cluster size ave.sil.width
## 1 1 289 0.56
## 2 2 1108 0.54
## 3 3 1673 0.40
## 4 4 729 0.40
## 5 5 462 0.37
## 6 6 709 0.56
mean_silhouette6<-mean(silhouette.result6[,"sil_width"])
mean_silhouette6
## [1] 0.4609299
###7类
set.seed(111)
c7<- kmeans(km, centers = 7, nstart = 5)
silhouette.result7 <-silhouette(c7$cluster,dist(km))
fviz_silhouette(silhouette.result7)
## cluster size ave.sil.width
## 1 1 1016 0.64
## 2 2 169 0.43
## 3 3 742 0.40
## 4 4 1189 0.43
## 5 5 469 0.39
## 6 6 497 0.46
## 7 7 888 0.35
mean_silhouette7 <-mean(silhouette.result7[,"sil_width"])
mean_silhouette7
## [1] 0.4534282
###8类
set.seed(111)
c8<- kmeans(km, centers = 8, nstart = 5)
silhouette.result8 <-silhouette(c8$cluster,dist(km))
fviz_silhouette(silhouette.result8)
## cluster size ave.sil.width
## 1 1 504 0.49
## 2 2 150 0.46
## 3 3 837 0.44
## 4 4 876 0.75
## 5 5 1037 0.46
## 6 6 305 0.71
## 7 7 294 0.52
## 8 8 967 0.38
mean_silhouette8 <-mean(silhouette.result8[,"sil_width"])
mean_silhouette8
## [1] 0.5143155
###9类
set.seed(111)
c9<- kmeans(km, centers = 9, nstart = 5)
silhouette.result9 <-silhouette(c9$cluster,dist(km))
fviz_silhouette(silhouette.result9)
## cluster size ave.sil.width
## 1 1 890 0.75
## 2 2 669 0.53
## 3 3 209 0.43
## 4 4 284 0.57
## 5 5 800 0.49
## 6 6 737 0.66
## 7 7 29 0.50
## 8 8 848 0.34
## 9 9 504 0.44
mean_silhouette9 <-mean(silhouette.result9[,"sil_width"])
mean_silhouette9
## [1] 0.539206
###10类
set.seed(111)
c10<- kmeans(km, centers = 10, nstart = 5)
silhouette.result10 <-silhouette(c10$cluster,dist(km))
fviz_silhouette(silhouette.result10)
## cluster size ave.sil.width
## 1 1 794 0.48
## 2 2 320 0.70
## 3 3 737 0.65
## 4 4 169 1.00
## 5 5 37 0.50
## 6 6 164 0.43
## 7 7 397 0.38
## 8 8 1314 0.48
## 9 9 709 0.50
## 10 10 329 0.61
mean_silhouette10 <-mean(silhouette.result10[,"sil_width"])
mean_silhouette10
## [1] 0.5399552
k<-c(2:10)
mean_silhouette<-c(mean_silhouette2,mean_silhouette3,mean_silhouette4,mean_silhouette5,mean_silhouette6,mean_silhouette7,mean_silhouette8,mean_silhouette9,mean_silhouette10)
###
par(mfrow=c(1,1))
plot(k,mean_silhouette,xlab='K',ylab='Mean silhouette',col='red',type = "o",pch=19,ylim=c(0.4,0.6))
text(k,mean_silhouette,labels = round(mean_silhouette, 2), pos = 3)
###
library(dplyr)
#sleepdisturbance1<-read.csv("data_cluster2.csv")
SDMode$cluster_4<-cluster_4
SDMode1<-SDMode[,c(3:7)]###四类为27列,三类为26列
for(j in 1:5){
SDMode1[,j] <- as.factor(SDMode1[,j])
}
c1<-SDMode1%>%filter(cluster_4==1)
# for(i in 1:ncol(c1)){
# plot(c1[,i],main=colnames(c1)[i],ylab='Count',col='darkblue',las=1,col.main='darkblue')
# }
par(mfrow=c(2,2))
category_counts1 <- table(c1[,1])
category_per1 <- round(table(c1[,1])/length(c1[,1])*100,2)
category_per1
##
## Regular Occasionally stay up late Stay up late often
## 66.94 23.98 9.07
## Very irregular
## 0.00
plot(c1[,1],main=colnames(c1)[1],ylab='Count',col='red',las=1,col.main='red')
text(category_per1,labels = round(category_per1, 2), pos = 3)
###ylim = c(0, 1000)
# 计算每个类别的数量
category_per2 <- round(table(c1[,2])/length(c1[,2])*100,2)
category_per2
##
## Very good Better Normal Poor Very poor
## 48.79 43.53 6.92 0.76 0.00
plot(c1[,2],main=colnames(c1)[2],ylab='Count',col='red',las=1,col.main='red')
text(category_per1,labels = round(category_per2, 2), pos = 3)
#text(category_per1,labels = c("48.79%","43.53%","6.92%","0.76%","0"), pos = 3)
#可以自己加%
category_per3 <- round(table(c1[,3])/length(c1[,3])*100,2)
category_per3
##
## <7h 7-9h >9h
## 46.13 51.84 2.03
plot(c1[,3],main=colnames(c1)[3],ylab='Count',col='red',las=1,col.main='red')
text(category_per1,labels = round(category_per3, 2), pos = 3)
category_per4 <- round(table(c1[,4])/length(c1[,4])*100,2)
category_per4
##
## No snoring in sleep Snoring in sleep
## 91.31 8.69
plot(c1[,4],main=colnames(c1)[4],ylab='Count',col='red',las=1,col.main='red')
text(category_per1,labels = round(category_per4, 2), pos = 3)
c2<-SDMode1%>%filter(cluster_4==2)
# for(i in 1:ncol(c2)){
# plot(c2[,i],main=colnames(c2)[i],ylab='Count',col='darkblue',las=1,col.main='darkblue')
# }
par(mfrow=c(2,2))
# 计算每个类别的数量
#category_counts1 <- table(c2[,1])
category_per1 <- round(table(c2[,1])/length(c2[,1])*100,2)
category_per1
##
## Regular Occasionally stay up late Stay up late often
## 16.73 57.61 25.65
## Very irregular
## 0.00
plot(c2[,1],main=colnames(c2)[1],ylab='Count',col='red',las=1,col.main='red')
text(category_per1,labels = round(category_per1, 2), pos = 3)
category_per2 <- round(table(c2[,2])/length(c2[,2])*100,2)
category_per2
##
## Very good Better Normal Poor Very poor
## 0.84 25.58 67.38 6.20 0.00
plot(c2[,2],main=colnames(c2)[2],ylab='Count',col='red',las=1,col.main='red')
text(category_per1,labels = round(category_per2, 2), pos = 3)
category_per3 <- round(table(c2[,3])/length(c2[,3])*100,2)
category_per3
##
## <7h 7-9h >9h
## 87.89 12.11 0.00
plot(c2[,3],main=colnames(c2)[3],ylab='Count',col='red',las=1,col.main='red')
text(category_per1,labels = round(category_per3, 2), pos = 3)
category_per4 <- round(table(c2[,4])/length(c2[,4])*100,2)
category_per4
##
## No snoring in sleep Snoring in sleep
## 74.06 25.94
plot(c2[,4],main=colnames(c2)[4],ylab='Count',col='red',las=1,col.main='red')
text(category_per1,labels = round(category_per4, 2), pos = 3)
c3<-SDMode1%>%filter(cluster_4==3)
# for(i in 1:ncol(c2)){
# plot(c2[,i],main=colnames(c2)[i],ylab='Count',col='darkblue',las=1,col.main='darkblue')
# }
par(mfrow=c(2,2))
# 计算每个类别的数量
#category_counts1 <- table(c3[,1])
category_per1 <- round(table(c3[,1])/length(c3[,1])*100,2)
category_per1
##
## Regular Occasionally stay up late Stay up late often
## 22.12 3.59 46.04
## Very irregular
## 28.25
plot(c3[,1],main=colnames(c3)[1],ylab='Count',col='red',las=1,col.main='red')
text(category_per1,labels = round(category_per1, 2), pos = 3)
category_per2 <- round(table(c3[,2])/length(c3[,2])*100,2)
category_per2
##
## Very good Better Normal Poor Very poor
## 5.23 2.09 8.97 68.76 14.95
plot(c3[,2],main=colnames(c3)[2],ylab='Count',col='red',las=1,col.main='red')
text(category_per1,labels = round(category_per2, 2), pos = 3)
category_per3 <- round(table(c3[,3])/length(c3[,3])*100,2)
category_per3
##
## <7h 7-9h >9h
## 90.43 8.22 1.35
plot(c3[,3],main=colnames(c3)[3],ylab='Count',col='red',las=1,col.main='red')
text(category_per1,labels = round(category_per3, 2), pos = 3)
category_per4 <- round(table(c3[,4])/length(c3[,4])*100,2)
category_per4
##
## No snoring in sleep Snoring in sleep
## 70.85 29.15
plot(c3[,4],main=colnames(c3)[4],ylab='Count',col='red',las=1,col.main='red')
text(category_per1,labels = round(category_per4, 2), pos = 3)
library(autoReg)
## Warning: 程辑包'autoReg'是用R版本4.3.3 来建造的
##缺失值填补
# 计算每列的均值
# means <- colMeans(sleepdisturbance, na.rm = TRUE)
#
# # 填充缺失值
# sleepdisturbance[is.na(sleepdisturbance)] <- round(means[col(sleepdisturbance)][is.na(sleepdisturbance)],2)
write.csv(sleepdisturbance,"sleepdisturbance.csv")
sleepdisturbance<-read.csv("sleepdisturbance.csv")
sleepdisturbance<-sleepdisturbance[,c(-1,-2)]
sleepdisturbance<-sleepdisturbance[,c(1:22,24:33,60,34:59,23)]
for(j in 1:33){
sleepdisturbance[,j] <- as.factor(as.numeric(sleepdisturbance[,j]))
}
# 训练集基线表
#sleepdisturbance<-sleepdisturbance[,c(1:22,24:60,23)]
#sleepdisturbance<-sleepdisturbance[,c(33,60)]
gaze(~. ,data = sleepdisturbance) %>% myft()
name | levels | stats |
|---|---|---|
Sex | 0 | 2149 (43.2%) |
1 | 2821 (56.8%) | |
Age | 0 | 1684 (33.9%) |
1 | 2411 (48.5%) | |
2 | 875 (17.6%) | |
Education | 1 | 1483 (29.8%) |
2 | 1110 (22.3%) | |
3 | 2051 (41.3%) | |
4 | 326 (6.6%) | |
Marrying | 1 | 141 (2.8%) |
2 | 4580 (92.2%) | |
3 | 123 (2.5%) | |
4 | 126 (2.5%) | |
Income | 1 | 1321 (26.6%) |
2 | 1436 (28.9%) | |
3 | 1484 (29.9%) | |
4 | 729 (14.7%) | |
Smoking | 1 | 3421 (68.8%) |
2 | 680 (13.7%) | |
3 | 685 (13.8%) | |
4 | 184 (3.7%) | |
Drinking | 1 | 2642 (53.2%) |
2 | 1687 (33.9%) | |
3 | 641 (12.9%) | |
Routine | 1 | 1659 (33.4%) |
2 | 1972 (39.7%) | |
3 | 1150 (23.1%) | |
4 | 189 (3.8%) | |
Sleep.quality | 1 | 827 (16.6%) |
2 | 1397 (28.1%) | |
3 | 2005 (40.3%) | |
4 | 641 (12.9%) | |
5 | 100 (2.0%) | |
Sleep.duration | 1 | 3727 (75.0%) |
2 | 1202 (24.2%) | |
3 | 41 (0.8%) | |
SIS | 0 | 3931 (79.1%) |
1 | 1039 (20.9%) | |
DFA | 0 | 4254 (85.6%) |
1 | 716 (14.4%) | |
EA | 0 | 3834 (77.1%) |
1 | 1136 (22.9%) | |
DN | 0 | 4517 (90.9%) |
1 | 453 (9.1%) | |
NFLP | 0 | 4462 (89.8%) |
1 | 508 (10.2%) | |
Central.obesity | 0 | 2547 (51.2%) |
1 | 2423 (48.8%) | |
Hypertension | 0 | 3053 (61.4%) |
1 | 1917 (38.6%) | |
Dyslipidemia | 0 | 4149 (83.5%) |
1 | 821 (16.5%) | |
Diabetes | 0 | 1998 (40.2%) |
1 | 2972 (59.8%) | |
Hyperuricemia | 0 | 4299 (86.5%) |
1 | 671 (13.5%) | |
NAFLD | 0 | 2213 (44.5%) |
1 | 2757 (55.5%) | |
Num | 0 | 767 (15.4%) |
1 | 940 (18.9%) | |
2 | 960 (19.3%) | |
3 | 1003 (20.2%) | |
4 | 865 (17.4%) | |
5 | 378 (7.6%) | |
6 | 57 (1.1%) | |
diabete | 0 | 2591 (52.1%) |
1 | 1766 (35.5%) | |
2 | 613 (12.3%) | |
clus_k540 | 1 | 1474 (29.7%) |
2 | 301 (6.1%) | |
3 | 1738 (35.0%) | |
4 | 1457 (29.3%) | |
clus_k541 | 1 | 1457 (29.3%) |
2 | 1738 (35.0%) | |
3 | 1474 (29.7%) | |
4 | 301 (6.1%) | |
WC_c | 0 | 1578 (31.8%) |
1 | 969 (19.5%) | |
2 | 2423 (48.8%) | |
BMI_c | 0 | 1573 (31.6%) |
1 | 60 (1.2%) | |
2 | 2134 (42.9%) | |
3 | 1203 (24.2%) | |
SUA_c | 0 | 4211 (84.7%) |
1 | 759 (15.3%) | |
TC_c | 0 | 3335 (67.1%) |
1 | 1635 (32.9%) | |
TG_c | 0 | 3160 (63.6%) |
1 | 1810 (36.4%) | |
HDL_c | 0 | 4538 (91.3%) |
1 | 432 (8.7%) | |
LDL_c | 0 | 4000 (80.5%) |
1 | 970 (19.5%) | |
cluster_4 | 1 | 1576 (31.7%) |
2 | 2725 (54.8%) | |
3 | 669 (13.5%) | |
AGE | Mean ± SD | 49.8 ± 11.0 |
WC | Mean ± SD | 86.9 ± 10.7 |
SBP | Mean ± SD | 129.1 ± 18.5 |
DBP | Mean ± SD | 79.0 ± 11.8 |
BMI | Mean ± SD | 25.7 ± 3.5 |
CRE | Mean ± SD | 68.5 ± 17.5 |
SUA | Mean ± SD | 318.8 ± 87.9 |
efeg | Mean ± SD | 101.5 ± 12.6 |
TC | Mean ± SD | 4.9 ± 1.0 |
TG | Mean ± SD | 1.8 ± 1.6 |
HDL | Mean ± SD | 1.3 ± 0.3 |
LDL | Mean ± SD | 2.9 ± 0.8 |
FPG | Mean ± SD | 5.6 ± 1.5 |
HbA1c | Mean ± SD | 5.8 ± 0.8 |
WBC | Mean ± SD | 6.0 ± 1.6 |
RBC | Mean ± SD | 4.8 ± 0.5 |
Hb | Mean ± SD | 145.5 ± 15.8 |
PLT | Mean ± SD | 240.8 ± 57.7 |
Ne | Mean ± SD | 3.6 ± 1.2 |
Lym | Mean ± SD | 2.0 ± 0.6 |
NLR | Mean ± SD | 1.9 ± 0.8 |
PLR | Mean ± SD | 131.0 ± 42.6 |
SII | Mean ± SD | 464.4 ± 232.1 |
ALT | Mean ± SD | 24.1 ± 28.3 |
AST | Mean ± SD | 21.7 ± 12.5 |
GGT | Mean ± SD | 32.1 ± 37.3 |
Comorbidities | Mean ± SD | 0.7 ± 0.5 |
sleepdisturbance$Comorbidities<-as.factor(sleepdisturbance$Comorbidities)
####单因素 多因素确定
gaze(~. ,data = sleepdisturbance) %>% myft()
name | levels | stats |
|---|---|---|
Sex | 0 | 2149 (43.2%) |
1 | 2821 (56.8%) | |
Age | 0 | 1684 (33.9%) |
1 | 2411 (48.5%) | |
2 | 875 (17.6%) | |
Education | 1 | 1483 (29.8%) |
2 | 1110 (22.3%) | |
3 | 2051 (41.3%) | |
4 | 326 (6.6%) | |
Marrying | 1 | 141 (2.8%) |
2 | 4580 (92.2%) | |
3 | 123 (2.5%) | |
4 | 126 (2.5%) | |
Income | 1 | 1321 (26.6%) |
2 | 1436 (28.9%) | |
3 | 1484 (29.9%) | |
4 | 729 (14.7%) | |
Smoking | 1 | 3421 (68.8%) |
2 | 680 (13.7%) | |
3 | 685 (13.8%) | |
4 | 184 (3.7%) | |
Drinking | 1 | 2642 (53.2%) |
2 | 1687 (33.9%) | |
3 | 641 (12.9%) | |
Routine | 1 | 1659 (33.4%) |
2 | 1972 (39.7%) | |
3 | 1150 (23.1%) | |
4 | 189 (3.8%) | |
Sleep.quality | 1 | 827 (16.6%) |
2 | 1397 (28.1%) | |
3 | 2005 (40.3%) | |
4 | 641 (12.9%) | |
5 | 100 (2.0%) | |
Sleep.duration | 1 | 3727 (75.0%) |
2 | 1202 (24.2%) | |
3 | 41 (0.8%) | |
SIS | 0 | 3931 (79.1%) |
1 | 1039 (20.9%) | |
DFA | 0 | 4254 (85.6%) |
1 | 716 (14.4%) | |
EA | 0 | 3834 (77.1%) |
1 | 1136 (22.9%) | |
DN | 0 | 4517 (90.9%) |
1 | 453 (9.1%) | |
NFLP | 0 | 4462 (89.8%) |
1 | 508 (10.2%) | |
Central.obesity | 0 | 2547 (51.2%) |
1 | 2423 (48.8%) | |
Hypertension | 0 | 3053 (61.4%) |
1 | 1917 (38.6%) | |
Dyslipidemia | 0 | 4149 (83.5%) |
1 | 821 (16.5%) | |
Diabetes | 0 | 1998 (40.2%) |
1 | 2972 (59.8%) | |
Hyperuricemia | 0 | 4299 (86.5%) |
1 | 671 (13.5%) | |
NAFLD | 0 | 2213 (44.5%) |
1 | 2757 (55.5%) | |
Num | 0 | 767 (15.4%) |
1 | 940 (18.9%) | |
2 | 960 (19.3%) | |
3 | 1003 (20.2%) | |
4 | 865 (17.4%) | |
5 | 378 (7.6%) | |
6 | 57 (1.1%) | |
diabete | 0 | 2591 (52.1%) |
1 | 1766 (35.5%) | |
2 | 613 (12.3%) | |
clus_k540 | 1 | 1474 (29.7%) |
2 | 301 (6.1%) | |
3 | 1738 (35.0%) | |
4 | 1457 (29.3%) | |
clus_k541 | 1 | 1457 (29.3%) |
2 | 1738 (35.0%) | |
3 | 1474 (29.7%) | |
4 | 301 (6.1%) | |
WC_c | 0 | 1578 (31.8%) |
1 | 969 (19.5%) | |
2 | 2423 (48.8%) | |
BMI_c | 0 | 1573 (31.6%) |
1 | 60 (1.2%) | |
2 | 2134 (42.9%) | |
3 | 1203 (24.2%) | |
SUA_c | 0 | 4211 (84.7%) |
1 | 759 (15.3%) | |
TC_c | 0 | 3335 (67.1%) |
1 | 1635 (32.9%) | |
TG_c | 0 | 3160 (63.6%) |
1 | 1810 (36.4%) | |
HDL_c | 0 | 4538 (91.3%) |
1 | 432 (8.7%) | |
LDL_c | 0 | 4000 (80.5%) |
1 | 970 (19.5%) | |
cluster_4 | 1 | 1576 (31.7%) |
2 | 2725 (54.8%) | |
3 | 669 (13.5%) | |
AGE | Mean ± SD | 49.8 ± 11.0 |
WC | Mean ± SD | 86.9 ± 10.7 |
SBP | Mean ± SD | 129.1 ± 18.5 |
DBP | Mean ± SD | 79.0 ± 11.8 |
BMI | Mean ± SD | 25.7 ± 3.5 |
CRE | Mean ± SD | 68.5 ± 17.5 |
SUA | Mean ± SD | 318.8 ± 87.9 |
efeg | Mean ± SD | 101.5 ± 12.6 |
TC | Mean ± SD | 4.9 ± 1.0 |
TG | Mean ± SD | 1.8 ± 1.6 |
HDL | Mean ± SD | 1.3 ± 0.3 |
LDL | Mean ± SD | 2.9 ± 0.8 |
FPG | Mean ± SD | 5.6 ± 1.5 |
HbA1c | Mean ± SD | 5.8 ± 0.8 |
WBC | Mean ± SD | 6.0 ± 1.6 |
RBC | Mean ± SD | 4.8 ± 0.5 |
Hb | Mean ± SD | 145.5 ± 15.8 |
PLT | Mean ± SD | 240.8 ± 57.7 |
Ne | Mean ± SD | 3.6 ± 1.2 |
Lym | Mean ± SD | 2.0 ± 0.6 |
NLR | Mean ± SD | 1.9 ± 0.8 |
PLR | Mean ± SD | 131.0 ± 42.6 |
SII | Mean ± SD | 464.4 ± 232.1 |
ALT | Mean ± SD | 24.1 ± 28.3 |
AST | Mean ± SD | 21.7 ± 12.5 |
GGT | Mean ± SD | 32.1 ± 37.3 |
Comorbidities | 0 | 1707 (34.3%) |
1 | 3263 (65.7%) |
mod <- glm(Comorbidities~cluster_4+Sex+Age+Education+Marrying+Income+Smoking+Drinking+BMI+CRE, data = sleepdisturbance, family = "binomial")
aovresult <- autoReg(mod, uni=TRUE, threshold = 0.05) # 输出单因素结果
aovresult %>% myft()
Dependent: Comorbidities |
| 0 (N=1707) | 1 (N=3263) | OR (univariable) | OR (multivariable) |
|---|---|---|---|---|---|
cluster_4 | 1 | 552 (32.3%) | 1024 (31.4%) | ||
2 | 953 (55.8%) | 1772 (54.3%) | 1.00 (0.88-1.14, p=.972) | 1.17 (0.99-1.39, p=.071) | |
3 | 202 (11.8%) | 467 (14.3%) | 1.25 (1.03-1.51, p=.027) | 1.35 (1.05-1.74, p=.021) | |
Sex | 0 | 1079 (63.2%) | 1070 (32.8%) | ||
1 | 628 (36.8%) | 2193 (67.2%) | 3.52 (3.12-3.98, p<.001) | 2.17 (1.69-2.77, p<.001) | |
Age | 0 | 736 (43.1%) | 948 (29.1%) | ||
1 | 762 (44.6%) | 1649 (50.5%) | 1.68 (1.48-1.91, p<.001) | 2.14 (1.78-2.58, p<.001) | |
2 | 209 (12.2%) | 666 (20.4%) | 2.47 (2.06-2.97, p<.001) | 3.71 (2.86-4.81, p<.001) | |
Education | 1 | 452 (26.5%) | 1031 (31.6%) | ||
2 | 361 (21.1%) | 749 (23%) | 0.91 (0.77-1.08, p=.267) | 1.19 (0.95-1.48, p=.122) | |
3 | 742 (43.5%) | 1309 (40.1%) | 0.77 (0.67-0.89, p<.001) | 1.42 (1.14-1.76, p=.001) | |
4 | 152 (8.9%) | 174 (5.3%) | 0.50 (0.39-0.64, p<.001) | 1.11 (0.76-1.60, p=.592) | |
Marrying | 1 | 82 (4.8%) | 59 (1.8%) | ||
2 | 1547 (90.6%) | 3033 (93%) | 2.72 (1.94-3.83, p<.001) | 2.06 (1.25-3.40, p=.004) | |
3 | 52 (3%) | 71 (2.2%) | 1.90 (1.16-3.10, p=.010) | 2.16 (1.09-4.30, p=.028) | |
4 | 26 (1.5%) | 100 (3.1%) | 5.35 (3.10-9.23, p<.001) | 4.05 (1.95-8.42, p<.001) | |
Income | 1 | 414 (24.3%) | 907 (27.8%) | ||
2 | 541 (31.7%) | 895 (27.4%) | 0.76 (0.64-0.88, p<.001) | 0.65 (0.52-0.81, p<.001) | |
3 | 523 (30.6%) | 961 (29.5%) | 0.84 (0.72-0.98, p=.029) | 0.69 (0.54-0.89, p=.003) | |
4 | 229 (13.4%) | 500 (15.3%) | 1.00 (0.82-1.21, p=.973) | 0.74 (0.55-1.00, p=.054) | |
Smoking | 1 | 1391 (81.5%) | 2030 (62.2%) | ||
2 | 147 (8.6%) | 533 (16.3%) | 2.48 (2.04-3.02, p<.001) | 1.28 (0.98-1.67, p=.065) | |
3 | 132 (7.7%) | 553 (16.9%) | 2.87 (2.35-3.51, p<.001) | 1.53 (1.14-2.04, p=.004) | |
4 | 37 (2.2%) | 147 (4.5%) | 2.72 (1.89-3.93, p<.001) | 1.37 (0.86-2.17, p=.183) | |
Drinking | 1 | 1095 (64.1%) | 1547 (47.4%) | ||
2 | 505 (29.6%) | 1182 (36.2%) | 1.66 (1.46-1.89, p<.001) | 0.87 (0.71-1.05, p=.143) | |
3 | 107 (6.3%) | 534 (16.4%) | 3.53 (2.83-4.41, p<.001) | 1.08 (0.79-1.46, p=.640) | |
BMI | Mean ± SD | 23.0 ± 2.5 | 27.1 ± 3.2 | 1.75 (1.69-1.81, p<.001) | 1.71 (1.65-1.77, p<.001) |
CRE | Mean ± SD | 64.1 ± 13.0 | 70.7 ± 19.0 | 1.04 (1.03-1.04, p<.001) | 1.00 (1.00-1.01, p=.446) |
# 单因素分析,后向剔除
mod0 <- glm(Comorbidities~cluster_4, data = sleepdisturbance, family = "binomial")
summary(mod0)
##
## Call:
## glm(formula = Comorbidities ~ cluster_4, family = "binomial",
## data = sleepdisturbance)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 0.617924 0.052803 11.702 <2e-16 ***
## cluster_42 0.002325 0.066346 0.035 0.9720
## cluster_43 0.220138 0.099398 2.215 0.0268 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 6394.4 on 4969 degrees of freedom
## Residual deviance: 6388.4 on 4967 degrees of freedom
## AIC: 6394.4
##
## Number of Fisher Scoring iterations: 4
mod1 <- glm(Comorbidities~cluster_4+Sex+Age+Education+Marrying+Income+Smoking+Drinking
, data = sleepdisturbance, family = "binomial")
mod11<-step(mod1)
## Start: AIC=5683.92
## Comorbidities ~ cluster_4 + Sex + Age + Education + Marrying +
## Income + Smoking + Drinking
##
## Df Deviance AIC
## - Smoking 3 5648.5 5682.5
## <none> 5643.9 5683.9
## - cluster_4 2 5651.3 5687.3
## - Education 3 5657.5 5691.5
## - Income 3 5657.9 5691.9
## - Drinking 2 5657.2 5693.2
## - Marrying 3 5668.2 5702.2
## - Age 2 5766.1 5802.1
## - Sex 1 5911.9 5949.9
##
## Step: AIC=5682.51
## Comorbidities ~ cluster_4 + Sex + Age + Education + Marrying +
## Income + Drinking
##
## Df Deviance AIC
## <none> 5648.5 5682.5
## - cluster_4 2 5656.7 5686.7
## - Income 3 5662.2 5690.2
## - Education 3 5663.8 5691.8
## - Drinking 2 5666.2 5696.2
## - Marrying 3 5672.8 5700.8
## - Age 2 5771.7 5801.7
## - Sex 1 5998.7 6030.7
summary(mod11)
##
## Call:
## glm(formula = Comorbidities ~ cluster_4 + Sex + Age + Education +
## Marrying + Income + Drinking, family = "binomial", data = sleepdisturbance)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -1.06073 0.21353 -4.968 6.78e-07 ***
## cluster_42 0.12647 0.07290 1.735 0.082778 .
## cluster_43 0.29823 0.10716 2.783 0.005385 **
## Sex1 1.46853 0.08192 17.928 < 2e-16 ***
## Age1 0.70172 0.07728 9.080 < 2e-16 ***
## Age2 1.09783 0.11189 9.812 < 2e-16 ***
## Education2 -0.17203 0.09426 -1.825 0.067995 .
## Education3 -0.18261 0.09090 -2.009 0.044546 *
## Education4 -0.59017 0.15264 -3.866 0.000110 ***
## Marrying2 0.59202 0.19199 3.084 0.002045 **
## Marrying3 0.46899 0.27227 1.722 0.084980 .
## Marrying4 1.41763 0.30047 4.718 2.38e-06 ***
## Income2 -0.33695 0.09267 -3.636 0.000277 ***
## Income3 -0.21309 0.10297 -2.069 0.038501 *
## Income4 -0.16280 0.12841 -1.268 0.204854
## Drinking2 0.04760 0.08120 0.586 0.557782
## Drinking3 0.51088 0.12876 3.968 7.26e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 6394.4 on 4969 degrees of freedom
## Residual deviance: 5648.5 on 4953 degrees of freedom
## AIC: 5682.5
##
## Number of Fisher Scoring iterations: 4
mod2 <- glm(Comorbidities~cluster_4+Sex+Age+Education+Marrying+Income+Smoking+Drinking+BMI+CRE, data = sleepdisturbance, family = "binomial")
mod22<-step(mod2)
## Start: AIC=4148.94
## Comorbidities ~ cluster_4 + Sex + Age + Education + Marrying +
## Income + Smoking + Drinking + BMI + CRE
##
## Df Deviance AIC
## - CRE 1 4105.6 4147.6
## - Drinking 2 4108.4 4148.4
## <none> 4104.9 4148.9
## - cluster_4 2 4111.1 4151.1
## - Smoking 3 4114.9 4152.9
## - Education 3 4116.3 4154.3
## - Marrying 3 4119.7 4157.7
## - Income 3 4120.9 4158.9
## - Sex 1 4141.7 4183.7
## - Age 2 4218.2 4258.2
## - BMI 1 5634.6 5676.6
##
## Step: AIC=4147.61
## Comorbidities ~ cluster_4 + Sex + Age + Education + Marrying +
## Income + Smoking + Drinking + BMI
##
## Df Deviance AIC
## - Drinking 2 4109.0 4147.0
## <none> 4105.6 4147.6
## - cluster_4 2 4111.7 4149.7
## - Smoking 3 4115.4 4151.4
## - Education 3 4117.2 4153.2
## - Marrying 3 4120.3 4156.3
## - Income 3 4121.7 4157.7
## - Sex 1 4167.1 4207.1
## - Age 2 4219.5 4257.5
## - BMI 1 5643.9 5683.9
##
## Step: AIC=4146.98
## Comorbidities ~ cluster_4 + Sex + Age + Education + Marrying +
## Income + Smoking + BMI
##
## Df Deviance AIC
## <none> 4109.0 4147.0
## - cluster_4 2 4115.1 4149.1
## - Education 3 4120.2 4152.2
## - Smoking 3 4120.7 4152.7
## - Marrying 3 4124.2 4156.2
## - Income 3 4124.9 4156.9
## - Sex 1 4173.2 4209.2
## - Age 2 4224.9 4258.9
## - BMI 1 5657.2 5693.2
summary(mod22)
##
## Call:
## glm(formula = Comorbidities ~ cluster_4 + Sex + Age + Education +
## Marrying + Income + Smoking + BMI, family = "binomial", data = sleepdisturbance)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -14.56545 0.53108 -27.426 < 2e-16 ***
## cluster_42 0.15595 0.08714 1.790 0.073515 .
## cluster_43 0.29819 0.13009 2.292 0.021892 *
## Sex1 0.79273 0.09987 7.937 2.07e-15 ***
## Age1 0.76270 0.09458 8.064 7.37e-16 ***
## Age2 1.32640 0.13201 10.048 < 2e-16 ***
## Education2 0.17484 0.11246 1.555 0.120023
## Education3 0.34828 0.10971 3.175 0.001500 **
## Education4 0.10146 0.18823 0.539 0.589862
## Marrying2 0.73071 0.25393 2.878 0.004007 **
## Marrying3 0.77707 0.34940 2.224 0.026148 *
## Marrying4 1.41628 0.37304 3.797 0.000147 ***
## Income2 -0.43277 0.11108 -3.896 9.77e-05 ***
## Income3 -0.36330 0.12424 -2.924 0.003453 **
## Income4 -0.30380 0.15497 -1.960 0.049947 *
## Smoking2 0.22463 0.13397 1.677 0.093594 .
## Smoking3 0.44594 0.13912 3.206 0.001348 **
## Smoking4 0.31325 0.23326 1.343 0.179306
## BMI 0.53595 0.01781 30.088 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 6394.4 on 4969 degrees of freedom
## Residual deviance: 4109.0 on 4951 degrees of freedom
## AIC: 4147
##
## Number of Fisher Scoring iterations: 5
####单因素 多因素确定
gaze(~. ,data = sleepdisturbance) %>% myft()
name | levels | stats |
|---|---|---|
Sex | 0 | 2149 (43.2%) |
1 | 2821 (56.8%) | |
Age | 0 | 1684 (33.9%) |
1 | 2411 (48.5%) | |
2 | 875 (17.6%) | |
Education | 1 | 1483 (29.8%) |
2 | 1110 (22.3%) | |
3 | 2051 (41.3%) | |
4 | 326 (6.6%) | |
Marrying | 1 | 141 (2.8%) |
2 | 4580 (92.2%) | |
3 | 123 (2.5%) | |
4 | 126 (2.5%) | |
Income | 1 | 1321 (26.6%) |
2 | 1436 (28.9%) | |
3 | 1484 (29.9%) | |
4 | 729 (14.7%) | |
Smoking | 1 | 3421 (68.8%) |
2 | 680 (13.7%) | |
3 | 685 (13.8%) | |
4 | 184 (3.7%) | |
Drinking | 1 | 2642 (53.2%) |
2 | 1687 (33.9%) | |
3 | 641 (12.9%) | |
Routine | 1 | 1659 (33.4%) |
2 | 1972 (39.7%) | |
3 | 1150 (23.1%) | |
4 | 189 (3.8%) | |
Sleep.quality | 1 | 827 (16.6%) |
2 | 1397 (28.1%) | |
3 | 2005 (40.3%) | |
4 | 641 (12.9%) | |
5 | 100 (2.0%) | |
Sleep.duration | 1 | 3727 (75.0%) |
2 | 1202 (24.2%) | |
3 | 41 (0.8%) | |
SIS | 0 | 3931 (79.1%) |
1 | 1039 (20.9%) | |
DFA | 0 | 4254 (85.6%) |
1 | 716 (14.4%) | |
EA | 0 | 3834 (77.1%) |
1 | 1136 (22.9%) | |
DN | 0 | 4517 (90.9%) |
1 | 453 (9.1%) | |
NFLP | 0 | 4462 (89.8%) |
1 | 508 (10.2%) | |
Central.obesity | 0 | 2547 (51.2%) |
1 | 2423 (48.8%) | |
Hypertension | 0 | 3053 (61.4%) |
1 | 1917 (38.6%) | |
Dyslipidemia | 0 | 4149 (83.5%) |
1 | 821 (16.5%) | |
Diabetes | 0 | 1998 (40.2%) |
1 | 2972 (59.8%) | |
Hyperuricemia | 0 | 4299 (86.5%) |
1 | 671 (13.5%) | |
NAFLD | 0 | 2213 (44.5%) |
1 | 2757 (55.5%) | |
Num | 0 | 767 (15.4%) |
1 | 940 (18.9%) | |
2 | 960 (19.3%) | |
3 | 1003 (20.2%) | |
4 | 865 (17.4%) | |
5 | 378 (7.6%) | |
6 | 57 (1.1%) | |
diabete | 0 | 2591 (52.1%) |
1 | 1766 (35.5%) | |
2 | 613 (12.3%) | |
clus_k540 | 1 | 1474 (29.7%) |
2 | 301 (6.1%) | |
3 | 1738 (35.0%) | |
4 | 1457 (29.3%) | |
clus_k541 | 1 | 1457 (29.3%) |
2 | 1738 (35.0%) | |
3 | 1474 (29.7%) | |
4 | 301 (6.1%) | |
WC_c | 0 | 1578 (31.8%) |
1 | 969 (19.5%) | |
2 | 2423 (48.8%) | |
BMI_c | 0 | 1573 (31.6%) |
1 | 60 (1.2%) | |
2 | 2134 (42.9%) | |
3 | 1203 (24.2%) | |
SUA_c | 0 | 4211 (84.7%) |
1 | 759 (15.3%) | |
TC_c | 0 | 3335 (67.1%) |
1 | 1635 (32.9%) | |
TG_c | 0 | 3160 (63.6%) |
1 | 1810 (36.4%) | |
HDL_c | 0 | 4538 (91.3%) |
1 | 432 (8.7%) | |
LDL_c | 0 | 4000 (80.5%) |
1 | 970 (19.5%) | |
cluster_4 | 1 | 1576 (31.7%) |
2 | 2725 (54.8%) | |
3 | 669 (13.5%) | |
AGE | Mean ± SD | 49.8 ± 11.0 |
WC | Mean ± SD | 86.9 ± 10.7 |
SBP | Mean ± SD | 129.1 ± 18.5 |
DBP | Mean ± SD | 79.0 ± 11.8 |
BMI | Mean ± SD | 25.7 ± 3.5 |
CRE | Mean ± SD | 68.5 ± 17.5 |
SUA | Mean ± SD | 318.8 ± 87.9 |
efeg | Mean ± SD | 101.5 ± 12.6 |
TC | Mean ± SD | 4.9 ± 1.0 |
TG | Mean ± SD | 1.8 ± 1.6 |
HDL | Mean ± SD | 1.3 ± 0.3 |
LDL | Mean ± SD | 2.9 ± 0.8 |
FPG | Mean ± SD | 5.6 ± 1.5 |
HbA1c | Mean ± SD | 5.8 ± 0.8 |
WBC | Mean ± SD | 6.0 ± 1.6 |
RBC | Mean ± SD | 4.8 ± 0.5 |
Hb | Mean ± SD | 145.5 ± 15.8 |
PLT | Mean ± SD | 240.8 ± 57.7 |
Ne | Mean ± SD | 3.6 ± 1.2 |
Lym | Mean ± SD | 2.0 ± 0.6 |
NLR | Mean ± SD | 1.9 ± 0.8 |
PLR | Mean ± SD | 131.0 ± 42.6 |
SII | Mean ± SD | 464.4 ± 232.1 |
ALT | Mean ± SD | 24.1 ± 28.3 |
AST | Mean ± SD | 21.7 ± 12.5 |
GGT | Mean ± SD | 32.1 ± 37.3 |
Comorbidities | 0 | 1707 (34.3%) |
1 | 3263 (65.7%) |
modd <- glm(NAFLD~cluster_4+Sex+Age+Education+Marrying+Income+Smoking+Drinking+BMI+CRE, data = sleepdisturbance, family = "binomial")
aovresult <- autoReg(modd, uni=TRUE, threshold = 0.05) # 输出单因素结果
aovresult %>% myft()
Dependent: NAFLD |
| 0 (N=2213) | 1 (N=2757) | OR (univariable) | OR (multivariable) |
|---|---|---|---|---|---|
cluster_4 | 1 | 753 (34%) | 823 (29.9%) | ||
2 | 1184 (53.5%) | 1541 (55.9%) | 1.19 (1.05-1.35, p=.006) | 1.24 (1.06-1.44, p=.007) | |
3 | 276 (12.5%) | 393 (14.3%) | 1.30 (1.09-1.56, p=.005) | 1.27 (1.01-1.59, p=.039) | |
Sex | 0 | 1345 (60.8%) | 804 (29.2%) | ||
1 | 868 (39.2%) | 1953 (70.8%) | 3.76 (3.34-4.24, p<.001) | 2.73 (2.18-3.42, p<.001) | |
Age | 0 | 773 (34.9%) | 911 (33%) | ||
1 | 1005 (45.4%) | 1406 (51%) | 1.19 (1.05-1.35, p=.007) | 1.31 (1.11-1.55, p=.001) | |
2 | 435 (19.7%) | 440 (16%) | 0.86 (0.73-1.01, p=.067) | 1.00 (0.80-1.25, p=.993) | |
Education | 1 | 679 (30.7%) | 804 (29.2%) | ||
2 | 504 (22.8%) | 606 (22%) | 1.02 (0.87-1.19, p=.848) | ||
3 | 872 (39.4%) | 1179 (42.8%) | 1.14 (1.00-1.31, p=.053) | ||
4 | 158 (7.1%) | 168 (6.1%) | 0.90 (0.71-1.14, p=.380) | ||
Marrying | 1 | 87 (3.9%) | 54 (2%) | ||
2 | 2006 (90.6%) | 2574 (93.4%) | 2.07 (1.46-2.92, p<.001) | 1.88 (1.19-2.97, p=.007) | |
3 | 58 (2.6%) | 65 (2.4%) | 1.81 (1.11-2.95, p=.018) | 2.47 (1.32-4.65, p=.005) | |
4 | 62 (2.8%) | 64 (2.3%) | 1.66 (1.02-2.71, p=.041) | 1.92 (1.02-3.60, p=.043) | |
Income | 1 | 658 (29.7%) | 663 (24%) | ||
2 | 678 (30.6%) | 758 (27.5%) | 1.11 (0.96-1.29, p=.173) | 1.01 (0.84-1.22, p=.932) | |
3 | 613 (27.7%) | 871 (31.6%) | 1.41 (1.21-1.64, p<.001) | 1.20 (0.98-1.46, p=.076) | |
4 | 264 (11.9%) | 465 (16.9%) | 1.75 (1.45-2.10, p<.001) | 1.23 (0.96-1.57, p=.110) | |
Smoking | 1 | 1752 (79.2%) | 1669 (60.5%) | ||
2 | 207 (9.4%) | 473 (17.2%) | 2.40 (2.01-2.86, p<.001) | 1.10 (0.87-1.38, p=.437) | |
3 | 194 (8.8%) | 491 (17.8%) | 2.66 (2.22-3.18, p<.001) | 1.36 (1.07-1.74, p=.013) | |
4 | 60 (2.7%) | 124 (4.5%) | 2.17 (1.58-2.97, p<.001) | 1.18 (0.81-1.72, p=.400) | |
Drinking | 1 | 1420 (64.2%) | 1222 (44.3%) | ||
2 | 614 (27.7%) | 1073 (38.9%) | 2.03 (1.79-2.30, p<.001) | 1.04 (0.87-1.23, p=.678) | |
3 | 179 (8.1%) | 462 (16.8%) | 3.00 (2.48-3.62, p<.001) | 0.90 (0.70-1.17, p=.431) | |
BMI | Mean ± SD | 23.6 ± 2.8 | 27.3 ± 3.2 | 1.56 (1.52-1.60, p<.001) | 1.52 (1.48-1.56, p<.001) |
CRE | Mean ± SD | 65.6 ± 20.9 | 70.7 ± 13.7 | 1.03 (1.02-1.03, p<.001) | 0.99 (0.98-1.00, p=.001) |
# 单因素分析,后向剔除
modd0 <- glm(NAFLD~cluster_4, data = sleepdisturbance, family = "binomial")
summary(modd0)
##
## Call:
## glm(formula = NAFLD ~ cluster_4, family = "binomial", data = sleepdisturbance)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 0.08889 0.05043 1.763 0.07795 .
## cluster_42 0.17464 0.06353 2.749 0.00598 **
## cluster_43 0.26452 0.09333 2.834 0.00459 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 6830.2 on 4969 degrees of freedom
## Residual deviance: 6819.3 on 4967 degrees of freedom
## AIC: 6825.3
##
## Number of Fisher Scoring iterations: 4
modd1 <- glm(NAFLD~cluster_4+Sex+Age+Education+Marrying+Income+Smoking+Drinking
, data = sleepdisturbance, family = "binomial")
modd11<-step(modd1)
## Start: AIC=6273.82
## NAFLD ~ cluster_4 + Sex + Age + Education + Marrying + Income +
## Smoking + Drinking
##
## Df Deviance AIC
## - Smoking 3 6236.7 6270.7
## - Income 3 6237.7 6271.7
## - Drinking 2 6237.7 6273.7
## <none> 6233.8 6273.8
## - Education 3 6241.1 6275.1
## - cluster_4 2 6244.8 6280.8
## - Marrying 3 6249.6 6283.6
## - Age 2 6262.0 6298.0
## - Sex 1 6480.8 6518.8
##
## Step: AIC=6270.67
## NAFLD ~ cluster_4 + Sex + Age + Education + Marrying + Income +
## Drinking
##
## Df Deviance AIC
## - Income 3 6240.4 6268.4
## <none> 6236.7 6270.7
## - Education 3 6244.8 6272.8
## - Drinking 2 6243.2 6273.2
## - cluster_4 2 6248.2 6278.2
## - Marrying 3 6252.4 6280.4
## - Age 2 6264.6 6294.6
## - Sex 1 6553.7 6585.7
##
## Step: AIC=6268.42
## NAFLD ~ cluster_4 + Sex + Age + Education + Marrying + Drinking
##
## Df Deviance AIC
## <none> 6240.4 6268.4
## - Education 3 6247.7 6269.7
## - Drinking 2 6247.1 6271.1
## - cluster_4 2 6252.2 6276.2
## - Marrying 3 6256.4 6278.4
## - Age 2 6267.4 6291.4
## - Sex 1 6578.7 6604.7
summary(modd11)
##
## Call:
## glm(formula = NAFLD ~ cluster_4 + Sex + Age + Education + Marrying +
## Drinking, family = "binomial", data = sleepdisturbance)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -1.46861 0.20625 -7.120 1.08e-12 ***
## cluster_42 0.19689 0.06847 2.876 0.004032 **
## cluster_43 0.29347 0.09968 2.944 0.003239 **
## Sex1 1.32160 0.07392 17.880 < 2e-16 ***
## Age1 0.35052 0.07283 4.813 1.49e-06 ***
## Age2 0.08175 0.09783 0.836 0.403358
## Education2 -0.15702 0.08562 -1.834 0.066660 .
## Education3 -0.06682 0.07758 -0.861 0.389046
## Education4 -0.31922 0.13671 -2.335 0.019543 *
## Marrying2 0.63804 0.18928 3.371 0.000749 ***
## Marrying3 0.71605 0.26683 2.684 0.007285 **
## Marrying4 1.02961 0.26954 3.820 0.000133 ***
## Drinking2 0.13444 0.07578 1.774 0.076059 .
## Drinking3 0.26696 0.10943 2.440 0.014706 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 6830.2 on 4969 degrees of freedom
## Residual deviance: 6240.4 on 4956 degrees of freedom
## AIC: 6268.4
##
## Number of Fisher Scoring iterations: 4
modd2 <- glm(NAFLD~cluster_4+Sex+Age+Education+Marrying+Income+Smoking+Drinking+BMI+CRE, data = sleepdisturbance, family = "binomial")
modd22<-step(modd2)
## Start: AIC=4967.14
## NAFLD ~ cluster_4 + Sex + Age + Education + Marrying + Income +
## Smoking + Drinking + BMI + CRE
##
## Df Deviance AIC
## - Income 3 4924.7 4962.7
## - Drinking 2 4924.5 4964.5
## <none> 4923.1 4967.1
## - Smoking 3 4930.6 4968.6
## - Marrying 3 4933.9 4971.9
## - cluster_4 2 4932.0 4972.0
## - Education 3 4939.8 4977.8
## - CRE 1 4937.5 4979.5
## - Age 2 4940.5 4980.5
## - Sex 1 5007.3 5049.3
## - BMI 1 6231.9 6273.9
##
## Step: AIC=4962.67
## NAFLD ~ cluster_4 + Sex + Age + Education + Marrying + Smoking +
## Drinking + BMI + CRE
##
## Df Deviance AIC
## - Drinking 2 4926.0 4960.0
## <none> 4924.7 4962.7
## - Smoking 3 4931.9 4963.9
## - Marrying 3 4935.7 4967.7
## - cluster_4 2 4933.7 4967.7
## - CRE 1 4938.9 4974.9
## - Age 2 4941.6 4975.6
## - Education 3 4945.3 4977.3
## - Sex 1 5012.2 5048.2
## - BMI 1 6235.9 6271.9
##
## Step: AIC=4960
## NAFLD ~ cluster_4 + Sex + Age + Education + Marrying + Smoking +
## BMI + CRE
##
## Df Deviance AIC
## <none> 4926.0 4960.0
## - Smoking 3 4932.3 4960.3
## - Marrying 3 4936.6 4964.6
## - cluster_4 2 4934.9 4964.9
## - CRE 1 4939.9 4971.9
## - Age 2 4942.8 4972.8
## - Education 3 4946.6 4974.6
## - Sex 1 5021.3 5053.3
## - BMI 1 6239.9 6271.9
summary(modd22)
##
## Call:
## glm(formula = NAFLD ~ cluster_4 + Sex + Age + Education + Marrying +
## Smoking + BMI + CRE, family = "binomial", data = sleepdisturbance)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -11.600210 0.479151 -24.210 < 2e-16 ***
## cluster_42 0.215726 0.078555 2.746 0.006029 **
## cluster_43 0.257700 0.116108 2.219 0.026454 *
## Sex1 1.000097 0.108802 9.192 < 2e-16 ***
## Age1 0.315455 0.085499 3.690 0.000225 ***
## Age2 0.058126 0.111925 0.519 0.603532
## Education2 0.097483 0.097938 0.995 0.319565
## Education3 0.388180 0.089938 4.316 1.59e-05 ***
## Education4 0.318822 0.161776 1.971 0.048751 *
## Marrying2 0.697918 0.235214 2.967 0.003006 **
## Marrying3 0.962621 0.322681 2.983 0.002853 **
## Marrying4 0.715047 0.322273 2.219 0.026503 *
## Smoking2 0.115524 0.116684 0.990 0.322148
## Smoking3 0.294325 0.118848 2.476 0.013269 *
## Smoking4 0.137065 0.190710 0.719 0.472320
## BMI 0.425168 0.014473 29.377 < 2e-16 ***
## CRE -0.011533 0.003392 -3.400 0.000673 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 6830.2 on 4969 degrees of freedom
## Residual deviance: 4926.0 on 4953 degrees of freedom
## AIC: 4960
##
## Number of Fisher Scoring iterations: 5