MCA

### MCA 说明https://zhuanlan.zhihu.com/p/499395932 
rm(list = ls())
library(dplyr)
## Warning: 程辑包'dplyr'是用R版本4.3.3 来建造的
## 
## 载入程辑包:'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
#install.packages('glmnet')
library(Matrix)
library(glmnet) ##Lasso回归
## Loaded glmnet 4.1-8
#install.packages('rms')
#install.packages('Hmisc')
library(Hmisc)
## 
## 载入程辑包:'Hmisc'
## The following objects are masked from 'package:dplyr':
## 
##     src, summarize
## The following objects are masked from 'package:base':
## 
##     format.pval, units
library(rms)  ## 画列线图;
## Warning in .recacheSubclasses(def@className, def, env):
## "replValueSp"类别的子类别"ndiMatrix"没有定义;因此没有更新
#install.packages('VIM')
library(colorspace)
library(grid)
library(VIM) ## 包中aggr()函数,判断数据缺失情况
## VIM is ready to use.
## Suggestions and bug-reports can be submitted at: https://github.com/statistikat/VIM/issues
## 
## 载入程辑包:'VIM'
## The following object is masked from 'package:datasets':
## 
##     sleep
#install.packages('survival')
library(survival) ##  生存分析包
sleepdisturbance<-read.csv('data.csv')

#睡眠相关
SDMode<-sleepdisturbance[,c(2:3,9:12)]
#A为入睡困难
#B为夜间易醒或早醒
#C为多梦或噩梦
#D次日疲劳、嗜睡、精神不佳
#E为入睡时打呼噜
for(j in 1:ncol(SDMode)){
  SDMode[,j] <- as.factor(as.numeric(SDMode[,j]))
}

levels(SDMode$Sex)[levels(SDMode$Sex)=='0']<-'Female'
levels(SDMode$Sex)[levels(SDMode$Sex)=='1']<-"Male"
levels(SDMode$Age)[levels(SDMode$Age)=='0']<-'Young'
levels(SDMode$Age)[levels(SDMode$Age)=='1']<-'Adult'
levels(SDMode$Age)[levels(SDMode$Age)=='2']<-'Aged'


levels(SDMode$Routine)[levels(SDMode$Routine)=='1']<-'Regular'
levels(SDMode$Routine)[levels(SDMode$Routine)=='2']<-'Occasionally stay up late'
levels(SDMode$Routine)[levels(SDMode$Routine)=='3']<-'Stay up late often'
levels(SDMode$Routine)[levels(SDMode$Routine)=='4']<-'Very irregular'

levels(SDMode$Sleep.quality)[levels(SDMode$Sleep.quality)=='1']<-'Very good'
levels(SDMode$Sleep.quality)[levels(SDMode$Sleep.quality)=='2']<-'Better'
levels(SDMode$Sleep.quality)[levels(SDMode$Sleep.quality)=='3']<-'Normal'
levels(SDMode$Sleep.quality)[levels(SDMode$Sleep.quality)=='4']<-'Poor'
levels(SDMode$Sleep.quality)[levels(SDMode$Sleep.quality)=='5']<-'Very poor'

levels(SDMode$Sleep.duration)[levels(SDMode$Sleep.duration)=='1']<-'<7h'
levels(SDMode$Sleep.duration)[levels(SDMode$Sleep.duration)=='2']<-'7-9h'
levels(SDMode$Sleep.duration)[levels(SDMode$Sleep.duration)=='3']<-'>9h'


levels(SDMode$SIS)[levels(SDMode$SIS)=='0']<-'No snoring in sleep'
levels(SDMode$SIS)[levels(SDMode$SIS)=='1']<-'Snoring in sleep'


SDMode$Routine<-as.factor(SDMode$Routine)
SDMode$Sleep.quality<-as.factor(SDMode$Sleep.quality)
SDMode$Sleep.duration<-as.factor(SDMode$Sleep.duration)
SDMode$SIS<-as.factor(SDMode$SIS)

#for循环
for(i in 1:ncol(SDMode)){
  plot(SDMode[,i],main=colnames(SDMode)[i],ylab='Count',col='darkblue',las=1,col.main='darkblue')
}

#install.packages('FactoMineR')
library("FactoMineR")
#install.packages('factoextra')
library("factoextra")
## 载入需要的程辑包:ggplot2
## Welcome! Want to learn more? See two factoextra-related books at https://goo.gl/ve3WBa
library("ggplot2")
#添加协变量,quali.sup=()添加协变量
#图1 每个指标对1,2维度的影响
res.mca <- MCA(SDMode, quali.sup = c(1:2), ncp = 5, graph = TRUE)

#res.mca<-MCA(SDMode,ncp=5,graph=TRUE)

###图2  每个维度可解释程度,看Eigenvalues,% of var.
fviz_screeplot(res.mca, addlabels = TRUE, ylim = c(0, 15))

summary(res.mca)
## 
## Call:
## MCA(X = SDMode, ncp = 5, quali.sup = c(1:2), graph = TRUE) 
## 
## 
## Eigenvalues
##                        Dim.1   Dim.2   Dim.3   Dim.4   Dim.5   Dim.6   Dim.7
## Variance               0.370   0.310   0.266   0.254   0.253   0.240   0.236
## % of var.             14.780  12.401  10.637  10.177  10.114   9.612   9.433
## Cumulative % of var.  14.780  27.182  37.819  47.996  58.110  67.722  77.155
##                        Dim.8   Dim.9  Dim.10
## Variance               0.213   0.194   0.163
## % of var.              8.531   7.779   6.536
## Cumulative % of var.  85.686  93.464 100.000
## 
## Individuals (the 10 first)
##                              Dim.1    ctr   cos2    Dim.2    ctr   cos2  
## 1                         | -1.219  0.081  0.571 |  0.733  0.035  0.206 |
## 2                         |  0.186  0.002  0.021 | -0.079  0.000  0.004 |
## 3                         |  0.491  0.013  0.135 | -0.729  0.034  0.298 |
## 4                         |  0.200  0.002  0.044 | -0.606  0.024  0.409 |
## 5                         |  0.200  0.002  0.044 | -0.606  0.024  0.409 |
## 6                         | -0.701  0.027  0.263 | -0.430  0.012  0.099 |
## 7                         | -0.997  0.054  0.500 |  0.134  0.001  0.009 |
## 8                         |  0.544  0.016  0.219 | -0.226  0.003  0.038 |
## 9                         | -0.676  0.025  0.241 |  0.704  0.032  0.261 |
## 10                        | -0.343  0.006  0.073 | -0.578  0.022  0.209 |
##                            Dim.3    ctr   cos2  
## 1                         -0.167  0.002  0.011 |
## 2                         -0.252  0.005  0.039 |
## 3                          0.401  0.012  0.090 |
## 4                          0.046  0.000  0.002 |
## 5                          0.046  0.000  0.002 |
## 6                          0.573  0.025  0.176 |
## 7                          0.190  0.003  0.018 |
## 8                         -0.484  0.018  0.174 |
## 9                         -0.462  0.016  0.112 |
## 10                         0.340  0.009  0.072 |
## 
## Categories (the 10 first)
##                               Dim.1     ctr    cos2  v.test     Dim.2     ctr
## Regular                   |  -0.747  12.613   0.280 -37.289 |   0.527   7.468
## Occasionally stay up late |  -0.026   0.018   0.000  -1.493 |  -0.730  17.034
## Stay up late often        |   0.811  10.287   0.198  31.352 |   0.117   0.254
## Very irregular            |   1.900   9.288   0.143  26.628 |   2.280  15.934
## Very good                 |  -1.079  13.100   0.232 -33.972 |   1.035  14.372
## Better                    |  -0.540   5.542   0.114 -23.794 |  -0.299   2.028
## Normal                    |   0.331   2.990   0.074  19.185 |  -0.627  12.802
## Poor                      |   1.212  12.824   0.218  32.883 |   0.916   8.717
## Very poor                 |   2.055   5.750   0.087  20.760 |   2.329   8.800
## <7h                       |   0.329   5.500   0.325  40.188 |  -0.050   0.151
##                              cos2  v.test     Dim.3     ctr    cos2  v.test  
## Regular                     0.139  26.283 |  -0.328   3.370   0.054 -16.353 |
## Occasionally stay up late   0.350 -41.715 |   0.463   7.987   0.141  26.455 |
## Stay up late often          0.004   4.512 |  -0.631   8.668   0.120 -24.415 |
## Very irregular              0.205  31.949 |   1.890  12.764   0.141  26.482 |
## Very good                   0.214  32.595 |  -0.303   1.433   0.018  -9.531 |
## Better                      0.035 -13.184 |   0.433   4.965   0.073  19.106 |
## Normal                      0.266 -36.364 |  -0.046   0.080   0.001  -2.661 |
## Poor                        0.124  24.833 |  -1.018  12.570   0.154 -27.619 |
## Very poor                   0.111  23.525 |   3.894  28.683   0.311  39.334 |
## <7h                         0.007  -6.104 |  -0.169   2.006   0.085 -20.590 |
## 
## Categorical variables (eta2)
##                             Dim.1 Dim.2 Dim.3  
## Routine                   | 0.476 0.505 0.349 |
## Sleep.quality             | 0.594 0.579 0.508 |
## Sleep.duration            | 0.325 0.144 0.119 |
## SIS                       | 0.083 0.012 0.089 |
## 
## Supplementary categories
##                               Dim.1    cos2  v.test     Dim.2    cos2  v.test  
## Female                    |  -0.046   0.002  -2.836 |   0.049   0.002   3.033 |
## Male                      |   0.035   0.002   2.836 |  -0.038   0.002  -3.033 |
## Young                     |   0.158   0.013   7.968 |  -0.142   0.010  -7.156 |
## Adult                     |   0.014   0.000   0.942 |   0.009   0.000   0.597 |
## Aged                      |  -0.342   0.025 -11.138 |   0.249   0.013   8.110 |
##                             Dim.3    cos2  v.test  
## Female                     -0.118   0.011  -7.265 |
## Male                        0.090   0.011   7.265 |
## Young                       0.030   0.000   1.503 |
## Adult                       0.028   0.001   1.938 |
## Aged                       -0.135   0.004  -4.411 |
## 
## Supplementary categorical variables (eta2)
##                             Dim.1 Dim.2 Dim.3  
## Sex                       | 0.002 0.002 0.011 |
## Age                       | 0.029 0.018 0.004 |
####图3 变量分析:变量对每个维度的贡献
fviz_contrib(res.mca, choice = "var", axes = 1)

fviz_contrib(res.mca, choice = "var", axes = 2)

##现在,我们展示每个类别的表现质量。我们看到它们相对较低,这可能是由于数据量的原因。
fviz_cos2(res.mca, choice = "var", axes = 1:2)

###图4 ACM plot ###活动类别图
plot(res.mca, invisible = c("quali.sup", "ind"), cex=1, col.var = "darkblue", 
     title = "Active categories", cex.main=2, col.main= "darkblue")

#plot of individuals,以habillage='Sex',按照性别进行划分,
fviz_mca_ind(res.mca, label = "none", habillage="Sex", 
             title="MCA individues", addEllipses = TRUE, ellipse.level = 0.95,
             palette = c("#00AFBB", "#E7B800"), ggtheme = theme_minimal())

#plot of individuals,以habillage='Age',按照性别进行划分,
fviz_mca_ind(res.mca, label = "none", habillage="Age", 
             title="MCA individues", addEllipses = TRUE, ellipse.level = 0.95)

#如果您想同时使用多个分类变量为个人着色,请使用函数 fviz_ellipses() [in factoextra ],如下所示:
fviz_ellipses(res.mca, c("Sex", "Age"), geom = "point",addEllipses = TRUE, ellipse.level = 0.95)
## Warning: `gather_()` was deprecated in tidyr 1.2.0.
## ℹ Please use `gather()` instead.
## ℹ The deprecated feature was likely used in the factoextra package.
##   Please report the issue at <https://github.com/kassambara/factoextra/issues>.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

#Individuals and categories  图4 ACM plot ###活动类别图
plot(res.mca, label = c("quali.sup", "var"), select = "cos2 10", 
     cex=1, col.var = "darkblue", col.quali.sup = "brown3",  
     col.ind = "seashell3", title = "Individuals with active and supplementary categories", 
     cex.main=2, col.main= "darkblue")

# 
# library("corrplot")
# ind <- get_mca_ind(res.mca)
# pdf("cor.pdf")
# corrplot(ind$cos2, is.corr = F)
# dev.off()

Kmeans 肘选法3类

###Kmeans Extraction of coordinates kmeans 5维数据分成4类
km=data.frame(res.mca$ind$coord)[,c(1:2)]
set.seed(111)
groupes.kmeans4 <- kmeans(km, centers = 3, nstart = 5)
#print(groupes.kmeans4)

# pdf("cluster4.pdf")
# fviz_cluster(groupes.kmeans4, data = km, palette = "jco", repel= TRUE,
#         main = "Kmeans", ggtheme = theme_classic())
# dev.off()

clus_k54<-groupes.kmeans4$cluster

###图6选择K数目
set.seed(111)
fviz_nbclust(km, kmeans, method = "wss")+ geom_vline(xintercept = 3, linetype = 2)+ labs(subtitle = "Elbow method")

###图7 聚类结果
fviz_cluster(groupes.kmeans4, data = km, show.clust.cent = TRUE,  geom = "point", palette = "jco")

fviz_cluster(groupes.kmeans4, data = km, palette = "Set2", ggtheme = theme_minimal())

fviz_cluster(groupes.kmeans4, data = km, geom = "text")

# # 
 fviz_cluster(groupes.kmeans4, data = km, ellipse.type = "norm")

fviz_cluster(groupes.kmeans4, data = km, palette = "jco", main = "Kmeans", ggtheme = theme_classic())

cluster_4<-groupes.kmeans4$cluster
sleepdisturbance$cluster_4<-cluster_4

sum(abs(sleepdisturbance[,61]-sleepdisturbance[,26]))
## [1] 7985

轮廓系数计算

library(cluster)  
library(factoextra) 
###2类
set.seed(111)
c2<- kmeans(km, centers = 2, nstart = 5)
silhouette.result2 <-silhouette(c2$cluster,dist(km))

fviz_silhouette(silhouette.result2)
##   cluster size ave.sil.width
## 1       1 3370          0.37
## 2       2 1600          0.47

mean_silhouette2<-mean(silhouette.result2[,"sil_width"])
mean_silhouette2
## [1] 0.399774
###3类

set.seed(111)
groupes.kmeans4 <- kmeans(km, centers = 3, nstart = 5)
silhouette.result3 <-silhouette(groupes.kmeans4$cluster,dist(km))

fviz_silhouette(silhouette.result3)
##   cluster size ave.sil.width
## 1       1 1576          0.43
## 2       2 2725          0.51
## 3       3  669          0.45

mean_silhouette3 <-mean(silhouette.result3[,"sil_width"])
mean_silhouette3
## [1] 0.4742346
###4类
set.seed(111)
c4<- kmeans(km, centers = 4, nstart = 5)
silhouette.result4 <-silhouette(c4$cluster,dist(km))

fviz_silhouette(silhouette.result4)
##   cluster size ave.sil.width
## 1       1  665          0.42
## 2       2 2076          0.32
## 3       3 1713          0.53
## 4       4  516          0.57

mean_silhouette4 <-mean(silhouette.result4[,"sil_width"])
mean_silhouette4
## [1] 0.4326377
###5类
set.seed(111)
c5<- kmeans(km, centers = 5, nstart = 5)
silhouette.result5 <-silhouette(c5$cluster,dist(km))

fviz_silhouette(silhouette.result5)
##   cluster size ave.sil.width
## 1       1 1904          0.36
## 2       2  992          0.50
## 3       3  663          0.35
## 4       4  821          0.50
## 5       5  590          0.37

mean_silhouette5 <-mean(silhouette.result5[,"sil_width"])
mean_silhouette5
## [1] 0.4093591
###6类
set.seed(111)
c6<- kmeans(km, centers = 6, nstart = 5)
silhouette.result6 <-silhouette(c6$cluster,dist(km))

fviz_silhouette(silhouette.result6)
##   cluster size ave.sil.width
## 1       1  289          0.56
## 2       2 1108          0.54
## 3       3 1673          0.40
## 4       4  729          0.40
## 5       5  462          0.37
## 6       6  709          0.56

mean_silhouette6<-mean(silhouette.result6[,"sil_width"])
mean_silhouette6
## [1] 0.4609299
###7类
set.seed(111)
c7<- kmeans(km, centers = 7, nstart = 5)
silhouette.result7 <-silhouette(c7$cluster,dist(km))

fviz_silhouette(silhouette.result7)
##   cluster size ave.sil.width
## 1       1 1016          0.64
## 2       2  169          0.43
## 3       3  742          0.40
## 4       4 1189          0.43
## 5       5  469          0.39
## 6       6  497          0.46
## 7       7  888          0.35

mean_silhouette7 <-mean(silhouette.result7[,"sil_width"])
mean_silhouette7
## [1] 0.4534282
###8类
set.seed(111)
c8<- kmeans(km, centers = 8, nstart = 5)
silhouette.result8 <-silhouette(c8$cluster,dist(km))

fviz_silhouette(silhouette.result8)
##   cluster size ave.sil.width
## 1       1  504          0.49
## 2       2  150          0.46
## 3       3  837          0.44
## 4       4  876          0.75
## 5       5 1037          0.46
## 6       6  305          0.71
## 7       7  294          0.52
## 8       8  967          0.38

mean_silhouette8 <-mean(silhouette.result8[,"sil_width"])
mean_silhouette8
## [1] 0.5143155
###9类
set.seed(111)
c9<- kmeans(km, centers = 9, nstart = 5)
silhouette.result9 <-silhouette(c9$cluster,dist(km))

fviz_silhouette(silhouette.result9)
##   cluster size ave.sil.width
## 1       1  890          0.75
## 2       2  669          0.53
## 3       3  209          0.43
## 4       4  284          0.57
## 5       5  800          0.49
## 6       6  737          0.66
## 7       7   29          0.50
## 8       8  848          0.34
## 9       9  504          0.44

mean_silhouette9 <-mean(silhouette.result9[,"sil_width"])
mean_silhouette9
## [1] 0.539206
###10类
set.seed(111)
c10<- kmeans(km, centers = 10, nstart = 5)
silhouette.result10 <-silhouette(c10$cluster,dist(km))

fviz_silhouette(silhouette.result10)
##    cluster size ave.sil.width
## 1        1  794          0.48
## 2        2  320          0.70
## 3        3  737          0.65
## 4        4  169          1.00
## 5        5   37          0.50
## 6        6  164          0.43
## 7        7  397          0.38
## 8        8 1314          0.48
## 9        9  709          0.50
## 10      10  329          0.61

mean_silhouette10 <-mean(silhouette.result10[,"sil_width"])
mean_silhouette10
## [1] 0.5399552
k<-c(2:10)
mean_silhouette<-c(mean_silhouette2,mean_silhouette3,mean_silhouette4,mean_silhouette5,mean_silhouette6,mean_silhouette7,mean_silhouette8,mean_silhouette9,mean_silhouette10)

轮廓系数画图

###
par(mfrow=c(1,1))
plot(k,mean_silhouette,xlab='K',ylab='Mean silhouette',col='red',type = "o",pch=19,ylim=c(0.4,0.6))
text(k,mean_silhouette,labels = round(mean_silhouette, 2), pos = 3)

###

聚类c1画图—睡眠模式最好

library(dplyr)
#sleepdisturbance1<-read.csv("data_cluster2.csv")
SDMode$cluster_4<-cluster_4
SDMode1<-SDMode[,c(3:7)]###四类为27列,三类为26列
for(j in 1:5){
  SDMode1[,j] <- as.factor(SDMode1[,j])
}
c1<-SDMode1%>%filter(cluster_4==1)
# for(i in 1:ncol(c1)){
#   plot(c1[,i],main=colnames(c1)[i],ylab='Count',col='darkblue',las=1,col.main='darkblue')
# }

par(mfrow=c(2,2))
category_counts1 <- table(c1[,1])
category_per1 <- round(table(c1[,1])/length(c1[,1])*100,2)
category_per1
## 
##                   Regular Occasionally stay up late        Stay up late often 
##                     66.94                     23.98                      9.07 
##            Very irregular 
##                      0.00
plot(c1[,1],main=colnames(c1)[1],ylab='Count',col='red',las=1,col.main='red')
text(category_per1,labels = round(category_per1, 2), pos = 3)
###ylim = c(0, 1000)
# 计算每个类别的数量

category_per2 <- round(table(c1[,2])/length(c1[,2])*100,2)
category_per2
## 
## Very good    Better    Normal      Poor Very poor 
##     48.79     43.53      6.92      0.76      0.00
plot(c1[,2],main=colnames(c1)[2],ylab='Count',col='red',las=1,col.main='red')
text(category_per1,labels = round(category_per2, 2), pos = 3)
#text(category_per1,labels = c("48.79%","43.53%","6.92%","0.76%","0"), pos = 3)
#可以自己加%

category_per3 <- round(table(c1[,3])/length(c1[,3])*100,2)
category_per3
## 
##   <7h  7-9h   >9h 
## 46.13 51.84  2.03
plot(c1[,3],main=colnames(c1)[3],ylab='Count',col='red',las=1,col.main='red')
text(category_per1,labels = round(category_per3, 2), pos = 3)


category_per4 <- round(table(c1[,4])/length(c1[,4])*100,2)
category_per4
## 
## No snoring in sleep    Snoring in sleep 
##               91.31                8.69
plot(c1[,4],main=colnames(c1)[4],ylab='Count',col='red',las=1,col.main='red')
text(category_per1,labels = round(category_per4, 2), pos = 3)

聚类c2画图—睡眠模式中等

c2<-SDMode1%>%filter(cluster_4==2)
# for(i in 1:ncol(c2)){
#   plot(c2[,i],main=colnames(c2)[i],ylab='Count',col='darkblue',las=1,col.main='darkblue')
# }

par(mfrow=c(2,2))
# 计算每个类别的数量
#category_counts1 <- table(c2[,1])
category_per1 <- round(table(c2[,1])/length(c2[,1])*100,2)
category_per1
## 
##                   Regular Occasionally stay up late        Stay up late often 
##                     16.73                     57.61                     25.65 
##            Very irregular 
##                      0.00
plot(c2[,1],main=colnames(c2)[1],ylab='Count',col='red',las=1,col.main='red')
text(category_per1,labels = round(category_per1, 2), pos = 3)


category_per2 <- round(table(c2[,2])/length(c2[,2])*100,2)
category_per2
## 
## Very good    Better    Normal      Poor Very poor 
##      0.84     25.58     67.38      6.20      0.00
plot(c2[,2],main=colnames(c2)[2],ylab='Count',col='red',las=1,col.main='red')
text(category_per1,labels = round(category_per2, 2), pos = 3)


category_per3 <- round(table(c2[,3])/length(c2[,3])*100,2)
category_per3
## 
##   <7h  7-9h   >9h 
## 87.89 12.11  0.00
plot(c2[,3],main=colnames(c2)[3],ylab='Count',col='red',las=1,col.main='red')
text(category_per1,labels = round(category_per3, 2), pos = 3)


category_per4 <- round(table(c2[,4])/length(c2[,4])*100,2)
category_per4
## 
## No snoring in sleep    Snoring in sleep 
##               74.06               25.94
plot(c2[,4],main=colnames(c2)[4],ylab='Count',col='red',las=1,col.main='red')
text(category_per1,labels = round(category_per4, 2), pos = 3)

聚类c3画图–睡眠模式最差

c3<-SDMode1%>%filter(cluster_4==3)
# for(i in 1:ncol(c2)){
#   plot(c2[,i],main=colnames(c2)[i],ylab='Count',col='darkblue',las=1,col.main='darkblue')
# }

par(mfrow=c(2,2))
# 计算每个类别的数量
#category_counts1 <- table(c3[,1])
category_per1 <- round(table(c3[,1])/length(c3[,1])*100,2)
category_per1
## 
##                   Regular Occasionally stay up late        Stay up late often 
##                     22.12                      3.59                     46.04 
##            Very irregular 
##                     28.25
plot(c3[,1],main=colnames(c3)[1],ylab='Count',col='red',las=1,col.main='red')
text(category_per1,labels = round(category_per1, 2), pos = 3)



category_per2 <- round(table(c3[,2])/length(c3[,2])*100,2)
category_per2
## 
## Very good    Better    Normal      Poor Very poor 
##      5.23      2.09      8.97     68.76     14.95
plot(c3[,2],main=colnames(c3)[2],ylab='Count',col='red',las=1,col.main='red')
text(category_per1,labels = round(category_per2, 2), pos = 3)


category_per3 <- round(table(c3[,3])/length(c3[,3])*100,2)
category_per3
## 
##   <7h  7-9h   >9h 
## 90.43  8.22  1.35
plot(c3[,3],main=colnames(c3)[3],ylab='Count',col='red',las=1,col.main='red')
text(category_per1,labels = round(category_per3, 2), pos = 3)


category_per4 <- round(table(c3[,4])/length(c3[,4])*100,2)
category_per4
## 
## No snoring in sleep    Snoring in sleep 
##               70.85               29.15
plot(c3[,4],main=colnames(c3)[4],ylab='Count',col='red',las=1,col.main='red')
text(category_per1,labels = round(category_per4, 2), pos = 3)

logistics-共病

library(autoReg)
## Warning: 程辑包'autoReg'是用R版本4.3.3 来建造的
##缺失值填补
# 计算每列的均值
# means <- colMeans(sleepdisturbance, na.rm = TRUE)
# 
# # 填充缺失值
# sleepdisturbance[is.na(sleepdisturbance)] <- round(means[col(sleepdisturbance)][is.na(sleepdisturbance)],2)

write.csv(sleepdisturbance,"sleepdisturbance.csv")

sleepdisturbance<-read.csv("sleepdisturbance.csv")

sleepdisturbance<-sleepdisturbance[,c(-1,-2)]

sleepdisturbance<-sleepdisturbance[,c(1:22,24:33,60,34:59,23)]


for(j in 1:33){
  sleepdisturbance[,j] <- as.factor(as.numeric(sleepdisturbance[,j]))
}
# 训练集基线表
#sleepdisturbance<-sleepdisturbance[,c(1:22,24:60,23)]

#sleepdisturbance<-sleepdisturbance[,c(33,60)]
gaze(~. ,data = sleepdisturbance) %>% myft()

name

levels

stats

Sex

0

2149 (43.2%)

1

2821 (56.8%)

Age

0

1684 (33.9%)

1

2411 (48.5%)

2

875 (17.6%)

Education

1

1483 (29.8%)

2

1110 (22.3%)

3

2051 (41.3%)

4

326 (6.6%)

Marrying

1

141 (2.8%)

2

4580 (92.2%)

3

123 (2.5%)

4

126 (2.5%)

Income

1

1321 (26.6%)

2

1436 (28.9%)

3

1484 (29.9%)

4

729 (14.7%)

Smoking

1

3421 (68.8%)

2

680 (13.7%)

3

685 (13.8%)

4

184 (3.7%)

Drinking

1

2642 (53.2%)

2

1687 (33.9%)

3

641 (12.9%)

Routine

1

1659 (33.4%)

2

1972 (39.7%)

3

1150 (23.1%)

4

189 (3.8%)

Sleep.quality

1

827 (16.6%)

2

1397 (28.1%)

3

2005 (40.3%)

4

641 (12.9%)

5

100 (2.0%)

Sleep.duration

1

3727 (75.0%)

2

1202 (24.2%)

3

41 (0.8%)

SIS

0

3931 (79.1%)

1

1039 (20.9%)

DFA

0

4254 (85.6%)

1

716 (14.4%)

EA

0

3834 (77.1%)

1

1136 (22.9%)

DN

0

4517 (90.9%)

1

453 (9.1%)

NFLP

0

4462 (89.8%)

1

508 (10.2%)

Central.obesity

0

2547 (51.2%)

1

2423 (48.8%)

Hypertension

0

3053 (61.4%)

1

1917 (38.6%)

Dyslipidemia

0

4149 (83.5%)

1

821 (16.5%)

Diabetes

0

1998 (40.2%)

1

2972 (59.8%)

Hyperuricemia

0

4299 (86.5%)

1

671 (13.5%)

NAFLD

0

2213 (44.5%)

1

2757 (55.5%)

Num

0

767 (15.4%)

1

940 (18.9%)

2

960 (19.3%)

3

1003 (20.2%)

4

865 (17.4%)

5

378 (7.6%)

6

57 (1.1%)

diabete

0

2591 (52.1%)

1

1766 (35.5%)

2

613 (12.3%)

clus_k540

1

1474 (29.7%)

2

301 (6.1%)

3

1738 (35.0%)

4

1457 (29.3%)

clus_k541

1

1457 (29.3%)

2

1738 (35.0%)

3

1474 (29.7%)

4

301 (6.1%)

WC_c

0

1578 (31.8%)

1

969 (19.5%)

2

2423 (48.8%)

BMI_c

0

1573 (31.6%)

1

60 (1.2%)

2

2134 (42.9%)

3

1203 (24.2%)

SUA_c

0

4211 (84.7%)

1

759 (15.3%)

TC_c

0

3335 (67.1%)

1

1635 (32.9%)

TG_c

0

3160 (63.6%)

1

1810 (36.4%)

HDL_c

0

4538 (91.3%)

1

432 (8.7%)

LDL_c

0

4000 (80.5%)

1

970 (19.5%)

cluster_4

1

1576 (31.7%)

2

2725 (54.8%)

3

669 (13.5%)

AGE

Mean ± SD

49.8 ± 11.0

WC

Mean ± SD

86.9 ± 10.7

SBP

Mean ± SD

129.1 ± 18.5

DBP

Mean ± SD

79.0 ± 11.8

BMI

Mean ± SD

25.7 ± 3.5

CRE

Mean ± SD

68.5 ± 17.5

SUA

Mean ± SD

318.8 ± 87.9

efeg

Mean ± SD

101.5 ± 12.6

TC

Mean ± SD

4.9 ± 1.0

TG

Mean ± SD

1.8 ± 1.6

HDL

Mean ± SD

1.3 ± 0.3

LDL

Mean ± SD

2.9 ± 0.8

FPG

Mean ± SD

5.6 ± 1.5

HbA1c

Mean ± SD

5.8 ± 0.8

WBC

Mean ± SD

6.0 ± 1.6

RBC

Mean ± SD

4.8 ± 0.5

Hb

Mean ± SD

145.5 ± 15.8

PLT

Mean ± SD

240.8 ± 57.7

Ne

Mean ± SD

3.6 ± 1.2

Lym

Mean ± SD

2.0 ± 0.6

NLR

Mean ± SD

1.9 ± 0.8

PLR

Mean ± SD

131.0 ± 42.6

SII

Mean ± SD

464.4 ± 232.1

ALT

Mean ± SD

24.1 ± 28.3

AST

Mean ± SD

21.7 ± 12.5

GGT

Mean ± SD

32.1 ± 37.3

Comorbidities

Mean ± SD

0.7 ± 0.5

sleepdisturbance$Comorbidities<-as.factor(sleepdisturbance$Comorbidities)


####单因素 多因素确定
gaze(~. ,data = sleepdisturbance) %>% myft()

name

levels

stats

Sex

0

2149 (43.2%)

1

2821 (56.8%)

Age

0

1684 (33.9%)

1

2411 (48.5%)

2

875 (17.6%)

Education

1

1483 (29.8%)

2

1110 (22.3%)

3

2051 (41.3%)

4

326 (6.6%)

Marrying

1

141 (2.8%)

2

4580 (92.2%)

3

123 (2.5%)

4

126 (2.5%)

Income

1

1321 (26.6%)

2

1436 (28.9%)

3

1484 (29.9%)

4

729 (14.7%)

Smoking

1

3421 (68.8%)

2

680 (13.7%)

3

685 (13.8%)

4

184 (3.7%)

Drinking

1

2642 (53.2%)

2

1687 (33.9%)

3

641 (12.9%)

Routine

1

1659 (33.4%)

2

1972 (39.7%)

3

1150 (23.1%)

4

189 (3.8%)

Sleep.quality

1

827 (16.6%)

2

1397 (28.1%)

3

2005 (40.3%)

4

641 (12.9%)

5

100 (2.0%)

Sleep.duration

1

3727 (75.0%)

2

1202 (24.2%)

3

41 (0.8%)

SIS

0

3931 (79.1%)

1

1039 (20.9%)

DFA

0

4254 (85.6%)

1

716 (14.4%)

EA

0

3834 (77.1%)

1

1136 (22.9%)

DN

0

4517 (90.9%)

1

453 (9.1%)

NFLP

0

4462 (89.8%)

1

508 (10.2%)

Central.obesity

0

2547 (51.2%)

1

2423 (48.8%)

Hypertension

0

3053 (61.4%)

1

1917 (38.6%)

Dyslipidemia

0

4149 (83.5%)

1

821 (16.5%)

Diabetes

0

1998 (40.2%)

1

2972 (59.8%)

Hyperuricemia

0

4299 (86.5%)

1

671 (13.5%)

NAFLD

0

2213 (44.5%)

1

2757 (55.5%)

Num

0

767 (15.4%)

1

940 (18.9%)

2

960 (19.3%)

3

1003 (20.2%)

4

865 (17.4%)

5

378 (7.6%)

6

57 (1.1%)

diabete

0

2591 (52.1%)

1

1766 (35.5%)

2

613 (12.3%)

clus_k540

1

1474 (29.7%)

2

301 (6.1%)

3

1738 (35.0%)

4

1457 (29.3%)

clus_k541

1

1457 (29.3%)

2

1738 (35.0%)

3

1474 (29.7%)

4

301 (6.1%)

WC_c

0

1578 (31.8%)

1

969 (19.5%)

2

2423 (48.8%)

BMI_c

0

1573 (31.6%)

1

60 (1.2%)

2

2134 (42.9%)

3

1203 (24.2%)

SUA_c

0

4211 (84.7%)

1

759 (15.3%)

TC_c

0

3335 (67.1%)

1

1635 (32.9%)

TG_c

0

3160 (63.6%)

1

1810 (36.4%)

HDL_c

0

4538 (91.3%)

1

432 (8.7%)

LDL_c

0

4000 (80.5%)

1

970 (19.5%)

cluster_4

1

1576 (31.7%)

2

2725 (54.8%)

3

669 (13.5%)

AGE

Mean ± SD

49.8 ± 11.0

WC

Mean ± SD

86.9 ± 10.7

SBP

Mean ± SD

129.1 ± 18.5

DBP

Mean ± SD

79.0 ± 11.8

BMI

Mean ± SD

25.7 ± 3.5

CRE

Mean ± SD

68.5 ± 17.5

SUA

Mean ± SD

318.8 ± 87.9

efeg

Mean ± SD

101.5 ± 12.6

TC

Mean ± SD

4.9 ± 1.0

TG

Mean ± SD

1.8 ± 1.6

HDL

Mean ± SD

1.3 ± 0.3

LDL

Mean ± SD

2.9 ± 0.8

FPG

Mean ± SD

5.6 ± 1.5

HbA1c

Mean ± SD

5.8 ± 0.8

WBC

Mean ± SD

6.0 ± 1.6

RBC

Mean ± SD

4.8 ± 0.5

Hb

Mean ± SD

145.5 ± 15.8

PLT

Mean ± SD

240.8 ± 57.7

Ne

Mean ± SD

3.6 ± 1.2

Lym

Mean ± SD

2.0 ± 0.6

NLR

Mean ± SD

1.9 ± 0.8

PLR

Mean ± SD

131.0 ± 42.6

SII

Mean ± SD

464.4 ± 232.1

ALT

Mean ± SD

24.1 ± 28.3

AST

Mean ± SD

21.7 ± 12.5

GGT

Mean ± SD

32.1 ± 37.3

Comorbidities

0

1707 (34.3%)

1

3263 (65.7%)

mod <- glm(Comorbidities~cluster_4+Sex+Age+Education+Marrying+Income+Smoking+Drinking+BMI+CRE, data = sleepdisturbance, family = "binomial")
aovresult <- autoReg(mod, uni=TRUE, threshold = 0.05) # 输出单因素结果
aovresult %>% myft()

Dependent: Comorbidities

0 (N=1707)

1 (N=3263)

OR (univariable)

OR (multivariable)

cluster_4

1

552 (32.3%)

1024 (31.4%)

2

953 (55.8%)

1772 (54.3%)

1.00 (0.88-1.14, p=.972)

1.17 (0.99-1.39, p=.071)

3

202 (11.8%)

467 (14.3%)

1.25 (1.03-1.51, p=.027)

1.35 (1.05-1.74, p=.021)

Sex

0

1079 (63.2%)

1070 (32.8%)

1

628 (36.8%)

2193 (67.2%)

3.52 (3.12-3.98, p<.001)

2.17 (1.69-2.77, p<.001)

Age

0

736 (43.1%)

948 (29.1%)

1

762 (44.6%)

1649 (50.5%)

1.68 (1.48-1.91, p<.001)

2.14 (1.78-2.58, p<.001)

2

209 (12.2%)

666 (20.4%)

2.47 (2.06-2.97, p<.001)

3.71 (2.86-4.81, p<.001)

Education

1

452 (26.5%)

1031 (31.6%)

2

361 (21.1%)

749 (23%)

0.91 (0.77-1.08, p=.267)

1.19 (0.95-1.48, p=.122)

3

742 (43.5%)

1309 (40.1%)

0.77 (0.67-0.89, p<.001)

1.42 (1.14-1.76, p=.001)

4

152 (8.9%)

174 (5.3%)

0.50 (0.39-0.64, p<.001)

1.11 (0.76-1.60, p=.592)

Marrying

1

82 (4.8%)

59 (1.8%)

2

1547 (90.6%)

3033 (93%)

2.72 (1.94-3.83, p<.001)

2.06 (1.25-3.40, p=.004)

3

52 (3%)

71 (2.2%)

1.90 (1.16-3.10, p=.010)

2.16 (1.09-4.30, p=.028)

4

26 (1.5%)

100 (3.1%)

5.35 (3.10-9.23, p<.001)

4.05 (1.95-8.42, p<.001)

Income

1

414 (24.3%)

907 (27.8%)

2

541 (31.7%)

895 (27.4%)

0.76 (0.64-0.88, p<.001)

0.65 (0.52-0.81, p<.001)

3

523 (30.6%)

961 (29.5%)

0.84 (0.72-0.98, p=.029)

0.69 (0.54-0.89, p=.003)

4

229 (13.4%)

500 (15.3%)

1.00 (0.82-1.21, p=.973)

0.74 (0.55-1.00, p=.054)

Smoking

1

1391 (81.5%)

2030 (62.2%)

2

147 (8.6%)

533 (16.3%)

2.48 (2.04-3.02, p<.001)

1.28 (0.98-1.67, p=.065)

3

132 (7.7%)

553 (16.9%)

2.87 (2.35-3.51, p<.001)

1.53 (1.14-2.04, p=.004)

4

37 (2.2%)

147 (4.5%)

2.72 (1.89-3.93, p<.001)

1.37 (0.86-2.17, p=.183)

Drinking

1

1095 (64.1%)

1547 (47.4%)

2

505 (29.6%)

1182 (36.2%)

1.66 (1.46-1.89, p<.001)

0.87 (0.71-1.05, p=.143)

3

107 (6.3%)

534 (16.4%)

3.53 (2.83-4.41, p<.001)

1.08 (0.79-1.46, p=.640)

BMI

Mean ± SD

23.0 ± 2.5

27.1 ± 3.2

1.75 (1.69-1.81, p<.001)

1.71 (1.65-1.77, p<.001)

CRE

Mean ± SD

64.1 ± 13.0

70.7 ± 19.0

1.04 (1.03-1.04, p<.001)

1.00 (1.00-1.01, p=.446)

# 单因素分析,后向剔除
mod0 <- glm(Comorbidities~cluster_4, data = sleepdisturbance, family = "binomial")
summary(mod0)
## 
## Call:
## glm(formula = Comorbidities ~ cluster_4, family = "binomial", 
##     data = sleepdisturbance)
## 
## Coefficients:
##             Estimate Std. Error z value Pr(>|z|)    
## (Intercept) 0.617924   0.052803  11.702   <2e-16 ***
## cluster_42  0.002325   0.066346   0.035   0.9720    
## cluster_43  0.220138   0.099398   2.215   0.0268 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 6394.4  on 4969  degrees of freedom
## Residual deviance: 6388.4  on 4967  degrees of freedom
## AIC: 6394.4
## 
## Number of Fisher Scoring iterations: 4
mod1 <- glm(Comorbidities~cluster_4+Sex+Age+Education+Marrying+Income+Smoking+Drinking
, data = sleepdisturbance, family = "binomial")
mod11<-step(mod1)
## Start:  AIC=5683.92
## Comorbidities ~ cluster_4 + Sex + Age + Education + Marrying + 
##     Income + Smoking + Drinking
## 
##             Df Deviance    AIC
## - Smoking    3   5648.5 5682.5
## <none>           5643.9 5683.9
## - cluster_4  2   5651.3 5687.3
## - Education  3   5657.5 5691.5
## - Income     3   5657.9 5691.9
## - Drinking   2   5657.2 5693.2
## - Marrying   3   5668.2 5702.2
## - Age        2   5766.1 5802.1
## - Sex        1   5911.9 5949.9
## 
## Step:  AIC=5682.51
## Comorbidities ~ cluster_4 + Sex + Age + Education + Marrying + 
##     Income + Drinking
## 
##             Df Deviance    AIC
## <none>           5648.5 5682.5
## - cluster_4  2   5656.7 5686.7
## - Income     3   5662.2 5690.2
## - Education  3   5663.8 5691.8
## - Drinking   2   5666.2 5696.2
## - Marrying   3   5672.8 5700.8
## - Age        2   5771.7 5801.7
## - Sex        1   5998.7 6030.7
summary(mod11)
## 
## Call:
## glm(formula = Comorbidities ~ cluster_4 + Sex + Age + Education + 
##     Marrying + Income + Drinking, family = "binomial", data = sleepdisturbance)
## 
## Coefficients:
##             Estimate Std. Error z value Pr(>|z|)    
## (Intercept) -1.06073    0.21353  -4.968 6.78e-07 ***
## cluster_42   0.12647    0.07290   1.735 0.082778 .  
## cluster_43   0.29823    0.10716   2.783 0.005385 ** 
## Sex1         1.46853    0.08192  17.928  < 2e-16 ***
## Age1         0.70172    0.07728   9.080  < 2e-16 ***
## Age2         1.09783    0.11189   9.812  < 2e-16 ***
## Education2  -0.17203    0.09426  -1.825 0.067995 .  
## Education3  -0.18261    0.09090  -2.009 0.044546 *  
## Education4  -0.59017    0.15264  -3.866 0.000110 ***
## Marrying2    0.59202    0.19199   3.084 0.002045 ** 
## Marrying3    0.46899    0.27227   1.722 0.084980 .  
## Marrying4    1.41763    0.30047   4.718 2.38e-06 ***
## Income2     -0.33695    0.09267  -3.636 0.000277 ***
## Income3     -0.21309    0.10297  -2.069 0.038501 *  
## Income4     -0.16280    0.12841  -1.268 0.204854    
## Drinking2    0.04760    0.08120   0.586 0.557782    
## Drinking3    0.51088    0.12876   3.968 7.26e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 6394.4  on 4969  degrees of freedom
## Residual deviance: 5648.5  on 4953  degrees of freedom
## AIC: 5682.5
## 
## Number of Fisher Scoring iterations: 4
mod2 <- glm(Comorbidities~cluster_4+Sex+Age+Education+Marrying+Income+Smoking+Drinking+BMI+CRE, data = sleepdisturbance, family = "binomial")
mod22<-step(mod2)
## Start:  AIC=4148.94
## Comorbidities ~ cluster_4 + Sex + Age + Education + Marrying + 
##     Income + Smoking + Drinking + BMI + CRE
## 
##             Df Deviance    AIC
## - CRE        1   4105.6 4147.6
## - Drinking   2   4108.4 4148.4
## <none>           4104.9 4148.9
## - cluster_4  2   4111.1 4151.1
## - Smoking    3   4114.9 4152.9
## - Education  3   4116.3 4154.3
## - Marrying   3   4119.7 4157.7
## - Income     3   4120.9 4158.9
## - Sex        1   4141.7 4183.7
## - Age        2   4218.2 4258.2
## - BMI        1   5634.6 5676.6
## 
## Step:  AIC=4147.61
## Comorbidities ~ cluster_4 + Sex + Age + Education + Marrying + 
##     Income + Smoking + Drinking + BMI
## 
##             Df Deviance    AIC
## - Drinking   2   4109.0 4147.0
## <none>           4105.6 4147.6
## - cluster_4  2   4111.7 4149.7
## - Smoking    3   4115.4 4151.4
## - Education  3   4117.2 4153.2
## - Marrying   3   4120.3 4156.3
## - Income     3   4121.7 4157.7
## - Sex        1   4167.1 4207.1
## - Age        2   4219.5 4257.5
## - BMI        1   5643.9 5683.9
## 
## Step:  AIC=4146.98
## Comorbidities ~ cluster_4 + Sex + Age + Education + Marrying + 
##     Income + Smoking + BMI
## 
##             Df Deviance    AIC
## <none>           4109.0 4147.0
## - cluster_4  2   4115.1 4149.1
## - Education  3   4120.2 4152.2
## - Smoking    3   4120.7 4152.7
## - Marrying   3   4124.2 4156.2
## - Income     3   4124.9 4156.9
## - Sex        1   4173.2 4209.2
## - Age        2   4224.9 4258.9
## - BMI        1   5657.2 5693.2
summary(mod22)
## 
## Call:
## glm(formula = Comorbidities ~ cluster_4 + Sex + Age + Education + 
##     Marrying + Income + Smoking + BMI, family = "binomial", data = sleepdisturbance)
## 
## Coefficients:
##              Estimate Std. Error z value Pr(>|z|)    
## (Intercept) -14.56545    0.53108 -27.426  < 2e-16 ***
## cluster_42    0.15595    0.08714   1.790 0.073515 .  
## cluster_43    0.29819    0.13009   2.292 0.021892 *  
## Sex1          0.79273    0.09987   7.937 2.07e-15 ***
## Age1          0.76270    0.09458   8.064 7.37e-16 ***
## Age2          1.32640    0.13201  10.048  < 2e-16 ***
## Education2    0.17484    0.11246   1.555 0.120023    
## Education3    0.34828    0.10971   3.175 0.001500 ** 
## Education4    0.10146    0.18823   0.539 0.589862    
## Marrying2     0.73071    0.25393   2.878 0.004007 ** 
## Marrying3     0.77707    0.34940   2.224 0.026148 *  
## Marrying4     1.41628    0.37304   3.797 0.000147 ***
## Income2      -0.43277    0.11108  -3.896 9.77e-05 ***
## Income3      -0.36330    0.12424  -2.924 0.003453 ** 
## Income4      -0.30380    0.15497  -1.960 0.049947 *  
## Smoking2      0.22463    0.13397   1.677 0.093594 .  
## Smoking3      0.44594    0.13912   3.206 0.001348 ** 
## Smoking4      0.31325    0.23326   1.343 0.179306    
## BMI           0.53595    0.01781  30.088  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 6394.4  on 4969  degrees of freedom
## Residual deviance: 4109.0  on 4951  degrees of freedom
## AIC: 4147
## 
## Number of Fisher Scoring iterations: 5

logistic –NAFLD

####单因素 多因素确定
gaze(~. ,data = sleepdisturbance) %>% myft()

name

levels

stats

Sex

0

2149 (43.2%)

1

2821 (56.8%)

Age

0

1684 (33.9%)

1

2411 (48.5%)

2

875 (17.6%)

Education

1

1483 (29.8%)

2

1110 (22.3%)

3

2051 (41.3%)

4

326 (6.6%)

Marrying

1

141 (2.8%)

2

4580 (92.2%)

3

123 (2.5%)

4

126 (2.5%)

Income

1

1321 (26.6%)

2

1436 (28.9%)

3

1484 (29.9%)

4

729 (14.7%)

Smoking

1

3421 (68.8%)

2

680 (13.7%)

3

685 (13.8%)

4

184 (3.7%)

Drinking

1

2642 (53.2%)

2

1687 (33.9%)

3

641 (12.9%)

Routine

1

1659 (33.4%)

2

1972 (39.7%)

3

1150 (23.1%)

4

189 (3.8%)

Sleep.quality

1

827 (16.6%)

2

1397 (28.1%)

3

2005 (40.3%)

4

641 (12.9%)

5

100 (2.0%)

Sleep.duration

1

3727 (75.0%)

2

1202 (24.2%)

3

41 (0.8%)

SIS

0

3931 (79.1%)

1

1039 (20.9%)

DFA

0

4254 (85.6%)

1

716 (14.4%)

EA

0

3834 (77.1%)

1

1136 (22.9%)

DN

0

4517 (90.9%)

1

453 (9.1%)

NFLP

0

4462 (89.8%)

1

508 (10.2%)

Central.obesity

0

2547 (51.2%)

1

2423 (48.8%)

Hypertension

0

3053 (61.4%)

1

1917 (38.6%)

Dyslipidemia

0

4149 (83.5%)

1

821 (16.5%)

Diabetes

0

1998 (40.2%)

1

2972 (59.8%)

Hyperuricemia

0

4299 (86.5%)

1

671 (13.5%)

NAFLD

0

2213 (44.5%)

1

2757 (55.5%)

Num

0

767 (15.4%)

1

940 (18.9%)

2

960 (19.3%)

3

1003 (20.2%)

4

865 (17.4%)

5

378 (7.6%)

6

57 (1.1%)

diabete

0

2591 (52.1%)

1

1766 (35.5%)

2

613 (12.3%)

clus_k540

1

1474 (29.7%)

2

301 (6.1%)

3

1738 (35.0%)

4

1457 (29.3%)

clus_k541

1

1457 (29.3%)

2

1738 (35.0%)

3

1474 (29.7%)

4

301 (6.1%)

WC_c

0

1578 (31.8%)

1

969 (19.5%)

2

2423 (48.8%)

BMI_c

0

1573 (31.6%)

1

60 (1.2%)

2

2134 (42.9%)

3

1203 (24.2%)

SUA_c

0

4211 (84.7%)

1

759 (15.3%)

TC_c

0

3335 (67.1%)

1

1635 (32.9%)

TG_c

0

3160 (63.6%)

1

1810 (36.4%)

HDL_c

0

4538 (91.3%)

1

432 (8.7%)

LDL_c

0

4000 (80.5%)

1

970 (19.5%)

cluster_4

1

1576 (31.7%)

2

2725 (54.8%)

3

669 (13.5%)

AGE

Mean ± SD

49.8 ± 11.0

WC

Mean ± SD

86.9 ± 10.7

SBP

Mean ± SD

129.1 ± 18.5

DBP

Mean ± SD

79.0 ± 11.8

BMI

Mean ± SD

25.7 ± 3.5

CRE

Mean ± SD

68.5 ± 17.5

SUA

Mean ± SD

318.8 ± 87.9

efeg

Mean ± SD

101.5 ± 12.6

TC

Mean ± SD

4.9 ± 1.0

TG

Mean ± SD

1.8 ± 1.6

HDL

Mean ± SD

1.3 ± 0.3

LDL

Mean ± SD

2.9 ± 0.8

FPG

Mean ± SD

5.6 ± 1.5

HbA1c

Mean ± SD

5.8 ± 0.8

WBC

Mean ± SD

6.0 ± 1.6

RBC

Mean ± SD

4.8 ± 0.5

Hb

Mean ± SD

145.5 ± 15.8

PLT

Mean ± SD

240.8 ± 57.7

Ne

Mean ± SD

3.6 ± 1.2

Lym

Mean ± SD

2.0 ± 0.6

NLR

Mean ± SD

1.9 ± 0.8

PLR

Mean ± SD

131.0 ± 42.6

SII

Mean ± SD

464.4 ± 232.1

ALT

Mean ± SD

24.1 ± 28.3

AST

Mean ± SD

21.7 ± 12.5

GGT

Mean ± SD

32.1 ± 37.3

Comorbidities

0

1707 (34.3%)

1

3263 (65.7%)

modd <- glm(NAFLD~cluster_4+Sex+Age+Education+Marrying+Income+Smoking+Drinking+BMI+CRE, data = sleepdisturbance, family = "binomial")
aovresult <- autoReg(modd, uni=TRUE, threshold = 0.05) # 输出单因素结果
aovresult %>% myft()

Dependent: NAFLD

0 (N=2213)

1 (N=2757)

OR (univariable)

OR (multivariable)

cluster_4

1

753 (34%)

823 (29.9%)

2

1184 (53.5%)

1541 (55.9%)

1.19 (1.05-1.35, p=.006)

1.24 (1.06-1.44, p=.007)

3

276 (12.5%)

393 (14.3%)

1.30 (1.09-1.56, p=.005)

1.27 (1.01-1.59, p=.039)

Sex

0

1345 (60.8%)

804 (29.2%)

1

868 (39.2%)

1953 (70.8%)

3.76 (3.34-4.24, p<.001)

2.73 (2.18-3.42, p<.001)

Age

0

773 (34.9%)

911 (33%)

1

1005 (45.4%)

1406 (51%)

1.19 (1.05-1.35, p=.007)

1.31 (1.11-1.55, p=.001)

2

435 (19.7%)

440 (16%)

0.86 (0.73-1.01, p=.067)

1.00 (0.80-1.25, p=.993)

Education

1

679 (30.7%)

804 (29.2%)

2

504 (22.8%)

606 (22%)

1.02 (0.87-1.19, p=.848)

3

872 (39.4%)

1179 (42.8%)

1.14 (1.00-1.31, p=.053)

4

158 (7.1%)

168 (6.1%)

0.90 (0.71-1.14, p=.380)

Marrying

1

87 (3.9%)

54 (2%)

2

2006 (90.6%)

2574 (93.4%)

2.07 (1.46-2.92, p<.001)

1.88 (1.19-2.97, p=.007)

3

58 (2.6%)

65 (2.4%)

1.81 (1.11-2.95, p=.018)

2.47 (1.32-4.65, p=.005)

4

62 (2.8%)

64 (2.3%)

1.66 (1.02-2.71, p=.041)

1.92 (1.02-3.60, p=.043)

Income

1

658 (29.7%)

663 (24%)

2

678 (30.6%)

758 (27.5%)

1.11 (0.96-1.29, p=.173)

1.01 (0.84-1.22, p=.932)

3

613 (27.7%)

871 (31.6%)

1.41 (1.21-1.64, p<.001)

1.20 (0.98-1.46, p=.076)

4

264 (11.9%)

465 (16.9%)

1.75 (1.45-2.10, p<.001)

1.23 (0.96-1.57, p=.110)

Smoking

1

1752 (79.2%)

1669 (60.5%)

2

207 (9.4%)

473 (17.2%)

2.40 (2.01-2.86, p<.001)

1.10 (0.87-1.38, p=.437)

3

194 (8.8%)

491 (17.8%)

2.66 (2.22-3.18, p<.001)

1.36 (1.07-1.74, p=.013)

4

60 (2.7%)

124 (4.5%)

2.17 (1.58-2.97, p<.001)

1.18 (0.81-1.72, p=.400)

Drinking

1

1420 (64.2%)

1222 (44.3%)

2

614 (27.7%)

1073 (38.9%)

2.03 (1.79-2.30, p<.001)

1.04 (0.87-1.23, p=.678)

3

179 (8.1%)

462 (16.8%)

3.00 (2.48-3.62, p<.001)

0.90 (0.70-1.17, p=.431)

BMI

Mean ± SD

23.6 ± 2.8

27.3 ± 3.2

1.56 (1.52-1.60, p<.001)

1.52 (1.48-1.56, p<.001)

CRE

Mean ± SD

65.6 ± 20.9

70.7 ± 13.7

1.03 (1.02-1.03, p<.001)

0.99 (0.98-1.00, p=.001)

# 单因素分析,后向剔除
modd0 <- glm(NAFLD~cluster_4, data = sleepdisturbance, family = "binomial")
summary(modd0)
## 
## Call:
## glm(formula = NAFLD ~ cluster_4, family = "binomial", data = sleepdisturbance)
## 
## Coefficients:
##             Estimate Std. Error z value Pr(>|z|)   
## (Intercept)  0.08889    0.05043   1.763  0.07795 . 
## cluster_42   0.17464    0.06353   2.749  0.00598 **
## cluster_43   0.26452    0.09333   2.834  0.00459 **
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 6830.2  on 4969  degrees of freedom
## Residual deviance: 6819.3  on 4967  degrees of freedom
## AIC: 6825.3
## 
## Number of Fisher Scoring iterations: 4
modd1 <- glm(NAFLD~cluster_4+Sex+Age+Education+Marrying+Income+Smoking+Drinking
, data = sleepdisturbance, family = "binomial")
modd11<-step(modd1)
## Start:  AIC=6273.82
## NAFLD ~ cluster_4 + Sex + Age + Education + Marrying + Income + 
##     Smoking + Drinking
## 
##             Df Deviance    AIC
## - Smoking    3   6236.7 6270.7
## - Income     3   6237.7 6271.7
## - Drinking   2   6237.7 6273.7
## <none>           6233.8 6273.8
## - Education  3   6241.1 6275.1
## - cluster_4  2   6244.8 6280.8
## - Marrying   3   6249.6 6283.6
## - Age        2   6262.0 6298.0
## - Sex        1   6480.8 6518.8
## 
## Step:  AIC=6270.67
## NAFLD ~ cluster_4 + Sex + Age + Education + Marrying + Income + 
##     Drinking
## 
##             Df Deviance    AIC
## - Income     3   6240.4 6268.4
## <none>           6236.7 6270.7
## - Education  3   6244.8 6272.8
## - Drinking   2   6243.2 6273.2
## - cluster_4  2   6248.2 6278.2
## - Marrying   3   6252.4 6280.4
## - Age        2   6264.6 6294.6
## - Sex        1   6553.7 6585.7
## 
## Step:  AIC=6268.42
## NAFLD ~ cluster_4 + Sex + Age + Education + Marrying + Drinking
## 
##             Df Deviance    AIC
## <none>           6240.4 6268.4
## - Education  3   6247.7 6269.7
## - Drinking   2   6247.1 6271.1
## - cluster_4  2   6252.2 6276.2
## - Marrying   3   6256.4 6278.4
## - Age        2   6267.4 6291.4
## - Sex        1   6578.7 6604.7
summary(modd11)
## 
## Call:
## glm(formula = NAFLD ~ cluster_4 + Sex + Age + Education + Marrying + 
##     Drinking, family = "binomial", data = sleepdisturbance)
## 
## Coefficients:
##             Estimate Std. Error z value Pr(>|z|)    
## (Intercept) -1.46861    0.20625  -7.120 1.08e-12 ***
## cluster_42   0.19689    0.06847   2.876 0.004032 ** 
## cluster_43   0.29347    0.09968   2.944 0.003239 ** 
## Sex1         1.32160    0.07392  17.880  < 2e-16 ***
## Age1         0.35052    0.07283   4.813 1.49e-06 ***
## Age2         0.08175    0.09783   0.836 0.403358    
## Education2  -0.15702    0.08562  -1.834 0.066660 .  
## Education3  -0.06682    0.07758  -0.861 0.389046    
## Education4  -0.31922    0.13671  -2.335 0.019543 *  
## Marrying2    0.63804    0.18928   3.371 0.000749 ***
## Marrying3    0.71605    0.26683   2.684 0.007285 ** 
## Marrying4    1.02961    0.26954   3.820 0.000133 ***
## Drinking2    0.13444    0.07578   1.774 0.076059 .  
## Drinking3    0.26696    0.10943   2.440 0.014706 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 6830.2  on 4969  degrees of freedom
## Residual deviance: 6240.4  on 4956  degrees of freedom
## AIC: 6268.4
## 
## Number of Fisher Scoring iterations: 4
modd2 <- glm(NAFLD~cluster_4+Sex+Age+Education+Marrying+Income+Smoking+Drinking+BMI+CRE, data = sleepdisturbance, family = "binomial")
modd22<-step(modd2)
## Start:  AIC=4967.14
## NAFLD ~ cluster_4 + Sex + Age + Education + Marrying + Income + 
##     Smoking + Drinking + BMI + CRE
## 
##             Df Deviance    AIC
## - Income     3   4924.7 4962.7
## - Drinking   2   4924.5 4964.5
## <none>           4923.1 4967.1
## - Smoking    3   4930.6 4968.6
## - Marrying   3   4933.9 4971.9
## - cluster_4  2   4932.0 4972.0
## - Education  3   4939.8 4977.8
## - CRE        1   4937.5 4979.5
## - Age        2   4940.5 4980.5
## - Sex        1   5007.3 5049.3
## - BMI        1   6231.9 6273.9
## 
## Step:  AIC=4962.67
## NAFLD ~ cluster_4 + Sex + Age + Education + Marrying + Smoking + 
##     Drinking + BMI + CRE
## 
##             Df Deviance    AIC
## - Drinking   2   4926.0 4960.0
## <none>           4924.7 4962.7
## - Smoking    3   4931.9 4963.9
## - Marrying   3   4935.7 4967.7
## - cluster_4  2   4933.7 4967.7
## - CRE        1   4938.9 4974.9
## - Age        2   4941.6 4975.6
## - Education  3   4945.3 4977.3
## - Sex        1   5012.2 5048.2
## - BMI        1   6235.9 6271.9
## 
## Step:  AIC=4960
## NAFLD ~ cluster_4 + Sex + Age + Education + Marrying + Smoking + 
##     BMI + CRE
## 
##             Df Deviance    AIC
## <none>           4926.0 4960.0
## - Smoking    3   4932.3 4960.3
## - Marrying   3   4936.6 4964.6
## - cluster_4  2   4934.9 4964.9
## - CRE        1   4939.9 4971.9
## - Age        2   4942.8 4972.8
## - Education  3   4946.6 4974.6
## - Sex        1   5021.3 5053.3
## - BMI        1   6239.9 6271.9
summary(modd22)
## 
## Call:
## glm(formula = NAFLD ~ cluster_4 + Sex + Age + Education + Marrying + 
##     Smoking + BMI + CRE, family = "binomial", data = sleepdisturbance)
## 
## Coefficients:
##               Estimate Std. Error z value Pr(>|z|)    
## (Intercept) -11.600210   0.479151 -24.210  < 2e-16 ***
## cluster_42    0.215726   0.078555   2.746 0.006029 ** 
## cluster_43    0.257700   0.116108   2.219 0.026454 *  
## Sex1          1.000097   0.108802   9.192  < 2e-16 ***
## Age1          0.315455   0.085499   3.690 0.000225 ***
## Age2          0.058126   0.111925   0.519 0.603532    
## Education2    0.097483   0.097938   0.995 0.319565    
## Education3    0.388180   0.089938   4.316 1.59e-05 ***
## Education4    0.318822   0.161776   1.971 0.048751 *  
## Marrying2     0.697918   0.235214   2.967 0.003006 ** 
## Marrying3     0.962621   0.322681   2.983 0.002853 ** 
## Marrying4     0.715047   0.322273   2.219 0.026503 *  
## Smoking2      0.115524   0.116684   0.990 0.322148    
## Smoking3      0.294325   0.118848   2.476 0.013269 *  
## Smoking4      0.137065   0.190710   0.719 0.472320    
## BMI           0.425168   0.014473  29.377  < 2e-16 ***
## CRE          -0.011533   0.003392  -3.400 0.000673 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 6830.2  on 4969  degrees of freedom
## Residual deviance: 4926.0  on 4953  degrees of freedom
## AIC: 4960
## 
## Number of Fisher Scoring iterations: 5