MCA

### MCA 说明https://zhuanlan.zhihu.com/p/499395932 
rm(list = ls())
library(dplyr)

## Warning: 程辑包'dplyr'是用R版本4.3.3 来建造的

## 
## 载入程辑包：'dplyr'

## The following objects are masked from 'package:stats':
## 
##     filter, lag

## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

#install.packages('glmnet')
library(Matrix)
library(glmnet) ##Lasso回归

## Loaded glmnet 4.1-8

#install.packages('rms')
#install.packages('Hmisc')
library(Hmisc)

## 
## 载入程辑包：'Hmisc'

## The following objects are masked from 'package:dplyr':
## 
##     src, summarize

## The following objects are masked from 'package:base':
## 
##     format.pval, units

library(rms)  ## 画列线图；

## Warning in .recacheSubclasses(def@className, def, env):
## "replValueSp"类别的子类别"ndiMatrix"没有定义；因此没有更新

#install.packages('VIM')
library(colorspace)
library(grid)
library(VIM) ## 包中aggr()函数，判断数据缺失情况

## VIM is ready to use.

## Suggestions and bug-reports can be submitted at: https://github.com/statistikat/VIM/issues

## 
## 载入程辑包：'VIM'

## The following object is masked from 'package:datasets':
## 
##     sleep

#install.packages('survival')
library(survival) ##  生存分析包
sleepdisturbance<-read.csv('data.csv')

#睡眠相关
SDMode<-sleepdisturbance[,c(2:3,9:12)]
#A为入睡困难
#B为夜间易醒或早醒
#C为多梦或噩梦
#D次日疲劳、嗜睡、精神不佳
#E为入睡时打呼噜
for(j in 1:ncol(SDMode)){
  SDMode[,j] <- as.factor(as.numeric(SDMode[,j]))
}

levels(SDMode$Sex)[levels(SDMode$Sex)=='0']<-'Female'
levels(SDMode$Sex)[levels(SDMode$Sex)=='1']<-"Male"
levels(SDMode$Age)[levels(SDMode$Age)=='0']<-'Young'
levels(SDMode$Age)[levels(SDMode$Age)=='1']<-'Adult'
levels(SDMode$Age)[levels(SDMode$Age)=='2']<-'Aged'


levels(SDMode$Routine)[levels(SDMode$Routine)=='1']<-'Regular'
levels(SDMode$Routine)[levels(SDMode$Routine)=='2']<-'Occasionally stay up late'
levels(SDMode$Routine)[levels(SDMode$Routine)=='3']<-'Stay up late often'
levels(SDMode$Routine)[levels(SDMode$Routine)=='4']<-'Very irregular'

levels(SDMode$Sleep.quality)[levels(SDMode$Sleep.quality)=='1']<-'Very good'
levels(SDMode$Sleep.quality)[levels(SDMode$Sleep.quality)=='2']<-'Better'
levels(SDMode$Sleep.quality)[levels(SDMode$Sleep.quality)=='3']<-'Normal'
levels(SDMode$Sleep.quality)[levels(SDMode$Sleep.quality)=='4']<-'Poor'
levels(SDMode$Sleep.quality)[levels(SDMode$Sleep.quality)=='5']<-'Very poor'

levels(SDMode$Sleep.duration)[levels(SDMode$Sleep.duration)=='1']<-'<7h'
levels(SDMode$Sleep.duration)[levels(SDMode$Sleep.duration)=='2']<-'7-9h'
levels(SDMode$Sleep.duration)[levels(SDMode$Sleep.duration)=='3']<-'>9h'


levels(SDMode$SIS)[levels(SDMode$SIS)=='0']<-'No snoring in sleep'
levels(SDMode$SIS)[levels(SDMode$SIS)=='1']<-'Snoring in sleep'


SDMode$Routine<-as.factor(SDMode$Routine)
SDMode$Sleep.quality<-as.factor(SDMode$Sleep.quality)
SDMode$Sleep.duration<-as.factor(SDMode$Sleep.duration)
SDMode$SIS<-as.factor(SDMode$SIS)

#for循环
for(i in 1:ncol(SDMode)){
  plot(SDMode[,i],main=colnames(SDMode)[i],ylab='Count',col='darkblue',las=1,col.main='darkblue')
}

#install.packages('FactoMineR')
library("FactoMineR")
#install.packages('factoextra')
library("factoextra")

## 载入需要的程辑包：ggplot2

## Welcome! Want to learn more? See two factoextra-related books at https://goo.gl/ve3WBa

library("ggplot2")
#添加协变量,quali.sup=()添加协变量
#图1 每个指标对1,2维度的影响
res.mca <- MCA(SDMode, quali.sup = c(1:2), ncp = 5, graph = TRUE)

#res.mca<-MCA(SDMode,ncp=5,graph=TRUE)

###图2  每个维度可解释程度,看Eigenvalues，% of var.
fviz_screeplot(res.mca, addlabels = TRUE, ylim = c(0, 15))

summary(res.mca)

## 
## Call:
## MCA(X = SDMode, ncp = 5, quali.sup = c(1:2), graph = TRUE) 
## 
## 
## Eigenvalues
##                        Dim.1   Dim.2   Dim.3   Dim.4   Dim.5   Dim.6   Dim.7
## Variance               0.370   0.310   0.266   0.254   0.253   0.240   0.236
## % of var.             14.780  12.401  10.637  10.177  10.114   9.612   9.433
## Cumulative % of var.  14.780  27.182  37.819  47.996  58.110  67.722  77.155
##                        Dim.8   Dim.9  Dim.10
## Variance               0.213   0.194   0.163
## % of var.              8.531   7.779   6.536
## Cumulative % of var.  85.686  93.464 100.000
## 
## Individuals (the 10 first)
##                              Dim.1    ctr   cos2    Dim.2    ctr   cos2  
## 1                         | -1.219  0.081  0.571 |  0.733  0.035  0.206 |
## 2                         |  0.186  0.002  0.021 | -0.079  0.000  0.004 |
## 3                         |  0.491  0.013  0.135 | -0.729  0.034  0.298 |
## 4                         |  0.200  0.002  0.044 | -0.606  0.024  0.409 |
## 5                         |  0.200  0.002  0.044 | -0.606  0.024  0.409 |
## 6                         | -0.701  0.027  0.263 | -0.430  0.012  0.099 |
## 7                         | -0.997  0.054  0.500 |  0.134  0.001  0.009 |
## 8                         |  0.544  0.016  0.219 | -0.226  0.003  0.038 |
## 9                         | -0.676  0.025  0.241 |  0.704  0.032  0.261 |
## 10                        | -0.343  0.006  0.073 | -0.578  0.022  0.209 |
##                            Dim.3    ctr   cos2  
## 1                         -0.167  0.002  0.011 |
## 2                         -0.252  0.005  0.039 |
## 3                          0.401  0.012  0.090 |
## 4                          0.046  0.000  0.002 |
## 5                          0.046  0.000  0.002 |
## 6                          0.573  0.025  0.176 |
## 7                          0.190  0.003  0.018 |
## 8                         -0.484  0.018  0.174 |
## 9                         -0.462  0.016  0.112 |
## 10                         0.340  0.009  0.072 |
## 
## Categories (the 10 first)
##                               Dim.1     ctr    cos2  v.test     Dim.2     ctr
## Regular                   |  -0.747  12.613   0.280 -37.289 |   0.527   7.468
## Occasionally stay up late |  -0.026   0.018   0.000  -1.493 |  -0.730  17.034
## Stay up late often        |   0.811  10.287   0.198  31.352 |   0.117   0.254
## Very irregular            |   1.900   9.288   0.143  26.628 |   2.280  15.934
## Very good                 |  -1.079  13.100   0.232 -33.972 |   1.035  14.372
## Better                    |  -0.540   5.542   0.114 -23.794 |  -0.299   2.028
## Normal                    |   0.331   2.990   0.074  19.185 |  -0.627  12.802
## Poor                      |   1.212  12.824   0.218  32.883 |   0.916   8.717
## Very poor                 |   2.055   5.750   0.087  20.760 |   2.329   8.800
## <7h                       |   0.329   5.500   0.325  40.188 |  -0.050   0.151
##                              cos2  v.test     Dim.3     ctr    cos2  v.test  
## Regular                     0.139  26.283 |  -0.328   3.370   0.054 -16.353 |
## Occasionally stay up late   0.350 -41.715 |   0.463   7.987   0.141  26.455 |
## Stay up late often          0.004   4.512 |  -0.631   8.668   0.120 -24.415 |
## Very irregular              0.205  31.949 |   1.890  12.764   0.141  26.482 |
## Very good                   0.214  32.595 |  -0.303   1.433   0.018  -9.531 |
## Better                      0.035 -13.184 |   0.433   4.965   0.073  19.106 |
## Normal                      0.266 -36.364 |  -0.046   0.080   0.001  -2.661 |
## Poor                        0.124  24.833 |  -1.018  12.570   0.154 -27.619 |
## Very poor                   0.111  23.525 |   3.894  28.683   0.311  39.334 |
## <7h                         0.007  -6.104 |  -0.169   2.006   0.085 -20.590 |
## 
## Categorical variables (eta2)
##                             Dim.1 Dim.2 Dim.3  
## Routine                   | 0.476 0.505 0.349 |
## Sleep.quality             | 0.594 0.579 0.508 |
## Sleep.duration            | 0.325 0.144 0.119 |
## SIS                       | 0.083 0.012 0.089 |
## 
## Supplementary categories
##                               Dim.1    cos2  v.test     Dim.2    cos2  v.test  
## Female                    |  -0.046   0.002  -2.836 |   0.049   0.002   3.033 |
## Male                      |   0.035   0.002   2.836 |  -0.038   0.002  -3.033 |
## Young                     |   0.158   0.013   7.968 |  -0.142   0.010  -7.156 |
## Adult                     |   0.014   0.000   0.942 |   0.009   0.000   0.597 |
## Aged                      |  -0.342   0.025 -11.138 |   0.249   0.013   8.110 |
##                             Dim.3    cos2  v.test  
## Female                     -0.118   0.011  -7.265 |
## Male                        0.090   0.011   7.265 |
## Young                       0.030   0.000   1.503 |
## Adult                       0.028   0.001   1.938 |
## Aged                       -0.135   0.004  -4.411 |
## 
## Supplementary categorical variables (eta2)
##                             Dim.1 Dim.2 Dim.3  
## Sex                       | 0.002 0.002 0.011 |
## Age                       | 0.029 0.018 0.004 |

####图3 变量分析：变量对每个维度的贡献
fviz_contrib(res.mca, choice = "var", axes = 1)

fviz_contrib(res.mca, choice = "var", axes = 2)

##现在，我们展示每个类别的表现质量。我们看到它们相对较低，这可能是由于数据量的原因。
fviz_cos2(res.mca, choice = "var", axes = 1:2)

###图4 ACM plot ###活动类别图
plot(res.mca, invisible = c("quali.sup", "ind"), cex=1, col.var = "darkblue", 
     title = "Active categories", cex.main=2, col.main= "darkblue")

#plot of individuals，以habillage='Sex',按照性别进行划分，
fviz_mca_ind(res.mca, label = "none", habillage="Sex", 
             title="MCA individues", addEllipses = TRUE, ellipse.level = 0.95,
             palette = c("#00AFBB", "#E7B800"), ggtheme = theme_minimal())

#plot of individuals，以habillage='Age',按照性别进行划分，
fviz_mca_ind(res.mca, label = "none", habillage="Age", 
             title="MCA individues", addEllipses = TRUE, ellipse.level = 0.95)

#如果您想同时使用多个分类变量为个人着色，请使用函数 fviz_ellipses() [in factoextra ]，如下所示：
fviz_ellipses(res.mca, c("Sex", "Age"), geom = "point",addEllipses = TRUE, ellipse.level = 0.95)

## Warning: `gather_()` was deprecated in tidyr 1.2.0.
## ℹ Please use `gather()` instead.
## ℹ The deprecated feature was likely used in the factoextra package.
##   Please report the issue at <https://github.com/kassambara/factoextra/issues>.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

#Individuals and categories  图4 ACM plot ###活动类别图
plot(res.mca, label = c("quali.sup", "var"), select = "cos2 10", 
     cex=1, col.var = "darkblue", col.quali.sup = "brown3",  
     col.ind = "seashell3", title = "Individuals with active and supplementary categories", 
     cex.main=2, col.main= "darkblue")

# 
# library("corrplot")
# ind <- get_mca_ind(res.mca)
# pdf("cor.pdf")
# corrplot(ind$cos2, is.corr = F)
# dev.off()

Kmeans 肘选法3类

###Kmeans Extraction of coordinates kmeans 5维数据分成4类
km=data.frame(res.mca$ind$coord)[,c(1:2)]
set.seed(111)
groupes.kmeans4 <- kmeans(km, centers = 3, nstart = 5)
#print(groupes.kmeans4)

# pdf("cluster4.pdf")
# fviz_cluster(groupes.kmeans4, data = km, palette = "jco", repel= TRUE,
#         main = "Kmeans", ggtheme = theme_classic())
# dev.off()

clus_k54<-groupes.kmeans4$cluster

###图6选择K数目
set.seed(111)
fviz_nbclust(km, kmeans, method = "wss")+ geom_vline(xintercept = 3, linetype = 2)+ labs(subtitle = "Elbow method")

###图7 聚类结果
fviz_cluster(groupes.kmeans4, data = km, show.clust.cent = TRUE,  geom = "point", palette = "jco")

fviz_cluster(groupes.kmeans4, data = km, palette = "Set2", ggtheme = theme_minimal())

fviz_cluster(groupes.kmeans4, data = km, geom = "text")

# # 
 fviz_cluster(groupes.kmeans4, data = km, ellipse.type = "norm")

fviz_cluster(groupes.kmeans4, data = km, palette = "jco", main = "Kmeans", ggtheme = theme_classic())

cluster_4<-groupes.kmeans4$cluster
sleepdisturbance$cluster_4<-cluster_4

sum(abs(sleepdisturbance[,61]-sleepdisturbance[,26]))

## [1] 7985

轮廓系数计算

library(cluster)  
library(factoextra) 
###2类
set.seed(111)
c2<- kmeans(km, centers = 2, nstart = 5)
silhouette.result2 <-silhouette(c2$cluster,dist(km))

fviz_silhouette(silhouette.result2)

##   cluster size ave.sil.width
## 1       1 3370          0.37
## 2       2 1600          0.47

mean_silhouette2<-mean(silhouette.result2[,"sil_width"])
mean_silhouette2

## [1] 0.399774

###3类

set.seed(111)
groupes.kmeans4 <- kmeans(km, centers = 3, nstart = 5)
silhouette.result3 <-silhouette(groupes.kmeans4$cluster,dist(km))

fviz_silhouette(silhouette.result3)

##   cluster size ave.sil.width
## 1       1 1576          0.43
## 2       2 2725          0.51
## 3       3  669          0.45

mean_silhouette3 <-mean(silhouette.result3[,"sil_width"])
mean_silhouette3

## [1] 0.4742346

###4类
set.seed(111)
c4<- kmeans(km, centers = 4, nstart = 5)
silhouette.result4 <-silhouette(c4$cluster,dist(km))

fviz_silhouette(silhouette.result4)

##   cluster size ave.sil.width
## 1       1  665          0.42
## 2       2 2076          0.32
## 3       3 1713          0.53
## 4       4  516          0.57

mean_silhouette4 <-mean(silhouette.result4[,"sil_width"])
mean_silhouette4

## [1] 0.4326377

###5类
set.seed(111)
c5<- kmeans(km, centers = 5, nstart = 5)
silhouette.result5 <-silhouette(c5$cluster,dist(km))

fviz_silhouette(silhouette.result5)

##   cluster size ave.sil.width
## 1       1 1904          0.36
## 2       2  992          0.50
## 3       3  663          0.35
## 4       4  821          0.50
## 5       5  590          0.37

mean_silhouette5 <-mean(silhouette.result5[,"sil_width"])
mean_silhouette5

## [1] 0.4093591

###6类
set.seed(111)
c6<- kmeans(km, centers = 6, nstart = 5)
silhouette.result6 <-silhouette(c6$cluster,dist(km))

fviz_silhouette(silhouette.result6)

##   cluster size ave.sil.width
## 1       1  289          0.56
## 2       2 1108          0.54
## 3       3 1673          0.40
## 4       4  729          0.40
## 5       5  462          0.37
## 6       6  709          0.56

mean_silhouette6<-mean(silhouette.result6[,"sil_width"])
mean_silhouette6

## [1] 0.4609299

###7类
set.seed(111)
c7<- kmeans(km, centers = 7, nstart = 5)
silhouette.result7 <-silhouette(c7$cluster,dist(km))

fviz_silhouette(silhouette.result7)

##   cluster size ave.sil.width
## 1       1 1016          0.64
## 2       2  169          0.43
## 3       3  742          0.40
## 4       4 1189          0.43
## 5       5  469          0.39
## 6       6  497          0.46
## 7       7  888          0.35

mean_silhouette7 <-mean(silhouette.result7[,"sil_width"])
mean_silhouette7

## [1] 0.4534282

###8类
set.seed(111)
c8<- kmeans(km, centers = 8, nstart = 5)
silhouette.result8 <-silhouette(c8$cluster,dist(km))

fviz_silhouette(silhouette.result8)

##   cluster size ave.sil.width
## 1       1  504          0.49
## 2       2  150          0.46
## 3       3  837          0.44
## 4       4  876          0.75
## 5       5 1037          0.46
## 6       6  305          0.71
## 7       7  294          0.52
## 8       8  967          0.38

mean_silhouette8 <-mean(silhouette.result8[,"sil_width"])
mean_silhouette8

## [1] 0.5143155

###9类
set.seed(111)
c9<- kmeans(km, centers = 9, nstart = 5)
silhouette.result9 <-silhouette(c9$cluster,dist(km))

fviz_silhouette(silhouette.result9)

##   cluster size ave.sil.width
## 1       1  890          0.75
## 2       2  669          0.53
## 3       3  209          0.43
## 4       4  284          0.57
## 5       5  800          0.49
## 6       6  737          0.66
## 7       7   29          0.50
## 8       8  848          0.34
## 9       9  504          0.44

mean_silhouette9 <-mean(silhouette.result9[,"sil_width"])
mean_silhouette9

## [1] 0.539206

###10类
set.seed(111)
c10<- kmeans(km, centers = 10, nstart = 5)
silhouette.result10 <-silhouette(c10$cluster,dist(km))

fviz_silhouette(silhouette.result10)

##    cluster size ave.sil.width
## 1        1  794          0.48
## 2        2  320          0.70
## 3        3  737          0.65
## 4        4  169          1.00
## 5        5   37          0.50
## 6        6  164          0.43
## 7        7  397          0.38
## 8        8 1314          0.48
## 9        9  709          0.50
## 10      10  329          0.61

mean_silhouette10 <-mean(silhouette.result10[,"sil_width"])
mean_silhouette10

## [1] 0.5399552

k<-c(2:10)
mean_silhouette<-c(mean_silhouette2,mean_silhouette3,mean_silhouette4,mean_silhouette5,mean_silhouette6,mean_silhouette7,mean_silhouette8,mean_silhouette9,mean_silhouette10)

轮廓系数画图

###
par(mfrow=c(1,1))
plot(k,mean_silhouette,xlab='K',ylab='Mean silhouette',col='red',type = "o",pch=19,ylim=c(0.4,0.6))
text(k,mean_silhouette,labels = round(mean_silhouette, 2), pos = 3)

###

聚类c1画图—睡眠模式最好

library(dplyr)
#sleepdisturbance1<-read.csv("data_cluster2.csv")
SDMode$cluster_4<-cluster_4
SDMode1<-SDMode[,c(3:7)]###四类为27列，三类为26列
for(j in 1:5){
  SDMode1[,j] <- as.factor(SDMode1[,j])
}
c1<-SDMode1%>%filter(cluster_4==1)
# for(i in 1:ncol(c1)){
#   plot(c1[,i],main=colnames(c1)[i],ylab='Count',col='darkblue',las=1,col.main='darkblue')
# }

par(mfrow=c(2,2))
category_counts1 <- table(c1[,1])
category_per1 <- round(table(c1[,1])/length(c1[,1])*100,2)
category_per1

## 
##                   Regular Occasionally stay up late        Stay up late often 
##                     66.94                     23.98                      9.07 
##            Very irregular 
##                      0.00

plot(c1[,1],main=colnames(c1)[1],ylab='Count',col='red',las=1,col.main='red')
text(category_per1,labels = round(category_per1, 2), pos = 3)
###ylim = c(0, 1000)
# 计算每个类别的数量

category_per2 <- round(table(c1[,2])/length(c1[,2])*100,2)
category_per2

## 
## Very good    Better    Normal      Poor Very poor 
##     48.79     43.53      6.92      0.76      0.00

plot(c1[,2],main=colnames(c1)[2],ylab='Count',col='red',las=1,col.main='red')
text(category_per1,labels = round(category_per2, 2), pos = 3)
#text(category_per1,labels = c("48.79%","43.53%","6.92%","0.76%","0"), pos = 3)
#可以自己加%

category_per3 <- round(table(c1[,3])/length(c1[,3])*100,2)
category_per3

## 
##   <7h  7-9h   >9h 
## 46.13 51.84  2.03

plot(c1[,3],main=colnames(c1)[3],ylab='Count',col='red',las=1,col.main='red')
text(category_per1,labels = round(category_per3, 2), pos = 3)


category_per4 <- round(table(c1[,4])/length(c1[,4])*100,2)
category_per4

## 
## No snoring in sleep    Snoring in sleep 
##               91.31                8.69

plot(c1[,4],main=colnames(c1)[4],ylab='Count',col='red',las=1,col.main='red')
text(category_per1,labels = round(category_per4, 2), pos = 3)

聚类c2画图—睡眠模式中等

c2<-SDMode1%>%filter(cluster_4==2)
# for(i in 1:ncol(c2)){
#   plot(c2[,i],main=colnames(c2)[i],ylab='Count',col='darkblue',las=1,col.main='darkblue')
# }

par(mfrow=c(2,2))
# 计算每个类别的数量
#category_counts1 <- table(c2[,1])
category_per1 <- round(table(c2[,1])/length(c2[,1])*100,2)
category_per1

## 
##                   Regular Occasionally stay up late        Stay up late often 
##                     16.73                     57.61                     25.65 
##            Very irregular 
##                      0.00

plot(c2[,1],main=colnames(c2)[1],ylab='Count',col='red',las=1,col.main='red')
text(category_per1,labels = round(category_per1, 2), pos = 3)


category_per2 <- round(table(c2[,2])/length(c2[,2])*100,2)
category_per2

## 
## Very good    Better    Normal      Poor Very poor 
##      0.84     25.58     67.38      6.20      0.00

plot(c2[,2],main=colnames(c2)[2],ylab='Count',col='red',las=1,col.main='red')
text(category_per1,labels = round(category_per2, 2), pos = 3)


category_per3 <- round(table(c2[,3])/length(c2[,3])*100,2)
category_per3

## 
##   <7h  7-9h   >9h 
## 87.89 12.11  0.00

plot(c2[,3],main=colnames(c2)[3],ylab='Count',col='red',las=1,col.main='red')
text(category_per1,labels = round(category_per3, 2), pos = 3)


category_per4 <- round(table(c2[,4])/length(c2[,4])*100,2)
category_per4

## 
## No snoring in sleep    Snoring in sleep 
##               74.06               25.94

plot(c2[,4],main=colnames(c2)[4],ylab='Count',col='red',las=1,col.main='red')
text(category_per1,labels = round(category_per4, 2), pos = 3)

聚类c3画图–睡眠模式最差

c3<-SDMode1%>%filter(cluster_4==3)
# for(i in 1:ncol(c2)){
#   plot(c2[,i],main=colnames(c2)[i],ylab='Count',col='darkblue',las=1,col.main='darkblue')
# }

par(mfrow=c(2,2))
# 计算每个类别的数量
#category_counts1 <- table(c3[,1])
category_per1 <- round(table(c3[,1])/length(c3[,1])*100,2)
category_per1

## 
##                   Regular Occasionally stay up late        Stay up late often 
##                     22.12                      3.59                     46.04 
##            Very irregular 
##                     28.25

plot(c3[,1],main=colnames(c3)[1],ylab='Count',col='red',las=1,col.main='red')
text(category_per1,labels = round(category_per1, 2), pos = 3)



category_per2 <- round(table(c3[,2])/length(c3[,2])*100,2)
category_per2

## 
## Very good    Better    Normal      Poor Very poor 
##      5.23      2.09      8.97     68.76     14.95

plot(c3[,2],main=colnames(c3)[2],ylab='Count',col='red',las=1,col.main='red')
text(category_per1,labels = round(category_per2, 2), pos = 3)


category_per3 <- round(table(c3[,3])/length(c3[,3])*100,2)
category_per3

## 
##   <7h  7-9h   >9h 
## 90.43  8.22  1.35

plot(c3[,3],main=colnames(c3)[3],ylab='Count',col='red',las=1,col.main='red')
text(category_per1,labels = round(category_per3, 2), pos = 3)


category_per4 <- round(table(c3[,4])/length(c3[,4])*100,2)
category_per4

## 
## No snoring in sleep    Snoring in sleep 
##               70.85               29.15

plot(c3[,4],main=colnames(c3)[4],ylab='Count',col='red',las=1,col.main='red')
text(category_per1,labels = round(category_per4, 2), pos = 3)

logistics-共病

library(autoReg)

## Warning: 程辑包'autoReg'是用R版本4.3.3 来建造的

##缺失值填补
# 计算每列的均值
# means <- colMeans(sleepdisturbance, na.rm = TRUE)
# 
# # 填充缺失值
# sleepdisturbance[is.na(sleepdisturbance)] <- round(means[col(sleepdisturbance)][is.na(sleepdisturbance)],2)

write.csv(sleepdisturbance,"sleepdisturbance.csv")

sleepdisturbance<-read.csv("sleepdisturbance.csv")

sleepdisturbance<-sleepdisturbance[,c(-1,-2)]

sleepdisturbance<-sleepdisturbance[,c(1:22,24:33,60,34:59,23)]


for(j in 1:33){
  sleepdisturbance[,j] <- as.factor(as.numeric(sleepdisturbance[,j]))
}
# 训练集基线表
#sleepdisturbance<-sleepdisturbance[,c(1:22,24:60,23)]

#sleepdisturbance<-sleepdisturbance[,c(33,60)]
gaze(~. ,data = sleepdisturbance) %>% myft()

name	levels	stats
Sex	0	2149 (43.2%)
	1	2821 (56.8%)
Age	0	1684 (33.9%)
	1	2411 (48.5%)
	2	875 (17.6%)
Education	1	1483 (29.8%)
	2	1110 (22.3%)
	3	2051 (41.3%)
	4	326 (6.6%)
Marrying	1	141 (2.8%)
	2	4580 (92.2%)
	3	123 (2.5%)
	4	126 (2.5%)
Income	1	1321 (26.6%)
	2	1436 (28.9%)
	3	1484 (29.9%)
	4	729 (14.7%)
Smoking	1	3421 (68.8%)
	2	680 (13.7%)
	3	685 (13.8%)
	4	184 (3.7%)
Drinking	1	2642 (53.2%)
	2	1687 (33.9%)
	3	641 (12.9%)
Routine	1	1659 (33.4%)
	2	1972 (39.7%)
	3	1150 (23.1%)
	4	189 (3.8%)
Sleep.quality	1	827 (16.6%)
	2	1397 (28.1%)
	3	2005 (40.3%)
	4	641 (12.9%)
	5	100 (2.0%)
Sleep.duration	1	3727 (75.0%)
	2	1202 (24.2%)
	3	41 (0.8%)
SIS	0	3931 (79.1%)
	1	1039 (20.9%)
DFA	0	4254 (85.6%)
	1	716 (14.4%)
EA	0	3834 (77.1%)
	1	1136 (22.9%)
DN	0	4517 (90.9%)
	1	453 (9.1%)
NFLP	0	4462 (89.8%)
	1	508 (10.2%)
Central.obesity	0	2547 (51.2%)
	1	2423 (48.8%)
Hypertension	0	3053 (61.4%)
	1	1917 (38.6%)
Dyslipidemia	0	4149 (83.5%)
	1	821 (16.5%)
Diabetes	0	1998 (40.2%)
	1	2972 (59.8%)
Hyperuricemia	0	4299 (86.5%)
	1	671 (13.5%)
NAFLD	0	2213 (44.5%)
	1	2757 (55.5%)
Num	0	767 (15.4%)
	1	940 (18.9%)
	2	960 (19.3%)
	3	1003 (20.2%)
	4	865 (17.4%)
	5	378 (7.6%)
	6	57 (1.1%)
diabete	0	2591 (52.1%)
	1	1766 (35.5%)
	2	613 (12.3%)
clus_k540	1	1474 (29.7%)
	2	301 (6.1%)
	3	1738 (35.0%)
	4	1457 (29.3%)
clus_k541	1	1457 (29.3%)
	2	1738 (35.0%)
	3	1474 (29.7%)
	4	301 (6.1%)
WC_c	0	1578 (31.8%)
	1	969 (19.5%)
	2	2423 (48.8%)
BMI_c	0	1573 (31.6%)
	1	60 (1.2%)
	2	2134 (42.9%)
	3	1203 (24.2%)
SUA_c	0	4211 (84.7%)
	1	759 (15.3%)
TC_c	0	3335 (67.1%)
	1	1635 (32.9%)
TG_c	0	3160 (63.6%)
	1	1810 (36.4%)
HDL_c	0	4538 (91.3%)
	1	432 (8.7%)
LDL_c	0	4000 (80.5%)
	1	970 (19.5%)
cluster_4	1	1576 (31.7%)
	2	2725 (54.8%)
	3	669 (13.5%)
AGE	Mean ± SD	49.8 ± 11.0
WC	Mean ± SD	86.9 ± 10.7
SBP	Mean ± SD	129.1 ± 18.5
DBP	Mean ± SD	79.0 ± 11.8
BMI	Mean ± SD	25.7 ± 3.5
CRE	Mean ± SD	68.5 ± 17.5
SUA	Mean ± SD	318.8 ± 87.9
efeg	Mean ± SD	101.5 ± 12.6
TC	Mean ± SD	4.9 ± 1.0
TG	Mean ± SD	1.8 ± 1.6
HDL	Mean ± SD	1.3 ± 0.3
LDL	Mean ± SD	2.9 ± 0.8
FPG	Mean ± SD	5.6 ± 1.5
HbA1c	Mean ± SD	5.8 ± 0.8
WBC	Mean ± SD	6.0 ± 1.6
RBC	Mean ± SD	4.8 ± 0.5
Hb	Mean ± SD	145.5 ± 15.8
PLT	Mean ± SD	240.8 ± 57.7
Ne	Mean ± SD	3.6 ± 1.2
Lym	Mean ± SD	2.0 ± 0.6
NLR	Mean ± SD	1.9 ± 0.8
PLR	Mean ± SD	131.0 ± 42.6
SII	Mean ± SD	464.4 ± 232.1
ALT	Mean ± SD	24.1 ± 28.3
AST	Mean ± SD	21.7 ± 12.5
GGT	Mean ± SD	32.1 ± 37.3
Comorbidities	Mean ± SD	0.7 ± 0.5

sleepdisturbance$Comorbidities<-as.factor(sleepdisturbance$Comorbidities)


####单因素 多因素确定
gaze(~. ,data = sleepdisturbance) %>% myft()

name	levels	stats
Sex	0	2149 (43.2%)
	1	2821 (56.8%)
Age	0	1684 (33.9%)
	1	2411 (48.5%)
	2	875 (17.6%)
Education	1	1483 (29.8%)
	2	1110 (22.3%)
	3	2051 (41.3%)
	4	326 (6.6%)
Marrying	1	141 (2.8%)
	2	4580 (92.2%)
	3	123 (2.5%)
	4	126 (2.5%)
Income	1	1321 (26.6%)
	2	1436 (28.9%)
	3	1484 (29.9%)
	4	729 (14.7%)
Smoking	1	3421 (68.8%)
	2	680 (13.7%)
	3	685 (13.8%)
	4	184 (3.7%)
Drinking	1	2642 (53.2%)
	2	1687 (33.9%)
	3	641 (12.9%)
Routine	1	1659 (33.4%)
	2	1972 (39.7%)
	3	1150 (23.1%)
	4	189 (3.8%)
Sleep.quality	1	827 (16.6%)
	2	1397 (28.1%)
	3	2005 (40.3%)
	4	641 (12.9%)
	5	100 (2.0%)
Sleep.duration	1	3727 (75.0%)
	2	1202 (24.2%)
	3	41 (0.8%)
SIS	0	3931 (79.1%)
	1	1039 (20.9%)
DFA	0	4254 (85.6%)
	1	716 (14.4%)
EA	0	3834 (77.1%)
	1	1136 (22.9%)
DN	0	4517 (90.9%)
	1	453 (9.1%)
NFLP	0	4462 (89.8%)
	1	508 (10.2%)
Central.obesity	0	2547 (51.2%)
	1	2423 (48.8%)
Hypertension	0	3053 (61.4%)
	1	1917 (38.6%)
Dyslipidemia	0	4149 (83.5%)
	1	821 (16.5%)
Diabetes	0	1998 (40.2%)
	1	2972 (59.8%)
Hyperuricemia	0	4299 (86.5%)
	1	671 (13.5%)
NAFLD	0	2213 (44.5%)
	1	2757 (55.5%)
Num	0	767 (15.4%)
	1	940 (18.9%)
	2	960 (19.3%)
	3	1003 (20.2%)
	4	865 (17.4%)
	5	378 (7.6%)
	6	57 (1.1%)
diabete	0	2591 (52.1%)
	1	1766 (35.5%)
	2	613 (12.3%)
clus_k540	1	1474 (29.7%)
	2	301 (6.1%)
	3	1738 (35.0%)
	4	1457 (29.3%)
clus_k541	1	1457 (29.3%)
	2	1738 (35.0%)
	3	1474 (29.7%)
	4	301 (6.1%)
WC_c	0	1578 (31.8%)
	1	969 (19.5%)
	2	2423 (48.8%)
BMI_c	0	1573 (31.6%)
	1	60 (1.2%)
	2	2134 (42.9%)
	3	1203 (24.2%)
SUA_c	0	4211 (84.7%)
	1	759 (15.3%)
TC_c	0	3335 (67.1%)
	1	1635 (32.9%)
TG_c	0	3160 (63.6%)
	1	1810 (36.4%)
HDL_c	0	4538 (91.3%)
	1	432 (8.7%)
LDL_c	0	4000 (80.5%)
	1	970 (19.5%)
cluster_4	1	1576 (31.7%)
	2	2725 (54.8%)
	3	669 (13.5%)
AGE	Mean ± SD	49.8 ± 11.0
WC	Mean ± SD	86.9 ± 10.7
SBP	Mean ± SD	129.1 ± 18.5
DBP	Mean ± SD	79.0 ± 11.8
BMI	Mean ± SD	25.7 ± 3.5
CRE	Mean ± SD	68.5 ± 17.5
SUA	Mean ± SD	318.8 ± 87.9
efeg	Mean ± SD	101.5 ± 12.6
TC	Mean ± SD	4.9 ± 1.0
TG	Mean ± SD	1.8 ± 1.6
HDL	Mean ± SD	1.3 ± 0.3
LDL	Mean ± SD	2.9 ± 0.8
FPG	Mean ± SD	5.6 ± 1.5
HbA1c	Mean ± SD	5.8 ± 0.8
WBC	Mean ± SD	6.0 ± 1.6
RBC	Mean ± SD	4.8 ± 0.5
Hb	Mean ± SD	145.5 ± 15.8
PLT	Mean ± SD	240.8 ± 57.7
Ne	Mean ± SD	3.6 ± 1.2
Lym	Mean ± SD	2.0 ± 0.6
NLR	Mean ± SD	1.9 ± 0.8
PLR	Mean ± SD	131.0 ± 42.6
SII	Mean ± SD	464.4 ± 232.1
ALT	Mean ± SD	24.1 ± 28.3
AST	Mean ± SD	21.7 ± 12.5
GGT	Mean ± SD	32.1 ± 37.3
Comorbidities	0	1707 (34.3%)
	1	3263 (65.7%)

mod <- glm(Comorbidities~cluster_4+Sex+Age+Education+Marrying+Income+Smoking+Drinking+BMI+CRE, data = sleepdisturbance, family = "binomial")
aovresult <- autoReg(mod, uni=TRUE, threshold = 0.05) # 输出单因素结果
aovresult %>% myft()

Dependent: Comorbidities		0 (N=1707)	1 (N=3263)	OR (univariable)	OR (multivariable)
cluster_4	1	552 (32.3%)	1024 (31.4%)
	2	953 (55.8%)	1772 (54.3%)	1.00 (0.88-1.14, p=.972)	1.17 (0.99-1.39, p=.071)
	3	202 (11.8%)	467 (14.3%)	1.25 (1.03-1.51, p=.027)	1.35 (1.05-1.74, p=.021)
Sex	0	1079 (63.2%)	1070 (32.8%)
	1	628 (36.8%)	2193 (67.2%)	3.52 (3.12-3.98, p<.001)	2.17 (1.69-2.77, p<.001)
Age	0	736 (43.1%)	948 (29.1%)
	1	762 (44.6%)	1649 (50.5%)	1.68 (1.48-1.91, p<.001)	2.14 (1.78-2.58, p<.001)
	2	209 (12.2%)	666 (20.4%)	2.47 (2.06-2.97, p<.001)	3.71 (2.86-4.81, p<.001)
Education	1	452 (26.5%)	1031 (31.6%)
	2	361 (21.1%)	749 (23%)	0.91 (0.77-1.08, p=.267)	1.19 (0.95-1.48, p=.122)
	3	742 (43.5%)	1309 (40.1%)	0.77 (0.67-0.89, p<.001)	1.42 (1.14-1.76, p=.001)
	4	152 (8.9%)	174 (5.3%)	0.50 (0.39-0.64, p<.001)	1.11 (0.76-1.60, p=.592)
Marrying	1	82 (4.8%)	59 (1.8%)
	2	1547 (90.6%)	3033 (93%)	2.72 (1.94-3.83, p<.001)	2.06 (1.25-3.40, p=.004)
	3	52 (3%)	71 (2.2%)	1.90 (1.16-3.10, p=.010)	2.16 (1.09-4.30, p=.028)
	4	26 (1.5%)	100 (3.1%)	5.35 (3.10-9.23, p<.001)	4.05 (1.95-8.42, p<.001)
Income	1	414 (24.3%)	907 (27.8%)
	2	541 (31.7%)	895 (27.4%)	0.76 (0.64-0.88, p<.001)	0.65 (0.52-0.81, p<.001)
	3	523 (30.6%)	961 (29.5%)	0.84 (0.72-0.98, p=.029)	0.69 (0.54-0.89, p=.003)
	4	229 (13.4%)	500 (15.3%)	1.00 (0.82-1.21, p=.973)	0.74 (0.55-1.00, p=.054)
Smoking	1	1391 (81.5%)	2030 (62.2%)
	2	147 (8.6%)	533 (16.3%)	2.48 (2.04-3.02, p<.001)	1.28 (0.98-1.67, p=.065)
	3	132 (7.7%)	553 (16.9%)	2.87 (2.35-3.51, p<.001)	1.53 (1.14-2.04, p=.004)
	4	37 (2.2%)	147 (4.5%)	2.72 (1.89-3.93, p<.001)	1.37 (0.86-2.17, p=.183)
Drinking	1	1095 (64.1%)	1547 (47.4%)
	2	505 (29.6%)	1182 (36.2%)	1.66 (1.46-1.89, p<.001)	0.87 (0.71-1.05, p=.143)
	3	107 (6.3%)	534 (16.4%)	3.53 (2.83-4.41, p<.001)	1.08 (0.79-1.46, p=.640)
BMI	Mean ± SD	23.0 ± 2.5	27.1 ± 3.2	1.75 (1.69-1.81, p<.001)	1.71 (1.65-1.77, p<.001)
CRE	Mean ± SD	64.1 ± 13.0	70.7 ± 19.0	1.04 (1.03-1.04, p<.001)	1.00 (1.00-1.01, p=.446)

# 单因素分析，后向剔除
mod0 <- glm(Comorbidities~cluster_4, data = sleepdisturbance, family = "binomial")
summary(mod0)

## 
## Call:
## glm(formula = Comorbidities ~ cluster_4, family = "binomial", 
##     data = sleepdisturbance)
## 
## Coefficients:
##             Estimate Std. Error z value Pr(>|z|)    
## (Intercept) 0.617924   0.052803  11.702   <2e-16 ***
## cluster_42  0.002325   0.066346   0.035   0.9720    
## cluster_43  0.220138   0.099398   2.215   0.0268 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 6394.4  on 4969  degrees of freedom
## Residual deviance: 6388.4  on 4967  degrees of freedom
## AIC: 6394.4
## 
## Number of Fisher Scoring iterations: 4

mod1 <- glm(Comorbidities~cluster_4+Sex+Age+Education+Marrying+Income+Smoking+Drinking
, data = sleepdisturbance, family = "binomial")
mod11<-step(mod1)

## Start:  AIC=5683.92
## Comorbidities ~ cluster_4 + Sex + Age + Education + Marrying + 
##     Income + Smoking + Drinking
## 
##             Df Deviance    AIC
## - Smoking    3   5648.5 5682.5
## <none>           5643.9 5683.9
## - cluster_4  2   5651.3 5687.3
## - Education  3   5657.5 5691.5
## - Income     3   5657.9 5691.9
## - Drinking   2   5657.2 5693.2
## - Marrying   3   5668.2 5702.2
## - Age        2   5766.1 5802.1
## - Sex        1   5911.9 5949.9
## 
## Step:  AIC=5682.51
## Comorbidities ~ cluster_4 + Sex + Age + Education + Marrying + 
##     Income + Drinking
## 
##             Df Deviance    AIC
## <none>           5648.5 5682.5
## - cluster_4  2   5656.7 5686.7
## - Income     3   5662.2 5690.2
## - Education  3   5663.8 5691.8
## - Drinking   2   5666.2 5696.2
## - Marrying   3   5672.8 5700.8
## - Age        2   5771.7 5801.7
## - Sex        1   5998.7 6030.7

summary(mod11)

## 
## Call:
## glm(formula = Comorbidities ~ cluster_4 + Sex + Age + Education + 
##     Marrying + Income + Drinking, family = "binomial", data = sleepdisturbance)
## 
## Coefficients:
##             Estimate Std. Error z value Pr(>|z|)    
## (Intercept) -1.06073    0.21353  -4.968 6.78e-07 ***
## cluster_42   0.12647    0.07290   1.735 0.082778 .  
## cluster_43   0.29823    0.10716   2.783 0.005385 ** 
## Sex1         1.46853    0.08192  17.928  < 2e-16 ***
## Age1         0.70172    0.07728   9.080  < 2e-16 ***
## Age2         1.09783    0.11189   9.812  < 2e-16 ***
## Education2  -0.17203    0.09426  -1.825 0.067995 .  
## Education3  -0.18261    0.09090  -2.009 0.044546 *  
## Education4  -0.59017    0.15264  -3.866 0.000110 ***
## Marrying2    0.59202    0.19199   3.084 0.002045 ** 
## Marrying3    0.46899    0.27227   1.722 0.084980 .  
## Marrying4    1.41763    0.30047   4.718 2.38e-06 ***
## Income2     -0.33695    0.09267  -3.636 0.000277 ***
## Income3     -0.21309    0.10297  -2.069 0.038501 *  
## Income4     -0.16280    0.12841  -1.268 0.204854    
## Drinking2    0.04760    0.08120   0.586 0.557782    
## Drinking3    0.51088    0.12876   3.968 7.26e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 6394.4  on 4969  degrees of freedom
## Residual deviance: 5648.5  on 4953  degrees of freedom
## AIC: 5682.5
## 
## Number of Fisher Scoring iterations: 4

mod2 <- glm(Comorbidities~cluster_4+Sex+Age+Education+Marrying+Income+Smoking+Drinking+BMI+CRE, data = sleepdisturbance, family = "binomial")
mod22<-step(mod2)

## Start:  AIC=4148.94
## Comorbidities ~ cluster_4 + Sex + Age + Education + Marrying + 
##     Income + Smoking + Drinking + BMI + CRE
## 
##             Df Deviance    AIC
## - CRE        1   4105.6 4147.6
## - Drinking   2   4108.4 4148.4
## <none>           4104.9 4148.9
## - cluster_4  2   4111.1 4151.1
## - Smoking    3   4114.9 4152.9
## - Education  3   4116.3 4154.3
## - Marrying   3   4119.7 4157.7
## - Income     3   4120.9 4158.9
## - Sex        1   4141.7 4183.7
## - Age        2   4218.2 4258.2
## - BMI        1   5634.6 5676.6
## 
## Step:  AIC=4147.61
## Comorbidities ~ cluster_4 + Sex + Age + Education + Marrying + 
##     Income + Smoking + Drinking + BMI
## 
##             Df Deviance    AIC
## - Drinking   2   4109.0 4147.0
## <none>           4105.6 4147.6
## - cluster_4  2   4111.7 4149.7
## - Smoking    3   4115.4 4151.4
## - Education  3   4117.2 4153.2
## - Marrying   3   4120.3 4156.3
## - Income     3   4121.7 4157.7
## - Sex        1   4167.1 4207.1
## - Age        2   4219.5 4257.5
## - BMI        1   5643.9 5683.9
## 
## Step:  AIC=4146.98
## Comorbidities ~ cluster_4 + Sex + Age + Education + Marrying + 
##     Income + Smoking + BMI
## 
##             Df Deviance    AIC
## <none>           4109.0 4147.0
## - cluster_4  2   4115.1 4149.1
## - Education  3   4120.2 4152.2
## - Smoking    3   4120.7 4152.7
## - Marrying   3   4124.2 4156.2
## - Income     3   4124.9 4156.9
## - Sex        1   4173.2 4209.2
## - Age        2   4224.9 4258.9
## - BMI        1   5657.2 5693.2

summary(mod22)

## 
## Call:
## glm(formula = Comorbidities ~ cluster_4 + Sex + Age + Education + 
##     Marrying + Income + Smoking + BMI, family = "binomial", data = sleepdisturbance)
## 
## Coefficients:
##              Estimate Std. Error z value Pr(>|z|)    
## (Intercept) -14.56545    0.53108 -27.426  < 2e-16 ***
## cluster_42    0.15595    0.08714   1.790 0.073515 .  
## cluster_43    0.29819    0.13009   2.292 0.021892 *  
## Sex1          0.79273    0.09987   7.937 2.07e-15 ***
## Age1          0.76270    0.09458   8.064 7.37e-16 ***
## Age2          1.32640    0.13201  10.048  < 2e-16 ***
## Education2    0.17484    0.11246   1.555 0.120023    
## Education3    0.34828    0.10971   3.175 0.001500 ** 
## Education4    0.10146    0.18823   0.539 0.589862    
## Marrying2     0.73071    0.25393   2.878 0.004007 ** 
## Marrying3     0.77707    0.34940   2.224 0.026148 *  
## Marrying4     1.41628    0.37304   3.797 0.000147 ***
## Income2      -0.43277    0.11108  -3.896 9.77e-05 ***
## Income3      -0.36330    0.12424  -2.924 0.003453 ** 
## Income4      -0.30380    0.15497  -1.960 0.049947 *  
## Smoking2      0.22463    0.13397   1.677 0.093594 .  
## Smoking3      0.44594    0.13912   3.206 0.001348 ** 
## Smoking4      0.31325    0.23326   1.343 0.179306    
## BMI           0.53595    0.01781  30.088  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 6394.4  on 4969  degrees of freedom
## Residual deviance: 4109.0  on 4951  degrees of freedom
## AIC: 4147
## 
## Number of Fisher Scoring iterations: 5

logistic –NAFLD

####单因素 多因素确定
gaze(~. ,data = sleepdisturbance) %>% myft()

name	levels	stats
Sex	0	2149 (43.2%)
	1	2821 (56.8%)
Age	0	1684 (33.9%)
	1	2411 (48.5%)
	2	875 (17.6%)
Education	1	1483 (29.8%)
	2	1110 (22.3%)
	3	2051 (41.3%)
	4	326 (6.6%)
Marrying	1	141 (2.8%)
	2	4580 (92.2%)
	3	123 (2.5%)
	4	126 (2.5%)
Income	1	1321 (26.6%)
	2	1436 (28.9%)
	3	1484 (29.9%)
	4	729 (14.7%)
Smoking	1	3421 (68.8%)
	2	680 (13.7%)
	3	685 (13.8%)
	4	184 (3.7%)
Drinking	1	2642 (53.2%)
	2	1687 (33.9%)
	3	641 (12.9%)
Routine	1	1659 (33.4%)
	2	1972 (39.7%)
	3	1150 (23.1%)
	4	189 (3.8%)
Sleep.quality	1	827 (16.6%)
	2	1397 (28.1%)
	3	2005 (40.3%)
	4	641 (12.9%)
	5	100 (2.0%)
Sleep.duration	1	3727 (75.0%)
	2	1202 (24.2%)
	3	41 (0.8%)
SIS	0	3931 (79.1%)
	1	1039 (20.9%)
DFA	0	4254 (85.6%)
	1	716 (14.4%)
EA	0	3834 (77.1%)
	1	1136 (22.9%)
DN	0	4517 (90.9%)
	1	453 (9.1%)
NFLP	0	4462 (89.8%)
	1	508 (10.2%)
Central.obesity	0	2547 (51.2%)
	1	2423 (48.8%)
Hypertension	0	3053 (61.4%)
	1	1917 (38.6%)
Dyslipidemia	0	4149 (83.5%)
	1	821 (16.5%)
Diabetes	0	1998 (40.2%)
	1	2972 (59.8%)
Hyperuricemia	0	4299 (86.5%)
	1	671 (13.5%)
NAFLD	0	2213 (44.5%)
	1	2757 (55.5%)
Num	0	767 (15.4%)
	1	940 (18.9%)
	2	960 (19.3%)
	3	1003 (20.2%)
	4	865 (17.4%)
	5	378 (7.6%)
	6	57 (1.1%)
diabete	0	2591 (52.1%)
	1	1766 (35.5%)
	2	613 (12.3%)
clus_k540	1	1474 (29.7%)
	2	301 (6.1%)
	3	1738 (35.0%)
	4	1457 (29.3%)
clus_k541	1	1457 (29.3%)
	2	1738 (35.0%)
	3	1474 (29.7%)
	4	301 (6.1%)
WC_c	0	1578 (31.8%)
	1	969 (19.5%)
	2	2423 (48.8%)
BMI_c	0	1573 (31.6%)
	1	60 (1.2%)
	2	2134 (42.9%)
	3	1203 (24.2%)
SUA_c	0	4211 (84.7%)
	1	759 (15.3%)
TC_c	0	3335 (67.1%)
	1	1635 (32.9%)
TG_c	0	3160 (63.6%)
	1	1810 (36.4%)
HDL_c	0	4538 (91.3%)
	1	432 (8.7%)
LDL_c	0	4000 (80.5%)
	1	970 (19.5%)
cluster_4	1	1576 (31.7%)
	2	2725 (54.8%)
	3	669 (13.5%)
AGE	Mean ± SD	49.8 ± 11.0
WC	Mean ± SD	86.9 ± 10.7
SBP	Mean ± SD	129.1 ± 18.5
DBP	Mean ± SD	79.0 ± 11.8
BMI	Mean ± SD	25.7 ± 3.5
CRE	Mean ± SD	68.5 ± 17.5
SUA	Mean ± SD	318.8 ± 87.9
efeg	Mean ± SD	101.5 ± 12.6
TC	Mean ± SD	4.9 ± 1.0
TG	Mean ± SD	1.8 ± 1.6
HDL	Mean ± SD	1.3 ± 0.3
LDL	Mean ± SD	2.9 ± 0.8
FPG	Mean ± SD	5.6 ± 1.5
HbA1c	Mean ± SD	5.8 ± 0.8
WBC	Mean ± SD	6.0 ± 1.6
RBC	Mean ± SD	4.8 ± 0.5
Hb	Mean ± SD	145.5 ± 15.8
PLT	Mean ± SD	240.8 ± 57.7
Ne	Mean ± SD	3.6 ± 1.2
Lym	Mean ± SD	2.0 ± 0.6
NLR	Mean ± SD	1.9 ± 0.8
PLR	Mean ± SD	131.0 ± 42.6
SII	Mean ± SD	464.4 ± 232.1
ALT	Mean ± SD	24.1 ± 28.3
AST	Mean ± SD	21.7 ± 12.5
GGT	Mean ± SD	32.1 ± 37.3
Comorbidities	0	1707 (34.3%)
	1	3263 (65.7%)

modd <- glm(NAFLD~cluster_4+Sex+Age+Education+Marrying+Income+Smoking+Drinking+BMI+CRE, data = sleepdisturbance, family = "binomial")
aovresult <- autoReg(modd, uni=TRUE, threshold = 0.05) # 输出单因素结果
aovresult %>% myft()

Dependent: NAFLD		0 (N=2213)	1 (N=2757)	OR (univariable)	OR (multivariable)
cluster_4	1	753 (34%)	823 (29.9%)
	2	1184 (53.5%)	1541 (55.9%)	1.19 (1.05-1.35, p=.006)	1.24 (1.06-1.44, p=.007)
	3	276 (12.5%)	393 (14.3%)	1.30 (1.09-1.56, p=.005)	1.27 (1.01-1.59, p=.039)
Sex	0	1345 (60.8%)	804 (29.2%)
	1	868 (39.2%)	1953 (70.8%)	3.76 (3.34-4.24, p<.001)	2.73 (2.18-3.42, p<.001)
Age	0	773 (34.9%)	911 (33%)
	1	1005 (45.4%)	1406 (51%)	1.19 (1.05-1.35, p=.007)	1.31 (1.11-1.55, p=.001)
	2	435 (19.7%)	440 (16%)	0.86 (0.73-1.01, p=.067)	1.00 (0.80-1.25, p=.993)
Education	1	679 (30.7%)	804 (29.2%)
	2	504 (22.8%)	606 (22%)	1.02 (0.87-1.19, p=.848)
	3	872 (39.4%)	1179 (42.8%)	1.14 (1.00-1.31, p=.053)
	4	158 (7.1%)	168 (6.1%)	0.90 (0.71-1.14, p=.380)
Marrying	1	87 (3.9%)	54 (2%)
	2	2006 (90.6%)	2574 (93.4%)	2.07 (1.46-2.92, p<.001)	1.88 (1.19-2.97, p=.007)
	3	58 (2.6%)	65 (2.4%)	1.81 (1.11-2.95, p=.018)	2.47 (1.32-4.65, p=.005)
	4	62 (2.8%)	64 (2.3%)	1.66 (1.02-2.71, p=.041)	1.92 (1.02-3.60, p=.043)
Income	1	658 (29.7%)	663 (24%)
	2	678 (30.6%)	758 (27.5%)	1.11 (0.96-1.29, p=.173)	1.01 (0.84-1.22, p=.932)
	3	613 (27.7%)	871 (31.6%)	1.41 (1.21-1.64, p<.001)	1.20 (0.98-1.46, p=.076)
	4	264 (11.9%)	465 (16.9%)	1.75 (1.45-2.10, p<.001)	1.23 (0.96-1.57, p=.110)
Smoking	1	1752 (79.2%)	1669 (60.5%)
	2	207 (9.4%)	473 (17.2%)	2.40 (2.01-2.86, p<.001)	1.10 (0.87-1.38, p=.437)
	3	194 (8.8%)	491 (17.8%)	2.66 (2.22-3.18, p<.001)	1.36 (1.07-1.74, p=.013)
	4	60 (2.7%)	124 (4.5%)	2.17 (1.58-2.97, p<.001)	1.18 (0.81-1.72, p=.400)
Drinking	1	1420 (64.2%)	1222 (44.3%)
	2	614 (27.7%)	1073 (38.9%)	2.03 (1.79-2.30, p<.001)	1.04 (0.87-1.23, p=.678)
	3	179 (8.1%)	462 (16.8%)	3.00 (2.48-3.62, p<.001)	0.90 (0.70-1.17, p=.431)
BMI	Mean ± SD	23.6 ± 2.8	27.3 ± 3.2	1.56 (1.52-1.60, p<.001)	1.52 (1.48-1.56, p<.001)
CRE	Mean ± SD	65.6 ± 20.9	70.7 ± 13.7	1.03 (1.02-1.03, p<.001)	0.99 (0.98-1.00, p=.001)

# 单因素分析，后向剔除
modd0 <- glm(NAFLD~cluster_4, data = sleepdisturbance, family = "binomial")
summary(modd0)

## 
## Call:
## glm(formula = NAFLD ~ cluster_4, family = "binomial", data = sleepdisturbance)
## 
## Coefficients:
##             Estimate Std. Error z value Pr(>|z|)   
## (Intercept)  0.08889    0.05043   1.763  0.07795 . 
## cluster_42   0.17464    0.06353   2.749  0.00598 **
## cluster_43   0.26452    0.09333   2.834  0.00459 **
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 6830.2  on 4969  degrees of freedom
## Residual deviance: 6819.3  on 4967  degrees of freedom
## AIC: 6825.3
## 
## Number of Fisher Scoring iterations: 4

modd1 <- glm(NAFLD~cluster_4+Sex+Age+Education+Marrying+Income+Smoking+Drinking
, data = sleepdisturbance, family = "binomial")
modd11<-step(modd1)

## Start:  AIC=6273.82
## NAFLD ~ cluster_4 + Sex + Age + Education + Marrying + Income + 
##     Smoking + Drinking
## 
##             Df Deviance    AIC
## - Smoking    3   6236.7 6270.7
## - Income     3   6237.7 6271.7
## - Drinking   2   6237.7 6273.7
## <none>           6233.8 6273.8
## - Education  3   6241.1 6275.1
## - cluster_4  2   6244.8 6280.8
## - Marrying   3   6249.6 6283.6
## - Age        2   6262.0 6298.0
## - Sex        1   6480.8 6518.8
## 
## Step:  AIC=6270.67
## NAFLD ~ cluster_4 + Sex + Age + Education + Marrying + Income + 
##     Drinking
## 
##             Df Deviance    AIC
## - Income     3   6240.4 6268.4
## <none>           6236.7 6270.7
## - Education  3   6244.8 6272.8
## - Drinking   2   6243.2 6273.2
## - cluster_4  2   6248.2 6278.2
## - Marrying   3   6252.4 6280.4
## - Age        2   6264.6 6294.6
## - Sex        1   6553.7 6585.7
## 
## Step:  AIC=6268.42
## NAFLD ~ cluster_4 + Sex + Age + Education + Marrying + Drinking
## 
##             Df Deviance    AIC
## <none>           6240.4 6268.4
## - Education  3   6247.7 6269.7
## - Drinking   2   6247.1 6271.1
## - cluster_4  2   6252.2 6276.2
## - Marrying   3   6256.4 6278.4
## - Age        2   6267.4 6291.4
## - Sex        1   6578.7 6604.7

summary(modd11)

## 
## Call:
## glm(formula = NAFLD ~ cluster_4 + Sex + Age + Education + Marrying + 
##     Drinking, family = "binomial", data = sleepdisturbance)
## 
## Coefficients:
##             Estimate Std. Error z value Pr(>|z|)    
## (Intercept) -1.46861    0.20625  -7.120 1.08e-12 ***
## cluster_42   0.19689    0.06847   2.876 0.004032 ** 
## cluster_43   0.29347    0.09968   2.944 0.003239 ** 
## Sex1         1.32160    0.07392  17.880  < 2e-16 ***
## Age1         0.35052    0.07283   4.813 1.49e-06 ***
## Age2         0.08175    0.09783   0.836 0.403358    
## Education2  -0.15702    0.08562  -1.834 0.066660 .  
## Education3  -0.06682    0.07758  -0.861 0.389046    
## Education4  -0.31922    0.13671  -2.335 0.019543 *  
## Marrying2    0.63804    0.18928   3.371 0.000749 ***
## Marrying3    0.71605    0.26683   2.684 0.007285 ** 
## Marrying4    1.02961    0.26954   3.820 0.000133 ***
## Drinking2    0.13444    0.07578   1.774 0.076059 .  
## Drinking3    0.26696    0.10943   2.440 0.014706 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 6830.2  on 4969  degrees of freedom
## Residual deviance: 6240.4  on 4956  degrees of freedom
## AIC: 6268.4
## 
## Number of Fisher Scoring iterations: 4

modd2 <- glm(NAFLD~cluster_4+Sex+Age+Education+Marrying+Income+Smoking+Drinking+BMI+CRE, data = sleepdisturbance, family = "binomial")
modd22<-step(modd2)

## Start:  AIC=4967.14
## NAFLD ~ cluster_4 + Sex + Age + Education + Marrying + Income + 
##     Smoking + Drinking + BMI + CRE
## 
##             Df Deviance    AIC
## - Income     3   4924.7 4962.7
## - Drinking   2   4924.5 4964.5
## <none>           4923.1 4967.1
## - Smoking    3   4930.6 4968.6
## - Marrying   3   4933.9 4971.9
## - cluster_4  2   4932.0 4972.0
## - Education  3   4939.8 4977.8
## - CRE        1   4937.5 4979.5
## - Age        2   4940.5 4980.5
## - Sex        1   5007.3 5049.3
## - BMI        1   6231.9 6273.9
## 
## Step:  AIC=4962.67
## NAFLD ~ cluster_4 + Sex + Age + Education + Marrying + Smoking + 
##     Drinking + BMI + CRE
## 
##             Df Deviance    AIC
## - Drinking   2   4926.0 4960.0
## <none>           4924.7 4962.7
## - Smoking    3   4931.9 4963.9
## - Marrying   3   4935.7 4967.7
## - cluster_4  2   4933.7 4967.7
## - CRE        1   4938.9 4974.9
## - Age        2   4941.6 4975.6
## - Education  3   4945.3 4977.3
## - Sex        1   5012.2 5048.2
## - BMI        1   6235.9 6271.9
## 
## Step:  AIC=4960
## NAFLD ~ cluster_4 + Sex + Age + Education + Marrying + Smoking + 
##     BMI + CRE
## 
##             Df Deviance    AIC
## <none>           4926.0 4960.0
## - Smoking    3   4932.3 4960.3
## - Marrying   3   4936.6 4964.6
## - cluster_4  2   4934.9 4964.9
## - CRE        1   4939.9 4971.9
## - Age        2   4942.8 4972.8
## - Education  3   4946.6 4974.6
## - Sex        1   5021.3 5053.3
## - BMI        1   6239.9 6271.9

summary(modd22)

## 
## Call:
## glm(formula = NAFLD ~ cluster_4 + Sex + Age + Education + Marrying + 
##     Smoking + BMI + CRE, family = "binomial", data = sleepdisturbance)
## 
## Coefficients:
##               Estimate Std. Error z value Pr(>|z|)    
## (Intercept) -11.600210   0.479151 -24.210  < 2e-16 ***
## cluster_42    0.215726   0.078555   2.746 0.006029 ** 
## cluster_43    0.257700   0.116108   2.219 0.026454 *  
## Sex1          1.000097   0.108802   9.192  < 2e-16 ***
## Age1          0.315455   0.085499   3.690 0.000225 ***
## Age2          0.058126   0.111925   0.519 0.603532    
## Education2    0.097483   0.097938   0.995 0.319565    
## Education3    0.388180   0.089938   4.316 1.59e-05 ***
## Education4    0.318822   0.161776   1.971 0.048751 *  
## Marrying2     0.697918   0.235214   2.967 0.003006 ** 
## Marrying3     0.962621   0.322681   2.983 0.002853 ** 
## Marrying4     0.715047   0.322273   2.219 0.026503 *  
## Smoking2      0.115524   0.116684   0.990 0.322148    
## Smoking3      0.294325   0.118848   2.476 0.013269 *  
## Smoking4      0.137065   0.190710   0.719 0.472320    
## BMI           0.425168   0.014473  29.377  < 2e-16 ***
## CRE          -0.011533   0.003392  -3.400 0.000673 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 6830.2  on 4969  degrees of freedom
## Residual deviance: 4926.0  on 4953  degrees of freedom
## AIC: 4960
## 
## Number of Fisher Scoring iterations: 5

MCA_sleep

2024-03-18

MCA

Kmeans 肘选法3类

轮廓系数计算

轮廓系数画图

聚类c1画图—睡眠模式最好

聚类c2画图—睡眠模式中等

聚类c3画图–睡眠模式最差

logistics-共病

logistic –NAFLD