一、資料整理

pacman::p_load(openxlsx)
dta <- read.xlsx(xlsxFile ="ANA_V2(遺漏值改1)(有加權).xlsx", sheet = 'ANA')

資料結構

dta$sleep_grp<-cut(dta$T_Sleep, c(0, 7, 9, 10), c("poor", "moderate", "good"))
dta$sleep_grp1<-cut(dta$T_Sleep, c(0, 7, 9, 10), c("1", "2", "3"))

將睡眠進行分組 0~7poor ,8~9moderate, 10 good。 參考以下文章 Dalmases, M., Benítez, I. D., Mas, A., Garcia-Codina, O., Medina-Bustos, A., Escarrabill, J., … & de Batlle, J. (2018). Assessing sleep health in a European population: results of the Catalan Health Survey 2015. PloS one, 13(4), e0194495.

head(dta)
##   ID Q1 Q2 Q3 Q4 Q5 Q6 Q7 Q8 Q9 Q10     AGET Q11 Q12 Q13 Q14 Q15 Q16 Q17 Q18
## 1  1  3  3  3  3  3  3  3  3  3   3 31.50960   3   4   4   3   3   3   3   3
## 2  2  2  2  2  3  2  2  3  2  2   2 23.10704   1   1   1   3   3   3   1   1
## 3  3  3  4  3  4  4  4  2  2  4   3 23.10726   4   3   2   4   3   2   4   3
## 4  4  3  3  2  3  2  3  2  3  3   3 18.90594   2   2   2   3   4   4   4   4
## 5  5  2  2  2  3  2  2  3  1  2   2 17.23197   2   2   3   4   4   4   4   4
## 6  6  1  2  1  2  3  4  3  3  3   3 20.51425   4   1   3   4   4   3   2   4
##   Q19 Q20 Q21 AUNDERSTAND Q22 Q23 Q24 Q25 Q26 Q27 Q28 Q29 Q30  AASSESS Q31 Q32
## 1   3   3   3    36.76120   3   3   3   3   3   3   3   3   3 28.35864   3   3
## 2   1   1   3    19.95608   1   1   3   3   3   3   1   1   3 19.95608   3   3
## 3   3   2   4    23.80748   3   2   2   2   3   3   3   2   2 15.40484   3   3
## 4   4   4   3    25.20792   2   2   2   2   1   3   3   1   2 12.60396   3   3
## 5   3   3   4    30.36109   2   1   1   3   2   2   3   3   2 15.59083   2   3
## 6   4   4   4    30.36109   4   4   4   4   4   4   3   4   3 27.89938   4   3
##   Q33 Q34 Q35 Q36 Q37 Q38   AAPPLY    ATOTAL K1 K2 K3 K4 K5 K6 K7 K8 K9   K_SUM
## 1   3   3   3   3   3   2 24.15736 120.78680  1  0  1  1  1  1  1  1  1 8.40256
## 2   1   3   1   3   1   2 17.85544  80.87464  1  0  0  1  0  1  0  1  0 4.20128
## 3   3   3   4   4   3   4 18.90594  81.22552  1  1  1  1  1  1  0  1  1 5.60176
## 4   1   1   4   3   2   1 12.60396  69.32178  1  0  1  1  1  0  1  1  0 4.20132
## 5   2   3   4   3   2   1 16.41140  79.59529  1  1  1  1  1  1  0  1  1 6.56456
## 6   4   4   4   4   4   4 25.43767 104.21239  1  0  1  1  1  0  0  1  1 4.92342
##   A1 A2 A3 A4 A5 A6 A7    A_SUM P1 P2 P3 P4 P5 P6 P7 P8 P9 P10 P11 P12    P_SUM
## 1  4  5  4  4  4  3  4 29.40896  4  4  4  3  4  3  3  4  5   3   2   2 43.06312
## 2  2  4  4  4  3  4  2 24.15736  4  4  4  4  4  2  2  4  5   4   2   4 45.16376
## 3  4  5  5  2  5  2  1 16.80528  2  5  5  5  5  3  3  1  5   4   3   5 32.21012
## 4  4  4  4  3  5  2  3 17.50550  4  5  5  4  4  3  3  5  5   2   2   3 31.50990
## 5  4  5  5  2  5  2  2 20.51425  2  2  3  5  3  2  3  4  5   3   2   4 31.18166
## 6  2  4  5  2  4  4  1 18.05254  2  5  2  5  4  5  2  4  5   5   2   4 36.92565
##   SEX PREG BIRTHY AGE EDU WORK INFORM LIVING LIVING1_1 LIVING1_11 LIVING1_12
## 1   1    2     87  23   2   10      1      1         0          1          0
## 2   0    1     55  55   0   11      1      1         0          1          0
## 3   0    1     89  21   2   10      1      1         0          1          0
## 4   0    1     88  22   2   10      1      0         2          2          2
## 5   0    1     87  23   2   10      1      1         0          1          1
## 6   0    1     83  27   2    9      0      1         1          1          0
##   PET COOK COOKF VENT SMOKE SMOKE_N EXP BUY D1 D2 D3 D4 D5 D6 D7 THINK1 THINK2
## 1   1    0     2    1     0      NA   1   1  2  2  2  2  2  2  1      2      2
## 2   1    0     2    1     0      NA   1   1  1  0  0  0  0  0  0      2      2
## 3   1    0     2    1     0      NA   0   2  1  0  0  0  0  0  0      1      1
## 4   1    1     7    7     0      NA   0   2  1  0  0  0  0  0  0      1      1
## 5   1    0     1    1     0      NA   1   2  2  2  2  2  2  2  1      2      2
## 6   1    0     1    1     0      NA   1   1  1  0  0  0  0  0  0      0      1
##   THINK3 THINK4 THINK5 THINK6 THINK7 THINK8 THINK9 THINK10 THINK11 THINK12 USE1
## 1      2      2      2      2      2      2      2       2       2       2    1
## 2      2      2      2      2      2      2      2       2       2       2    1
## 3      0      0      0      1      1      1      0       1       1       0    0
## 4      0      1      0      1      0      1      0       1       1       0    1
## 5      2      2      2      2      2      2      2       2       2       2    0
## 6      1      1      0      0      0      1      0       0       0       0    1
##   USE2 USE3 USE4 USE5 USE6 USE7 USE8 USE9 USE10 STAY RES Sleep1 Sleep2 Sleep3
## 1    0    1    0    0    0    0    0    0     0   22   4      1      2      0
## 2    1    0    0    0    0    0    0    0     0   22   4      2      2      1
## 3    0    1    1    1    0    0    1    0     0   16   4      1      0      2
## 4    1    1    1    0    0    0    1    1     0    8   4      2      1      2
## 5    0    1    0    0    1    0    0    0     0    8   2      1      1      0
## 6    0    1    0    0    0    0    0    1     0   20   2      1      1      2
##   Sleep4 Sleep5 T_Sleep T_Sleep_weight TV TV_hr      GET UNDERSTAND   ASSESS
## 1      1      2       6        6.30192  2     2 3.150960   3.341927 3.150960
## 2      0      1       6        6.30192  2    42 2.310704   1.814189 2.217342
## 3      1      1       5        3.50110  1     0 2.310726   2.164316 1.711649
## 4      2      2       9        6.30198  2     2 1.890594   2.291629 1.400440
## 5      1      1       4        3.28228  3     2 1.723197   2.760099 1.732314
## 6      0      1       5        4.10285  3     5 2.051425   2.760099 3.099931
##      APPLY    TOTAL sleep_grp sleep_grp1
## 1 3.019670 3.178600      poor          1
## 2 2.231930 2.128280      poor          1
## 3 2.363242 2.137514      poor          1
## 4 1.575495 1.824257  moderate          2
## 5 2.051425 2.094613      poor          1
## 6 3.179709 2.742431      poor          1
pacman::p_load(ggplot2,Hmisc, MASS, reshape2,vcd)

睡眠與性別和電視數的個數

ftable(xtabs(~ SEX + TV+ sleep_grp1, data = dta))
##        sleep_grp1  1  2  3
## SEX TV                    
## 0   0             13  4  0
##     1             87 42 18
##     2             78 28  8
##     3             23 15  4
##     4             12  1  2
##     5              3  3  0
## 1   0             12  6  2
##     1             87 41 12
##     2             56 22 10
##     3             29  5  1
##     4              8  3  2
##     5              3  5  1
summary(dta$TV_hr)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    0.00    2.00    7.00   16.85   21.00  120.00
ggplot(dta, aes(x = sleep_grp1, y = TV_hr)) +
  geom_boxplot(size = .75, aes(fill = SEX)) +
  geom_jitter(alpha = .5) +
  facet_grid( ~ SEX, margins = TRUE) +
  theme(axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1))

有一個人睡眠總分為0,設為NA。睡眠品質差的這組看電視時間較長。

sleep <- with(dta, table(sleep_grp1)) 
sleep
## sleep_grp1
##   1   2   3 
## 411 175  60

二、ordinal logistic regression

參考UCLA說明。

## fit ordered logit model and store results 'm'
m <- polr(sleep_grp1 ~  TV + SEX + TV_hr + AGE, data = dta, Hess=TRUE)

## view a summary of the model
summary(m)
## Call:
## polr(formula = sleep_grp1 ~ TV + SEX + TV_hr + AGE, data = dta, 
##     Hess = TRUE)
## 
## Coefficients:
##           Value Std. Error  t value
## TV    -0.001549   0.077882 -0.01988
## SEX   -0.029824   0.161010 -0.18523
## TV_hr -0.005042   0.003931 -1.28263
## AGE    0.002992   0.004726  0.63310
## 
## Intercepts:
##     Value   Std. Error t value
## 1|2  0.5913  0.2521     2.3453
## 2|3  2.3142  0.2747     8.4251
## 
## Residual Deviance: 1112.195 
## AIC: 1124.195 
## (因為不存在,1 個觀察量被刪除了)
(ctable  <-  coef ( summary (m)))
##              Value  Std. Error     t value
## TV    -0.001548541 0.077881601 -0.01988327
## SEX   -0.029824301 0.161010043 -0.18523255
## TV_hr -0.005042170 0.003931114 -1.28263140
## AGE    0.002991847 0.004725686  0.63310327
## 1|2    0.591340164 0.252133092  2.34534927
## 2|3    2.314225845 0.274683528  8.42506232
p  <-  pnorm ( abs (ctable[,  "t value" ]),  lower.tail  =  FALSE )  *  2
(ctable  <-  cbind (ctable,  "p value"  = p))
##              Value  Std. Error     t value      p value
## TV    -0.001548541 0.077881601 -0.01988327 9.841365e-01
## SEX   -0.029824301 0.161010043 -0.18523255 8.530466e-01
## TV_hr -0.005042170 0.003931114 -1.28263140 1.996212e-01
## AGE    0.002991847 0.004725686  0.63310327 5.266662e-01
## 1|2    0.591340164 0.252133092  2.34534927 1.900926e-02
## 2|3    2.314225845 0.274683528  8.42506232 3.605583e-17

這裡可以看出都沒有顯著。

## Waiting for profiling to be done...
exp ( coef (m))
##        TV       SEX     TV_hr       AGE 
## 0.9984527 0.9706161 0.9949705 1.0029963
exp ( cbind ( OR  =  coef (m), ci))
##              OR     2.5 %   97.5 %
## TV    0.9984527 0.8555493 1.161633
## SEX   0.9706161 0.7074997 1.330511
## TV_hr 0.9949705 0.9871132 1.002493
## AGE   1.0029963 0.9937349 1.012331

在此ordinal logistic (and ordinal probit) regression 假設之一是每對結果組之間的關係是相同的,因為所有組對之間的關係是相同的,所以只有一組係數。

三、ordinal情況的關系

以下針對不同ordinal來描述每對結果組之間的關係。 因我們的結果不顯著,以下參考用。

註:運用Hmisc

sf <- function(y) {
  c('Y>=1' = qlogis(mean(y >= 1)),
    'Y>=2' = qlogis(mean(y >= 2)),
    'Y>=3' = qlogis(mean(y >= 3)))
}

(s <- with(dta, summary(as.numeric(sleep_grp1) ~TV + SEX + TV_hr+ AGE, fun=sf)))
## as.numeric(sleep_grp1)     N= 646 , 1 Missing 
## 
## +-------+--------+---+----+----------+---------+
## |       |        |  N|Y>=1|      Y>=2|     Y>=3|
## +-------+--------+---+----+----------+---------+
## |     TV|       0| 37| Inf|-0.7339692|-2.862201|
## |       |       1|287| Inf|-0.4316675|-2.147879|
## |       |       2|202| Inf|-0.6783321|-2.324564|
## |       |       3| 77| Inf|-0.7323679|-2.667228|
## |       |       4| 28| Inf|-0.9162907|-1.791759|
## |       |       5| 15| Inf| 0.4054651|-2.639057|
## +-------+--------+---+----+----------+---------+
## |    SEX|      No|341| Inf|-0.5469647|-2.267605|
## |       |     Yes|305| Inf|-0.5725192|-2.291813|
## +-------+--------+---+----+----------+---------+
## |  TV_hr|[ 0,  3)|186| Inf|-0.5744309|-2.433613|
## |       |[ 3,  8)|139| Inf|-0.6079894|-2.670310|
## |       |[ 8, 22)|167| Inf|-0.3262157|-1.885691|
## |       |[22,120]|154| Inf|-0.7621401|-2.302585|
## +-------+--------+---+----+----------+---------+
## |    AGE| [20,25)|167| Inf|-0.5525652|-2.315830|
## |       | [25,46)|158| Inf|-0.3844117|-2.498700|
## |       | [46,59)|169| Inf|-0.8109302|-2.257849|
## |       | [59,88]|152| Inf|-0.4828518|-2.072061|
## +-------+--------+---+----+----------+---------+
## |Overall|        |646| Inf|-0.5590077|-2.278975|
## +-------+--------+---+----+----------+---------+
glm ( I ( as.numeric (sleep_grp1)  >=  2 )  ~ SEX  ,  family = "binomial" ,  data  = dta)
## 
## Call:  glm(formula = I(as.numeric(sleep_grp1) >= 2) ~ SEX, family = "binomial", 
##     data = dta)
## 
## Coefficients:
## (Intercept)          SEX  
##    -0.54696     -0.02555  
## 
## Degrees of Freedom: 645 Total (i.e. Null);  644 Residual
##   (因為不存在,1 個觀察量被刪除了)
## Null Deviance:       847 
## Residual Deviance: 847   AIC: 851
glm ( I ( as.numeric (sleep_grp1)  >=  3 )  ~  SEX   ,  family = "binomial" ,  data  = dta)
## 
## Call:  glm(formula = I(as.numeric(sleep_grp1) >= 3) ~ SEX, family = "binomial", 
##     data = dta)
## 
## Coefficients:
## (Intercept)          SEX  
##    -2.26761     -0.02421  
## 
## Degrees of Freedom: 645 Total (i.e. Null);  644 Residual
##   (因為不存在,1 個觀察量被刪除了)
## Null Deviance:       399.4 
## Residual Deviance: 399.4     AIC: 403.4
s[, 4] <- s[, 4] - s[, 3]
s[, 3] <- s[, 3] - s[, 3]
s # print
## as.numeric(sleep_grp1)     N= 646 , 1 Missing 
## 
## +-------+--------+---+----+----+----------+
## |       |        |  N|Y>=1|Y>=2|      Y>=3|
## +-------+--------+---+----+----+----------+
## |     TV|       0| 37| Inf|   0|-2.1282317|
## |       |       1|287| Inf|   0|-1.7162112|
## |       |       2|202| Inf|   0|-1.6462319|
## |       |       3| 77| Inf|   0|-1.9348603|
## |       |       4| 28| Inf|   0|-0.8754687|
## |       |       5| 15| Inf|   0|-3.0445224|
## +-------+--------+---+----+----+----------+
## |    SEX|      No|341| Inf|   0|-1.7206407|
## |       |     Yes|305| Inf|   0|-1.7192938|
## +-------+--------+---+----+----+----------+
## |  TV_hr|[ 0,  3)|186| Inf|   0|-1.8591825|
## |       |[ 3,  8)|139| Inf|   0|-2.0623205|
## |       |[ 8, 22)|167| Inf|   0|-1.5594756|
## |       |[22,120]|154| Inf|   0|-1.5404450|
## +-------+--------+---+----+----+----------+
## |    AGE| [20,25)|167| Inf|   0|-1.7632651|
## |       | [25,46)|158| Inf|   0|-2.1142883|
## |       | [46,59)|169| Inf|   0|-1.4469190|
## |       | [59,88]|152| Inf|   0|-1.5892097|
## +-------+--------+---+----+----+----------+
## |Overall|        |646| Inf|   0|-1.7199675|
## +-------+--------+---+----+----+----------+
plot(s, which=1:3, pch=1:3, xlab='logit', main=' ', xlim=range(s[,3:4]))

newdta <- data.frame(
  TV = rep(0:5, 200),
  SEX  = rep(0:1, each = 200),
  TV_hr = rep(seq(from = 1, to = 120, length.out = 100), 4),
  AGE= rep(seq(from = 20, to = 88, length.out = 100), 4))

newdta <- cbind(newdta, predict(m, newdta, type = "probs"))

##show first few rows
head(newdta)
##   TV SEX    TV_hr      AGE         1         2          3
## 1  0   0 1.000000 20.00000 0.6310084 0.2744525 0.09453906
## 2  1   0 2.202020 20.68687 0.6323007 0.2736346 0.09406467
## 3  2   0 3.404040 21.37374 0.6335911 0.2728164 0.09359242
## 4  3   0 4.606061 22.06061 0.6348796 0.2719981 0.09312229
## 5  4   0 5.808081 22.74747 0.6361662 0.2711795 0.09265428
## 6  5   0 7.010101 23.43434 0.6374508 0.2703608 0.09218839
lnewdta <- melt(newdta, id.vars = c("TV", "SEX", "TV_hr","AGE"),
  variable.name = "Level", value.name="Probability")
## view first few rows
head(lnewdta)
##   TV SEX    TV_hr      AGE Level Probability
## 1  0   0 1.000000 20.00000     1   0.6310084
## 2  1   0 2.202020 20.68687     1   0.6323007
## 3  2   0 3.404040 21.37374     1   0.6335911
## 4  3   0 4.606061 22.06061     1   0.6348796
## 5  4   0 5.808081 22.74747     1   0.6361662
## 6  5   0 7.010101 23.43434     1   0.6374508
ggplot(lnewdta, aes(x = TV_hr, y = Probability, colour = Level)) +
  geom_line() + facet_grid(SEX ~ TV, labeller="label_both")

ggplot(lnewdta, aes(x = TV_hr, y = Probability, colour = Level)) +
  geom_line() +facet_grid(SEX ~. ,labeller="label_both")

ggplot(lnewdta, aes(x =TV_hr, y = Probability, colour = Level)) +
  geom_line() 

四、驗證

以下驗證另一個package,結果一樣。

pacman::p_load(ordinal,ggeffects, effects,tidyverse)
cbPalette <- c("#999999", "#E69F00", "#56B4E9", "#009E73", "#F0E442", "#0072B2", "#D55E00", "#CC79A7")
dta %>% mutate(sleep_grp1 = ordered(sleep_grp1, levels=rev(levels(sleep_grp1))), position = as.factor(SEX)) %>% ggplot( aes(x = SEX, fill = sleep_grp1)) + geom_bar(position = "fill") + scale_fill_manual(values = cbPalette) + theme_minimal()

ols1 = clm(sleep_grp1~TV + SEX + TV_hr +AGE  ,data = dta, link = "logit")
summary(ols1)
## formula: sleep_grp1 ~ TV + SEX + TV_hr + AGE
## data:    dta
## 
##  link  threshold nobs logLik  AIC     niter max.grad cond.H 
##  logit flexible  646  -556.10 1124.19 5(0)  7.14e-11 5.7e+04
## 
## Coefficients:
##        Estimate Std. Error z value Pr(>|z|)
## TV    -0.001548   0.077881  -0.020    0.984
## SEX   -0.029824   0.161010  -0.185    0.853
## TV_hr -0.005042   0.003931  -1.283    0.200
## AGE    0.002992   0.004725   0.633    0.527
## 
## Threshold coefficients:
##     Estimate Std. Error z value
## 1|2   0.5913     0.2521   2.345
## 2|3   2.3142     0.2747   8.426
## (因為不存在,1 個觀察量被刪除了)
exp(coef(ols1))
##        1|2        2|3         TV        SEX      TV_hr        AGE 
##  1.8064081 10.1170894  0.9984527  0.9706160  0.9949705  1.0029963

未來再測試中 newdat= data.frame(SEX = c(“subject”, “object”)) %>% mutate(SEX = as.factor(SEX)) ols1predict =cbind(newdat, predict(ols1, newdat, type = “prob”)$fit) ols1predict