::p_load(openxlsx)
pacman<- read.xlsx(xlsxFile ="ANA_V2(遺漏值改1)(有加權).xlsx", sheet = 'ANA') dta
資料結構
$sleep_grp<-cut(dta$T_Sleep, c(0, 7, 9, 10), c("poor", "moderate", "good"))
dta$sleep_grp1<-cut(dta$T_Sleep, c(0, 7, 9, 10), c("1", "2", "3")) dta
將睡眠進行分組 0~7poor ,8~9moderate, 10 good。 參考以下文章 Dalmases, M., Benítez, I. D., Mas, A., Garcia-Codina, O., Medina-Bustos, A., Escarrabill, J., … & de Batlle, J. (2018). Assessing sleep health in a European population: results of the Catalan Health Survey 2015. PloS one, 13(4), e0194495.
head(dta)
## ID Q1 Q2 Q3 Q4 Q5 Q6 Q7 Q8 Q9 Q10 AGET Q11 Q12 Q13 Q14 Q15 Q16 Q17 Q18
## 1 1 3 3 3 3 3 3 3 3 3 3 31.50960 3 4 4 3 3 3 3 3
## 2 2 2 2 2 3 2 2 3 2 2 2 23.10704 1 1 1 3 3 3 1 1
## 3 3 3 4 3 4 4 4 2 2 4 3 23.10726 4 3 2 4 3 2 4 3
## 4 4 3 3 2 3 2 3 2 3 3 3 18.90594 2 2 2 3 4 4 4 4
## 5 5 2 2 2 3 2 2 3 1 2 2 17.23197 2 2 3 4 4 4 4 4
## 6 6 1 2 1 2 3 4 3 3 3 3 20.51425 4 1 3 4 4 3 2 4
## Q19 Q20 Q21 AUNDERSTAND Q22 Q23 Q24 Q25 Q26 Q27 Q28 Q29 Q30 AASSESS Q31 Q32
## 1 3 3 3 36.76120 3 3 3 3 3 3 3 3 3 28.35864 3 3
## 2 1 1 3 19.95608 1 1 3 3 3 3 1 1 3 19.95608 3 3
## 3 3 2 4 23.80748 3 2 2 2 3 3 3 2 2 15.40484 3 3
## 4 4 4 3 25.20792 2 2 2 2 1 3 3 1 2 12.60396 3 3
## 5 3 3 4 30.36109 2 1 1 3 2 2 3 3 2 15.59083 2 3
## 6 4 4 4 30.36109 4 4 4 4 4 4 3 4 3 27.89938 4 3
## Q33 Q34 Q35 Q36 Q37 Q38 AAPPLY ATOTAL K1 K2 K3 K4 K5 K6 K7 K8 K9 K_SUM
## 1 3 3 3 3 3 2 24.15736 120.78680 1 0 1 1 1 1 1 1 1 8.40256
## 2 1 3 1 3 1 2 17.85544 80.87464 1 0 0 1 0 1 0 1 0 4.20128
## 3 3 3 4 4 3 4 18.90594 81.22552 1 1 1 1 1 1 0 1 1 5.60176
## 4 1 1 4 3 2 1 12.60396 69.32178 1 0 1 1 1 0 1 1 0 4.20132
## 5 2 3 4 3 2 1 16.41140 79.59529 1 1 1 1 1 1 0 1 1 6.56456
## 6 4 4 4 4 4 4 25.43767 104.21239 1 0 1 1 1 0 0 1 1 4.92342
## A1 A2 A3 A4 A5 A6 A7 A_SUM P1 P2 P3 P4 P5 P6 P7 P8 P9 P10 P11 P12 P_SUM
## 1 4 5 4 4 4 3 4 29.40896 4 4 4 3 4 3 3 4 5 3 2 2 43.06312
## 2 2 4 4 4 3 4 2 24.15736 4 4 4 4 4 2 2 4 5 4 2 4 45.16376
## 3 4 5 5 2 5 2 1 16.80528 2 5 5 5 5 3 3 1 5 4 3 5 32.21012
## 4 4 4 4 3 5 2 3 17.50550 4 5 5 4 4 3 3 5 5 2 2 3 31.50990
## 5 4 5 5 2 5 2 2 20.51425 2 2 3 5 3 2 3 4 5 3 2 4 31.18166
## 6 2 4 5 2 4 4 1 18.05254 2 5 2 5 4 5 2 4 5 5 2 4 36.92565
## SEX PREG BIRTHY AGE EDU WORK INFORM LIVING LIVING1_1 LIVING1_11 LIVING1_12
## 1 1 2 87 23 2 10 1 1 0 1 0
## 2 0 1 55 55 0 11 1 1 0 1 0
## 3 0 1 89 21 2 10 1 1 0 1 0
## 4 0 1 88 22 2 10 1 0 2 2 2
## 5 0 1 87 23 2 10 1 1 0 1 1
## 6 0 1 83 27 2 9 0 1 1 1 0
## PET COOK COOKF VENT SMOKE SMOKE_N EXP BUY D1 D2 D3 D4 D5 D6 D7 THINK1 THINK2
## 1 1 0 2 1 0 NA 1 1 2 2 2 2 2 2 1 2 2
## 2 1 0 2 1 0 NA 1 1 1 0 0 0 0 0 0 2 2
## 3 1 0 2 1 0 NA 0 2 1 0 0 0 0 0 0 1 1
## 4 1 1 7 7 0 NA 0 2 1 0 0 0 0 0 0 1 1
## 5 1 0 1 1 0 NA 1 2 2 2 2 2 2 2 1 2 2
## 6 1 0 1 1 0 NA 1 1 1 0 0 0 0 0 0 0 1
## THINK3 THINK4 THINK5 THINK6 THINK7 THINK8 THINK9 THINK10 THINK11 THINK12 USE1
## 1 2 2 2 2 2 2 2 2 2 2 1
## 2 2 2 2 2 2 2 2 2 2 2 1
## 3 0 0 0 1 1 1 0 1 1 0 0
## 4 0 1 0 1 0 1 0 1 1 0 1
## 5 2 2 2 2 2 2 2 2 2 2 0
## 6 1 1 0 0 0 1 0 0 0 0 1
## USE2 USE3 USE4 USE5 USE6 USE7 USE8 USE9 USE10 STAY RES Sleep1 Sleep2 Sleep3
## 1 0 1 0 0 0 0 0 0 0 22 4 1 2 0
## 2 1 0 0 0 0 0 0 0 0 22 4 2 2 1
## 3 0 1 1 1 0 0 1 0 0 16 4 1 0 2
## 4 1 1 1 0 0 0 1 1 0 8 4 2 1 2
## 5 0 1 0 0 1 0 0 0 0 8 2 1 1 0
## 6 0 1 0 0 0 0 0 1 0 20 2 1 1 2
## Sleep4 Sleep5 T_Sleep T_Sleep_weight TV TV_hr GET UNDERSTAND ASSESS
## 1 1 2 6 6.30192 2 2 3.150960 3.341927 3.150960
## 2 0 1 6 6.30192 2 42 2.310704 1.814189 2.217342
## 3 1 1 5 3.50110 1 0 2.310726 2.164316 1.711649
## 4 2 2 9 6.30198 2 2 1.890594 2.291629 1.400440
## 5 1 1 4 3.28228 3 2 1.723197 2.760099 1.732314
## 6 0 1 5 4.10285 3 5 2.051425 2.760099 3.099931
## APPLY TOTAL sleep_grp sleep_grp1
## 1 3.019670 3.178600 poor 1
## 2 2.231930 2.128280 poor 1
## 3 2.363242 2.137514 poor 1
## 4 1.575495 1.824257 moderate 2
## 5 2.051425 2.094613 poor 1
## 6 3.179709 2.742431 poor 1
::p_load(ggplot2,Hmisc, MASS, reshape2,vcd) pacman
睡眠與性別和電視數的個數
ftable(xtabs(~ SEX + TV+ sleep_grp1, data = dta))
## sleep_grp1 1 2 3
## SEX TV
## 0 0 13 4 0
## 1 87 42 18
## 2 78 28 8
## 3 23 15 4
## 4 12 1 2
## 5 3 3 0
## 1 0 12 6 2
## 1 87 41 12
## 2 56 22 10
## 3 29 5 1
## 4 8 3 2
## 5 3 5 1
summary(dta$TV_hr)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.00 2.00 7.00 16.85 21.00 120.00
ggplot(dta, aes(x = sleep_grp1, y = TV_hr)) +
geom_boxplot(size = .75, aes(fill = SEX)) +
geom_jitter(alpha = .5) +
facet_grid( ~ SEX, margins = TRUE) +
theme(axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1))
有一個人睡眠總分為0,設為NA。睡眠品質差的這組看電視時間較長。
<- with(dta, table(sleep_grp1))
sleep sleep
## sleep_grp1
## 1 2 3
## 411 175 60
參考UCLA說明。
## fit ordered logit model and store results 'm'
<- polr(sleep_grp1 ~ TV + SEX + TV_hr + AGE, data = dta, Hess=TRUE)
m
## view a summary of the model
summary(m)
## Call:
## polr(formula = sleep_grp1 ~ TV + SEX + TV_hr + AGE, data = dta,
## Hess = TRUE)
##
## Coefficients:
## Value Std. Error t value
## TV -0.001549 0.077882 -0.01988
## SEX -0.029824 0.161010 -0.18523
## TV_hr -0.005042 0.003931 -1.28263
## AGE 0.002992 0.004726 0.63310
##
## Intercepts:
## Value Std. Error t value
## 1|2 0.5913 0.2521 2.3453
## 2|3 2.3142 0.2747 8.4251
##
## Residual Deviance: 1112.195
## AIC: 1124.195
## (因為不存在,1 個觀察量被刪除了)
<- coef ( summary (m))) (ctable
## Value Std. Error t value
## TV -0.001548541 0.077881601 -0.01988327
## SEX -0.029824301 0.161010043 -0.18523255
## TV_hr -0.005042170 0.003931114 -1.28263140
## AGE 0.002991847 0.004725686 0.63310327
## 1|2 0.591340164 0.252133092 2.34534927
## 2|3 2.314225845 0.274683528 8.42506232
<- pnorm ( abs (ctable[, "t value" ]), lower.tail = FALSE ) * 2
p <- cbind (ctable, "p value" = p)) (ctable
## Value Std. Error t value p value
## TV -0.001548541 0.077881601 -0.01988327 9.841365e-01
## SEX -0.029824301 0.161010043 -0.18523255 8.530466e-01
## TV_hr -0.005042170 0.003931114 -1.28263140 1.996212e-01
## AGE 0.002991847 0.004725686 0.63310327 5.266662e-01
## 1|2 0.591340164 0.252133092 2.34534927 1.900926e-02
## 2|3 2.314225845 0.274683528 8.42506232 3.605583e-17
這裡可以看出都沒有顯著。
## Waiting for profiling to be done...
exp ( coef (m))
## TV SEX TV_hr AGE
## 0.9984527 0.9706161 0.9949705 1.0029963
exp ( cbind ( OR = coef (m), ci))
## OR 2.5 % 97.5 %
## TV 0.9984527 0.8555493 1.161633
## SEX 0.9706161 0.7074997 1.330511
## TV_hr 0.9949705 0.9871132 1.002493
## AGE 1.0029963 0.9937349 1.012331
在此ordinal logistic (and ordinal probit) regression 假設之一是每對結果組之間的關係是相同的,因為所有組對之間的關係是相同的,所以只有一組係數。
以下針對不同ordinal來描述每對結果組之間的關係。 因我們的結果不顯著,以下參考用。
註:運用Hmisc
<- function(y) {
sf c('Y>=1' = qlogis(mean(y >= 1)),
'Y>=2' = qlogis(mean(y >= 2)),
'Y>=3' = qlogis(mean(y >= 3)))
}
<- with(dta, summary(as.numeric(sleep_grp1) ~TV + SEX + TV_hr+ AGE, fun=sf))) (s
## as.numeric(sleep_grp1) N= 646 , 1 Missing
##
## +-------+--------+---+----+----------+---------+
## | | | N|Y>=1| Y>=2| Y>=3|
## +-------+--------+---+----+----------+---------+
## | TV| 0| 37| Inf|-0.7339692|-2.862201|
## | | 1|287| Inf|-0.4316675|-2.147879|
## | | 2|202| Inf|-0.6783321|-2.324564|
## | | 3| 77| Inf|-0.7323679|-2.667228|
## | | 4| 28| Inf|-0.9162907|-1.791759|
## | | 5| 15| Inf| 0.4054651|-2.639057|
## +-------+--------+---+----+----------+---------+
## | SEX| No|341| Inf|-0.5469647|-2.267605|
## | | Yes|305| Inf|-0.5725192|-2.291813|
## +-------+--------+---+----+----------+---------+
## | TV_hr|[ 0, 3)|186| Inf|-0.5744309|-2.433613|
## | |[ 3, 8)|139| Inf|-0.6079894|-2.670310|
## | |[ 8, 22)|167| Inf|-0.3262157|-1.885691|
## | |[22,120]|154| Inf|-0.7621401|-2.302585|
## +-------+--------+---+----+----------+---------+
## | AGE| [20,25)|167| Inf|-0.5525652|-2.315830|
## | | [25,46)|158| Inf|-0.3844117|-2.498700|
## | | [46,59)|169| Inf|-0.8109302|-2.257849|
## | | [59,88]|152| Inf|-0.4828518|-2.072061|
## +-------+--------+---+----+----------+---------+
## |Overall| |646| Inf|-0.5590077|-2.278975|
## +-------+--------+---+----+----------+---------+
glm ( I ( as.numeric (sleep_grp1) >= 2 ) ~ SEX , family = "binomial" , data = dta)
##
## Call: glm(formula = I(as.numeric(sleep_grp1) >= 2) ~ SEX, family = "binomial",
## data = dta)
##
## Coefficients:
## (Intercept) SEX
## -0.54696 -0.02555
##
## Degrees of Freedom: 645 Total (i.e. Null); 644 Residual
## (因為不存在,1 個觀察量被刪除了)
## Null Deviance: 847
## Residual Deviance: 847 AIC: 851
glm ( I ( as.numeric (sleep_grp1) >= 3 ) ~ SEX , family = "binomial" , data = dta)
##
## Call: glm(formula = I(as.numeric(sleep_grp1) >= 3) ~ SEX, family = "binomial",
## data = dta)
##
## Coefficients:
## (Intercept) SEX
## -2.26761 -0.02421
##
## Degrees of Freedom: 645 Total (i.e. Null); 644 Residual
## (因為不存在,1 個觀察量被刪除了)
## Null Deviance: 399.4
## Residual Deviance: 399.4 AIC: 403.4
4] <- s[, 4] - s[, 3]
s[, 3] <- s[, 3] - s[, 3]
s[, # print s
## as.numeric(sleep_grp1) N= 646 , 1 Missing
##
## +-------+--------+---+----+----+----------+
## | | | N|Y>=1|Y>=2| Y>=3|
## +-------+--------+---+----+----+----------+
## | TV| 0| 37| Inf| 0|-2.1282317|
## | | 1|287| Inf| 0|-1.7162112|
## | | 2|202| Inf| 0|-1.6462319|
## | | 3| 77| Inf| 0|-1.9348603|
## | | 4| 28| Inf| 0|-0.8754687|
## | | 5| 15| Inf| 0|-3.0445224|
## +-------+--------+---+----+----+----------+
## | SEX| No|341| Inf| 0|-1.7206407|
## | | Yes|305| Inf| 0|-1.7192938|
## +-------+--------+---+----+----+----------+
## | TV_hr|[ 0, 3)|186| Inf| 0|-1.8591825|
## | |[ 3, 8)|139| Inf| 0|-2.0623205|
## | |[ 8, 22)|167| Inf| 0|-1.5594756|
## | |[22,120]|154| Inf| 0|-1.5404450|
## +-------+--------+---+----+----+----------+
## | AGE| [20,25)|167| Inf| 0|-1.7632651|
## | | [25,46)|158| Inf| 0|-2.1142883|
## | | [46,59)|169| Inf| 0|-1.4469190|
## | | [59,88]|152| Inf| 0|-1.5892097|
## +-------+--------+---+----+----+----------+
## |Overall| |646| Inf| 0|-1.7199675|
## +-------+--------+---+----+----+----------+
plot(s, which=1:3, pch=1:3, xlab='logit', main=' ', xlim=range(s[,3:4]))
<- data.frame(
newdta TV = rep(0:5, 200),
SEX = rep(0:1, each = 200),
TV_hr = rep(seq(from = 1, to = 120, length.out = 100), 4),
AGE= rep(seq(from = 20, to = 88, length.out = 100), 4))
<- cbind(newdta, predict(m, newdta, type = "probs"))
newdta
##show first few rows
head(newdta)
## TV SEX TV_hr AGE 1 2 3
## 1 0 0 1.000000 20.00000 0.6310084 0.2744525 0.09453906
## 2 1 0 2.202020 20.68687 0.6323007 0.2736346 0.09406467
## 3 2 0 3.404040 21.37374 0.6335911 0.2728164 0.09359242
## 4 3 0 4.606061 22.06061 0.6348796 0.2719981 0.09312229
## 5 4 0 5.808081 22.74747 0.6361662 0.2711795 0.09265428
## 6 5 0 7.010101 23.43434 0.6374508 0.2703608 0.09218839
<- melt(newdta, id.vars = c("TV", "SEX", "TV_hr","AGE"),
lnewdta variable.name = "Level", value.name="Probability")
## view first few rows
head(lnewdta)
## TV SEX TV_hr AGE Level Probability
## 1 0 0 1.000000 20.00000 1 0.6310084
## 2 1 0 2.202020 20.68687 1 0.6323007
## 3 2 0 3.404040 21.37374 1 0.6335911
## 4 3 0 4.606061 22.06061 1 0.6348796
## 5 4 0 5.808081 22.74747 1 0.6361662
## 6 5 0 7.010101 23.43434 1 0.6374508
ggplot(lnewdta, aes(x = TV_hr, y = Probability, colour = Level)) +
geom_line() + facet_grid(SEX ~ TV, labeller="label_both")
ggplot(lnewdta, aes(x = TV_hr, y = Probability, colour = Level)) +
geom_line() +facet_grid(SEX ~. ,labeller="label_both")
ggplot(lnewdta, aes(x =TV_hr, y = Probability, colour = Level)) +
geom_line()
以下驗證另一個package,結果一樣。
::p_load(ordinal,ggeffects, effects,tidyverse)
pacman<- c("#999999", "#E69F00", "#56B4E9", "#009E73", "#F0E442", "#0072B2", "#D55E00", "#CC79A7") cbPalette
%>% mutate(sleep_grp1 = ordered(sleep_grp1, levels=rev(levels(sleep_grp1))), position = as.factor(SEX)) %>% ggplot( aes(x = SEX, fill = sleep_grp1)) + geom_bar(position = "fill") + scale_fill_manual(values = cbPalette) + theme_minimal() dta
= clm(sleep_grp1~TV + SEX + TV_hr +AGE ,data = dta, link = "logit")
ols1 summary(ols1)
## formula: sleep_grp1 ~ TV + SEX + TV_hr + AGE
## data: dta
##
## link threshold nobs logLik AIC niter max.grad cond.H
## logit flexible 646 -556.10 1124.19 5(0) 7.14e-11 5.7e+04
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## TV -0.001548 0.077881 -0.020 0.984
## SEX -0.029824 0.161010 -0.185 0.853
## TV_hr -0.005042 0.003931 -1.283 0.200
## AGE 0.002992 0.004725 0.633 0.527
##
## Threshold coefficients:
## Estimate Std. Error z value
## 1|2 0.5913 0.2521 2.345
## 2|3 2.3142 0.2747 8.426
## (因為不存在,1 個觀察量被刪除了)
exp(coef(ols1))
## 1|2 2|3 TV SEX TV_hr AGE
## 1.8064081 10.1170894 0.9984527 0.9706160 0.9949705 1.0029963
未來再測試中 newdat= data.frame(SEX = c(“subject”, “object”)) %>% mutate(SEX = as.factor(SEX)) ols1predict =cbind(newdat, predict(ols1, newdat, type = “prob”)$fit) ols1predict