df = read.table("osas_data_3feb.txt", sep="\t", dec=".", header=T, fill=NA)
df = subset(df, AHI_voor >= 5 & Studienummer!="")
df$Delta_AHI = df$AHI_na - df$AHI_voor
df$Delta_AHI_rug = df$AHI_rug_na - df$AHI_rug_voor
df$Delta_AHI_zij = df$AHI_zij_na = df$AHI_zij_voor
df$Succes = as.numeric(apply(df[,c("AHI_voor","AHI_na")],1, function(x){
(x[2] < 10) & (x[2]<x[1]/2)
}))
AHI voor - AHI na (dus delta AHI), verschil tussen groep pOSAS_voor_1_ja_2_nee
hist(df$Delta_AHI, breaks=20)
qqnorm(df$Delta_AHI)
qqline(df$Delta_AHI)
Data is niet normaal verdeeld, vooral de lagere waardes zijn lager dan je bij een normaal verdeling zou verwachten
Dus een rank test is het beste:
wilcox.test(x=subset(df, pOSAS_voor_1_ja_2_nee == 1)$Delta_AHI, subset(df, pOSAS_voor_1_ja_2_nee == 2)$Delta_AHI, conf.int = T)
##
## Wilcoxon rank sum test with continuity correction
##
## data: subset(df, pOSAS_voor_1_ja_2_nee == 1)$Delta_AHI and subset(df, pOSAS_voor_1_ja_2_nee == 2)$Delta_AHI
## W = 1273, p-value = 0.125
## alternative hypothesis: true location shift is not equal to 0
## 95 percent confidence interval:
## -4.5000029 0.6999456
## sample estimates:
## difference in location
## -2.100065
Is niet significant, maar komt enigzins in de buurt.
Bij een t-test komt het niet in de buurt:
t.test(x=subset(df, pOSAS_voor_1_ja_2_nee == 1)$Delta_AHI, y=subset(df, pOSAS_voor_1_ja_2_nee == 2)$Delta_AHI, var.equal=T)
##
## Two Sample t-test
##
## data: subset(df, pOSAS_voor_1_ja_2_nee == 1)$Delta_AHI and subset(df, pOSAS_voor_1_ja_2_nee == 2)$Delta_AHI
## t = -0.56126, df = 138, p-value = 0.5755
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -4.139333 2.308976
## sample estimates:
## mean of x mean of y
## -4.033036 -3.117857
df$pOSAS_voor = relevel(factor(ifelse(df$pOSAS_voor_1_ja_2_nee ==1 , "supine_dependent_OSA", "non_supine_dependent_OSA")), ref="supine_dependent_OSA")
tmp = df[,c("Studienummer","pOSAS_voor","AHI_voor","AHI_na")]
tmp = reshape::melt(tmp, id.vars=1:2)
library(GeneralFunctions)
##
## Attaching package: 'GeneralFunctions'
## The following object is masked from 'package:base':
##
## stderr
setTheme()
## Loading required package: ggplot2
##
## Attaching package: 'ggplot2'
## The following object is masked from 'package:epiDisplay':
##
## alpha
library(ggplot2)
ggplot(tmp, aes(x=variable,y=value))+
theme(legend.position="none")+
theme(axis.text.y=element_text(angle = 0, hjust = 0))+
theme(axis.text.x=element_text(angle = 90, hjust = 0,size=10))+
#scale_y_continuous(limits=c(0,1))+
geom_boxplot(position=position_dodge(0.9))+
geom_dotplot(binaxis = "y", dotsize = 1, stackdir = "center", stackratio=1, binwidth = 0.5)+
facet_wrap(~pOSAS_voor)
ggplot(tmp, aes(x=variable, y=value, group=Studienummer))+
geom_line(colour = transparent("black",100))+
geom_point()+
theme(legend.position="none")+
theme(axis.text.y=element_text(angle = 0, hjust = 0))+
theme(axis.text.x=element_text(angle = 90, hjust = 0,size=10))+
facet_wrap(~pOSAS_voor)
tmp = df[,c("Studienummer","pOSAS_voor", "Delta_AHI","Delta_AHI_rug","Delta_AHI_zij")]
tmp = reshape::melt(tmp, id.vars=1:2)
library(GeneralFunctions)
setTheme()
library(ggplot2)
ggplot(tmp, aes(x=pOSAS_voor,y=value))+
theme(legend.position="none")+
theme(axis.text.y=element_text(angle = 0, hjust = 0))+
theme(axis.text.x=element_text(angle = 90, hjust = 0,size=10))+
#scale_y_continuous(limits=c(0,1))+
geom_boxplot(position=position_dodge(0.9), outlier.shape = NA)+
geom_dotplot(binaxis = "y", dotsize = 0.8, stackdir = "center", stackratio=1, binwidth = 1)+
facet_wrap(~variable)
## Warning: Removed 10 rows containing non-finite values (stat_boxplot).
## Warning: Removed 10 rows containing non-finite values (stat_bindot).
Percentage ‘succes’, verschil tussen pOSAS_voor_1_ja_2_nee
table(df$pOSAS_voor_1_ja_2_nee, df$Succes)
##
## 0 1
## 1 62 50
## 2 21 7
50/(63+50)
## [1] 0.4424779
7/29
## [1] 0.2413793
fisher.test(table(df$pOSAS_voor_1_ja_2_nee, df$Succes), conf.int = T)
##
## Fisher's Exact Test for Count Data
##
## data: table(df$pOSAS_voor_1_ja_2_nee, df$Succes)
## p-value = 0.08437
## alternative hypothesis: true odds ratio is not equal to 1
## 95 percent confidence interval:
## 0.1377736 1.1190868
## sample estimates:
## odds ratio
## 0.4158029
Een trend voor minder succes bij patienten met geen pOSAS_voor
chisq.test(df$pOSAS_voor_1_ja_2_nee, df$Succes, correct=F)
##
## Pearson's Chi-squared test
##
## data: df$pOSAS_voor_1_ja_2_nee and df$Succes
## X-squared = 3.5806, df = 1, p-value = 0.05846
tmp = data.frame(prop.table(table(df$pOSAS_voor, df$Succes), margin=1))
colnames(tmp)=c("pOSAS_voor","Success","Percentage")
tmp$Percentage = 100*tmp$Percentage
ggplot(subset(tmp, Success==1), aes(x=pOSAS_voor,y=Percentage))+
theme(axis.text.y=element_text(angle = 0, hjust = 0))+
theme(axis.text.x=element_text(angle = 90, hjust = 0,size=10))+
#scale_y_continuous(limits=c(0,1))+
geom_bar(position=position_dodge(0.9), stat="identity", width = 0.8)+
scale_fill_manual(values=c("black","black"))
df$Severity = factor(ifelse(df$AHI_voor > 5 & df$AHI_voor < 15, "mild", ifelse(df$AHI_voor > 15 & df$AHI_voor < 30,"moderate",NA)), levels=c("mild","moderate"))
library(dplyr)
##
## Attaching package: 'dplyr'
## The following object is masked from 'package:GeneralFunctions':
##
## last
## The following object is masked from 'package:MASS':
##
## select
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
tmp = df[,c("Severity","pOSAS_voor","Succes")] %>% as.tbl() %>% group_by(Severity,pOSAS_voor) %>% dplyr::summarize(succesPerc=mean(Succes, na.rm=T))
tmp$Percentage = 100*tmp$succesPerc
ggplot(subset(tmp, !is.na(Severity)), aes(x=Severity,y=Percentage))+
theme(axis.text.y=element_text(angle = 0, hjust = 0))+
theme(axis.text.x=element_text(angle = 90, hjust = 0,size=10))+
#scale_y_continuous(limits=c(0,1))+
geom_bar(position=position_dodge(0.9), stat="identity", width = 0.8)+
scale_fill_manual(values=c("black","black"))+
facet_wrap(~pOSAS_voor)
Voorspellende factoren voor ‘delta AHI’: BMI, halsomtrek, leeftijd, geslacht, AHI, pOSAS_voor_1_ja_2_nee, AHI_rug_voor
Het simpelste is een linear model:
Eerst moet ik de categoriale variabelen herstellen:
df$Geslacht = ifelse(df$Geslacht_1_man_2_vrouw==1,"Man","Vrouw")
df$pOSAS_voor = ifelse(df$pOSAS_voor_1_ja_2_nee==1,T,F)
Dan het lineaire model:
fit = lm(Delta_AHI~BMI +Halsomtrek +Leeftijd +Geslacht +pOSAS_voor +AHI_rug_voor, data=df)
summary(fit)
##
## Call:
## lm(formula = Delta_AHI ~ BMI + Halsomtrek + Leeftijd + Geslacht +
## pOSAS_voor + AHI_rug_voor, data = df)
##
## Residuals:
## Min 1Q Median 3Q Max
## -14.260 -4.944 -1.167 2.886 24.992
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -10.15124 13.17387 -0.771 0.44256
## BMI 0.75630 0.23816 3.176 0.00192 **
## Halsomtrek -0.37922 0.37481 -1.012 0.31379
## Leeftijd 0.05840 0.06789 0.860 0.39150
## GeslachtVrouw -5.43800 2.54411 -2.137 0.03470 *
## pOSAS_voorTRUE 2.28549 1.90385 1.200 0.23245
## AHI_rug_voor -0.11925 0.04019 -2.968 0.00366 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 7.576 on 114 degrees of freedom
## (19 observations deleted due to missingness)
## Multiple R-squared: 0.1477, Adjusted R-squared: 0.1029
## F-statistic: 3.293 on 6 and 114 DF, p-value: 0.005022
Pr(>|z|) is de p-waarde voor elke variable, spreekt verder voor zich.
Let op, je verliest hier 20 patienten omdat de data niet compleet is, dat is best wel veel.
for(x in c("Delta_AHI","BMI","Halsomtrek","Leeftijd","Geslacht","pOSAS_voor","AHI_rug_voor")){
print(paste0(x,":", sum(is.na(df[,x]))," ontbreken"))
}
## [1] "Delta_AHI:0 ontbreken"
## [1] "BMI:5 ontbreken"
## [1] "Halsomtrek:19 ontbreken"
## [1] "Leeftijd:0 ontbreken"
## [1] "Geslacht:0 ontbreken"
## [1] "pOSAS_voor:0 ontbreken"
## [1] "AHI_rug_voor:0 ontbreken"
Dit is puur de halsomtrek die ontbreekt bij alle 20, als je die weglaat:
fit = lm(Delta_AHI~BMI +Leeftijd +Geslacht +pOSAS_voor +AHI_rug_voor, data=df)
summary(fit)
##
## Call:
## lm(formula = Delta_AHI ~ BMI + Leeftijd + Geslacht + pOSAS_voor +
## AHI_rug_voor, data = df)
##
## Residuals:
## Min 1Q Median 3Q Max
## -13.728 -4.968 -1.052 2.925 25.629
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -18.47484 5.95443 -3.103 0.00236 **
## BMI 0.47369 0.16374 2.893 0.00448 **
## Leeftijd 0.08133 0.06226 1.306 0.19379
## GeslachtVrouw -3.21049 1.53668 -2.089 0.03865 *
## pOSAS_voorTRUE 1.62574 1.73228 0.938 0.34974
## AHI_rug_voor -0.12310 0.03684 -3.342 0.00109 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 7.485 on 129 degrees of freedom
## (5 observations deleted due to missingness)
## Multiple R-squared: 0.1167, Adjusted R-squared: 0.08249
## F-statistic: 3.409 on 5 and 129 DF, p-value: 0.00631
Voorspellende factoren voor ‘succes’: BMI, halsomtrek, leeftijd, geslacht, AHI, pOSAS_voor_1_ja_2_nee, AHI_rug_voor
fit = glm(Succes~BMI +Leeftijd +Geslacht +AHI_voor +pOSAS_voor +AHI_rug_voor, data=df, family = "binomial")
summary(fit)
##
## Call:
## glm(formula = Succes ~ BMI + Leeftijd + Geslacht + AHI_voor +
## pOSAS_voor + AHI_rug_voor, family = "binomial", data = df)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.8302 -0.9497 -0.6128 1.0910 1.7522
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 4.1953181 1.9020363 2.206 0.02741 *
## BMI -0.1458558 0.0521863 -2.795 0.00519 **
## Leeftijd -0.0431722 0.0188710 -2.288 0.02215 *
## GeslachtVrouw 0.7808424 0.4585451 1.703 0.08859 .
## AHI_voor 0.0629719 0.0478906 1.315 0.18854
## pOSAS_voorTRUE 0.8292306 0.5682639 1.459 0.14450
## AHI_rug_voor -0.0005401 0.0157711 -0.034 0.97268
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 182.49 on 134 degrees of freedom
## Residual deviance: 164.23 on 128 degrees of freedom
## (5 observations deleted due to missingness)
## AIC: 178.23
##
## Number of Fisher Scoring iterations: 4
data.frame(logistic.display(logistic.model=fit, simplified=T)$table)
## OR lower95ci upper95ci Pr...Z..
## BMI 0.8642824 0.7802514 0.9573632 0.005191535
## Leeftijd 0.9577464 0.9229699 0.9938332 0.022151398
## GeslachtVrouw 2.1833106 0.8887943 5.3632716 0.088592512
## AHI_voor 1.0649969 0.9695804 1.1698033 0.188539468
## pOSAS_voorTRUE 2.2915548 0.7523555 6.9797100 0.144500463
## AHI_rug_voor 0.9994600 0.9690386 1.0308365 0.972680531
df$BMI5 = df$BMI/5
df$Leeftijd10 = df$Leeftijd/10
df$AHI_voor10 = df$AHI_voor/10
df$AHI_rug_voor10 = df$AHI_rug_voor/10
fit = glm(Succes~BMI5 +Leeftijd10 +Geslacht +AHI_voor10 +pOSAS_voor +AHI_rug_voor10, data=df, family = "binomial")
summary(fit)
##
## Call:
## glm(formula = Succes ~ BMI5 + Leeftijd10 + Geslacht + AHI_voor10 +
## pOSAS_voor + AHI_rug_voor10, family = "binomial", data = df)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.8302 -0.9497 -0.6128 1.0910 1.7522
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 4.195318 1.902036 2.206 0.02741 *
## BMI5 -0.729279 0.260932 -2.795 0.00519 **
## Leeftijd10 -0.431722 0.188710 -2.288 0.02215 *
## GeslachtVrouw 0.780842 0.458545 1.703 0.08859 .
## AHI_voor10 0.629719 0.478906 1.315 0.18854
## pOSAS_voorTRUE 0.829231 0.568264 1.459 0.14450
## AHI_rug_voor10 -0.005401 0.157711 -0.034 0.97268
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 182.49 on 134 degrees of freedom
## Residual deviance: 164.23 on 128 degrees of freedom
## (5 observations deleted due to missingness)
## AIC: 178.23
##
## Number of Fisher Scoring iterations: 4
# install.packages("epiDisplay")
library(epiDisplay)
tmp = data.frame(epiDisplay::logistic.display(logistic.model=fit, simplified=T)$table)
tmp$variable = factor(row.names(tmp), levels=rev(row.names(tmp)))
colnames(tmp)= c("OR","lower95ci","upper95ci","p.val","variable")
tmp$Outcome="Success"
require(ggplot2)
maxOR=10
tmp$lower95ci <- sapply(tmp$lower95ci, function(x) {
ifelse(x < 1, (-1/x) + 1, x - 1)
})
tmp$upper95ci <- sapply(tmp$upper95ci, function(x) {
ifelse(x < 1, (-1/x) + 1, x - 1)
})
tmp$OR <- sapply(tmp$OR, function(x) {
ifelse(x < 1, (-1/x) + 1, x - 1)
})
lbl = c(paste0("1/", seq(maxOR, 1, -1) + 1), seq(0, maxOR, 1) + 1)
tmp <- tmp[order(tmp$OR, decreasing = F),]
tmp$varType = c("continuous","continuous","continuous","continuous","categorical","categorical")
ggplot(tmp, aes(x = variable, y = OR)) +
geom_point(position = position_dodge(0.9), stat = "identity", size=4) +
geom_errorbar(aes(ymin = lower95ci, ymax = upper95ci)) +
coord_flip(ylim=c(-4,4))+
scale_y_continuous(breaks = seq(-maxOR, maxOR, 1), labels = lbl)+
scale_colour_manual(values = colours) +
geom_hline(yintercept = 0, col = "darkgrey")+
theme(panel.margin = unit(2, "lines"))+
ggtitle("binomial model for success")+xlab("") + ylab("Odds Ratio (95% CI)")
## Warning: `panel.margin` is deprecated. Please use `panel.spacing` property
## instead
#facet_wrap(~varType, scales="free",ncol=1)
De halsomtrek en BMI hebben een tegenovergesteld effect, niet helemaal zoals verwacht:
preds=predict(fit,df)
plot(y=preds,x=df$BMI)
plot(y=preds,x=df$Halsomtrek)
Dit heeft er vast mee te maken dat ze sterk gecorreleerd zijn:
plot(df$Halsomtrek, df$BMI)
cor.test(df$Halsomtrek, df$BMI)
##
## Pearson's product-moment correlation
##
## data: df$Halsomtrek and df$BMI
## t = 4.3425, df = 119, p-value = 2.977e-05
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.2048823 0.5143908
## sample estimates:
## cor
## 0.369853
Pr(>|z|) is weer de p-waarde voor elke variable
zonder halsomtrek:
fit = glm(Succes~BMI +Leeftijd +Geslacht +AHI_voor +pOSAS_voor +AHI_rug_voor, data=df, family = "binomial")
summary(fit)
##
## Call:
## glm(formula = Succes ~ BMI + Leeftijd + Geslacht + AHI_voor +
## pOSAS_voor + AHI_rug_voor, family = "binomial", data = df)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.8302 -0.9497 -0.6128 1.0910 1.7522
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 4.1953181 1.9020363 2.206 0.02741 *
## BMI -0.1458558 0.0521863 -2.795 0.00519 **
## Leeftijd -0.0431722 0.0188710 -2.288 0.02215 *
## GeslachtVrouw 0.7808424 0.4585451 1.703 0.08859 .
## AHI_voor 0.0629719 0.0478906 1.315 0.18854
## pOSAS_voorTRUE 0.8292306 0.5682639 1.459 0.14450
## AHI_rug_voor -0.0005401 0.0157711 -0.034 0.97268
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 182.49 on 134 degrees of freedom
## Residual deviance: 164.23 on 128 degrees of freedom
## (5 observations deleted due to missingness)
## AIC: 178.23
##
## Number of Fisher Scoring iterations: 4
zonder BMI:
fit = glm(Succes~Halsomtrek +Leeftijd +Geslacht +AHI_voor +pOSAS_voor +AHI_rug_voor, data=df, family = "binomial")
summary(fit)
##
## Call:
## glm(formula = Succes ~ Halsomtrek + Leeftijd + Geslacht + AHI_voor +
## pOSAS_voor + AHI_rug_voor, family = "binomial", data = df)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.6935 -1.0090 -0.7524 1.1889 1.8530
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -0.263002 3.626397 -0.073 0.9422
## Halsomtrek 0.011240 0.081306 0.138 0.8900
## Leeftijd -0.044214 0.020059 -2.204 0.0275 *
## GeslachtVrouw 0.639510 0.575770 1.111 0.2667
## AHI_voor 0.059106 0.047454 1.246 0.2129
## pOSAS_voorTRUE 0.917035 0.607354 1.510 0.1311
## AHI_rug_voor -0.001475 0.016224 -0.091 0.9276
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 163.34 on 120 degrees of freedom
## Residual deviance: 152.97 on 114 degrees of freedom
## (19 observations deleted due to missingness)
## AIC: 166.97
##
## Number of Fisher Scoring iterations: 4
Rugligging percentage voor en na pOSA
tmp = df[,c("Studienummer","Tijd_rug_na","Tijd_rug_voor","pOSAS_voor")]
tmp = reshape::melt(tmp, id.vars=c(1,4))
ggplot(tmp, aes(x=pOSAS_voor,y=value))+
theme(legend.position="none")+
theme(axis.text.y=element_text(angle = 0, hjust = 0))+
theme(axis.text.x=element_text(angle = 90, hjust = 0,size=10))+
#scale_y_continuous(limits=c(0,1))+
geom_boxplot(position=position_dodge(0.9), outlier.shape = NA)+
geom_dotplot(binaxis = "y", dotsize = 1, stackdir = "center", stackratio=1, binwidth = 1.5)+
facet_wrap(~variable)