df = read.table("osas_data_3feb.txt", sep="\t", dec=".", header=T, fill=NA)
df = subset(df, AHI_voor >= 5 & Studienummer!="")
df$Delta_AHI = df$AHI_na - df$AHI_voor
df$Delta_AHI_rug = df$AHI_rug_na - df$AHI_rug_voor
df$Delta_AHI_zij = df$AHI_zij_na = df$AHI_zij_voor
df$Succes = as.numeric(apply(df[,c("AHI_voor","AHI_na")],1, function(x){
    (x[2] < 10) & (x[2]<x[1]/2)  
}))

Vraag 1

AHI voor - AHI na (dus delta AHI), verschil tussen groep pOSAS_voor_1_ja_2_nee

hist(df$Delta_AHI, breaks=20)

qqnorm(df$Delta_AHI)
qqline(df$Delta_AHI)

Data is niet normaal verdeeld, vooral de lagere waardes zijn lager dan je bij een normaal verdeling zou verwachten

Dus een rank test is het beste:

wilcox.test(x=subset(df, pOSAS_voor_1_ja_2_nee == 1)$Delta_AHI, subset(df, pOSAS_voor_1_ja_2_nee == 2)$Delta_AHI, conf.int = T)
## 
##  Wilcoxon rank sum test with continuity correction
## 
## data:  subset(df, pOSAS_voor_1_ja_2_nee == 1)$Delta_AHI and subset(df, pOSAS_voor_1_ja_2_nee == 2)$Delta_AHI
## W = 1273, p-value = 0.125
## alternative hypothesis: true location shift is not equal to 0
## 95 percent confidence interval:
##  -4.5000029  0.6999456
## sample estimates:
## difference in location 
##              -2.100065

Is niet significant, maar komt enigzins in de buurt.

Bij een t-test komt het niet in de buurt:

t.test(x=subset(df, pOSAS_voor_1_ja_2_nee == 1)$Delta_AHI, y=subset(df, pOSAS_voor_1_ja_2_nee == 2)$Delta_AHI, var.equal=T)
## 
##  Two Sample t-test
## 
## data:  subset(df, pOSAS_voor_1_ja_2_nee == 1)$Delta_AHI and subset(df, pOSAS_voor_1_ja_2_nee == 2)$Delta_AHI
## t = -0.56126, df = 138, p-value = 0.5755
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -4.139333  2.308976
## sample estimates:
## mean of x mean of y 
## -4.033036 -3.117857
df$pOSAS_voor = relevel(factor(ifelse(df$pOSAS_voor_1_ja_2_nee ==1 , "supine_dependent_OSA", "non_supine_dependent_OSA")), ref="supine_dependent_OSA")
tmp = df[,c("Studienummer","pOSAS_voor","AHI_voor","AHI_na")]
tmp = reshape::melt(tmp, id.vars=1:2)

library(GeneralFunctions)
## 
## Attaching package: 'GeneralFunctions'
## The following object is masked from 'package:base':
## 
##     stderr
setTheme()
## Loading required package: ggplot2
## 
## Attaching package: 'ggplot2'
## The following object is masked from 'package:epiDisplay':
## 
##     alpha
library(ggplot2)
ggplot(tmp, aes(x=variable,y=value))+
    theme(legend.position="none")+
    theme(axis.text.y=element_text(angle = 0, hjust = 0))+
    theme(axis.text.x=element_text(angle = 90, hjust = 0,size=10))+
    #scale_y_continuous(limits=c(0,1))+
    geom_boxplot(position=position_dodge(0.9))+
    geom_dotplot(binaxis = "y", dotsize = 1, stackdir = "center", stackratio=1, binwidth = 0.5)+
    facet_wrap(~pOSAS_voor)

ggplot(tmp, aes(x=variable, y=value, group=Studienummer))+
    geom_line(colour = transparent("black",100))+
    geom_point()+
    theme(legend.position="none")+
    theme(axis.text.y=element_text(angle = 0, hjust = 0))+
    theme(axis.text.x=element_text(angle = 90, hjust = 0,size=10))+
    facet_wrap(~pOSAS_voor)

tmp = df[,c("Studienummer","pOSAS_voor", "Delta_AHI","Delta_AHI_rug","Delta_AHI_zij")]
tmp = reshape::melt(tmp, id.vars=1:2)
library(GeneralFunctions)
setTheme()
library(ggplot2)
ggplot(tmp, aes(x=pOSAS_voor,y=value))+
    theme(legend.position="none")+
    theme(axis.text.y=element_text(angle = 0, hjust = 0))+
    theme(axis.text.x=element_text(angle = 90, hjust = 0,size=10))+
    #scale_y_continuous(limits=c(0,1))+
    geom_boxplot(position=position_dodge(0.9), outlier.shape = NA)+
    geom_dotplot(binaxis = "y", dotsize = 0.8, stackdir = "center", stackratio=1, binwidth = 1)+
    facet_wrap(~variable)
## Warning: Removed 10 rows containing non-finite values (stat_boxplot).
## Warning: Removed 10 rows containing non-finite values (stat_bindot).

Vraag 2

Percentage ‘succes’, verschil tussen pOSAS_voor_1_ja_2_nee

table(df$pOSAS_voor_1_ja_2_nee, df$Succes)
##    
##      0  1
##   1 62 50
##   2 21  7
50/(63+50)
## [1] 0.4424779
7/29
## [1] 0.2413793
fisher.test(table(df$pOSAS_voor_1_ja_2_nee, df$Succes), conf.int = T)
## 
##  Fisher's Exact Test for Count Data
## 
## data:  table(df$pOSAS_voor_1_ja_2_nee, df$Succes)
## p-value = 0.08437
## alternative hypothesis: true odds ratio is not equal to 1
## 95 percent confidence interval:
##  0.1377736 1.1190868
## sample estimates:
## odds ratio 
##  0.4158029

Een trend voor minder succes bij patienten met geen pOSAS_voor

chisq.test(df$pOSAS_voor_1_ja_2_nee, df$Succes, correct=F)
## 
##  Pearson's Chi-squared test
## 
## data:  df$pOSAS_voor_1_ja_2_nee and df$Succes
## X-squared = 3.5806, df = 1, p-value = 0.05846
tmp = data.frame(prop.table(table(df$pOSAS_voor, df$Succes), margin=1))
colnames(tmp)=c("pOSAS_voor","Success","Percentage")
tmp$Percentage = 100*tmp$Percentage
ggplot(subset(tmp, Success==1), aes(x=pOSAS_voor,y=Percentage))+
    theme(axis.text.y=element_text(angle = 0, hjust = 0))+
    theme(axis.text.x=element_text(angle = 90, hjust = 0,size=10))+
    #scale_y_continuous(limits=c(0,1))+
    geom_bar(position=position_dodge(0.9), stat="identity", width = 0.8)+
    scale_fill_manual(values=c("black","black"))

df$Severity = factor(ifelse(df$AHI_voor > 5 & df$AHI_voor < 15, "mild", ifelse(df$AHI_voor > 15 & df$AHI_voor < 30,"moderate",NA)), levels=c("mild","moderate"))

library(dplyr)
## 
## Attaching package: 'dplyr'
## The following object is masked from 'package:GeneralFunctions':
## 
##     last
## The following object is masked from 'package:MASS':
## 
##     select
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
tmp = df[,c("Severity","pOSAS_voor","Succes")] %>% as.tbl() %>% group_by(Severity,pOSAS_voor) %>% dplyr::summarize(succesPerc=mean(Succes, na.rm=T))

tmp$Percentage = 100*tmp$succesPerc
ggplot(subset(tmp, !is.na(Severity)), aes(x=Severity,y=Percentage))+
    theme(axis.text.y=element_text(angle = 0, hjust = 0))+
    theme(axis.text.x=element_text(angle = 90, hjust = 0,size=10))+
    #scale_y_continuous(limits=c(0,1))+
    geom_bar(position=position_dodge(0.9), stat="identity", width = 0.8)+
    scale_fill_manual(values=c("black","black"))+
    facet_wrap(~pOSAS_voor)

Vraag 3

Voorspellende factoren voor ‘delta AHI’: BMI, halsomtrek, leeftijd, geslacht, AHI, pOSAS_voor_1_ja_2_nee, AHI_rug_voor

Het simpelste is een linear model:

Eerst moet ik de categoriale variabelen herstellen:

df$Geslacht = ifelse(df$Geslacht_1_man_2_vrouw==1,"Man","Vrouw")
df$pOSAS_voor = ifelse(df$pOSAS_voor_1_ja_2_nee==1,T,F)

Dan het lineaire model:

fit = lm(Delta_AHI~BMI +Halsomtrek +Leeftijd +Geslacht +pOSAS_voor +AHI_rug_voor, data=df)
summary(fit)
## 
## Call:
## lm(formula = Delta_AHI ~ BMI + Halsomtrek + Leeftijd + Geslacht + 
##     pOSAS_voor + AHI_rug_voor, data = df)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -14.260  -4.944  -1.167   2.886  24.992 
## 
## Coefficients:
##                 Estimate Std. Error t value Pr(>|t|)   
## (Intercept)    -10.15124   13.17387  -0.771  0.44256   
## BMI              0.75630    0.23816   3.176  0.00192 **
## Halsomtrek      -0.37922    0.37481  -1.012  0.31379   
## Leeftijd         0.05840    0.06789   0.860  0.39150   
## GeslachtVrouw   -5.43800    2.54411  -2.137  0.03470 * 
## pOSAS_voorTRUE   2.28549    1.90385   1.200  0.23245   
## AHI_rug_voor    -0.11925    0.04019  -2.968  0.00366 **
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 7.576 on 114 degrees of freedom
##   (19 observations deleted due to missingness)
## Multiple R-squared:  0.1477, Adjusted R-squared:  0.1029 
## F-statistic: 3.293 on 6 and 114 DF,  p-value: 0.005022

Pr(>|z|) is de p-waarde voor elke variable, spreekt verder voor zich.

Let op, je verliest hier 20 patienten omdat de data niet compleet is, dat is best wel veel.

for(x in c("Delta_AHI","BMI","Halsomtrek","Leeftijd","Geslacht","pOSAS_voor","AHI_rug_voor")){
    print(paste0(x,":", sum(is.na(df[,x]))," ontbreken"))
}
## [1] "Delta_AHI:0 ontbreken"
## [1] "BMI:5 ontbreken"
## [1] "Halsomtrek:19 ontbreken"
## [1] "Leeftijd:0 ontbreken"
## [1] "Geslacht:0 ontbreken"
## [1] "pOSAS_voor:0 ontbreken"
## [1] "AHI_rug_voor:0 ontbreken"

Dit is puur de halsomtrek die ontbreekt bij alle 20, als je die weglaat:

fit = lm(Delta_AHI~BMI +Leeftijd +Geslacht +pOSAS_voor +AHI_rug_voor, data=df)
summary(fit)
## 
## Call:
## lm(formula = Delta_AHI ~ BMI + Leeftijd + Geslacht + pOSAS_voor + 
##     AHI_rug_voor, data = df)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -13.728  -4.968  -1.052   2.925  25.629 
## 
## Coefficients:
##                 Estimate Std. Error t value Pr(>|t|)   
## (Intercept)    -18.47484    5.95443  -3.103  0.00236 **
## BMI              0.47369    0.16374   2.893  0.00448 **
## Leeftijd         0.08133    0.06226   1.306  0.19379   
## GeslachtVrouw   -3.21049    1.53668  -2.089  0.03865 * 
## pOSAS_voorTRUE   1.62574    1.73228   0.938  0.34974   
## AHI_rug_voor    -0.12310    0.03684  -3.342  0.00109 **
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 7.485 on 129 degrees of freedom
##   (5 observations deleted due to missingness)
## Multiple R-squared:  0.1167, Adjusted R-squared:  0.08249 
## F-statistic: 3.409 on 5 and 129 DF,  p-value: 0.00631

Vraag 4

Voorspellende factoren voor ‘succes’: BMI, halsomtrek, leeftijd, geslacht, AHI, pOSAS_voor_1_ja_2_nee, AHI_rug_voor

fit = glm(Succes~BMI +Leeftijd +Geslacht +AHI_voor +pOSAS_voor +AHI_rug_voor, data=df, family = "binomial")
summary(fit)
## 
## Call:
## glm(formula = Succes ~ BMI + Leeftijd + Geslacht + AHI_voor + 
##     pOSAS_voor + AHI_rug_voor, family = "binomial", data = df)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -1.8302  -0.9497  -0.6128   1.0910   1.7522  
## 
## Coefficients:
##                  Estimate Std. Error z value Pr(>|z|)   
## (Intercept)     4.1953181  1.9020363   2.206  0.02741 * 
## BMI            -0.1458558  0.0521863  -2.795  0.00519 **
## Leeftijd       -0.0431722  0.0188710  -2.288  0.02215 * 
## GeslachtVrouw   0.7808424  0.4585451   1.703  0.08859 . 
## AHI_voor        0.0629719  0.0478906   1.315  0.18854   
## pOSAS_voorTRUE  0.8292306  0.5682639   1.459  0.14450   
## AHI_rug_voor   -0.0005401  0.0157711  -0.034  0.97268   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 182.49  on 134  degrees of freedom
## Residual deviance: 164.23  on 128  degrees of freedom
##   (5 observations deleted due to missingness)
## AIC: 178.23
## 
## Number of Fisher Scoring iterations: 4
data.frame(logistic.display(logistic.model=fit, simplified=T)$table)
##                       OR lower95ci upper95ci    Pr...Z..
## BMI            0.8642824 0.7802514 0.9573632 0.005191535
## Leeftijd       0.9577464 0.9229699 0.9938332 0.022151398
## GeslachtVrouw  2.1833106 0.8887943 5.3632716 0.088592512
## AHI_voor       1.0649969 0.9695804 1.1698033 0.188539468
## pOSAS_voorTRUE 2.2915548 0.7523555 6.9797100 0.144500463
## AHI_rug_voor   0.9994600 0.9690386 1.0308365 0.972680531
df$BMI5 = df$BMI/5
df$Leeftijd10 = df$Leeftijd/10
df$AHI_voor10 = df$AHI_voor/10
df$AHI_rug_voor10 = df$AHI_rug_voor/10
fit = glm(Succes~BMI5 +Leeftijd10 +Geslacht +AHI_voor10 +pOSAS_voor +AHI_rug_voor10, data=df, family = "binomial")
summary(fit)
## 
## Call:
## glm(formula = Succes ~ BMI5 + Leeftijd10 + Geslacht + AHI_voor10 + 
##     pOSAS_voor + AHI_rug_voor10, family = "binomial", data = df)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -1.8302  -0.9497  -0.6128   1.0910   1.7522  
## 
## Coefficients:
##                 Estimate Std. Error z value Pr(>|z|)   
## (Intercept)     4.195318   1.902036   2.206  0.02741 * 
## BMI5           -0.729279   0.260932  -2.795  0.00519 **
## Leeftijd10     -0.431722   0.188710  -2.288  0.02215 * 
## GeslachtVrouw   0.780842   0.458545   1.703  0.08859 . 
## AHI_voor10      0.629719   0.478906   1.315  0.18854   
## pOSAS_voorTRUE  0.829231   0.568264   1.459  0.14450   
## AHI_rug_voor10 -0.005401   0.157711  -0.034  0.97268   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 182.49  on 134  degrees of freedom
## Residual deviance: 164.23  on 128  degrees of freedom
##   (5 observations deleted due to missingness)
## AIC: 178.23
## 
## Number of Fisher Scoring iterations: 4
# install.packages("epiDisplay")
library(epiDisplay)
tmp = data.frame(epiDisplay::logistic.display(logistic.model=fit, simplified=T)$table)
tmp$variable = factor(row.names(tmp), levels=rev(row.names(tmp)))
colnames(tmp)= c("OR","lower95ci","upper95ci","p.val","variable")
tmp$Outcome="Success"

require(ggplot2)
maxOR=10
tmp$lower95ci <- sapply(tmp$lower95ci, function(x) {
    ifelse(x < 1, (-1/x) + 1, x - 1)
})
tmp$upper95ci <- sapply(tmp$upper95ci, function(x) {
    ifelse(x < 1, (-1/x) + 1, x - 1)
})
tmp$OR <- sapply(tmp$OR, function(x) {
    ifelse(x < 1, (-1/x) + 1, x - 1)
})
lbl = c(paste0("1/", seq(maxOR, 1, -1) + 1), seq(0, maxOR, 1) + 1)

tmp <- tmp[order(tmp$OR, decreasing = F),]
tmp$varType = c("continuous","continuous","continuous","continuous","categorical","categorical")
ggplot(tmp, aes(x = variable, y = OR)) + 
    geom_point(position = position_dodge(0.9), stat = "identity", size=4) + 
    geom_errorbar(aes(ymin = lower95ci, ymax = upper95ci)) + 
    coord_flip(ylim=c(-4,4))+
    scale_y_continuous(breaks = seq(-maxOR, maxOR, 1), labels = lbl)+
    scale_colour_manual(values = colours) + 
    geom_hline(yintercept = 0, col = "darkgrey")+
    theme(panel.margin = unit(2, "lines"))+
    ggtitle("binomial model for success")+xlab("") + ylab("Odds Ratio (95% CI)")
## Warning: `panel.margin` is deprecated. Please use `panel.spacing` property
## instead

    #facet_wrap(~varType, scales="free",ncol=1)

De halsomtrek en BMI hebben een tegenovergesteld effect, niet helemaal zoals verwacht:

preds=predict(fit,df)
plot(y=preds,x=df$BMI)

plot(y=preds,x=df$Halsomtrek)

Dit heeft er vast mee te maken dat ze sterk gecorreleerd zijn:

plot(df$Halsomtrek, df$BMI)

cor.test(df$Halsomtrek, df$BMI)
## 
##  Pearson's product-moment correlation
## 
## data:  df$Halsomtrek and df$BMI
## t = 4.3425, df = 119, p-value = 2.977e-05
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.2048823 0.5143908
## sample estimates:
##      cor 
## 0.369853

Pr(>|z|) is weer de p-waarde voor elke variable

zonder halsomtrek:

fit = glm(Succes~BMI +Leeftijd +Geslacht +AHI_voor +pOSAS_voor +AHI_rug_voor, data=df, family = "binomial")
summary(fit)
## 
## Call:
## glm(formula = Succes ~ BMI + Leeftijd + Geslacht + AHI_voor + 
##     pOSAS_voor + AHI_rug_voor, family = "binomial", data = df)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -1.8302  -0.9497  -0.6128   1.0910   1.7522  
## 
## Coefficients:
##                  Estimate Std. Error z value Pr(>|z|)   
## (Intercept)     4.1953181  1.9020363   2.206  0.02741 * 
## BMI            -0.1458558  0.0521863  -2.795  0.00519 **
## Leeftijd       -0.0431722  0.0188710  -2.288  0.02215 * 
## GeslachtVrouw   0.7808424  0.4585451   1.703  0.08859 . 
## AHI_voor        0.0629719  0.0478906   1.315  0.18854   
## pOSAS_voorTRUE  0.8292306  0.5682639   1.459  0.14450   
## AHI_rug_voor   -0.0005401  0.0157711  -0.034  0.97268   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 182.49  on 134  degrees of freedom
## Residual deviance: 164.23  on 128  degrees of freedom
##   (5 observations deleted due to missingness)
## AIC: 178.23
## 
## Number of Fisher Scoring iterations: 4

zonder BMI:

fit = glm(Succes~Halsomtrek +Leeftijd +Geslacht +AHI_voor +pOSAS_voor +AHI_rug_voor, data=df, family = "binomial")
summary(fit)
## 
## Call:
## glm(formula = Succes ~ Halsomtrek + Leeftijd + Geslacht + AHI_voor + 
##     pOSAS_voor + AHI_rug_voor, family = "binomial", data = df)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -1.6935  -1.0090  -0.7524   1.1889   1.8530  
## 
## Coefficients:
##                 Estimate Std. Error z value Pr(>|z|)  
## (Intercept)    -0.263002   3.626397  -0.073   0.9422  
## Halsomtrek      0.011240   0.081306   0.138   0.8900  
## Leeftijd       -0.044214   0.020059  -2.204   0.0275 *
## GeslachtVrouw   0.639510   0.575770   1.111   0.2667  
## AHI_voor        0.059106   0.047454   1.246   0.2129  
## pOSAS_voorTRUE  0.917035   0.607354   1.510   0.1311  
## AHI_rug_voor   -0.001475   0.016224  -0.091   0.9276  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 163.34  on 120  degrees of freedom
## Residual deviance: 152.97  on 114  degrees of freedom
##   (19 observations deleted due to missingness)
## AIC: 166.97
## 
## Number of Fisher Scoring iterations: 4

Vraag 5

Rugligging percentage voor en na pOSA

tmp = df[,c("Studienummer","Tijd_rug_na","Tijd_rug_voor","pOSAS_voor")]

tmp = reshape::melt(tmp, id.vars=c(1,4))

ggplot(tmp, aes(x=pOSAS_voor,y=value))+
    theme(legend.position="none")+
    theme(axis.text.y=element_text(angle = 0, hjust = 0))+
    theme(axis.text.x=element_text(angle = 90, hjust = 0,size=10))+
    #scale_y_continuous(limits=c(0,1))+
    geom_boxplot(position=position_dodge(0.9), outlier.shape = NA)+
    geom_dotplot(binaxis = "y", dotsize = 1, stackdir = "center", stackratio=1, binwidth = 1.5)+
    facet_wrap(~variable)