Genreal Health Questionaire Vs Phsych Cases

Overblik over data

#Indlæs nødvendige pakker
library(foreign)
library(ggplot2)

data<- read.dta(file.path("C:/Users/Vision/Desktop/STATISTIK/BIOSTATISTIK2/GHQScore12.dta"))
head(data)
  GHQScore    Sex PsycCase
1        0 Female        0
2        0 Female        0
3        0 Female        0
4        0 Female        0
5        1 Female        0
6        1 Female        0
data <- na.omit(data)

data$Sex <- as.factor(data$Sex)                 # Nu burde det ikke være nødvendigt at omkode med dummyvariabel
contrasts(data$Sex)                             # Viser hvorledes R selv koder dummyvariablen
       Male
Female    0
Male      1
ggplot(data=data, aes(x=PsycCase, y= GHQScore, fill=Sex)) + 
  geom_boxplot() + 
  ggtitle("Score Vs Case")

Opbygning af modellen

# skal GHQScore være kontinuerlig eller skal den inddelles i kategorier?
# I litteraturen er der i hvert fald eksempler på at man har behandlet GHQ som en kontinuert variabel.
# Selvom dette ikke nødvendigvis er det # korrekte i dette tilfælde, lægger vi ud med dette:

model.crude<- glm(PsycCase ~ GHQScore, data=data, family=binomial)
summary (model.crude)

Call:
glm(formula = PsycCase ~ GHQScore, family = binomial, data = data)

Deviance Residuals: 
    Min       1Q   Median       3Q      Max  
-2.3559   0.3588   0.3588   0.5099   2.5371  

Coefficients:
            Estimate Std. Error z value Pr(>|z|)    
(Intercept)  2.71073    0.27245   9.950  < 2e-16 ***
GHQScore    -0.73604    0.09457  -7.783 7.11e-15 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

(Dispersion parameter for binomial family taken to be 1)

    Null deviance: 309.32  on 277  degrees of freedom
Residual deviance: 195.25  on 276  degrees of freedom
AIC: 199.25

Number of Fisher Scoring iterations: 5
round(exp(cbind(Estimate=coef(model.crude), confint(model.crude))),2)       #omregner fra log odds så det er lettere at tolke
Waiting for profiling to be done...
            Estimate 2.5 % 97.5 %
(Intercept)    15.04  9.10  26.59
GHQScore        0.48  0.39   0.57
# Inkl. køn 
model.1<- glm(PsycCase ~ GHQScore + Sex, data=data, family=binomial)  
summary (model.1)

Call:
glm(formula = PsycCase ~ GHQScore + Sex, family = binomial, data = data)

Deviance Residuals: 
    Min       1Q   Median       3Q      Max  
-2.6312   0.2525   0.3985   0.3985   2.3925  

Coefficients:
            Estimate Std. Error z value Pr(>|z|)    
(Intercept)  2.49351    0.28163   8.854  < 2e-16 ***
GHQScore    -0.77910    0.09903  -7.867 3.63e-15 ***
SexMale      0.93609    0.43434   2.155   0.0311 *  
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

(Dispersion parameter for binomial family taken to be 1)

    Null deviance: 309.32  on 277  degrees of freedom
Residual deviance: 190.13  on 275  degrees of freedom
AIC: 196.13

Number of Fisher Scoring iterations: 5
round(exp(cbind(Estimate=coef(model.1), confint(model.1))),2)      #omregner fra log odds så det er lettere at tolke
Waiting for profiling to be done...
            Estimate 2.5 % 97.5 %
(Intercept)    12.10  7.18  21.78
GHQScore        0.46  0.37   0.55
SexMale         2.55  1.13   6.28
# Nu prøver vi lige at dele GHQScore op så det bliver en kategorisk vairabel:
# Først lige en graf for at se om der er oplagte steder at dele op

data$GHQScore <- as.numeric(data$GHQScore) 
hist (data$GHQScore) 

# Umiddelbart vil en fordeling på; 
                                   # 0+1+2, 3+4, 5+6, 7+8+9   virke relevant:

data$GHQScore[data$GHQScore<3] <- "Verylow"
data$GHQScore[data$GHQScore==3] <- "low"
data$GHQScore[data$GHQScore==4] <- "low"
data$GHQScore[data$GHQScore==5] <- "Medium"
data$GHQScore[data$GHQScore==6] <- "Medium"
data$GHQScore[data$GHQScore==7] <- "High"
data$GHQScore[data$GHQScore==8] <- "High"
data$GHQScore[data$GHQScore==9] <- "High"
data$GHQScore[data$GHQScore==10] <- "High"

# Så prøver vi at køre modellen igen

data$GHQScore <- as.factor(data$GHQScore) 

model.2 <- glm(data$PsycCase ~ data$GHQScore, data=data)
summary(model.2)

Call:
glm(formula = data$PsycCase ~ data$GHQScore, data = data)

Deviance Residuals: 
     Min        1Q    Median        3Q       Max  
-0.90187   0.09813   0.09813   0.09813   0.86364  

Coefficients:
                     Estimate Std. Error t value Pr(>|t|)    
(Intercept)           0.13636    0.07110   1.918  0.05617 .  
data$GHQScorelow      0.28030    0.09844   2.848  0.00474 ** 
data$GHQScoreMedium   0.08586    0.10599   0.810  0.41862    
data$GHQScoreVerylow  0.76551    0.07467  10.252  < 2e-16 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

(Dispersion parameter for gaussian family taken to be 0.1112212)

    Null deviance: 51.367  on 277  degrees of freedom
Residual deviance: 30.475  on 274  degrees of freedom
AIC: 184.35

Number of Fisher Scoring iterations: 2
round(exp(cbind(Estimate=coef(model.2), confint(model.2))),2)  # eksponentierer så estimatet bliver lettere at fortolke
Waiting for profiling to be done...
                     Estimate 2.5 % 97.5 %
(Intercept)              1.15  1.00   1.32
data$GHQScorelow         1.32  1.09   1.61
data$GHQScoreMedium      1.09  0.89   1.34
data$GHQScoreVerylow     2.15  1.86   2.49
#Og inkl. køn

model.3 <- glm(data$PsycCase ~ data$GHQScore + Sex, data=data)
#Summary(model.3)
round(exp(cbind(Estimate=coef(model.3), confint(model.3))),2)    # eksponentierer så estimatet bliver lettere at fortolke
Waiting for profiling to be done...
                     Estimate 2.5 % 97.5 %
(Intercept)              1.09  0.94   1.26
data$GHQScorelow         1.35  1.11   1.64
data$GHQScoreMedium      1.11  0.90   1.37
data$GHQScoreVerylow     2.20  1.90   2.55
SexMale                  1.08  1.00   1.18