Overblik over data
#Indlæs nødvendige pakker
library(foreign)
library(ggplot2)
data<- read.dta(file.path("C:/Users/Vision/Desktop/STATISTIK/BIOSTATISTIK2/GHQScore12.dta"))
head(data)
GHQScore Sex PsycCase
1 0 Female 0
2 0 Female 0
3 0 Female 0
4 0 Female 0
5 1 Female 0
6 1 Female 0
data <- na.omit(data)
data$Sex <- as.factor(data$Sex) # Nu burde det ikke være nødvendigt at omkode med dummyvariabel
contrasts(data$Sex) # Viser hvorledes R selv koder dummyvariablen
Male
Female 0
Male 1
ggplot(data=data, aes(x=PsycCase, y= GHQScore, fill=Sex)) +
geom_boxplot() +
ggtitle("Score Vs Case")

Opbygning af modellen
# skal GHQScore være kontinuerlig eller skal den inddelles i kategorier?
# I litteraturen er der i hvert fald eksempler på at man har behandlet GHQ som en kontinuert variabel.
# Selvom dette ikke nødvendigvis er det # korrekte i dette tilfælde, lægger vi ud med dette:
model.crude<- glm(PsycCase ~ GHQScore, data=data, family=binomial)
summary (model.crude)
Call:
glm(formula = PsycCase ~ GHQScore, family = binomial, data = data)
Deviance Residuals:
Min 1Q Median 3Q Max
-2.3559 0.3588 0.3588 0.5099 2.5371
Coefficients:
Estimate Std. Error z value Pr(>|z|)
(Intercept) 2.71073 0.27245 9.950 < 2e-16 ***
GHQScore -0.73604 0.09457 -7.783 7.11e-15 ***
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
(Dispersion parameter for binomial family taken to be 1)
Null deviance: 309.32 on 277 degrees of freedom
Residual deviance: 195.25 on 276 degrees of freedom
AIC: 199.25
Number of Fisher Scoring iterations: 5
round(exp(cbind(Estimate=coef(model.crude), confint(model.crude))),2) #omregner fra log odds så det er lettere at tolke
Waiting for profiling to be done...
Estimate 2.5 % 97.5 %
(Intercept) 15.04 9.10 26.59
GHQScore 0.48 0.39 0.57
# Inkl. køn
model.1<- glm(PsycCase ~ GHQScore + Sex, data=data, family=binomial)
summary (model.1)
Call:
glm(formula = PsycCase ~ GHQScore + Sex, family = binomial, data = data)
Deviance Residuals:
Min 1Q Median 3Q Max
-2.6312 0.2525 0.3985 0.3985 2.3925
Coefficients:
Estimate Std. Error z value Pr(>|z|)
(Intercept) 2.49351 0.28163 8.854 < 2e-16 ***
GHQScore -0.77910 0.09903 -7.867 3.63e-15 ***
SexMale 0.93609 0.43434 2.155 0.0311 *
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
(Dispersion parameter for binomial family taken to be 1)
Null deviance: 309.32 on 277 degrees of freedom
Residual deviance: 190.13 on 275 degrees of freedom
AIC: 196.13
Number of Fisher Scoring iterations: 5
round(exp(cbind(Estimate=coef(model.1), confint(model.1))),2) #omregner fra log odds så det er lettere at tolke
Waiting for profiling to be done...
Estimate 2.5 % 97.5 %
(Intercept) 12.10 7.18 21.78
GHQScore 0.46 0.37 0.55
SexMale 2.55 1.13 6.28
# Nu prøver vi lige at dele GHQScore op så det bliver en kategorisk vairabel:
# Først lige en graf for at se om der er oplagte steder at dele op
data$GHQScore <- as.numeric(data$GHQScore)
hist (data$GHQScore)

# Umiddelbart vil en fordeling på;
# 0+1+2, 3+4, 5+6, 7+8+9 virke relevant:
data$GHQScore[data$GHQScore<3] <- "Verylow"
data$GHQScore[data$GHQScore==3] <- "low"
data$GHQScore[data$GHQScore==4] <- "low"
data$GHQScore[data$GHQScore==5] <- "Medium"
data$GHQScore[data$GHQScore==6] <- "Medium"
data$GHQScore[data$GHQScore==7] <- "High"
data$GHQScore[data$GHQScore==8] <- "High"
data$GHQScore[data$GHQScore==9] <- "High"
data$GHQScore[data$GHQScore==10] <- "High"
# Så prøver vi at køre modellen igen
data$GHQScore <- as.factor(data$GHQScore)
model.2 <- glm(data$PsycCase ~ data$GHQScore, data=data)
summary(model.2)
Call:
glm(formula = data$PsycCase ~ data$GHQScore, data = data)
Deviance Residuals:
Min 1Q Median 3Q Max
-0.90187 0.09813 0.09813 0.09813 0.86364
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 0.13636 0.07110 1.918 0.05617 .
data$GHQScorelow 0.28030 0.09844 2.848 0.00474 **
data$GHQScoreMedium 0.08586 0.10599 0.810 0.41862
data$GHQScoreVerylow 0.76551 0.07467 10.252 < 2e-16 ***
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
(Dispersion parameter for gaussian family taken to be 0.1112212)
Null deviance: 51.367 on 277 degrees of freedom
Residual deviance: 30.475 on 274 degrees of freedom
AIC: 184.35
Number of Fisher Scoring iterations: 2
round(exp(cbind(Estimate=coef(model.2), confint(model.2))),2) # eksponentierer så estimatet bliver lettere at fortolke
Waiting for profiling to be done...
Estimate 2.5 % 97.5 %
(Intercept) 1.15 1.00 1.32
data$GHQScorelow 1.32 1.09 1.61
data$GHQScoreMedium 1.09 0.89 1.34
data$GHQScoreVerylow 2.15 1.86 2.49
#Og inkl. køn
model.3 <- glm(data$PsycCase ~ data$GHQScore + Sex, data=data)
#Summary(model.3)
round(exp(cbind(Estimate=coef(model.3), confint(model.3))),2) # eksponentierer så estimatet bliver lettere at fortolke
Waiting for profiling to be done...
Estimate 2.5 % 97.5 %
(Intercept) 1.09 0.94 1.26
data$GHQScorelow 1.35 1.11 1.64
data$GHQScoreMedium 1.11 0.90 1.37
data$GHQScoreVerylow 2.20 1.90 2.55
SexMale 1.08 1.00 1.18