1 Data Management

載入資料並改變項名稱

dta_HW2 <- read.table("C:/Users/ASUS/Desktop/data/cantabrian.txt", h=T)
names(dta_HW2) <- c("ID","Mental","Gender","Type")
str(dta_HW2)

## 'data.frame':    1646 obs. of  4 variables:
##  $ ID    : int  1 1 2 2 3 3 4 4 5 5 ...
##  $ Mental: int  0 0 0 0 0 0 0 0 0 0 ...
##  $ Gender: chr  "M" "M" "M" "M" ...
##  $ Type  : chr  "GP" "GHQ" "GP" "GHQ" ...

定義為名義變項

library(dplyr)

## 
## Attaching package: 'dplyr'

## The following objects are masked from 'package:stats':
## 
##     filter, lag

## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

dtaHW2 <- dta_HW2 %>% 
  mutate(Mental= factor(Mental, 
                         levels=0:1, 
                         labels=c("Illness","Otherwise")),
         Gender = factor(Gender, levels = c("M", "F")),
         Type = factor(Type, 
                       levels=c("GP","GHQ")))

Table(gender/mental/type)

ftable(dta_HW2, row.vars = c('Gender','Mental'), col.vars = c('Type'))

##               Type GHQ  GP
## Gender Mental             
## F      0           306 420
##        1           193  79
## M      0           244 287
##        1            80  37

2 Analysis

2.1 m0

性別和施測方式(訪談或問卷) 為判定是否為Mental illness的重要變項，但二者間為觀察到有交互作存在。

library(geepack)
summary(m0 <- geeglm(Mental ~ Gender*Type, data=dta_HW2, id = ID, corstr="exchangeable", family = binomial))

## 
## Call:
## geeglm(formula = Mental ~ Gender * Type, family = binomial, data = dta_HW2, 
##     id = ID, corstr = "exchangeable")
## 
##  Coefficients:
##                Estimate  Std.err   Wald Pr(>|W|)    
## (Intercept)    -0.46089  0.09192 25.141 5.33e-07 ***
## GenderM        -0.65425  0.15826 17.089 3.57e-05 ***
## TypeGP         -1.20991  0.12651 91.462  < 2e-16 ***
## GenderM:TypeGP  0.27649  0.22828  1.467    0.226    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation structure = exchangeable 
## Estimated Scale Parameters:
## 
##             Estimate Std.err
## (Intercept)        1 0.07865
##   Link = identity 
## 
## Estimated Correlation Parameters:
##       Estimate Std.err
## alpha   0.2975 0.04744
## Number of clusters:   823  Maximum cluster size: 2

2.2 Finding (in table)

男生F (odds=.52)與使用Gp(odds=.30)較不會被判定為有Mental illness

sjPlot::tab_model(m0, show.obs=F, show.ngroups = F)

	Mental
Predictors	Odds Ratios	CI	p
(Intercept)	0.63	0.53 – 0.76	<0.001
Gender [M]	0.52	0.38 – 0.71	<0.001
Type [GP]	0.30	0.23 – 0.38	<0.001
Gender [M] * Type [GP]	1.32	0.84 – 2.06	0.226

2.3 Barplot

par(mfrow=c(1,2))

barplot(prop.table(with(subset(dta_HW2, Type=="GP"), 
                           ftable(Gender, Mental)), m=2), 
        xlab="Menal illness \n (Illness, Otherwise)",
        ylab="Proportion", 
        ylim=c(0,1),
        main="GP", 
        beside=T)

legend('topleft', c("Male","Female"), 
       col=c("black","gray"), 
       pch=15, bty='n', cex=.5)

barplot(prop.table(with(subset(dta_HW2, Type=="GHQ"), 
                           ftable(Gender, Mental)), m=2), 
        xlab="Mental illness \n (Illness, Otherwise)",
        ylab="Proportion", 
        ylim=c(0,1),
        main="GHQ", 
        beside=T)

The End

Cantabrian example replication

Szu-Yu Chen

15 November 2020

1 Data Management

2 Analysis

2.1 m0

2.2 Finding (in table)

2.3 Barplot