## Create data
malmel <- read.table(header = TRUE, sep = ",", text = "
type, site, frequency
melanotic freckle, head & neck, 22
melanotic freckle, trunk, 2
melanotic freckle, extremities, 10
superficial spreading, head & neck, 16
superficial spreading, trunk, 54
superficial spreading, extremities, 115
nodular, head & neck, 19
nodular, trunk, 33
nodular, extremities, 73
indeterminate, head & neck, 11
indeterminate, trunk, 17
indeterminate, extremities, 28
")
malmel
## type site frequency
## 1 melanotic freckle head & neck 22
## 2 melanotic freckle trunk 2
## 3 melanotic freckle extremities 10
## 4 superficial spreading head & neck 16
## 5 superficial spreading trunk 54
## 6 superficial spreading extremities 115
## 7 nodular head & neck 19
## 8 nodular trunk 33
## 9 nodular extremities 73
## 10 indeterminate head & neck 11
## 11 indeterminate trunk 17
## 12 indeterminate extremities 28
## Set reference levels
malmel$type <- factor(malmel$type, levels = c("melanotic freckle", "superficial spreading", "nodular", "indeterminate"))
malmel$site <- factor(malmel$site, levels = c(" head & neck", " trunk", " extremities"))
Is there any association between the type of skin lesions and the sites. The approach taken here models the probability of each cell (frequency/total count) as the outcome, and both of the margin variables as explanatory variables.
xtabs(frequency ~ type + site, malmel)
## site
## type head & neck trunk extremities
## melanotic freckle 22 2 10
## superficial spreading 16 54 115
## nodular 19 33 73
## indeterminate 11 17 28
Both margin variables are used as the predictors. The offset corresponding to the total count is the same for all data rows, thus, it can be omitted.
## Fit a saturated model
fitSaturated <- glm(formula = frequency ~ type + site + type:site,
family = poisson(link = "log"),
data = malmel)
summary(fitSaturated)
##
## Call:
## glm(formula = frequency ~ type + site + type:site, family = poisson(link = "log"),
## data = malmel)
##
## Deviance Residuals:
## [1] 0 0 0 0 0 0 0 0 0 0 0 0
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 3.091 0.213 14.50 < 2e-16 ***
## typesuperficial spreading -0.318 0.329 -0.97 0.33243
## typenodular -0.147 0.313 -0.47 0.63971
## typeindeterminate -0.693 0.369 -1.88 0.06051 .
## site trunk -2.398 0.739 -3.25 0.00117 **
## site extremities -0.788 0.381 -2.07 0.03870 *
## typesuperficial spreading:site trunk 3.614 0.792 4.57 0.000004962 ***
## typenodular:site trunk 2.950 0.793 3.72 0.00020 ***
## typeindeterminate:site trunk 2.833 0.834 3.40 0.00068 ***
## typesuperficial spreading:site extremities 2.761 0.465 5.93 0.000000003 ***
## typenodular:site extremities 2.134 0.460 4.64 0.000003516 ***
## typeindeterminate:site extremities 1.723 0.522 3.30 0.00096 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for poisson family taken to be 1)
##
## Null deviance: 2.9520e+02 on 11 degrees of freedom
## Residual deviance: -1.8652e-14 on 0 degrees of freedom
## AIC: 83.11
##
## Number of Fisher Scoring iterations: 3
## Fit an additive model
fitAdditive <- glm(formula = frequency ~ type + site,
family = poisson(link = "log"),
data = malmel)
summary(fitAdditive)
##
## Call:
## glm(formula = frequency ~ type + site, family = poisson(link = "log"),
## data = malmel)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -3.045 -1.074 0.130 0.586 5.135
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 1.754 0.204 8.60 < 2e-16 ***
## typesuperficial spreading 1.694 0.187 9.08 < 2e-16 ***
## typenodular 1.302 0.193 6.73 1.7e-11 ***
## typeindeterminate 0.499 0.217 2.30 0.0217 *
## site trunk 0.444 0.155 2.86 0.0043 **
## site extremities 1.201 0.138 8.68 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for poisson family taken to be 1)
##
## Null deviance: 295.203 on 11 degrees of freedom
## Residual deviance: 51.795 on 6 degrees of freedom
## AIC: 122.9
##
## Number of Fisher Scoring iterations: 5
By testing the difference between the saturated model with the product term and the additive model without the product term, whether there is a significant association between the type and site is assessed. If there is not association, then the joint distribution is the product of the two marginal distribution, i.e, addition on the log scale with out the product term (additive model is sufficient). The
## Likelihood ratio test
anova(fitSaturated, fitAdditive, test = "LR")
## Analysis of Deviance Table
##
## Model 1: frequency ~ type + site + type:site
## Model 2: frequency ~ type + site
## Resid. Df Resid. Dev Df Deviance Pr(>Chi)
## 1 0 0.0
## 2 6 51.8 -6 -51.8 0.0000000021 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
The research question here is whether the association between aspirin use and case-control status for ulcers are modified by the sites (gastric vs duodenal). Thus, the effect modification is represented by the product term between the aspirin variable and the ulcer (site) variable.
## Create data
aspirinDat <- read.table(header = TRUE, text = "
ulcer case-control aspirin frequency
gastric control non-user 62
gastric control user 6
gastric case non-user 39
gastric case user 25
duodenal control non-user 53
duodenal control user 8
duodenal case non-user 49
duodenal case user 8
")
aspirinDat
## ulcer case.control aspirin frequency
## 1 gastric control non-user 62
## 2 gastric control user 6
## 3 gastric case non-user 39
## 4 gastric case user 25
## 5 duodenal control non-user 53
## 6 duodenal control user 8
## 7 duodenal case non-user 49
## 8 duodenal case user 8
## Reorder
aspirinDat$case.control <- factor(aspirinDat$case.control, levels = c("control","case"))
aspirinDat$ulcer <- factor(aspirinDat$ulcer, levels = c("gastric","duodenal"))
## Show as a table (Is there an association between the case status and aspirin use within levels of ulcer type?)
xtabs(frequency ~ case.control + aspirin + ulcer, data = aspirinDat)
## , , ulcer = gastric
##
## aspirin
## case.control non-user user
## control 62 6
## case 39 25
##
## , , ulcer = duodenal
##
## aspirin
## case.control non-user user
## control 53 8
## case 49 8
## Represents a hypothesis that aspirin effect is modified by the ulcer sites.
fitEffectMod <- glm(formula = frequency ~ ulcer + case.control + ulcer:case.control + aspirin + aspirin:case.control + aspirin:ulcer,
family = poisson(link = "log"),
data = aspirinDat)
summary(fitEffectMod)
##
## Call:
## glm(formula = frequency ~ ulcer + case.control + ulcer:case.control +
## aspirin + aspirin:case.control + aspirin:ulcer, family = poisson(link = "log"),
## data = aspirinDat)
##
## Deviance Residuals:
## 1 2 3 4 5 6 7 8
## 0.449 -1.208 -0.539 0.728 -0.466 1.467 0.507 -1.083
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 4.070 0.129 31.64 < 2e-16 ***
## ulcerduodenal -0.036 0.180 -0.20 0.8419
## case.controlcase -0.321 0.194 -1.66 0.0978 .
## aspirinuser -1.822 0.308 -5.92 0.0000000033 ***
## ulcerduodenal:case.controlcase 0.106 0.262 0.40 0.6859
## case.controlcase:aspirinuser 1.143 0.352 3.25 0.0012 **
## ulcerduodenal:aspirinuser -0.700 0.346 -2.02 0.0431 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for poisson family taken to be 1)
##
## Null deviance: 127.749 on 7 degrees of freedom
## Residual deviance: 6.283 on 1 degrees of freedom
## AIC: 59.9
##
## Number of Fisher Scoring iterations: 4
## Represents a hypothesis of no effect modification
fitNoEffectMod <- glm(formula = frequency ~ ulcer + case.control + ulcer:case.control + aspirin + aspirin:case.control,
family = poisson(link = "log"),
data = aspirinDat)
summary(fitNoEffectMod)
##
## Call:
## glm(formula = frequency ~ ulcer + case.control + ulcer:case.control +
## aspirin + aspirin:case.control, family = poisson(link = "log"),
## data = aspirinDat)
##
## Deviance Residuals:
## 1 2 3 4 5 6 7 8
## 0.177 -0.525 -1.138 1.695 -0.188 0.519 1.139 -2.112
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 4.1046 0.1251 32.81 <2e-16 ***
## ulcerduodenal -0.1086 0.1764 -0.62 0.5379
## case.controlcase -0.2642 0.1854 -1.43 0.1542
## aspirinuser -2.1059 0.2831 -7.44 1e-13 ***
## ulcerduodenal:case.controlcase -0.0072 0.2535 -0.03 0.9773
## case.controlcase:aspirinuser 1.1251 0.3490 3.22 0.0013 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for poisson family taken to be 1)
##
## Null deviance: 127.749 on 7 degrees of freedom
## Residual deviance: 10.538 on 2 degrees of freedom
## AIC: 62.15
##
## Number of Fisher Scoring iterations: 4
## Likelihood ratio test
anova(fitEffectMod, fitNoEffectMod, test = "LR")
## Analysis of Deviance Table
##
## Model 1: frequency ~ ulcer + case.control + ulcer:case.control + aspirin +
## aspirin:case.control + aspirin:ulcer
## Model 2: frequency ~ ulcer + case.control + ulcer:case.control + aspirin +
## aspirin:case.control
## Resid. Df Resid. Dev Df Deviance Pr(>Chi)
## 1 1 6.28
## 2 2 10.54 -1 -4.26 0.039 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1