setwd("/Users/mchsu/Downloads")
library(MASS)
library(contrast)

## Loading required package: rms

## Loading required package: Hmisc

## Loading required package: lattice

## Loading required package: survival

## Loading required package: Formula

## Loading required package: ggplot2

## 
## Attaching package: 'Hmisc'

## The following objects are masked from 'package:base':
## 
##     format.pval, units

## Loading required package: SparseM

## 
## Attaching package: 'SparseM'

## The following object is masked from 'package:base':
## 
##     backsolve

library(multcomp)

## Loading required package: mvtnorm

## Loading required package: TH.data

## 
## Attaching package: 'TH.data'

## The following object is masked from 'package:MASS':
## 
##     geyser

OR for remission comparing the first labeling index group (‘8-12’) to the last labeling index group (‘34-38’)

remission <- read.csv('remission.csv')
head(remission)

##     LLI total remiss grp
## 1  8-12     7      1   1
## 2 14-18     7      1   2
## 3 20-24     6      3   3
## 4 26-32     3      2   4
## 5 34-38     4      3   5

cat('\n Order: Default - Alphanumeric ordering \n Order: GLM Coding \n')

## 
##  Order: Default - Alphanumeric ordering 
##  Order: GLM Coding

M <- glm(cbind(remiss,total-remiss) ~ LLI, family=binomial(), data=remission)
contrast(M,list(LLI='8-12'), list(LLI='34-38'), fun=exp)

## Warning in qt((1 + conf.int)/2, dfdm): NaNs produced

## glm model parameter contrast
## 
##    Contrast     S.E. Lower Upper     t df Pr(>|t|)
## 1 -2.890372 1.581139   NaN   NaN -1.83  0   0.0675

User Defined Levels order

levels(remission$LLI)

## [1] "14-18" "20-24" "26-32" "34-38" "8-12"

remission$LLI <- factor(remission$LLI, levels = c("8-12", "14-18", "20-24", "26-32", "34-38"))
levels(remission$LLI)

## [1] "8-12"  "14-18" "20-24" "26-32" "34-38"

OR for remission comparing the first labeling index group (‘8-12’) to the last labeling index group (‘34-38’)

M1 <- glm(cbind(remiss,total-remiss) ~ LLI, family=binomial(), data=remission)
contrast(M1,list(LLI='8-12'), list(LLI='34-38'), fun=exp)

## Warning in qt((1 + conf.int)/2, dfdm): NaNs produced

## glm model parameter contrast
## 
##    Contrast     S.E. Lower Upper     t df Pr(>|t|)
## 1 -2.890372 1.581139   NaN   NaN -1.83  0   0.0675

Change the Contrasts (vs. Corresponds to SAS)

contr.SAS (contr.treatment) that sets the base level to be the last level of the factor. The coefficients

produced should be equivalent to those in SAS procedures.

options(contrasts = c("contr.SAS", "contr.poly"))  # SAS Contrast
options()$contrasts

## [1] "contr.SAS"  "contr.poly"

M2 <- glm(cbind(remiss,total-remiss) ~ LLI, family=binomial(), data=remission); contrast(M2,list(LLI='8-12'), list(LLI='34-38'), fun=exp)

## Warning in qt((1 + conf.int)/2, dfdm): NaNs produced

## glm model parameter contrast
## 
##    Contrast     S.E. Lower Upper     t df Pr(>|t|)
## 1 -2.890372 1.581139   NaN   NaN -1.83  0   0.0675

options(contrasts = c("contr.treatment", "contr.poly"))  # Default contrast
options()$contrasts

## [1] "contr.treatment" "contr.poly"

M3 <- glm(cbind(remiss,total-remiss) ~ LLI, family=binomial(), data=remission); contrast(M3, list(LLI='8-12'), list(LLI='34-38'), fun=exp)

## Warning in qt((1 + conf.int)/2, dfdm): NaNs produced

## glm model parameter contrast
## 
##    Contrast     S.E. Lower Upper     t df Pr(>|t|)
## 1 -2.890372 1.581139   NaN   NaN -1.83  0   0.0675

options(contrasts = c("contr.sum", "contr.poly"))    # Reference contrast
options()$contrasts

## [1] "contr.sum"  "contr.poly"

M4 <- glm(cbind(remiss,total-remiss) ~ LLI, family=binomial(), data=remission); contrast(M4, list(LLI='8-12'), list(LLI='34-38'), fun=exp)

## Warning in qt((1 + conf.int)/2, dfdm): NaNs produced

## glm model parameter contrast
## 
##    Contrast     S.E. Lower Upper     t df Pr(>|t|)
## 1 -2.890372 1.581139   NaN   NaN -1.83  0   0.0675

CHD in Framingham Study

chd <- read.csv('CHD.csv') 
head(chd)

##        Chol       SBP CHD Total
## 1      <200      <127   2   119
## 2      <200 127 - 146   3   124
## 3      <200 147 - 166   3    50
## 4      <200      167+   4    26
## 5 200 - 219      <127   3    88
## 6 200 - 219 127 - 146   2   100

cat( '\n Contrasting <200 vs. 220-259 with CONTRAST \n')

## 
##  Contrasting <200 vs. 220-259 with CONTRAST

M1 <- glm(cbind(CHD,Total-CHD) ~ Chol + SBP, family=binomial(), data=chd)
contrast(M1, list(Chol='<200', SBP=levels(chd$SBP)), list(Chol='220 - 259', SBP=levels(chd$SBP)),type='average')

## glm model parameter contrast
## 
##     Contrast      S.E.     Lower     Upper    t df Pr(>|t|)
## 1 -0.5622288 0.3507978 -1.355789 0.2313309 -1.6  9   0.1435

contrast(M1, list(Chol='<200', SBP='127 - 146'), list(Chol='220 - 259', SBP='127 - 146'))

## glm model parameter contrast
## 
##    Contrast      S.E.     Lower     Upper    t df Pr(>|t|)
##  -0.5622288 0.3507978 -1.355789 0.2313309 -1.6  9   0.1435

contrast(M1, list(Chol='<200', SBP=c('<127','147 - 166')), list(Chol='<200',  SBP='127 - 146'),type='average')

## glm model parameter contrast
## 
##    Contrast      S.E.      Lower     Upper   t df Pr(>|t|)
## 1 0.3076389 0.2569207 -0.2735561 0.8888339 1.2  9   0.2617

Admission Data

admission <- read.csv('gradAdmission.csv')
head(admission)

##   Department Sex yes  no total
## 1          1   M 512 313   825
## 2          1   F  89  19   108
## 3          2   M 353 207   560
## 4          2   F  17   8    25
## 5          3   M 120 205   325
## 6          3   F 202 391   593

admission$Department=factor(admission$Department)
admission$Department

##  [1] 1 1 2 2 3 3 4 4 5 5 6 6
## Levels: 1 2 3 4 5 6

cat('\n Saturated Model \n')

## 
##  Saturated Model

M1<-glm(cbind(yes,no) ~ Department*Sex , data=admission,family=binomial())

cat('\n Only main effects Model \n');

## 
##  Only main effects Model

M2<-glm(cbind(yes,no) ~ Department + Sex , data=admission,family=binomial())

cat('\n Only Sex Model \n');

## 
##  Only Sex Model

M3<-glm(cbind(yes,no) ~ Sex , data=admission,family=binomial())

cat('\n Only Department effects Model \n');

## 
##  Only Department effects Model

M3<-glm(cbind(yes,no) ~ Department , data=admission,family=binomial())

contrast(M1,list(Department='1',Sex='F'),list(Department='1',Sex='M')) # Gender (F-M) in Department 1

## Warning in qt((1 + conf.int)/2, dfdm): NaNs produced

## glm model parameter contrast
## 
##  Contrast      S.E. Lower Upper t df Pr(>|t|)
##  1.052076 0.2627081   NaN   NaN 4  0    1e-04

contrast(M1,list(Department='2',Sex='F'),list(Department='2',Sex='M')) # Gender (F-M) in Department 2

## Warning in qt((1 + conf.int)/2, dfdm): NaNs produced

## glm model parameter contrast
## 
##   Contrast      S.E. Lower Upper   t df Pr(>|t|)
##  0.2200225 0.4375926   NaN   NaN 0.5  0   0.6151

contrast(M1,list(Department='3',Sex='F'),list(Department='3',Sex='M')) # Gender (F-M) in Department 3

## Warning in qt((1 + conf.int)/2, dfdm): NaNs produced

## glm model parameter contrast
## 
##    Contrast      S.E. Lower Upper     t df Pr(>|t|)
##  -0.1249216 0.1439424   NaN   NaN -0.87  0   0.3855

contrast(M1,list(Department='4',Sex='F'),list(Department='4',Sex='M')) # Gender (F-M) in Department 4

## Warning in qt((1 + conf.int)/2, dfdm): NaNs produced

## glm model parameter contrast
## 
##    Contrast      S.E. Lower Upper    t df Pr(>|t|)
##  0.08198719 0.1502084   NaN   NaN 0.55  0   0.5852

contrast(M1,list(Department='5',Sex='F'),list(Department='5',Sex='M')) # Gender (F-M) in Department 5

## Warning in qt((1 + conf.int)/2, dfdm): NaNs produced

## glm model parameter contrast
## 
##   Contrast      S.E. Lower Upper  t df Pr(>|t|)
##  -0.200187 0.2002426   NaN   NaN -1  0   0.3174

contrast(M1,list(Department='6',Sex='F'),list(Department='6',Sex='M')) # Gender (F-M) in Department 6

## Warning in qt((1 + conf.int)/2, dfdm): NaNs produced

## glm model parameter contrast
## 
##   Contrast      S.E. Lower Upper    t df Pr(>|t|)
##  0.1888958 0.3051635   NaN   NaN 0.62  0   0.5359

Treatment for Urinary Tract Infection (UTI)

UTI <- read.csv('UTI.csv');names(UTI)

## [1] "diag"  "treat" "resp"  "count"

U<-glm(count ~ diag*treat + diag*resp + treat*resp, family=poisson(), data=UTI)

a1<-c(0,  0, 0, 0, 0, 0, 0,  0, 1,  0)
a2<-c(0,  0, 0, 0, 0, 0, 0,  0, 1, -1)
a3<-c(0,  0, 0, 0, 0, 0, 0,  0, 0,  1)
Cont<-rbind(a1,a2,a3)

General linear hypotheses

General linear hypotheses and multiple comparisons for parametric models, including generalized linear models,

linear mixed effects models, and survival models.

cat('\n Individual tests \n')

## 
##  Individual tests

Res1<-summary(glht(U,linfct=t(a1)))
Res2<-summary(glht(U,linfct=t(a2)))
Res3<-summary(glht(U,linfct=t(a3)))

cat('\n All Tests\n')

## 
##  All Tests

Res<-glht(U,linfct=Cont)
summary(Res)

## 
##   Simultaneous Tests for General Linear Hypotheses
## 
## Fit: glm(formula = count ~ diag * treat + diag * resp + treat * resp, 
##     family = poisson(), data = UTI)
## 
## Linear Hypotheses:
##         Estimate Std. Error z value Pr(>|z|)    
## a1 == 0 -0.06521    0.08480  -0.769  0.64064    
## a2 == 0 -0.48801    0.16553  -2.948  0.00648 ** 
## a3 == 0  0.42280    0.09848   4.293  < 0.001 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## (Adjusted p values reported -- single-step method)

Breathlessness and wheeze in coal miners Ashford and Sowden (1970)

### read in data to use for plots ###
coalminr <- read.csv('coalminr.csv')
coalminr$cage <- coalminr$age - 5;   
coalminr$sqage <- coalminr$cage^2;
coalminr$brthlss1 <- 0+(coalminr$brthlss=='yes');
coalminr$wheeze1 <- 2-coalminr$wheeze
coalminr$wheeze <- factor(coalminr$wheeze)
coalminr$age <- factor(coalminr$age)

cat('\n CMH test for conditional independence \n');

## 
##  CMH test for conditional independence

l<-sapply(coalminr[c('wheeze','brthlss', 'age', 'count')], function(x) length(levels(x)))
Tc <- array(0, l[-4], lapply(coalminr[c('wheeze','brthlss', 'age', 'count')][, -4], levels))
Tc[data.matrix(coalminr[c('wheeze','brthlss', 'age', 'count')][,-4])] <- coalminr$count 
ftable(Tc)

##                age    1    2    3    4    5    6    7    8    9
## wheeze brthlss                                                 
## 1      no            95  105  177  257  273  324  245  225  132
##        yes            9   23   54  121  169  269  404  406  372
## 2      no          1841 1654 1863 2357 1778 1712 1324  967  526
##        yes            7    9   19   48   54   88  117  152  106

mantelhaen.test(Tc,correct=FALSE)

## 
##  Mantel-Haenszel chi-squared test without continuity correction
## 
## data:  Tc
## Mantel-Haenszel X-squared = 3309.6, df = 1, p-value < 2.2e-16
## alternative hypothesis: true common odds ratio is not equal to 1
## 95 percent confidence interval:
##  0.05482530 0.06843431
## sample estimates:
## common odds ratio 
##        0.06125301

Create data sets for studying the relation of wheeze and breathlessmess with age

Construct the wheeze data

wheeze <- data.frame(tapply(coalminr$count,list(coalminr$age,coalminr$wheeze ) , sum))
names(wheeze) <- c('yes','no')
wheeze$age <- as.numeric(as.character(rownames(wheeze)))
wheeze$cage <- wheeze$age - 5  
wheeze$sqage <- wheeze$cage^2

Construct the breathlessness data

breathlessness <- data.frame(tapply(coalminr$count,list(coalminr$age,coalminr$brthlss) , sum))
breathlessness$age <- as.numeric(as.character(rownames(wheeze)))
breathlessness$cage <- breathlessness$age - 5  
breathlessness$sqage <- breathlessness$cage^2

Model 1

cat('\n Predicting Breathlessness rates with age \n Only linear effect of age in the model \n')

## 
##  Predicting Breathlessness rates with age 
##  Only linear effect of age in the model

M1 <- glm(cbind(yes,no) ~ cage, family=binomial(), data=breathlessness)

###Model 2###
cat('\n Predicting Breathlessness rates with age \n Linear and quadratic effects of age in the model \n')

## 
##  Predicting Breathlessness rates with age 
##  Linear and quadratic effects of age in the model

M2 <- glm(cbind(yes,no) ~ cage + sqage, family=binomial(), data=breathlessness)

###Model 3###
cat('\n Predicting Wheeze rates with age \n Only linear effect of age in the model \n')

## 
##  Predicting Wheeze rates with age 
##  Only linear effect of age in the model

M3 <- glm(cbind(yes,no) ~ cage, family=binomial(), data=wheeze)

cat('\n Predicting Breathlessness rates with age \n Linear and quadratic effects of age in the model \n Use estimate to compare age groups \n')

## 
##  Predicting Breathlessness rates with age 
##  Linear and quadratic effects of age in the model 
##  Use estimate to compare age groups

Agec <- glht(M2,linfct=t(c(0,-4, 16)))
summary(Agec)

## 
##   Simultaneous Tests for General Linear Hypotheses
## 
## Fit: glm(formula = cbind(yes, no) ~ cage + sqage, family = binomial(), 
##     data = breathlessness)
## 
## Linear Hypotheses:
##        Estimate Std. Error z value Pr(>|z|)    
## 1 == 0  -2.5082     0.1446  -17.34   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## (Adjusted p values reported -- single-step method)

Use Loglinear models to analyse the data

cat('\n All 2 way interaction model \n');

## 
##  All 2 way interaction model

M4 <- glm(count ~ age*brthlss1 + age*wheeze1 + brthlss1*wheeze1, family=poisson(), data=coalminr)


cat('\n Allowing OR between Wheeze and Breathlessness to vary with age \n');

## 
##  Allowing OR between Wheeze and Breathlessness to vary with age

M5 <- glm(count ~ age*brthlss1 + age*wheeze1 + brthlss1*wheeze1+I(cage*brthlss1*wheeze1), family=poisson(), data=coalminr)

Generalized Linear Model R Data Practice

Michelle Hsu

March 12, 2018

OR for remission comparing the first labeling index group (‘8-12’) to the last labeling index group (‘34-38’)

User Defined Levels order

OR for remission comparing the first labeling index group (‘8-12’) to the last labeling index group (‘34-38’)

Change the Contrasts (vs. Corresponds to SAS)

contr.SAS (contr.treatment) that sets the base level to be the last level of the factor. The coefficients

produced should be equivalent to those in SAS procedures.

CHD in Framingham Study

Admission Data

Treatment for Urinary Tract Infection (UTI)

General linear hypotheses

General linear hypotheses and multiple comparisons for parametric models, including generalized linear models,

linear mixed effects models, and survival models.

Breathlessness and wheeze in coal miners Ashford and Sowden (1970)

Create data sets for studying the relation of wheeze and breathlessmess with age

Construct the wheeze data

Construct the breathlessness data

Construct the breathlessness data

Model 1

Use Loglinear models to analyse the data