Chapter 8

8.1

If \(J=2\) then \(\pi_1 + \pi_2 = 1\), so we can definie \(\pi_1 = \pi\) and \(\pi_2 = 1 - \pi\), and then it is straightforward to see that equations (8.4), (8.13), (8.14) and (8.15) all reduce to

\[ log \left(\frac{\pi}{1-\pi} \right) = \beta_0 + \beta_1 x \]

8.2

a.

(housing <- read.csv2("Table8.5Housingconditions.csv",skip=2))

##           type satisfaction contact frequency
## 1  tower block          low     low        65
## 2  tower block          low    high        34
## 3  tower block       medium     low        54
## 4  tower block       medium    high        47
## 5  tower block         high     low       100
## 6  tower block         high    high       100
## 7    apartment          low     low       130
## 8    apartment          low    high       141
## 9    apartment       medium     low        76
## 10   apartment       medium    high       116
## 11   apartment         high     low       111
## 12   apartment         high    high       191
## 13       house          low     low        67
## 14       house          low    high       130
## 15       house       medium     low        48
## 16       house       medium    high       105
## 17       house         high     low        62
## 18       house         high    high       104

housing$contact <- gl(2,1,18,labels=c("low","high"),ordered=TRUE)
with(housing, xtabs(frequency ~ type + contact + satisfaction)) #Table 8.5

## , , satisfaction = high
## 
##              contact
## type          low high
##   apartment   111  191
##   house        62  104
##   tower block 100  100
## 
## , , satisfaction = low
## 
##              contact
## type          low high
##   apartment   130  141
##   house        67  130
##   tower block  65   34
## 
## , , satisfaction = medium
## 
##              contact
## type          low high
##   apartment    76  116
##   house        48  105
##   tower block  54   47

table8.5 = xtabs(frequency ~ type + contact + satisfaction, data=housing)

#Tables of percentages

with(housing, xtabs(frequency ~ satisfaction + contact))  -> sat.cont.table
prop.table(sat.cont.table, 1)*100 #by row

##             contact
## satisfaction      low     high
##       high   40.86826 59.13174
##       low    46.20811 53.79189
##       medium 39.91031 60.08969

with(housing, xtabs(frequency ~ satisfaction + type))  -> sat.type.table
prop.table(sat.type.table, 1)*100 #by row

##             type
## satisfaction apartment    house tower block
##       high    45.20958 24.85030    29.94012
##       low     47.79541 34.74427    17.46032
##       medium  43.04933 34.30493    22.64574

with(housing, xtabs(frequency ~ contact + type))  -> cont.type.table
prop.table(cont.type.table, 1)*100 #by row

##        type
## contact apartment    house tower block
##    low   44.46003 24.82468    30.71529
##    high  46.28099 35.02066    18.69835

b.

nom.model1 <- multinom(satisfaction ~ type + contact +  type * contact, weights=frequency, data=housing)

## # weights:  21 (12 variable)
## initial  value 1846.767257 
## iter  10 value 1802.876911
## final  value 1799.293647 
## converged

summary(nom.model1)

## Call:
## multinom(formula = satisfaction ~ type + contact + type * contact, 
##     data = housing, weights = frequency)
## 
## Coefficients:
##        (Intercept) typehouse typetower block   contact.L
## low    -0.07275427 0.2231054      -0.6820406 -0.32633994
## medium -0.43873988 0.3155586      -0.2468679 -0.08477072
##        typehouse:contact.L typetower block:contact.L
## low              0.4292855               -0.13188065
## medium           0.2725097               -0.01339855
## 
## Std. Errors:
##        (Intercept) typehouse typetower block contact.L typehouse:contact.L
## low     0.08518966 0.1390964       0.1531546 0.1204764           0.1967120
## medium  0.09489889 0.1517603       0.1547727 0.1342073           0.2146215
##        typetower block:contact.L
## low                    0.2165934
## medium                 0.2188817
## 
## Residual Deviance: 3598.587 
## AIC: 3622.587

nom.model2 <- multinom(satisfaction ~ type + contact , weights=frequency, data=housing)

## # weights:  15 (8 variable)
## initial  value 1846.767257 
## iter  10 value 1803.572484
## final  value 1802.740161 
## converged

summary(nom.model2)

## Call:
## multinom(formula = satisfaction ~ type + contact, data = housing, 
##     weights = frequency)
## 
## Coefficients:
##        (Intercept) typehouse typetower block   contact.L
## low    -0.08329098 0.3040193      -0.6415930 -0.23209137
## medium -0.44931685 0.3736972      -0.2348338 -0.02280046
## 
## Std. Errors:
##        (Intercept) typehouse typetower block  contact.L
## low     0.08437084 0.1351693       0.1500773 0.08357085
## medium  0.09337762 0.1454812       0.1540988 0.08974541
## 
## Residual Deviance: 3605.48 
## AIC: 3621.48

anova(nom.model2, nom.model1) # model with interactions is not better

## Likelihood ratio tests of Multinomial Models
## 
## Response: satisfaction
##                             Model Resid. df Resid. Dev   Test    Df
## 1                  type + contact        28   3605.480             
## 2 type + contact + type * contact        24   3598.587 1 vs 2     4
##   LR stat.   Pr(Chi)
## 1                   
## 2 6.893028 0.1416504

nom.model3 <- multinom(satisfaction ~  contact, weights=frequency, data=housing)

## # weights:  9 (4 variable)
## initial  value 1846.767257 
## final  value 1821.875901 
## converged

summary(nom.model3)

## Call:
## multinom(formula = satisfaction ~ contact, data = housing, weights = frequency)
## 
## Coefficients:
##        (Intercept)   contact.L
## low     -0.1498514 -0.15375902
## medium  -0.4077930  0.02813511
## 
## Std. Errors:
##        (Intercept)  contact.L
## low     0.05764143 0.08151729
## medium  0.06233772 0.08815884
## 
## Residual Deviance: 3643.752 
## AIC: 3651.752

anova(nom.model3, nom.model2) # nom.model2 is better

## Likelihood ratio tests of Multinomial Models
## 
## Response: satisfaction
##            Model Resid. df Resid. Dev   Test    Df LR stat.      Pr(Chi)
## 1        contact        32   3643.752                                   
## 2 type + contact        28   3605.480 1 vs 2     4 38.27148 9.849653e-08

c.

ord.model2 <- polr(ordered(satisfaction) ~ type + ordered(contact), weights=frequency, data=housing) # proportional odds logistic regression
summary(ord.model2)

## 
## Re-fitting to get Hessian

## Call:
## polr(formula = ordered(satisfaction) ~ type + ordered(contact), 
##     data = housing, weights = frequency)
## 
## Coefficients:
##                      Value Std. Error t value
## typehouse           0.2759     0.1043  2.6442
## typetower block    -0.2924     0.1179 -2.4810
## ordered(contact).L -0.0527     0.0653 -0.8071
## 
## Intercepts:
##            Value   Std. Error t value
## high|low   -0.4044  0.0700    -5.7741
## low|medium  1.0444  0.0743    14.0555
## 
## Residual Deviance: 3628.55 
## AIC: 3638.55

Although larger deviance, this seems to make more sense since ordering is inherent part of the variables standard errors of ordinal model lower model with interactions is not better

ord.model2$fitted.values

##         high       low    medium
## 1  0.4627575 0.3230023 0.2142402
## 2  0.4813303 0.3167093 0.2019604
## 3  0.4627575 0.3230023 0.2142402
## 4  0.4813303 0.3167093 0.2019604
## 5  0.4627575 0.3230023 0.2142402
## 6  0.4813303 0.3167093 0.2019604
## 7  0.3913479 0.3411141 0.2675379
## 8  0.4092379 0.3375745 0.2531876
## 9  0.3913479 0.3411141 0.2675379
## 10 0.4092379 0.3375745 0.2531876
## 11 0.3913479 0.3411141 0.2675379
## 12 0.4092379 0.3375745 0.2531876
## 13 0.3279326 0.3471468 0.3249206
## 14 0.3445647 0.3466453 0.3087900
## 15 0.3279326 0.3471468 0.3249206
## 16 0.3445647 0.3466453 0.3087900
## 17 0.3279326 0.3471468 0.3249206
## 18 0.3445647 0.3466453 0.3087900

names(ord.model2)

##  [1] "coefficients"  "zeta"          "deviance"      "fitted.values"
##  [5] "lev"           "terms"         "df.residual"   "edf"          
##  [9] "n"             "nobs"          "call"          "method"       
## [13] "convergence"   "niter"         "lp"            "model"        
## [17] "contrasts"     "xlevels"

8.3

a.

(tumordat <- read.csv2("Table8.6Tumorresponse.csv", skip=2))

##      treatment    sex           response frequency
## 1   sequential   male        progressive        28
## 2   sequential   male          no change        45
## 3   sequential   male  partial remission        29
## 4   sequential   male complete remission        26
## 5   sequential female        progressive         4
## 6   sequential female          no change        12
## 7   sequential female  partial remission         5
## 8   sequential female complete remission         2
## 9  alternating   male        progressive        41
## 10 alternating   male          no change        44
## 11 alternating   male  partial remission        20
## 12 alternating   male complete remission        20
## 13 alternating female        progressive        12
## 14 alternating female          no change         7
## 15 alternating female  partial remission         3
## 16 alternating female complete remission         1

with(tumordat,  xtabs(frequency ~ factor(sex)+ factor(response) + factor(treatment))) #Table 8.6 in alphabetic different order

## , , factor(treatment) = alternating
## 
##            factor(response)
## factor(sex) complete remission no change partial remission progressive
##      female                  1         7                 3          12
##      male                   20        44                20          41
## 
## , , factor(treatment) = sequential
## 
##            factor(response)
## factor(sex) complete remission no change partial remission progressive
##      female                  2        12                 5           4
##      male                   26        45                29          28

mod1 <- polr(ordered(response) ~ factor(treatment) + factor(sex), weights=frequency, data = tumordat)
summary(mod1)

## 
## Re-fitting to get Hessian

## Call:
## polr(formula = ordered(response) ~ factor(treatment) + factor(sex), 
##     data = tumordat, weights = frequency)
## 
## Coefficients:
##                               Value Std. Error t value
## factor(treatment)sequential -0.4485     0.2108  -2.128
## factor(sex)male             -0.4153     0.2861  -1.452
## 
## Intercepts:
##                               Value   Std. Error t value
## complete remission|no change  -2.2289  0.3166    -7.0407
## no change|partial remission   -0.4816  0.2899    -1.6610
## partial remission|progressive  0.3534  0.2910     1.2146
## 
## Residual Deviance: 793.4132 
## AIC: 803.4132

b.

mod2 <- polr(factor(response) ~ 1, weights=frequency, data = tumordat)
anova(mod1,mod2)

## Likelihood ratio tests of ordinal regression models
## 
## Response: factor(response)
## Response: ordered(response)
##                             Model Resid. df Resid. Dev   Test    Df
## 1                               1       296   799.9680             
## 2 factor(treatment) + factor(sex)       294   793.4132 1 vs 2     2
##   LR stat.    Pr(Chi)
## 1                    
## 2  6.55474 0.03772735

c.

summary(mod1)

## 
## Re-fitting to get Hessian

## Call:
## polr(formula = ordered(response) ~ factor(treatment) + factor(sex), 
##     data = tumordat, weights = frequency)
## 
## Coefficients:
##                               Value Std. Error t value
## factor(treatment)sequential -0.4485     0.2108  -2.128
## factor(sex)male             -0.4153     0.2861  -1.452
## 
## Intercepts:
##                               Value   Std. Error t value
## complete remission|no change  -2.2289  0.3166    -7.0407
## no change|partial remission   -0.4816  0.2899    -1.6610
## partial remission|progressive  0.3534  0.2910     1.2146
## 
## Residual Deviance: 793.4132 
## AIC: 803.4132

##calculating p-values
(ctable <- coef(summary(mod1)))

## 
## Re-fitting to get Hessian

##                                    Value Std. Error   t value
## factor(treatment)sequential   -0.4484604  0.2107789 -2.127634
## factor(sex)male               -0.4153424  0.2861069 -1.451704
## complete remission|no change  -2.2289307  0.3165793 -7.040671
## no change|partial remission   -0.4815889  0.2899408 -1.660990
## partial remission|progressive  0.3534256  0.2909847  1.214585

 (p <- pnorm(abs(ctable[, "t value"]), lower.tail = FALSE) * 2)

##   factor(treatment)sequential               factor(sex)male 
##                  3.336742e-02                  1.465840e-01 
##  complete remission|no change   no change|partial remission 
##                  1.913163e-12                  9.671545e-02 
## partial remission|progressive 
##                  2.245245e-01

d.

mod3 <- polr(ordered(response) ~ factor(sex), weights=frequency, data = tumordat)
anova(mod1,mod3)

## Likelihood ratio tests of ordinal regression models
## 
## Response: ordered(response)
##                             Model Resid. df Resid. Dev   Test    Df
## 1                     factor(sex)       295   797.9652             
## 2 factor(treatment) + factor(sex)       294   793.4132 1 vs 2     1
##   LR stat.    Pr(Chi)
## 1                    
## 2 4.551977 0.03288076

e.

library(VGAM)

## Loading required package: stats4

## Loading required package: splines

resp.prog <- c(28, 4, 41, 12)
resp.no <-  c(45, 12, 44, 7)
resp.part <- c(29, 5, 20, 3)
resp.comp <- c(26, 2, 20, 1)


treat <- c("seq", "seq", "alt", "alt")
sex <- c("m", "f", "m", "f")

dat <- data.frame(treat,sex,resp.prog,resp.no,resp.part,resp.comp)

##Adjacent category model with logit link function

fit.adj <- vglm(cbind(resp.prog,resp.no,resp.part,resp.comp) ~ factor(treat) + factor(sex),
        family=acat(reverse=FALSE, parallel=TRUE), data=dat)
summary(fit.adj)

## 
## Call:
## vglm(formula = cbind(resp.prog, resp.no, resp.part, resp.comp) ~ 
##     factor(treat) + factor(sex), family = acat(reverse = FALSE, 
##     parallel = TRUE), data = dat)
## 
## 
## Pearson residuals:
##   loge(P[Y=2]/P[Y=1]) loge(P[Y=3]/P[Y=2]) loge(P[Y=4]/P[Y=3])
## 1             0.04426            -0.06102             -0.4360
## 2             1.73162            -0.11329             -0.7348
## 3            -0.40748             0.08184              0.9726
## 4            -0.83665             0.04515             -0.4108
## 
## Coefficients: 
##                  Estimate Std. Error z value Pr(>|z|)    
## (Intercept):1     -0.1619     0.2037  -0.795   0.4268    
## (Intercept):2     -1.0754     0.2307  -4.661 3.14e-06 ***
## (Intercept):3     -0.6193     0.2642  -2.344   0.0191 *  
## factor(treat)seq   0.2889     0.1148   2.515   0.0119 *  
## factor(sex)m       0.3308     0.1684   1.964   0.0495 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Number of linear predictors:  3 
## 
## Names of linear predictors: 
## loge(P[Y=2]/P[Y=1]), loge(P[Y=3]/P[Y=2]), loge(P[Y=4]/P[Y=3])
## 
## Residual deviance: 5.9537 on 7 degrees of freedom
## 
## Log-likelihood: -25.7347 on 7 degrees of freedom
## 
## Number of iterations: 4 
## 
## No Hauck-Donner effect found in any of the estimates

##Continuation ratio model with logit link function
fit.cratio <- vglm(cbind(resp.prog,resp.no,resp.part,resp.comp) ~ factor(treat) + factor(sex),
  family=cratio(reverse=FALSE, parallel=TRUE), data=dat)
summary(fit.cratio)

## 
## Call:
## vglm(formula = cbind(resp.prog, resp.no, resp.part, resp.comp) ~ 
##     factor(treat) + factor(sex), family = cratio(reverse = FALSE, 
##     parallel = TRUE), data = dat)
## 
## 
## Pearson residuals:
##   logit(P[Y>1|Y>=1]) logit(P[Y>2|Y>=2]) logit(P[Y>3|Y>=3])
## 1             0.2553            -0.1547            -0.6492
## 2             1.6424            -0.5523            -0.5594
## 3            -0.5441             0.3181             1.0555
## 4            -0.8971             0.2074            -0.1927
## 
## Coefficients: 
##                  Estimate Std. Error z value Pr(>|z|)   
## (Intercept):1      0.2853     0.2519   1.132  0.25745   
## (Intercept):2     -0.7015     0.2709  -2.589  0.00962 **
## (Intercept):3     -0.8670     0.3136  -2.765  0.00570 **
## factor(treat)seq   0.3975     0.1710   2.325  0.02009 * 
## factor(sex)m       0.5355     0.2437   2.197  0.02798 * 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Number of linear predictors:  3 
## 
## Names of linear predictors: 
## logit(P[Y>1|Y>=1]), logit(P[Y>2|Y>=2]), logit(P[Y>3|Y>=3])
## 
## Residual deviance: 6.4591 on 7 degrees of freedom
## 
## Log-likelihood: -25.9874 on 7 degrees of freedom
## 
## Number of iterations: 4 
## 
## No Hauck-Donner effect found in any of the estimates

Chapter 9

9.1

\[ Y_i \sim Po(\mu_i)\] \[ log (\mu_i) = \beta_1 + \sum x_{ij} \beta_j \] \[ l(\mu,y) = \sum y_i log(\mu_i) - \mu_i - log(y_i!) = \sum y_i [\beta_1 + \sum x_{ij}\beta_j ] - e^{\beta_1 + \sum x_{ij}\beta_j} - log(y_i) \] \[ U_1 = \frac{\partial l}{\partial \beta_1} = \sum y_i - \mu_i \]

then,(b) and (c) are straightforward.

9.2

m <- read.table("data.txt", header = TRUE)
attach(m)

a.

rate.car <- NULL
a <- NULL
CAR <- 1:4
for(i in CAR){
   a <- apply(m[car == i, ], 2, sum)
   rate.car[i] <- a[4] / a[5]
   }
rate.age <- NULL
a <- NULL
AGE <- 1:4
for(i in AGE){
   a <- apply(m[age == i, ], 2, sum)
   rate.age[i] <- a[4] / a[5]
   }
rate.district <- NULL
a <- NULL
DISTRICT <- 0:1
for(i in DISTRICT){
   a <- apply(m[district == i, ], 2, sum)
   rate.district[i + 1] <- a[4] / a[5]
   }
par(mfrow = c(1, 3))
barplot(matrix(rate.car,ncol=1),beside=T,col=heat.colors(4),ylim=c(0,0.3),
 legend.text = c("Category 1", "Category 2","Category 3","Category 4"),
             args.legend = list(x = "topright"),main='rate for CAR')
barplot(matrix(rate.age,ncol=1),beside=T,col=heat.colors(4),ylim=c(0,0.3),
 legend.text = c("Category 1", "Category 2","Category 3","Category 4"),
             args.legend = list(x = "topright"),main='rate for AGE')
barplot(matrix(rate.district,ncol=1),beside=T,col=heat.colors(4),ylim=c(0,0.3),
 legend.text = c("Category 1", "Category 2"),
             args.legend = list(x = "topright"),main='rate for DIST')

y <- m[,"y"]

b.

m1 <- m
m[, "car"] <- factor(m$car)
m[, "age"] <- factor(m$age)
model <- glm(y ~ car * age * district + offset(log(m[, "n"])), family = 'poisson', data = m)
summary(model)

## 
## Call:
## glm(formula = y ~ car * age * district + offset(log(m[, "n"])), 
##     family = "poisson", data = m)
## 
## Deviance Residuals: 
##  [1]  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0
## [24]  0  0  0  0  0  0  0  0  0
## 
## Coefficients:
##                      Estimate Std. Error z value Pr(>|z|)    
## (Intercept)        -1.585e+00  1.240e-01 -12.775  < 2e-16 ***
## car2               -1.673e-02  1.600e-01  -0.105 0.916721    
## car3               -1.091e-01  1.994e-01  -0.547 0.584401    
## car4                2.935e-01  3.260e-01   0.900 0.367947    
## age2               -4.065e-01  1.754e-01  -2.317 0.020477 *  
## age3               -6.504e-01  1.860e-01  -3.496 0.000472 ***
## age4               -7.681e-01  1.364e-01  -5.630  1.8e-08 ***
## district           -7.181e-01  7.179e-01  -1.000 0.317198    
## car2:age2           1.649e-01  2.174e-01   0.759 0.448106    
## car3:age2           5.726e-01  2.524e-01   2.269 0.023298 *  
## car4:age2           2.556e-01  3.876e-01   0.660 0.509574    
## car2:age3           2.049e-01  2.248e-01   0.912 0.361989    
## car3:age3           7.173e-01  2.575e-01   2.785 0.005345 ** 
## car4:age3           2.390e-01  3.888e-01   0.615 0.538711    
## car2:age4           2.021e-01  1.731e-01   1.168 0.242996    
## car3:age4           4.854e-01  2.124e-01   2.286 0.022271 *  
## car4:age4           2.505e-01  3.399e-01   0.737 0.461104    
## car2:district       8.312e-01  8.176e-01   1.017 0.309299    
## car3:district       1.131e+00  8.601e-01   1.315 0.188626    
## car4:district      -2.139e+01  4.225e+04  -0.001 0.999596    
## age2:district       8.220e-01  8.548e-01   0.962 0.336246    
## age3:district       6.504e-01  8.858e-01   0.734 0.462753    
## age4:district       8.984e-01  7.392e-01   1.215 0.224188    
## car2:age2:district -1.184e+00  9.950e-01  -1.190 0.234003    
## car3:age2:district -1.425e+00  1.052e+00  -1.354 0.175588    
## car4:age2:district  2.175e+01  4.225e+04   0.001 0.999589    
## car2:age3:district -4.298e-01  9.944e-01  -0.432 0.665567    
## car3:age3:district -8.832e-01  1.039e+00  -0.850 0.395125    
## car4:age3:district  2.202e+01  4.225e+04   0.001 0.999584    
## car2:age4:district -8.042e-01  8.428e-01  -0.954 0.340026    
## car3:age4:district -1.032e+00  8.881e-01  -1.162 0.245079    
## car4:age4:district  2.178e+01  4.225e+04   0.001 0.999589    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for poisson family taken to be 1)
## 
##     Null deviance: 2.0783e+02  on 31  degrees of freedom
## Residual deviance: 4.1226e-10  on  0  degrees of freedom
## AIC: 232.36
## 
## Number of Fisher Scoring iterations: 20

We can see how results suggest simpler model

model <- glm(y ~ car + age + district + car:age + offset(log(m[, "n"])), family = 'poisson', data = m)
summary(model)

## 
## Call:
## glm(formula = y ~ car + age + district + car:age + offset(log(m[, 
##     "n"])), family = "poisson", data = m)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -1.4804  -0.2481  -0.0349   0.3179   1.6404  
## 
## Coefficients:
##             Estimate Std. Error z value Pr(>|z|)    
## (Intercept) -1.62983    0.12224 -13.333  < 2e-16 ***
## car2         0.02116    0.15636   0.135 0.892356    
## car3        -0.04446    0.19148  -0.232 0.816377    
## car4         0.24957    0.32532   0.767 0.442991    
## age2        -0.36989    0.17091  -2.164 0.030450 *  
## age3        -0.62858    0.18106  -3.472 0.000517 ***
## age4        -0.72688    0.13349  -5.445 5.17e-08 ***
## district     0.21916    0.05854   3.744 0.000181 ***
## car2:age2    0.10099    0.21131   0.478 0.632705    
## car3:age2    0.48849    0.24293   2.011 0.044346 *  
## car4:age2    0.34022    0.38015   0.895 0.370800    
## car2:age3    0.20265    0.21768   0.931 0.351867    
## car3:age3    0.67189    0.24710   2.719 0.006546 ** 
## car4:age3    0.35722    0.38075   0.938 0.348145    
## car2:age4    0.16706    0.16841   0.992 0.321190    
## car3:age4    0.43166    0.20349   2.121 0.033897 *  
## car4:age4    0.34847    0.33723   1.033 0.301444    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for poisson family taken to be 1)
## 
##     Null deviance: 207.833  on 31  degrees of freedom
## Residual deviance:  13.192  on 15  degrees of freedom
## AIC: 215.55
## 
## Number of Fisher Scoring iterations: 4

pchisq(model$deviance, df = model$df.residual, lower.tail = FALSE)

## [1] 0.5874938

p-value is 0.5874938, so the model fits well

We can try even simpler model

model <- glm(y ~ car + age + district + offset(log(m[, "n"])), family = 'poisson', data = m)
summary(model)

## 
## Call:
## glm(formula = y ~ car + age + district + offset(log(m[, "n"])), 
##     family = "poisson", data = m)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -1.8588  -0.6371  -0.1555   0.3749   1.7536  
## 
## Coefficients:
##             Estimate Std. Error z value Pr(>|z|)    
## (Intercept) -1.81021    0.07532 -24.034  < 2e-16 ***
## car2         0.16229    0.05052   3.213 0.001315 ** 
## car3         0.39352    0.05498   7.157 8.25e-13 ***
## car4         0.56540    0.07228   7.823 5.18e-15 ***
## age2        -0.18902    0.08282  -2.282 0.022477 *  
## age3        -0.34211    0.08130  -4.208 2.58e-05 ***
## age4        -0.53275    0.06979  -7.634 2.28e-14 ***
## district     0.21850    0.05853   3.733 0.000189 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for poisson family taken to be 1)
## 
##     Null deviance: 207.833  on 31  degrees of freedom
## Residual deviance:  23.709  on 24  degrees of freedom
## AIC: 208.07
## 
## Number of Fisher Scoring iterations: 4

pchisq(model$deviance, df = model$df.residual, lower.tail = FALSE)

## [1] 0.4783343

p-value = 0.4783343. so the model fits well AIC is smaller then the AIC of the previous model and this model is simpler. We choose this model.

c.

m1[,"district"] <- factor(m1$district)
m=m1
attach(m)

## The following object is masked _by_ .GlobalEnv:
## 
##     y

## The following objects are masked from m (pos = 3):
## 
##     age, car, district, n, y

model <- glm(y ~ car + age + district + offset(log(m[, "n"])), family = 'poisson', data = m1)
summary(model)

## 
## Call:
## glm(formula = y ~ car + age + district + offset(log(m[, "n"])), 
##     family = "poisson", data = m1)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -1.7248  -0.5681  -0.1679   0.3384   1.9126  
## 
## Coefficients:
##             Estimate Std. Error z value Pr(>|z|)    
## (Intercept) -1.85253    0.07990 -23.185  < 2e-16 ***
## car          0.19777    0.02080   9.507  < 2e-16 ***
## age         -0.17674    0.01849  -9.559  < 2e-16 ***
## district1    0.21865    0.05853   3.736 0.000187 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for poisson family taken to be 1)
## 
##     Null deviance: 207.833  on 31  degrees of freedom
## Residual deviance:  24.685  on 28  degrees of freedom
## AIC: 201.05
## 
## Number of Fisher Scoring iterations: 4

pchisq(model$deviance, df = model$df.residual, lower.tail = FALSE)

## [1] 0.6449029

p-value = 0.6449029 so the model fits well

Practical #5

F.R.

March 8, International Women’s Day

Chapter 8

8.1

8.2

a.

b.

c.

8.3

a.

b.

c.

d.

e.

Chapter 9

9.1

9.2

a.

b.

c.