If \(J=2\) then \(\pi_1 + \pi_2 = 1\), so we can definie \(\pi_1 = \pi\) and \(\pi_2 = 1 - \pi\), and then it is straightforward to see that equations (8.4), (8.13), (8.14) and (8.15) all reduce to
\[ log \left(\frac{\pi}{1-\pi} \right) = \beta_0 + \beta_1 x \]
(housing <- read.csv2("Table8.5Housingconditions.csv",skip=2))
## type satisfaction contact frequency
## 1 tower block low low 65
## 2 tower block low high 34
## 3 tower block medium low 54
## 4 tower block medium high 47
## 5 tower block high low 100
## 6 tower block high high 100
## 7 apartment low low 130
## 8 apartment low high 141
## 9 apartment medium low 76
## 10 apartment medium high 116
## 11 apartment high low 111
## 12 apartment high high 191
## 13 house low low 67
## 14 house low high 130
## 15 house medium low 48
## 16 house medium high 105
## 17 house high low 62
## 18 house high high 104
housing$contact <- gl(2,1,18,labels=c("low","high"),ordered=TRUE)
with(housing, xtabs(frequency ~ type + contact + satisfaction)) #Table 8.5
## , , satisfaction = high
##
## contact
## type low high
## apartment 111 191
## house 62 104
## tower block 100 100
##
## , , satisfaction = low
##
## contact
## type low high
## apartment 130 141
## house 67 130
## tower block 65 34
##
## , , satisfaction = medium
##
## contact
## type low high
## apartment 76 116
## house 48 105
## tower block 54 47
table8.5 = xtabs(frequency ~ type + contact + satisfaction, data=housing)
#Tables of percentages
with(housing, xtabs(frequency ~ satisfaction + contact)) -> sat.cont.table
prop.table(sat.cont.table, 1)*100 #by row
## contact
## satisfaction low high
## high 40.86826 59.13174
## low 46.20811 53.79189
## medium 39.91031 60.08969
with(housing, xtabs(frequency ~ satisfaction + type)) -> sat.type.table
prop.table(sat.type.table, 1)*100 #by row
## type
## satisfaction apartment house tower block
## high 45.20958 24.85030 29.94012
## low 47.79541 34.74427 17.46032
## medium 43.04933 34.30493 22.64574
with(housing, xtabs(frequency ~ contact + type)) -> cont.type.table
prop.table(cont.type.table, 1)*100 #by row
## type
## contact apartment house tower block
## low 44.46003 24.82468 30.71529
## high 46.28099 35.02066 18.69835
nom.model1 <- multinom(satisfaction ~ type + contact + type * contact, weights=frequency, data=housing)
## # weights: 21 (12 variable)
## initial value 1846.767257
## iter 10 value 1802.876911
## final value 1799.293647
## converged
summary(nom.model1)
## Call:
## multinom(formula = satisfaction ~ type + contact + type * contact,
## data = housing, weights = frequency)
##
## Coefficients:
## (Intercept) typehouse typetower block contact.L
## low -0.07275427 0.2231054 -0.6820406 -0.32633994
## medium -0.43873988 0.3155586 -0.2468679 -0.08477072
## typehouse:contact.L typetower block:contact.L
## low 0.4292855 -0.13188065
## medium 0.2725097 -0.01339855
##
## Std. Errors:
## (Intercept) typehouse typetower block contact.L typehouse:contact.L
## low 0.08518966 0.1390964 0.1531546 0.1204764 0.1967120
## medium 0.09489889 0.1517603 0.1547727 0.1342073 0.2146215
## typetower block:contact.L
## low 0.2165934
## medium 0.2188817
##
## Residual Deviance: 3598.587
## AIC: 3622.587
nom.model2 <- multinom(satisfaction ~ type + contact , weights=frequency, data=housing)
## # weights: 15 (8 variable)
## initial value 1846.767257
## iter 10 value 1803.572484
## final value 1802.740161
## converged
summary(nom.model2)
## Call:
## multinom(formula = satisfaction ~ type + contact, data = housing,
## weights = frequency)
##
## Coefficients:
## (Intercept) typehouse typetower block contact.L
## low -0.08329098 0.3040193 -0.6415930 -0.23209137
## medium -0.44931685 0.3736972 -0.2348338 -0.02280046
##
## Std. Errors:
## (Intercept) typehouse typetower block contact.L
## low 0.08437084 0.1351693 0.1500773 0.08357085
## medium 0.09337762 0.1454812 0.1540988 0.08974541
##
## Residual Deviance: 3605.48
## AIC: 3621.48
anova(nom.model2, nom.model1) # model with interactions is not better
## Likelihood ratio tests of Multinomial Models
##
## Response: satisfaction
## Model Resid. df Resid. Dev Test Df
## 1 type + contact 28 3605.480
## 2 type + contact + type * contact 24 3598.587 1 vs 2 4
## LR stat. Pr(Chi)
## 1
## 2 6.893028 0.1416504
nom.model3 <- multinom(satisfaction ~ contact, weights=frequency, data=housing)
## # weights: 9 (4 variable)
## initial value 1846.767257
## final value 1821.875901
## converged
summary(nom.model3)
## Call:
## multinom(formula = satisfaction ~ contact, data = housing, weights = frequency)
##
## Coefficients:
## (Intercept) contact.L
## low -0.1498514 -0.15375902
## medium -0.4077930 0.02813511
##
## Std. Errors:
## (Intercept) contact.L
## low 0.05764143 0.08151729
## medium 0.06233772 0.08815884
##
## Residual Deviance: 3643.752
## AIC: 3651.752
anova(nom.model3, nom.model2) # nom.model2 is better
## Likelihood ratio tests of Multinomial Models
##
## Response: satisfaction
## Model Resid. df Resid. Dev Test Df LR stat. Pr(Chi)
## 1 contact 32 3643.752
## 2 type + contact 28 3605.480 1 vs 2 4 38.27148 9.849653e-08
ord.model2 <- polr(ordered(satisfaction) ~ type + ordered(contact), weights=frequency, data=housing) # proportional odds logistic regression
summary(ord.model2)
##
## Re-fitting to get Hessian
## Call:
## polr(formula = ordered(satisfaction) ~ type + ordered(contact),
## data = housing, weights = frequency)
##
## Coefficients:
## Value Std. Error t value
## typehouse 0.2759 0.1043 2.6442
## typetower block -0.2924 0.1179 -2.4810
## ordered(contact).L -0.0527 0.0653 -0.8071
##
## Intercepts:
## Value Std. Error t value
## high|low -0.4044 0.0700 -5.7741
## low|medium 1.0444 0.0743 14.0555
##
## Residual Deviance: 3628.55
## AIC: 3638.55
Although larger deviance, this seems to make more sense since ordering is inherent part of the variables standard errors of ordinal model lower model with interactions is not better
ord.model2$fitted.values
## high low medium
## 1 0.4627575 0.3230023 0.2142402
## 2 0.4813303 0.3167093 0.2019604
## 3 0.4627575 0.3230023 0.2142402
## 4 0.4813303 0.3167093 0.2019604
## 5 0.4627575 0.3230023 0.2142402
## 6 0.4813303 0.3167093 0.2019604
## 7 0.3913479 0.3411141 0.2675379
## 8 0.4092379 0.3375745 0.2531876
## 9 0.3913479 0.3411141 0.2675379
## 10 0.4092379 0.3375745 0.2531876
## 11 0.3913479 0.3411141 0.2675379
## 12 0.4092379 0.3375745 0.2531876
## 13 0.3279326 0.3471468 0.3249206
## 14 0.3445647 0.3466453 0.3087900
## 15 0.3279326 0.3471468 0.3249206
## 16 0.3445647 0.3466453 0.3087900
## 17 0.3279326 0.3471468 0.3249206
## 18 0.3445647 0.3466453 0.3087900
names(ord.model2)
## [1] "coefficients" "zeta" "deviance" "fitted.values"
## [5] "lev" "terms" "df.residual" "edf"
## [9] "n" "nobs" "call" "method"
## [13] "convergence" "niter" "lp" "model"
## [17] "contrasts" "xlevels"
(tumordat <- read.csv2("Table8.6Tumorresponse.csv", skip=2))
## treatment sex response frequency
## 1 sequential male progressive 28
## 2 sequential male no change 45
## 3 sequential male partial remission 29
## 4 sequential male complete remission 26
## 5 sequential female progressive 4
## 6 sequential female no change 12
## 7 sequential female partial remission 5
## 8 sequential female complete remission 2
## 9 alternating male progressive 41
## 10 alternating male no change 44
## 11 alternating male partial remission 20
## 12 alternating male complete remission 20
## 13 alternating female progressive 12
## 14 alternating female no change 7
## 15 alternating female partial remission 3
## 16 alternating female complete remission 1
with(tumordat, xtabs(frequency ~ factor(sex)+ factor(response) + factor(treatment))) #Table 8.6 in alphabetic different order
## , , factor(treatment) = alternating
##
## factor(response)
## factor(sex) complete remission no change partial remission progressive
## female 1 7 3 12
## male 20 44 20 41
##
## , , factor(treatment) = sequential
##
## factor(response)
## factor(sex) complete remission no change partial remission progressive
## female 2 12 5 4
## male 26 45 29 28
mod1 <- polr(ordered(response) ~ factor(treatment) + factor(sex), weights=frequency, data = tumordat)
summary(mod1)
##
## Re-fitting to get Hessian
## Call:
## polr(formula = ordered(response) ~ factor(treatment) + factor(sex),
## data = tumordat, weights = frequency)
##
## Coefficients:
## Value Std. Error t value
## factor(treatment)sequential -0.4485 0.2108 -2.128
## factor(sex)male -0.4153 0.2861 -1.452
##
## Intercepts:
## Value Std. Error t value
## complete remission|no change -2.2289 0.3166 -7.0407
## no change|partial remission -0.4816 0.2899 -1.6610
## partial remission|progressive 0.3534 0.2910 1.2146
##
## Residual Deviance: 793.4132
## AIC: 803.4132
mod2 <- polr(factor(response) ~ 1, weights=frequency, data = tumordat)
anova(mod1,mod2)
## Likelihood ratio tests of ordinal regression models
##
## Response: factor(response)
## Response: ordered(response)
## Model Resid. df Resid. Dev Test Df
## 1 1 296 799.9680
## 2 factor(treatment) + factor(sex) 294 793.4132 1 vs 2 2
## LR stat. Pr(Chi)
## 1
## 2 6.55474 0.03772735
summary(mod1)
##
## Re-fitting to get Hessian
## Call:
## polr(formula = ordered(response) ~ factor(treatment) + factor(sex),
## data = tumordat, weights = frequency)
##
## Coefficients:
## Value Std. Error t value
## factor(treatment)sequential -0.4485 0.2108 -2.128
## factor(sex)male -0.4153 0.2861 -1.452
##
## Intercepts:
## Value Std. Error t value
## complete remission|no change -2.2289 0.3166 -7.0407
## no change|partial remission -0.4816 0.2899 -1.6610
## partial remission|progressive 0.3534 0.2910 1.2146
##
## Residual Deviance: 793.4132
## AIC: 803.4132
##calculating p-values
(ctable <- coef(summary(mod1)))
##
## Re-fitting to get Hessian
## Value Std. Error t value
## factor(treatment)sequential -0.4484604 0.2107789 -2.127634
## factor(sex)male -0.4153424 0.2861069 -1.451704
## complete remission|no change -2.2289307 0.3165793 -7.040671
## no change|partial remission -0.4815889 0.2899408 -1.660990
## partial remission|progressive 0.3534256 0.2909847 1.214585
(p <- pnorm(abs(ctable[, "t value"]), lower.tail = FALSE) * 2)
## factor(treatment)sequential factor(sex)male
## 3.336742e-02 1.465840e-01
## complete remission|no change no change|partial remission
## 1.913163e-12 9.671545e-02
## partial remission|progressive
## 2.245245e-01
mod3 <- polr(ordered(response) ~ factor(sex), weights=frequency, data = tumordat)
anova(mod1,mod3)
## Likelihood ratio tests of ordinal regression models
##
## Response: ordered(response)
## Model Resid. df Resid. Dev Test Df
## 1 factor(sex) 295 797.9652
## 2 factor(treatment) + factor(sex) 294 793.4132 1 vs 2 1
## LR stat. Pr(Chi)
## 1
## 2 4.551977 0.03288076
library(VGAM)
## Loading required package: stats4
## Loading required package: splines
resp.prog <- c(28, 4, 41, 12)
resp.no <- c(45, 12, 44, 7)
resp.part <- c(29, 5, 20, 3)
resp.comp <- c(26, 2, 20, 1)
treat <- c("seq", "seq", "alt", "alt")
sex <- c("m", "f", "m", "f")
dat <- data.frame(treat,sex,resp.prog,resp.no,resp.part,resp.comp)
##Adjacent category model with logit link function
fit.adj <- vglm(cbind(resp.prog,resp.no,resp.part,resp.comp) ~ factor(treat) + factor(sex),
family=acat(reverse=FALSE, parallel=TRUE), data=dat)
summary(fit.adj)
##
## Call:
## vglm(formula = cbind(resp.prog, resp.no, resp.part, resp.comp) ~
## factor(treat) + factor(sex), family = acat(reverse = FALSE,
## parallel = TRUE), data = dat)
##
##
## Pearson residuals:
## loge(P[Y=2]/P[Y=1]) loge(P[Y=3]/P[Y=2]) loge(P[Y=4]/P[Y=3])
## 1 0.04426 -0.06102 -0.4360
## 2 1.73162 -0.11329 -0.7348
## 3 -0.40748 0.08184 0.9726
## 4 -0.83665 0.04515 -0.4108
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept):1 -0.1619 0.2037 -0.795 0.4268
## (Intercept):2 -1.0754 0.2307 -4.661 3.14e-06 ***
## (Intercept):3 -0.6193 0.2642 -2.344 0.0191 *
## factor(treat)seq 0.2889 0.1148 2.515 0.0119 *
## factor(sex)m 0.3308 0.1684 1.964 0.0495 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Number of linear predictors: 3
##
## Names of linear predictors:
## loge(P[Y=2]/P[Y=1]), loge(P[Y=3]/P[Y=2]), loge(P[Y=4]/P[Y=3])
##
## Residual deviance: 5.9537 on 7 degrees of freedom
##
## Log-likelihood: -25.7347 on 7 degrees of freedom
##
## Number of iterations: 4
##
## No Hauck-Donner effect found in any of the estimates
##Continuation ratio model with logit link function
fit.cratio <- vglm(cbind(resp.prog,resp.no,resp.part,resp.comp) ~ factor(treat) + factor(sex),
family=cratio(reverse=FALSE, parallel=TRUE), data=dat)
summary(fit.cratio)
##
## Call:
## vglm(formula = cbind(resp.prog, resp.no, resp.part, resp.comp) ~
## factor(treat) + factor(sex), family = cratio(reverse = FALSE,
## parallel = TRUE), data = dat)
##
##
## Pearson residuals:
## logit(P[Y>1|Y>=1]) logit(P[Y>2|Y>=2]) logit(P[Y>3|Y>=3])
## 1 0.2553 -0.1547 -0.6492
## 2 1.6424 -0.5523 -0.5594
## 3 -0.5441 0.3181 1.0555
## 4 -0.8971 0.2074 -0.1927
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept):1 0.2853 0.2519 1.132 0.25745
## (Intercept):2 -0.7015 0.2709 -2.589 0.00962 **
## (Intercept):3 -0.8670 0.3136 -2.765 0.00570 **
## factor(treat)seq 0.3975 0.1710 2.325 0.02009 *
## factor(sex)m 0.5355 0.2437 2.197 0.02798 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Number of linear predictors: 3
##
## Names of linear predictors:
## logit(P[Y>1|Y>=1]), logit(P[Y>2|Y>=2]), logit(P[Y>3|Y>=3])
##
## Residual deviance: 6.4591 on 7 degrees of freedom
##
## Log-likelihood: -25.9874 on 7 degrees of freedom
##
## Number of iterations: 4
##
## No Hauck-Donner effect found in any of the estimates
\[ Y_i \sim Po(\mu_i)\] \[ log (\mu_i) = \beta_1 + \sum x_{ij} \beta_j \] \[ l(\mu,y) = \sum y_i log(\mu_i) - \mu_i - log(y_i!) = \sum y_i [\beta_1 + \sum x_{ij}\beta_j ] - e^{\beta_1 + \sum x_{ij}\beta_j} - log(y_i) \] \[ U_1 = \frac{\partial l}{\partial \beta_1} = \sum y_i - \mu_i \]
then,(b) and (c) are straightforward.
m <- read.table("data.txt", header = TRUE)
attach(m)
rate.car <- NULL
a <- NULL
CAR <- 1:4
for(i in CAR){
a <- apply(m[car == i, ], 2, sum)
rate.car[i] <- a[4] / a[5]
}
rate.age <- NULL
a <- NULL
AGE <- 1:4
for(i in AGE){
a <- apply(m[age == i, ], 2, sum)
rate.age[i] <- a[4] / a[5]
}
rate.district <- NULL
a <- NULL
DISTRICT <- 0:1
for(i in DISTRICT){
a <- apply(m[district == i, ], 2, sum)
rate.district[i + 1] <- a[4] / a[5]
}
par(mfrow = c(1, 3))
barplot(matrix(rate.car,ncol=1),beside=T,col=heat.colors(4),ylim=c(0,0.3),
legend.text = c("Category 1", "Category 2","Category 3","Category 4"),
args.legend = list(x = "topright"),main='rate for CAR')
barplot(matrix(rate.age,ncol=1),beside=T,col=heat.colors(4),ylim=c(0,0.3),
legend.text = c("Category 1", "Category 2","Category 3","Category 4"),
args.legend = list(x = "topright"),main='rate for AGE')
barplot(matrix(rate.district,ncol=1),beside=T,col=heat.colors(4),ylim=c(0,0.3),
legend.text = c("Category 1", "Category 2"),
args.legend = list(x = "topright"),main='rate for DIST')
y <- m[,"y"]
m1 <- m
m[, "car"] <- factor(m$car)
m[, "age"] <- factor(m$age)
model <- glm(y ~ car * age * district + offset(log(m[, "n"])), family = 'poisson', data = m)
summary(model)
##
## Call:
## glm(formula = y ~ car * age * district + offset(log(m[, "n"])),
## family = "poisson", data = m)
##
## Deviance Residuals:
## [1] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## [24] 0 0 0 0 0 0 0 0 0
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -1.585e+00 1.240e-01 -12.775 < 2e-16 ***
## car2 -1.673e-02 1.600e-01 -0.105 0.916721
## car3 -1.091e-01 1.994e-01 -0.547 0.584401
## car4 2.935e-01 3.260e-01 0.900 0.367947
## age2 -4.065e-01 1.754e-01 -2.317 0.020477 *
## age3 -6.504e-01 1.860e-01 -3.496 0.000472 ***
## age4 -7.681e-01 1.364e-01 -5.630 1.8e-08 ***
## district -7.181e-01 7.179e-01 -1.000 0.317198
## car2:age2 1.649e-01 2.174e-01 0.759 0.448106
## car3:age2 5.726e-01 2.524e-01 2.269 0.023298 *
## car4:age2 2.556e-01 3.876e-01 0.660 0.509574
## car2:age3 2.049e-01 2.248e-01 0.912 0.361989
## car3:age3 7.173e-01 2.575e-01 2.785 0.005345 **
## car4:age3 2.390e-01 3.888e-01 0.615 0.538711
## car2:age4 2.021e-01 1.731e-01 1.168 0.242996
## car3:age4 4.854e-01 2.124e-01 2.286 0.022271 *
## car4:age4 2.505e-01 3.399e-01 0.737 0.461104
## car2:district 8.312e-01 8.176e-01 1.017 0.309299
## car3:district 1.131e+00 8.601e-01 1.315 0.188626
## car4:district -2.139e+01 4.225e+04 -0.001 0.999596
## age2:district 8.220e-01 8.548e-01 0.962 0.336246
## age3:district 6.504e-01 8.858e-01 0.734 0.462753
## age4:district 8.984e-01 7.392e-01 1.215 0.224188
## car2:age2:district -1.184e+00 9.950e-01 -1.190 0.234003
## car3:age2:district -1.425e+00 1.052e+00 -1.354 0.175588
## car4:age2:district 2.175e+01 4.225e+04 0.001 0.999589
## car2:age3:district -4.298e-01 9.944e-01 -0.432 0.665567
## car3:age3:district -8.832e-01 1.039e+00 -0.850 0.395125
## car4:age3:district 2.202e+01 4.225e+04 0.001 0.999584
## car2:age4:district -8.042e-01 8.428e-01 -0.954 0.340026
## car3:age4:district -1.032e+00 8.881e-01 -1.162 0.245079
## car4:age4:district 2.178e+01 4.225e+04 0.001 0.999589
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for poisson family taken to be 1)
##
## Null deviance: 2.0783e+02 on 31 degrees of freedom
## Residual deviance: 4.1226e-10 on 0 degrees of freedom
## AIC: 232.36
##
## Number of Fisher Scoring iterations: 20
We can see how results suggest simpler model
model <- glm(y ~ car + age + district + car:age + offset(log(m[, "n"])), family = 'poisson', data = m)
summary(model)
##
## Call:
## glm(formula = y ~ car + age + district + car:age + offset(log(m[,
## "n"])), family = "poisson", data = m)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.4804 -0.2481 -0.0349 0.3179 1.6404
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -1.62983 0.12224 -13.333 < 2e-16 ***
## car2 0.02116 0.15636 0.135 0.892356
## car3 -0.04446 0.19148 -0.232 0.816377
## car4 0.24957 0.32532 0.767 0.442991
## age2 -0.36989 0.17091 -2.164 0.030450 *
## age3 -0.62858 0.18106 -3.472 0.000517 ***
## age4 -0.72688 0.13349 -5.445 5.17e-08 ***
## district 0.21916 0.05854 3.744 0.000181 ***
## car2:age2 0.10099 0.21131 0.478 0.632705
## car3:age2 0.48849 0.24293 2.011 0.044346 *
## car4:age2 0.34022 0.38015 0.895 0.370800
## car2:age3 0.20265 0.21768 0.931 0.351867
## car3:age3 0.67189 0.24710 2.719 0.006546 **
## car4:age3 0.35722 0.38075 0.938 0.348145
## car2:age4 0.16706 0.16841 0.992 0.321190
## car3:age4 0.43166 0.20349 2.121 0.033897 *
## car4:age4 0.34847 0.33723 1.033 0.301444
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for poisson family taken to be 1)
##
## Null deviance: 207.833 on 31 degrees of freedom
## Residual deviance: 13.192 on 15 degrees of freedom
## AIC: 215.55
##
## Number of Fisher Scoring iterations: 4
pchisq(model$deviance, df = model$df.residual, lower.tail = FALSE)
## [1] 0.5874938
p-value is 0.5874938, so the model fits well
We can try even simpler model
model <- glm(y ~ car + age + district + offset(log(m[, "n"])), family = 'poisson', data = m)
summary(model)
##
## Call:
## glm(formula = y ~ car + age + district + offset(log(m[, "n"])),
## family = "poisson", data = m)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.8588 -0.6371 -0.1555 0.3749 1.7536
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -1.81021 0.07532 -24.034 < 2e-16 ***
## car2 0.16229 0.05052 3.213 0.001315 **
## car3 0.39352 0.05498 7.157 8.25e-13 ***
## car4 0.56540 0.07228 7.823 5.18e-15 ***
## age2 -0.18902 0.08282 -2.282 0.022477 *
## age3 -0.34211 0.08130 -4.208 2.58e-05 ***
## age4 -0.53275 0.06979 -7.634 2.28e-14 ***
## district 0.21850 0.05853 3.733 0.000189 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for poisson family taken to be 1)
##
## Null deviance: 207.833 on 31 degrees of freedom
## Residual deviance: 23.709 on 24 degrees of freedom
## AIC: 208.07
##
## Number of Fisher Scoring iterations: 4
pchisq(model$deviance, df = model$df.residual, lower.tail = FALSE)
## [1] 0.4783343
p-value = 0.4783343. so the model fits well AIC is smaller then the AIC of the previous model and this model is simpler. We choose this model.
m1[,"district"] <- factor(m1$district)
m=m1
attach(m)
## The following object is masked _by_ .GlobalEnv:
##
## y
## The following objects are masked from m (pos = 3):
##
## age, car, district, n, y
model <- glm(y ~ car + age + district + offset(log(m[, "n"])), family = 'poisson', data = m1)
summary(model)
##
## Call:
## glm(formula = y ~ car + age + district + offset(log(m[, "n"])),
## family = "poisson", data = m1)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.7248 -0.5681 -0.1679 0.3384 1.9126
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -1.85253 0.07990 -23.185 < 2e-16 ***
## car 0.19777 0.02080 9.507 < 2e-16 ***
## age -0.17674 0.01849 -9.559 < 2e-16 ***
## district1 0.21865 0.05853 3.736 0.000187 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for poisson family taken to be 1)
##
## Null deviance: 207.833 on 31 degrees of freedom
## Residual deviance: 24.685 on 28 degrees of freedom
## AIC: 201.05
##
## Number of Fisher Scoring iterations: 4
pchisq(model$deviance, df = model$df.residual, lower.tail = FALSE)
## [1] 0.6449029
p-value = 0.6449029 so the model fits well