R Markdown

This is an R Markdown document for Chapter 3.

##### 3.2.4 #####

Heart <- read.table("http://www.stat.ufl.edu/~aa/cat/data/Heart.dat",
                    header = TRUE)

head(Heart)
##              snoring yes   no
## 1              never  24 1355
## 2         occasional  35  603
## 3 nearly_every_night  21  192
## 4        every_night  30  224
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
Heart$x <- recode(Heart$snoring, never = 0, occasional = 2,
                  nearly_every_night = 4, every_night = 5)
n <- Heart$yes + Heart$no
fit <- glm(yes/n ~ x, family = binomial(link = logit), weight = n, data = Heart)

summary(fit)
## 
## Call:
## glm(formula = yes/n ~ x, family = binomial(link = logit), data = Heart, 
##     weights = n)
## 
## Deviance Residuals: 
##       1        2        3        4  
## -0.8346   1.2521   0.2758  -0.6845  
## 
## Coefficients:
##             Estimate Std. Error z value Pr(>|z|)    
## (Intercept) -3.86625    0.16621 -23.261  < 2e-16 ***
## x            0.39734    0.05001   7.945 1.94e-15 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 65.9045  on 3  degrees of freedom
## Residual deviance:  2.8089  on 2  degrees of freedom
## AIC: 27.061
## 
## Number of Fisher Scoring iterations: 4
fitted(fit)
##          1          2          3          4 
## 0.02050742 0.04429511 0.09305411 0.13243885
fit2 <- glm(yes/n ~ x, family = quasi(link = identity, variance = "mu(1-mu)"),
            weights = n, data = Heart)

summary(fit2, dispersion = 1)
## 
## Call:
## glm(formula = yes/n ~ x, family = quasi(link = identity, variance = "mu(1-mu)"), 
##     data = Heart, weights = n)
## 
## Deviance Residuals: 
##        1         2         3         4  
##  0.04478  -0.21322   0.11010   0.09798  
## 
## Coefficients:
##             Estimate Std. Error z value Pr(>|z|)    
## (Intercept) 0.017247   0.003451   4.998 5.80e-07 ***
## x           0.019778   0.002805   7.051 1.77e-12 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for quasi family taken to be 1)
## 
##     Null deviance: 65.904481  on 3  degrees of freedom
## Residual deviance:  0.069191  on 2  degrees of freedom
## AIC: NA
## 
## Number of Fisher Scoring iterations: 3
##### 3.2.5 #####
Heart2 <- read.table("http://www.stat.ufl.edu/~aa/cat/data/Heart2.dat",
                     header = TRUE)
head(Heart2)
##   subject snoring x y
## 1       1   never 0 1
## 2       2   never 0 1
## 3       3   never 0 1
## 4       4   never 0 1
## 5       5   never 0 1
## 6       6   never 0 1
##### 3.3.3 #####
Crabs <- read.table("http://www.stat.ufl.edu/~aa/cat/data/Crabs.dat",
                    header = TRUE)
head(Crabs)
##   crab sat y weight width color spine
## 1    1   8 1   3.05  28.3     2     3
## 2    2   0 0   1.55  22.5     3     3
## 3    3   9 1   2.30  26.0     1     1
## 4    4   0 0   2.10  24.8     3     3
## 5    5   4 1   2.60  26.0     3     3
## 6    6   0 0   2.10  23.8     2     3
plot(sat ~ width, xlab = "Width", ylab = "Number of satellites", data = Crabs)

fit <- glm(sat ~ width, family = poisson(link = log), data = Crabs)

summary(fit)
## 
## Call:
## glm(formula = sat ~ width, family = poisson(link = log), data = Crabs)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -2.8526  -1.9884  -0.4933   1.0970   4.9221  
## 
## Coefficients:
##             Estimate Std. Error z value Pr(>|z|)    
## (Intercept) -3.30476    0.54224  -6.095  1.1e-09 ***
## width        0.16405    0.01997   8.216  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for poisson family taken to be 1)
## 
##     Null deviance: 632.79  on 172  degrees of freedom
## Residual deviance: 567.88  on 171  degrees of freedom
## AIC: 927.18
## 
## Number of Fisher Scoring iterations: 6
# install.packages("gam")
library(gam)
## Warning: package 'gam' was built under R version 4.1.2
## Loading required package: splines
## Loading required package: foreach
## Warning: package 'foreach' was built under R version 4.1.2
## Loaded gam 1.22-2
gam.fit <- gam(sat ~ s(width), family = poisson, data = Crabs)

curve(predict(gam.fit, data.frame(width = x), type = "resp"), add = TRUE)

##### 3.4.2 #####
Evo <- read.table("http://www.stat.ufl.edu/~aa/cat/data/Evolution.dat",
                  header = TRUE)
Evo
##   ideology true false
## 1        1   11    37
## 2        2   46   104
## 3        3   70    72
## 4        4  241   214
## 5        5   78    36
## 6        6   89    24
## 7        7   36     6
n <- Evo$true + Evo$false
fit <- glm(true/n ~ ideology, family = binomial, weights = n, data = Evo)

summary(fit)
## 
## Call:
## glm(formula = true/n ~ ideology, family = binomial, data = Evo, 
##     weights = n)
## 
## Deviance Residuals: 
##       1        2        3        4        5        6        7  
##  0.1430  -0.2697   1.4614  -1.0791   0.2922   0.4471   0.2035  
## 
## Coefficients:
##             Estimate Std. Error z value Pr(>|z|)    
## (Intercept) -1.75658    0.20500  -8.569   <2e-16 ***
## ideology     0.49422    0.05092   9.706   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 113.20  on 6  degrees of freedom
## Residual deviance:   3.72  on 5  degrees of freedom
## AIC: 42.332
## 
## Number of Fisher Scoring iterations: 3
confint(fit)
## Waiting for profiling to be done...
##                 2.5 %     97.5 %
## (Intercept) -2.165294 -1.3609733
## ideology     0.396166  0.5959414
library(car)
## Loading required package: carData
## 
## Attaching package: 'car'
## The following object is masked from 'package:dplyr':
## 
##     recode
anova(fit)
## Analysis of Deviance Table
## 
## Model: binomial, link: logit
## 
## Response: true/n
## 
## Terms added sequentially (first to last)
## 
## 
##          Df Deviance Resid. Df Resid. Dev
## NULL                         6     113.20
## ideology  1   109.48         5       3.72
library(statmod)
## Warning: package 'statmod' was built under R version 4.1.2
fit0 <- glm(true/n ~ 1, family = binomial, weights = n, data = Evo)

glm.scoretest(fit0, Evo$ideology)^2
## [1] 104.101
##### 3.4.5 #####
attach(Evo)

cbind(ideology, true, false, n, sample = true/n, fitted = fitted(fit), std.res. = rstandard(fit, type = "pearson"))
##   ideology true false   n    sample    fitted   std.res.
## 1        1   11    37  48 0.2291667 0.2205679  0.1611162
## 2        2   46   104 150 0.3066667 0.3168813 -0.3515386
## 3        3   70    72 142 0.4929577 0.4319445  1.6480176
## 4        4  241   214 455 0.5296703 0.5548525 -1.4995488
## 5        5   78    36 114 0.6842105 0.6713982  0.3248519
## 6        6   89    24 113 0.7876106 0.7700750  0.5413625
## 7        7   36     6  42 0.8571429 0.8459201  0.2206605