glm for generalized linear model

## fit the model, and assign
resLm <- lm(len ~ supp + dose, data = ToothGrowth)
resLm
## 
## Call:
## lm(formula = len ~ supp + dose, data = ToothGrowth)
## 
## Coefficients:
## (Intercept)       suppVC         dose  
##        9.27        -3.70         9.76
summary(resLm)
## 
## Call:
## lm(formula = len ~ supp + dose, data = ToothGrowth)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -6.600 -3.700  0.373  2.116  8.800 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)    9.272      1.282    7.23  1.3e-09 ***
## suppVC        -3.700      1.094   -3.38   0.0013 ** 
## dose           9.764      0.877   11.14  6.3e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 4.24 on 57 degrees of freedom
## Multiple R-squared:  0.704,  Adjusted R-squared:  0.693 
## F-statistic: 67.7 on 2 and 57 DF,  p-value: 8.72e-16

## Linear regression
resGlm0 <- glm(len ~ supp + dose, data = ToothGrowth, family = gaussian)
summary(resGlm0)
## 
## Call:
## glm(formula = len ~ supp + dose, family = gaussian, data = ToothGrowth)
## 
## Deviance Residuals: 
##    Min      1Q  Median      3Q     Max  
## -6.600  -3.700   0.373   2.116   8.800  
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)    9.272      1.282    7.23  1.3e-09 ***
## suppVC        -3.700      1.094   -3.38   0.0013 ** 
## dose           9.764      0.877   11.14  6.3e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for gaussian family taken to be 17.94)
## 
##     Null deviance: 3452.2  on 59  degrees of freedom
## Residual deviance: 1022.6  on 57  degrees of freedom
## AIC: 348.4
## 
## Number of Fisher Scoring iterations: 2

## logistic regression
resGlm1 <- glm(supp ~ len + dose, data = ToothGrowth, family = binomial)
summary(resGlm1)
## 
## Call:
## glm(formula = supp ~ len + dose, family = binomial, data = ToothGrowth)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -1.5984  -0.9625   0.0477   1.0449   1.8425  
## 
## Coefficients:
##             Estimate Std. Error z value Pr(>|z|)   
## (Intercept)   1.5377     0.7860    1.96   0.0504 . 
## len          -0.2127     0.0728   -2.92   0.0035 **
## dose          2.0886     0.8497    2.46   0.0140 * 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 83.178  on 59  degrees of freedom
## Residual deviance: 72.330  on 57  degrees of freedom
## AIC: 78.33
## 
## Number of Fisher Scoring iterations: 4

## odds ratios using tableone
tableone::ShowRegTable(resGlm1)
## Waiting for profiling to be done...
##             exp(beta) [confint] p     
## (Intercept) 4.65 [1.07, 24.34]   0.050
## len         0.81 [0.69, 0.92]    0.003
## dose        8.07 [1.71, 50.12]   0.014

## odds ratios using epicalc
library(epicalc)
## Loading required package: foreign
## Loading required package: survival
## Loading required package: splines
## Loading required package: MASS
## Loading required package: nnet
## 
## Attaching package: 'epicalc'
## 
## The following object is masked from 'package:R.utils':
## 
##     use
logistic.display(resGlm1)
## 
## Logistic regression predicting supp : VC vs OJ 
##  
##                   crude OR(95%CI) adj. OR(95%CI)     P(Wald's test)
## len (cont. var.)  0.94 (0.87,1)   0.81 (0.7,0.93)    0.003         
##                                                                    
## dose (cont. var.) 1 (0.44,2.25)   8.07 (1.53,42.69)  0.014         
##                                                                    
##                   P(LR-test)
## len (cont. var.)  < 0.001   
##                             
## dose (cont. var.) 0.007     
##                             
## Log-likelihood = -36.1649
## No. of observations = 60
## AIC value = 78.3297

## Create a dataset manually
nonmel <- read.table(header = TRUE, text = "\n   cases city u1 u2 u3 u4 u5 u6 u7      n\n1      1    0  1  0  0  0  0  0  0 172675\n2     16    0  0  1  0  0  0  0  0 123065\n3     30    0  0  0  1  0  0  0  0  96216\n4     71    0  0  0  0  1  0  0  0  92051\n5    102    0  0  0  0  0  1  0  0  72159\n6    130    0  0  0  0  0  0  1  0  54722\n7    133    0  0  0  0  0  0  0  1  32185\n8     40    0  0  0  0  0  0  0  0   8328\n9      4    1  1  0  0  0  0  0  0 181343\n10    38    1  0  1  0  0  0  0  0 146207\n11   119    1  0  0  1  0  0  0  0 121374\n12   221    1  0  0  0  1  0  0  0 111353\n13   259    1  0  0  0  0  1  0  0  83004\n14   310    1  0  0  0  0  0  1  0  55932\n15   226    1  0  0  0  0  0  0  1  29007\n16    65    1  0  0  0  0  0  0  0   7583\n")

## Create age.range variable and city variable
nonmel <- within(nonmel, {
    age.range <- rep(c("15_24", "25_34", "35_44", "45_54", "55_64", "65_74", 
        "75_84", "85+"), 2)
    age.range <- factor(age.range)
    age.range <- relevel(age.range, ref = "85+")

    city <- factor(city, 0:1, c("Minneapolis", "Dallas"))
})

## rop unnecessary columns
nonmel <- nonmel[c("cases", "n", "city", "age.range")]

## Check data
nonmel
##    cases      n        city age.range
## 1      1 172675 Minneapolis     15_24
## 2     16 123065 Minneapolis     25_34
## 3     30  96216 Minneapolis     35_44
## 4     71  92051 Minneapolis     45_54
## 5    102  72159 Minneapolis     55_64
## 6    130  54722 Minneapolis     65_74
## 7    133  32185 Minneapolis     75_84
## 8     40   8328 Minneapolis       85+
## 9      4 181343      Dallas     15_24
## 10    38 146207      Dallas     25_34
## 11   119 121374      Dallas     35_44
## 12   221 111353      Dallas     45_54
## 13   259  83004      Dallas     55_64
## 14   310  55932      Dallas     65_74
## 15   226  29007      Dallas     75_84
## 16    65   7583      Dallas       85+

## Poisson regression
resGlm2 <- glm(cases ~ city + age.range + offset(log(n)), family = poisson, 
    data = nonmel)
summary(resGlm2)
## 
## Call:
## glm(formula = cases ~ city + age.range + offset(log(n)), family = poisson, 
##     data = nonmel)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -1.5060  -0.4857   0.0164   0.3693   1.2476  
## 
## Coefficients:
##                Estimate Std. Error z value  Pr(>|z|)    
## (Intercept)     -5.4834     0.1037  -52.89   < 2e-16 ***
## cityDallas       0.8039     0.0522   15.40   < 2e-16 ***
## age.range15_24  -6.1742     0.4577  -13.49   < 2e-16 ***
## age.range25_34  -3.5440     0.1675  -21.16   < 2e-16 ***
## age.range35_44  -2.3268     0.1275  -18.25   < 2e-16 ***
## age.range45_54  -1.5790     0.1138  -13.87   < 2e-16 ***
## age.range55_64  -1.0869     0.1109   -9.80   < 2e-16 ***
## age.range65_74  -0.5288     0.1086   -4.87 0.0000011 ***
## age.range75_84  -0.1157     0.1109   -1.04       0.3    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for poisson family taken to be 1)
## 
##     Null deviance: 2789.6810  on 15  degrees of freedom
## Residual deviance:    8.2585  on  7  degrees of freedom
## AIC: 120.5
## 
## Number of Fisher Scoring iterations: 4
tableone::ShowRegTable(resGlm2)
## Waiting for profiling to be done...
##                exp(beta) [confint] p     
## (Intercept)    0.00 [0.00, 0.01]   <0.001
## cityDallas     2.23 [2.02, 2.48]   <0.001
## age.range15_24 0.00 [0.00, 0.00]   <0.001
## age.range25_34 0.03 [0.02, 0.04]   <0.001
## age.range35_44 0.10 [0.08, 0.13]   <0.001
## age.range45_54 0.21 [0.17, 0.26]   <0.001
## age.range55_64 0.34 [0.27, 0.42]   <0.001
## age.range65_74 0.59 [0.48, 0.73]   <0.001
## age.range75_84 0.89 [0.72, 1.11]    0.297


## Survival analysis
library(survival)
data(pbc)
head(pbc)
##   id time status trt   age sex ascites hepato spiders edema bili chol
## 1  1  400      2   1 58.77   f       1      1       1   1.0 14.5  261
## 2  2 4500      0   1 56.45   f       0      1       1   0.0  1.1  302
## 3  3 1012      2   1 70.07   m       0      0       0   0.5  1.4  176
## 4  4 1925      2   1 54.74   f       0      1       1   0.5  1.8  244
## 5  5 1504      1   2 38.11   f       0      1       1   0.0  3.4  279
## 6  6 2503      2   2 66.26   f       0      1       0   0.0  0.8  248
##   albumin copper alk.phos    ast trig platelet protime stage
## 1    2.60    156     1718 137.95  172      190    12.2     4
## 2    4.14     54     7395 113.52   88      221    10.6     3
## 3    3.48    210      516  96.10   55      151    12.0     4
## 4    2.54     64     6122  60.63   92      183    10.3     4
## 5    3.53    143      671 113.15   72      136    10.9     3
## 6    3.98     50      944  93.00   63       NA    11.0     3

## coxph
resCoxph <- coxph(Surv(time, status == 2) ~ age + sex, data = pbc)
summary(resCoxph)
## Call:
## coxph(formula = Surv(time, status == 2) ~ age + sex, data = pbc)
## 
##   n= 418, number of events= 161 
## 
##          coef exp(coef) se(coef)     z  Pr(>|z|)    
## age   0.03827   1.03902  0.00786  4.87 0.0000011 ***
## sexf -0.27174   0.76205  0.22316 -1.22      0.22    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##      exp(coef) exp(-coef) lower .95 upper .95
## age      1.039      0.962     1.023      1.06
## sexf     0.762      1.312     0.492      1.18
## 
## Concordance= 0.614  (se = 0.025 )
## Rsquare= 0.062   (max possible= 0.985 )
## Likelihood ratio test= 26.6  on 2 df,   p=0.00000169
## Wald test            = 26.7  on 2 df,   p=0.0000016
## Score (logrank) test = 27  on 2 df,   p=0.00000139