glm for generalized linear model
## fit the model, and assign
resLm <- lm(len ~ supp + dose, data = ToothGrowth)
resLm
##
## Call:
## lm(formula = len ~ supp + dose, data = ToothGrowth)
##
## Coefficients:
## (Intercept) suppVC dose
## 9.27 -3.70 9.76
summary(resLm)
##
## Call:
## lm(formula = len ~ supp + dose, data = ToothGrowth)
##
## Residuals:
## Min 1Q Median 3Q Max
## -6.600 -3.700 0.373 2.116 8.800
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 9.272 1.282 7.23 1.3e-09 ***
## suppVC -3.700 1.094 -3.38 0.0013 **
## dose 9.764 0.877 11.14 6.3e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 4.24 on 57 degrees of freedom
## Multiple R-squared: 0.704, Adjusted R-squared: 0.693
## F-statistic: 67.7 on 2 and 57 DF, p-value: 8.72e-16
## Linear regression
resGlm0 <- glm(len ~ supp + dose, data = ToothGrowth, family = gaussian)
summary(resGlm0)
##
## Call:
## glm(formula = len ~ supp + dose, family = gaussian, data = ToothGrowth)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -6.600 -3.700 0.373 2.116 8.800
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 9.272 1.282 7.23 1.3e-09 ***
## suppVC -3.700 1.094 -3.38 0.0013 **
## dose 9.764 0.877 11.14 6.3e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for gaussian family taken to be 17.94)
##
## Null deviance: 3452.2 on 59 degrees of freedom
## Residual deviance: 1022.6 on 57 degrees of freedom
## AIC: 348.4
##
## Number of Fisher Scoring iterations: 2
## logistic regression
resGlm1 <- glm(supp ~ len + dose, data = ToothGrowth, family = binomial)
summary(resGlm1)
##
## Call:
## glm(formula = supp ~ len + dose, family = binomial, data = ToothGrowth)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.5984 -0.9625 0.0477 1.0449 1.8425
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 1.5377 0.7860 1.96 0.0504 .
## len -0.2127 0.0728 -2.92 0.0035 **
## dose 2.0886 0.8497 2.46 0.0140 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 83.178 on 59 degrees of freedom
## Residual deviance: 72.330 on 57 degrees of freedom
## AIC: 78.33
##
## Number of Fisher Scoring iterations: 4
## odds ratios using tableone
tableone::ShowRegTable(resGlm1)
## Waiting for profiling to be done...
## exp(beta) [confint] p
## (Intercept) 4.65 [1.07, 24.34] 0.050
## len 0.81 [0.69, 0.92] 0.003
## dose 8.07 [1.71, 50.12] 0.014
## odds ratios using epicalc
library(epicalc)
## Loading required package: foreign
## Loading required package: survival
## Loading required package: splines
## Loading required package: MASS
## Loading required package: nnet
##
## Attaching package: 'epicalc'
##
## The following object is masked from 'package:R.utils':
##
## use
logistic.display(resGlm1)
##
## Logistic regression predicting supp : VC vs OJ
##
## crude OR(95%CI) adj. OR(95%CI) P(Wald's test)
## len (cont. var.) 0.94 (0.87,1) 0.81 (0.7,0.93) 0.003
##
## dose (cont. var.) 1 (0.44,2.25) 8.07 (1.53,42.69) 0.014
##
## P(LR-test)
## len (cont. var.) < 0.001
##
## dose (cont. var.) 0.007
##
## Log-likelihood = -36.1649
## No. of observations = 60
## AIC value = 78.3297
## Create a dataset manually
nonmel <- read.table(header = TRUE, text = "\n cases city u1 u2 u3 u4 u5 u6 u7 n\n1 1 0 1 0 0 0 0 0 0 172675\n2 16 0 0 1 0 0 0 0 0 123065\n3 30 0 0 0 1 0 0 0 0 96216\n4 71 0 0 0 0 1 0 0 0 92051\n5 102 0 0 0 0 0 1 0 0 72159\n6 130 0 0 0 0 0 0 1 0 54722\n7 133 0 0 0 0 0 0 0 1 32185\n8 40 0 0 0 0 0 0 0 0 8328\n9 4 1 1 0 0 0 0 0 0 181343\n10 38 1 0 1 0 0 0 0 0 146207\n11 119 1 0 0 1 0 0 0 0 121374\n12 221 1 0 0 0 1 0 0 0 111353\n13 259 1 0 0 0 0 1 0 0 83004\n14 310 1 0 0 0 0 0 1 0 55932\n15 226 1 0 0 0 0 0 0 1 29007\n16 65 1 0 0 0 0 0 0 0 7583\n")
## Create age.range variable and city variable
nonmel <- within(nonmel, {
age.range <- rep(c("15_24", "25_34", "35_44", "45_54", "55_64", "65_74",
"75_84", "85+"), 2)
age.range <- factor(age.range)
age.range <- relevel(age.range, ref = "85+")
city <- factor(city, 0:1, c("Minneapolis", "Dallas"))
})
## rop unnecessary columns
nonmel <- nonmel[c("cases", "n", "city", "age.range")]
## Check data
nonmel
## cases n city age.range
## 1 1 172675 Minneapolis 15_24
## 2 16 123065 Minneapolis 25_34
## 3 30 96216 Minneapolis 35_44
## 4 71 92051 Minneapolis 45_54
## 5 102 72159 Minneapolis 55_64
## 6 130 54722 Minneapolis 65_74
## 7 133 32185 Minneapolis 75_84
## 8 40 8328 Minneapolis 85+
## 9 4 181343 Dallas 15_24
## 10 38 146207 Dallas 25_34
## 11 119 121374 Dallas 35_44
## 12 221 111353 Dallas 45_54
## 13 259 83004 Dallas 55_64
## 14 310 55932 Dallas 65_74
## 15 226 29007 Dallas 75_84
## 16 65 7583 Dallas 85+
## Poisson regression
resGlm2 <- glm(cases ~ city + age.range + offset(log(n)), family = poisson,
data = nonmel)
summary(resGlm2)
##
## Call:
## glm(formula = cases ~ city + age.range + offset(log(n)), family = poisson,
## data = nonmel)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.5060 -0.4857 0.0164 0.3693 1.2476
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -5.4834 0.1037 -52.89 < 2e-16 ***
## cityDallas 0.8039 0.0522 15.40 < 2e-16 ***
## age.range15_24 -6.1742 0.4577 -13.49 < 2e-16 ***
## age.range25_34 -3.5440 0.1675 -21.16 < 2e-16 ***
## age.range35_44 -2.3268 0.1275 -18.25 < 2e-16 ***
## age.range45_54 -1.5790 0.1138 -13.87 < 2e-16 ***
## age.range55_64 -1.0869 0.1109 -9.80 < 2e-16 ***
## age.range65_74 -0.5288 0.1086 -4.87 0.0000011 ***
## age.range75_84 -0.1157 0.1109 -1.04 0.3
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for poisson family taken to be 1)
##
## Null deviance: 2789.6810 on 15 degrees of freedom
## Residual deviance: 8.2585 on 7 degrees of freedom
## AIC: 120.5
##
## Number of Fisher Scoring iterations: 4
tableone::ShowRegTable(resGlm2)
## Waiting for profiling to be done...
## exp(beta) [confint] p
## (Intercept) 0.00 [0.00, 0.01] <0.001
## cityDallas 2.23 [2.02, 2.48] <0.001
## age.range15_24 0.00 [0.00, 0.00] <0.001
## age.range25_34 0.03 [0.02, 0.04] <0.001
## age.range35_44 0.10 [0.08, 0.13] <0.001
## age.range45_54 0.21 [0.17, 0.26] <0.001
## age.range55_64 0.34 [0.27, 0.42] <0.001
## age.range65_74 0.59 [0.48, 0.73] <0.001
## age.range75_84 0.89 [0.72, 1.11] 0.297
## Survival analysis
library(survival)
data(pbc)
head(pbc)
## id time status trt age sex ascites hepato spiders edema bili chol
## 1 1 400 2 1 58.77 f 1 1 1 1.0 14.5 261
## 2 2 4500 0 1 56.45 f 0 1 1 0.0 1.1 302
## 3 3 1012 2 1 70.07 m 0 0 0 0.5 1.4 176
## 4 4 1925 2 1 54.74 f 0 1 1 0.5 1.8 244
## 5 5 1504 1 2 38.11 f 0 1 1 0.0 3.4 279
## 6 6 2503 2 2 66.26 f 0 1 0 0.0 0.8 248
## albumin copper alk.phos ast trig platelet protime stage
## 1 2.60 156 1718 137.95 172 190 12.2 4
## 2 4.14 54 7395 113.52 88 221 10.6 3
## 3 3.48 210 516 96.10 55 151 12.0 4
## 4 2.54 64 6122 60.63 92 183 10.3 4
## 5 3.53 143 671 113.15 72 136 10.9 3
## 6 3.98 50 944 93.00 63 NA 11.0 3
## coxph
resCoxph <- coxph(Surv(time, status == 2) ~ age + sex, data = pbc)
summary(resCoxph)
## Call:
## coxph(formula = Surv(time, status == 2) ~ age + sex, data = pbc)
##
## n= 418, number of events= 161
##
## coef exp(coef) se(coef) z Pr(>|z|)
## age 0.03827 1.03902 0.00786 4.87 0.0000011 ***
## sexf -0.27174 0.76205 0.22316 -1.22 0.22
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## exp(coef) exp(-coef) lower .95 upper .95
## age 1.039 0.962 1.023 1.06
## sexf 0.762 1.312 0.492 1.18
##
## Concordance= 0.614 (se = 0.025 )
## Rsquare= 0.062 (max possible= 0.985 )
## Likelihood ratio test= 26.6 on 2 df, p=0.00000169
## Wald test = 26.7 on 2 df, p=0.0000016
## Score (logrank) test = 27 on 2 df, p=0.00000139