library(UsingR)
## Loading required package: MASS
## Loading required package: HistData
## Loading required package: Hmisc
##
## Attaching package: 'Hmisc'
## The following objects are masked from 'package:base':
##
## format.pval, units
data("father.son")
fit<-lm(sheight~fheight, father.son)
summary(fit)
##
## Call:
## lm(formula = sheight ~ fheight, data = father.son)
##
## Residuals:
## Min 1Q Median 3Q Max
## -8.8772 -1.5144 -0.0079 1.6285 8.9685
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 33.88660 1.83235 18.49 <2e-16 ***
## fheight 0.51409 0.02705 19.01 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.437 on 1076 degrees of freedom
## Multiple R-squared: 0.2513, Adjusted R-squared: 0.2506
## F-statistic: 361.2 on 1 and 1076 DF, p-value: < 2.2e-16
confint(fit)
## 2.5 % 97.5 %
## (Intercept) 30.2912126 37.4819961
## fheight 0.4610188 0.5671673
cfheight<-(father.son$fheight - mean(father.son$fheight))
fit2<-lm(sheight~cfheight, father.son)
confint(fit2)
## 2.5 % 97.5 %
## (Intercept) 68.5384554 68.8296839
## cfheight 0.4610188 0.5671673
predict(fit, newdata = data.frame(fheight = mean(father.son$fheight)), interval = 'confidence')
## fit lwr upr
## 1 68.68407 68.53846 68.82968
predict(fit, newdata = data.frame(fheight = mean(father.son$fheight)), interval = 'prediction')
## fit lwr upr
## 1 68.68407 63.90091 73.46723
fit<-lm(mpg~hp, mtcars)
summary(fit)$coef
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 30.09886054 1.6339210 18.421246 6.642736e-18
## hp -0.06822828 0.0101193 -6.742389 1.787835e-07
.
confint(fit)
## 2.5 % 97.5 %
## (Intercept) 26.76194879 33.4357723
## hp -0.08889465 -0.0475619
chp<-(mtcars$hp - mean(mtcars$hp))
fit2<-lm(mpg~chp, mtcars)
confint(fit2)
## 2.5 % 97.5 %
## (Intercept) 18.69599452 21.4852555
## chp -0.08889465 -0.0475619
predict(fit, newdata = data.frame(hp=mean(mtcars$hp)), interval = 'confidence')
## fit lwr upr
## 1 20.09062 18.69599 21.48526
predict(fit, newdata = data.frame(hp=mean(mtcars$hp)), interval = 'prediction')
## fit lwr upr
## 1 20.09062 12.07908 28.10217
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:Hmisc':
##
## src, summarize
## The following object is masked from 'package:MASS':
##
## select
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(ggplot2)
y <- mtcars$mpg; x <- mtcars$hp; n <- length(y)
fit <- lm(y~x)
p1 <- data.frame(predict(fit, newdata = data.frame(x=x), interval = 'confidence'))
p2 <- data.frame(predict(fit, newdata = data.frame(x=x), interval = 'prediction'))
p1 <- p1 %>%
mutate(interval = 'confidence') %>%
mutate(x = mtcars$hp)
p2 <- p2 %>%
mutate(interval = 'prediction') %>%
mutate(x = mtcars$hp)
dat <- rbind(p1,p2)
names(dat)[1] <- "y"
dat %>%
ggplot(aes(x = x, y = y)) +
geom_ribbon(aes(ymin= lwr, ymax= upr, fill= interval), alpha= 0.4) +
geom_line(colour="red", lwd=.5) +
geom_point(data= data.frame(x=x,y=y), aes(x=x,y=y))
# PAGE 58 ### 1. Load the dataset Seatbelts as part of the datasets
package via data(Seatbelts). Use as.data.frame to convert the object to
a dataframe. Fit a linear model of driver deaths with kms and
PetrolPrice as predictors. Interpret your results. ## ANSWER
library(datasets)
data("Seatbelts")
stblts <- as.data.frame(Seatbelts)
fit<- lm(DriversKilled ~ kms + PetrolPrice, stblts)
summary(fit)$coef
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2.157461e+02 1.466559e+01 14.711047 3.772201e-33
## kms -1.749546e-03 6.145401e-04 -2.846919 4.902428e-03
## PetrolPrice -6.437895e+02 1.482896e+02 -4.341435 2.304713e-05
round(summary(fit)$coef, 4)
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 215.7461 14.6656 14.7110 0.0000
## kms -0.0017 0.0006 -2.8469 0.0049
## PetrolPrice -643.7895 148.2896 -4.3414 0.0000
library(dplyr)
stblts <- stblts %>%
mutate(ppn = (PetrolPrice - mean(PetrolPrice))/sd(PetrolPrice),
mkm = kms / 1000) %>%
mutate(mkm = mkm - mean(mkm))
summary(lm(DriversKilled ~ mkm + ppn, stblts))$coef
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 122.802083 1.6628507 73.850336 2.395106e-141
## mkm -1.749546 0.6145401 -2.846919 4.902428e-03
## ppn -7.838674 1.8055491 -4.341435 2.304713e-05
predict(fit, newdata = data.frame(kms= mean(stblts$kms),
PetrolPrice= mean(stblts$PetrolPrice)))
## 1
## 122.8021
dk<-stblts$DriversKilled
kms<-stblts$kms
pp<-stblts$PetrolPrice
fitf<-lm(dk~kms+pp)
edk<-resid(lm(dk~kms))
epp<-resid(lm(pp~kms))
summary(lm(edk~epp-1))
##
## Call:
## lm(formula = edk ~ epp - 1)
##
## Residuals:
## Min 1Q Median 3Q Max
## -51.06 -17.77 -4.15 15.67 59.33
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## epp -643.8 147.5 -4.364 2.09e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 22.92 on 191 degrees of freedom
## Multiple R-squared: 0.09068, Adjusted R-squared: 0.08592
## F-statistic: 19.05 on 1 and 191 DF, p-value: 2.086e-05
summary(lm(edk~epp-1))$coef
## Estimate Std. Error t value Pr(>|t|)
## epp -643.7895 147.5111 -4.364345 2.085664e-05
summary(fitf)$coef
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2.157461e+02 1.466559e+01 14.711047 3.772201e-33
## kms -1.749546e-03 6.145401e-04 -2.846919 4.902428e-03
## pp -6.437895e+02 1.482896e+02 -4.341435 2.304713e-05
dk2 <- resid(lm(dk ~ pp))
kms <- resid(lm(kms ~ pp))
round(summary(lm(dk2 ~ kms -1))$coef, 4)
## Estimate Std. Error t value Pr(>|t|)
## kms -0.0017 6e-04 -2.8619 0.0047
round(summary(lm(DriversKilled ~ mkm + ppn, stblts))$coef,4)
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 122.8021 1.6629 73.8503 0.0000
## mkm -1.7495 0.6145 -2.8469 0.0049
## ppn -7.8387 1.8055 -4.3414 0.0000
library(dplyr)
stblts <- stblts %>%
mutate(ppn = (PetrolPrice - mean(PetrolPrice))/sd(PetrolPrice),
mkm = kms / 1000) %>%
mutate(mkm = mkm - mean(mkm))
fit<-lm(I(log(DriversKilled)) ~ mkm + ppn, stblts)
summary(fit)$coef
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 4.78966306 0.013426810 356.723817 2.737888e-269
## mkm -0.01400794 0.004962149 -2.822959 5.267843e-03
## ppn -0.06412578 0.014579039 -4.398492 1.818005e-05
1-exp(-0.06412578)
## [1] 0.06211298
1-exp(-0.01400794)
## [1] 0.01391029
library(dplyr)
stblts <- stblts %>%
mutate(ppn = (PetrolPrice - mean(PetrolPrice))/sd(PetrolPrice),
mkm = kms / 1000) %>%
mutate(mkm = mkm - mean(mkm))
fit<-lm(DriversKilled ~ mkm + ppn +law, stblts)
summary(fit)
##
## Call:
## lm(formula = DriversKilled ~ mkm + ppn + law, data = stblts)
##
## Residuals:
## Min 1Q Median 3Q Max
## -50.69 -17.29 -4.05 14.33 60.71
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 124.2263 1.8012 68.967 < 2e-16 ***
## mkm -1.2233 0.6657 -1.838 0.067676 .
## ppn -6.9199 1.8514 -3.738 0.000246 ***
## law -11.8892 6.0258 -1.973 0.049955 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 22.87 on 188 degrees of freedom
## Multiple R-squared: 0.201, Adjusted R-squared: 0.1882
## F-statistic: 15.76 on 3 and 188 DF, p-value: 3.478e-09
fit<-lm(DriversKilled ~ mkm + ppn +I(factor(law)), stblts)
summary(fit)$coef
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 124.226311 1.8012324 68.967399 1.976641e-135
## mkm -1.223318 0.6656567 -1.837761 6.767594e-02
## ppn -6.919949 1.8513987 -3.737687 2.463128e-04
## I(factor(law))1 -11.889202 6.0257850 -1.973055 4.995497e-02
library(dplyr)
stblts <- stblts %>%
mutate(ppn = (PetrolPrice - mean(PetrolPrice))/sd(PetrolPrice),
ppf=as.factor((ppn<=-1.5)+(ppn<=0)+(ppn<=1.5)+(ppn< Inf)),
mkm = kms / 1000) %>%
mutate(mkm = mkm - mean(mkm))
table(stblts$ppf)
##
## 1 2 3 4
## 6 96 71 19
fit<-lm(DriversKilled ~ mkm + ppf +law, stblts)
summary(fit)
##
## Call:
## lm(formula = DriversKilled ~ mkm + ppf + law, data = stblts)
##
## Residuals:
## Min 1Q Median 3Q Max
## -53.384 -17.211 -3.421 14.849 65.613
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 109.8405 9.5066 11.554 <2e-16 ***
## mkm -1.2991 0.7668 -1.694 0.0919 .
## ppf2 10.8271 9.9462 1.089 0.2778
## ppf3 18.6904 9.9374 1.881 0.0616 .
## ppf4 25.0074 10.9163 2.291 0.0231 *
## law -15.3445 6.0345 -2.543 0.0118 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 23.24 on 186 degrees of freedom
## Multiple R-squared: 0.1833, Adjusted R-squared: 0.1614
## F-statistic: 8.35 on 5 and 186 DF, p-value: 3.835e-07
library(ggplot2)
library(datasets)
library(grid)
library(ggplotify)
swiss <- swiss %>%
mutate(CatholicBin = 1 * (Catholic > 50))
g <- ggplot(swiss, aes(x = Agriculture, y = Fertility, colour = factor(CatholicBin)))+
geom_point(size = 5) +
xlab("% in Agriculture") +
ylab("Fertility")
fit <- lm(Fertility ~ Agriculture + factor(CatholicBin), data = swiss) # Note the + operator
g +
geom_abline(intercept = coef(fit)[1], slope = coef(fit)[2], size=1, colour='green') +
geom_abline(intercept = coef(fit)[1] + coef(fit)[3], slope = coef(fit)[2], size=1, colour='red') +
annotation_custom(grob = grobTree(textGrob("Catholic",
x=0.05,
y=.62,
hjust=0,
gp=gpar(col="blue",
fontsize=10,
fontface="italic")))) +
annotation_custom(grob = grobTree(textGrob("Non catholic",
x=0.05,
y=.5,
hjust=0,
gp=gpar(col="black",
fontsize=10,
fontface="italic"))))
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
fitfull <- lm(Fertility ~ Agriculture * factor(CatholicBin), data = swiss)
g +
geom_abline(intercept = coef(fitfull)[1], slope = coef(fitfull)[2], size=1, colour='green') +
geom_abline(intercept = coef(fitfull)[1] + coef(fitfull)[3],
slope = coef(fitfull)[2] + coef(fitfull)[4], size=1, colour='red') +
annotation_custom(grob = grobTree(textGrob("Catholic",
x=0.05,
y=.58,
hjust=0,
gp=gpar(col="blue",
fontsize=10,
fontface="italic")))) +
annotation_custom(grob = grobTree(textGrob("Non catholic",
x=0.05,
y=.46,
hjust=0,
gp=gpar(col="black",
fontsize=10,
fontface="italic"))))
# PAGE 80 ### 1. Load the dataset Seatbelts as part of the datasets
package via data(Seatbelts). Use as.data.frame to convert the object to
a dataframe. Fit a linear model of driver deaths with kms and
PetrolPrice as predictors. Interpret your results. ## ANSWER:
library(datasets)
data("Seatbelts")
stblts <- as.data.frame(Seatbelts)
fit<- lm(DriversKilled ~ kms + PetrolPrice, stblts)
summary(fit)$coef
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2.157461e+02 1.466559e+01 14.711047 3.772201e-33
## kms -1.749546e-03 6.145401e-04 -2.846919 4.902428e-03
## PetrolPrice -6.437895e+02 1.482896e+02 -4.341435 2.304713e-05
library(dplyr)
stblts <- stblts %>%
mutate(ppn = (PetrolPrice - mean(PetrolPrice))/sd(PetrolPrice),
mkm = kms / 1000) %>%
mutate(mkm = mkm - mean(mkm))
summary(lm(DriversKilled ~ mkm + ppn, stblts))$coef
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 122.802083 1.6628507 73.850336 2.395106e-141
## mkm -1.749546 0.6145401 -2.846919 4.902428e-03
## ppn -7.838674 1.8055491 -4.341435 2.304713e-05
library(dplyr)
stblts <- stblts %>%
mutate(ppn = (PetrolPrice - mean(PetrolPrice))/sd(PetrolPrice),
mkm = kms / 1000) %>%
mutate(mkm = mkm - mean(mkm))
cor(stblts$ppn, stblts$mkm)
## [1] 0.3839004
fit0<-lm(DriversKilled ~ mkm, stblts)
fit1<-lm(DriversKilled ~ mkm + ppn, stblts)
summary(fit0)$coef
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 122.802083 1.7391997 70.60839 2.665611e-138
## mkm -2.773787 0.5935049 -4.67357 5.596266e-06
summary(fit1)$coef
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 122.802083 1.6628507 73.850336 2.395106e-141
## mkm -1.749546 0.6145401 -2.846919 4.902428e-03
## ppn -7.838674 1.8055491 -4.341435 2.304713e-05
fit2<-lm(DriversKilled ~ ppn, stblts)
fit3<-lm(DriversKilled ~ ppn+mkm, stblts)
summary(fit2)$coef
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 122.802083 1.693656 72.507096 2.061333e-140
## ppn -9.812019 1.698084 -5.778288 3.044208e-08
summary(fit3)$coef
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 122.802083 1.6628507 73.850336 2.395106e-141
## ppn -7.838674 1.8055491 -4.341435 2.304713e-05
## mkm -1.749546 0.6145401 -2.846919 4.902428e-03
library(datasets)
data("Seatbelts")
stblts <- as.data.frame(Seatbelts)
fit<- lm(DriversKilled ~ kms + PetrolPrice+law, stblts)
library(dplyr)
stblts <- stblts %>%
mutate(ppn = (PetrolPrice - mean(PetrolPrice))/sd(PetrolPrice),
mkm = kms / 1000) %>%
mutate(mkm = mkm - mean(mkm))
fit<-lm(DriversKilled ~ mkm + ppn +law, stblts)
sum(resid(fit)^2)/(nrow(stblts)-4)
## [1] 522.8903
summary(fit)$sigma^2
## [1] 522.8903
fit<-lm(DriversKilled ~ mkm + ppn +law, stblts)
plot(fit)
datdffits <- data.frame(dffits = dffits(fit)) %>%
mutate(id=row_number()) %>%
mutate(specialPoint= as.factor(abs(dffits) >= .35))
ggplot(datdffits, aes(y=dffits, x= id, color= specialPoint))+
geom_point(size=3) +
geom_text(data=subset(datdffits, specialPoint==TRUE),
aes(y=dffits,x=id,label=id), colour='blue') +
geom_hline(yintercept = c(-0.35,0.35), color= 'green') +
geom_hline(yintercept = 0)
head(dfbeta(fit))
## (Intercept) mkm ppn law
## 1 -0.1022614 0.11881682 -0.09349927 -0.26195987
## 2 -0.1331656 0.22275746 -0.15807657 -0.56786007
## 3 -0.1279422 0.11452337 -0.07920934 -0.23464337
## 4 -0.2032275 0.13006726 -0.06413772 -0.23584650
## 5 -0.0524778 0.02353784 -0.01104844 -0.02756976
## 6 -0.1188004 0.03961875 -0.01108847 -0.02386593
plot(dfbeta(fit)[ , 2])
## dfbeta for kms:
datdfbeta <- data.frame(dfbetamkm = dfbeta(fit)[ , 2]) %>%
mutate(id=row_number()) %>%
mutate(specialPoint= as.factor(abs(dfbetamkm) >= .2))
ggplot(datdfbeta, aes(y=dfbetamkm, x= id, color= specialPoint))+
geom_point(size=3) +
geom_text(data=subset(datdfbeta, specialPoint==TRUE),
aes(y=dfbetamkm,x=id,label=id), colour='blue') +
geom_hline(yintercept = 0)+
geom_hline(yintercept = c(-0.2,0.2), color= 'green') +
ylab('dfbeta: mkm')
datdfbeta2 <- data.frame(dfbetappn = dfbeta(fit)[ , 3]) %>%
mutate(id=row_number()) %>%
mutate(specialPoint= as.factor(abs(dfbetappn) >= .4))
ggplot(datdfbeta2, aes(y=dfbetappn, x= id, color= specialPoint))+
geom_point(size=3) +
geom_text(data=subset(datdfbeta2, specialPoint==TRUE),
aes(y=dfbetappn,x=id,label=id), colour='blue') +
geom_hline(yintercept = 0)+
geom_hline(yintercept = c(-0.4,0.4), color= 'green') +
ylab('dfbeta: PetrolPrice')
datcooks <- data.frame(cooksdist = cooks.distance(fit)) %>%
mutate(id=row_number()) %>%
mutate(specialPoint= as.factor(cooksdist >= .03))
ggplot(datcooks, aes(y=cooksdist, x= id, color= specialPoint))+
geom_point(size=3) +
geom_text(data=subset(datcooks, specialPoint==TRUE),
aes(y=cooksdist,x=id,label=id), colour='black') +
geom_hline(yintercept = 0)+
geom_hline(yintercept = 0.03, color= 'blue') +
ylab('Cooks distance')
## Evaluating leverage:
summary(round(hatvalues(fit), 3))
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.00600 0.01175 0.01700 0.02082 0.02600 0.06700
dathatv <- data.frame(hatv = hatvalues(fit)) %>%
mutate(id=row_number()) %>%
mutate(specialPoint= as.factor(hatv >= .06))
ggplot(dathatv, aes(y=hatv, x= id, color= specialPoint))+
geom_point(size=3) +
geom_text(data=subset(dathatv, specialPoint==TRUE),
aes(y=hatv,x=id,label=id), colour='blue') +
geom_hline(yintercept = 0.06, color= 'green') +
ylab('hatvalues')
# PAGE 100 ## 1. Load the dataset Seatbelts as part of the datasets
package via data(Seatbelts). Use as.data.frame to convert the object to
a dataframe. Fit a linear model of driver deaths with kms, PetrolPrice
and law as predictors.
library(datasets)
data("Seatbelts")
stblts <- as.data.frame(Seatbelts)
fit<- lm(DriversKilled ~ kms + PetrolPrice, stblts)
summary(fit)$coef
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2.157461e+02 1.466559e+01 14.711047 3.772201e-33
## kms -1.749546e-03 6.145401e-04 -2.846919 4.902428e-03
## PetrolPrice -6.437895e+02 1.482896e+02 -4.341435 2.304713e-05
library(dplyr)
stblts <- stblts %>%
mutate(ppn = (PetrolPrice - mean(PetrolPrice))/sd(PetrolPrice),
mkm = kms / 1000) %>%
mutate(mkm = mkm - mean(mkm))
fit0 <- lm(DriversKilled~law, stblts)
fit1 <- lm(DriversKilled~law + mkm, stblts)
fit2 <- lm(DriversKilled~law + ppn, stblts)
fit3 <- lm(DriversKilled~law + mkm + ppn, stblts)
anova(fit0,fit1,fit2,fit3)
## Analysis of Variance Table
##
## Model 1: DriversKilled ~ law
## Model 2: DriversKilled ~ law + mkm
## Model 3: DriversKilled ~ law + ppn
## Model 4: DriversKilled ~ law + mkm + ppn
## Res.Df RSS Df Sum of Sq F Pr(>F)
## 1 190 109754
## 2 189 105608 1 4145.3 7.9276 0.005388 **
## 3 189 100069 0 5538.9
## 4 188 98303 1 1766.0 3.3774 0.067676 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
anova(fit0,fit3)
## Analysis of Variance Table
##
## Model 1: DriversKilled ~ law
## Model 2: DriversKilled ~ law + mkm + ppn
## Res.Df RSS Df Sum of Sq F Pr(>F)
## 1 190 109754
## 2 188 98303 2 11450 10.949 3.177e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
rbind(summary(fit0)$coef[2, ],
summary(fit1)$coef[2, ],
summary(fit2)$coef[2, ],
summary(fit3)$coef[2, ])
## Estimate Std. Error t value Pr(>|t|)
## [1,] -25.60895 5.341655 -4.794198 3.288375e-06
## [2,] -17.55372 6.028888 -2.911602 4.028394e-03
## [3,] -16.32618 5.555579 -2.938700 3.706585e-03
## [4,] -11.88920 6.025785 -1.973055 4.995497e-02
library(dplyr)
stblts <- stblts %>%
mutate(ppn = (PetrolPrice - mean(PetrolPrice))/sd(PetrolPrice),
dkb = (DriversKilled > 119),
mkm = kms / 1000) %>%
mutate(mkm = mkm - mean(mkm))
table(stblts$dkb)
##
## FALSE TRUE
## 98 94
library(dplyr)
stblts <- stblts %>%
mutate(ppn = (PetrolPrice - mean(PetrolPrice))/sd(PetrolPrice),
dkb = 1 *(DriversKilled > 119),
mkm = kms / 1000) %>%
mutate(mkm = mkm - mean(mkm))
table(stblts$dkb)
##
## 0 1
## 98 94
fit<-glm(dkb~ppn+mkm+law, family = binomial, stblts)
summary(fit)$coef
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 0.024313512 0.16077499 0.15122695 0.87979669
## ppn -0.416407701 0.16973435 -2.45329074 0.01415559
## mkm -0.002938343 0.05984816 -0.04909663 0.96084229
## law -0.615522450 0.57780755 -1.06527242 0.28675267
round(summary(fit)$coef, 4)
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 0.0243 0.1608 0.1512 0.8798
## ppn -0.4164 0.1697 -2.4533 0.0142
## mkm -0.0029 0.0598 -0.0491 0.9608
## law -0.6155 0.5778 -1.0653 0.2868
exp( -0.615522450)
## [1] 0.5403585
1-exp( -0.615522450)
## [1] 0.4596415
exp(-0.002938343)
## [1] 0.997066
1-exp(-0.002938343)
## [1] 0.00293403
fit<-glm(cbind(DriversKilled, drivers-DriversKilled)~ppn+mkm+law, family = binomial, stblts)
summary(fit)
##
## Call:
## glm(formula = cbind(DriversKilled, drivers - DriversKilled) ~
## ppn + mkm + law, family = binomial, data = stblts)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -2.536637 0.007399 -342.829 <2e-16 ***
## ppn -0.007829 0.007479 -1.047 0.295
## mkm 0.003645 0.002733 1.334 0.182
## law 0.030785 0.026527 1.161 0.246
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 234.93 on 191 degrees of freedom
## Residual deviance: 229.93 on 188 degrees of freedom
## AIC: 1496
##
## Number of Fisher Scoring iterations: 3
fit1<-glm(dkb~law, family = binomial, stblts)
fit2<-update(fit1, dkb~law + PetrolPrice)
fit3<-update(fit2,dkb~law + PetrolPrice+kms)
anova(fit1,fit2,fit3)
## Analysis of Deviance Table
##
## Model 1: dkb ~ law
## Model 2: dkb ~ law + PetrolPrice
## Model 3: dkb ~ law + PetrolPrice + kms
## Resid. Df Resid. Dev Df Deviance
## 1 190 260.40
## 2 189 253.62 1 6.7760
## 3 188 253.62 1 0.0024
summary(fit1)$coef
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 0.08288766 0.1539783 0.5383074 0.59036483
## law -1.12434153 0.4991985 -2.2522935 0.02430373
summary(fit2)$coef
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 3.5875552 1.3851773 2.589961 0.009598679
## law -0.6260282 0.5367204 -1.166396 0.243454555
## PetrolPrice -34.3737200 13.4887754 -2.548320 0.010824304
summary(fit3)$coef
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 3.612261e+00 1.473634e+00 2.45126028 0.01423570
## law -6.155225e-01 5.778076e-01 -1.06527242 0.28675267
## PetrolPrice -3.419952e+01 1.394026e+01 -2.45329074 0.01415559
## kms -2.938343e-06 5.984816e-05 -0.04909663 0.96084229
fit<-glm(DriversKilled~ppn+mkm+law, family = poisson, stblts)
summary(fit)$coef
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 4.819845137 0.007127388 676.242884 0.000000e+00
## ppn -0.055361338 0.007243262 -7.643150 2.119715e-14
## mkm -0.009980975 0.002614002 -3.818274 1.343887e-04
## law -0.114877106 0.025557951 -4.494770 6.964526e-06
round(summary(fit)$coef, 4)
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 4.8198 0.0071 676.2429 0e+00
## ppn -0.0554 0.0072 -7.6431 0e+00
## mkm -0.0100 0.0026 -3.8183 1e-04
## law -0.1149 0.0256 -4.4948 0e+00
exp(-0.114877106 )
## [1] 0.8914757
1-exp(-0.114877106 )
## [1] 0.1085243
exp(-0.009980975 )
## [1] 0.9900687
1-exp(-0.009980975 )
## [1] 0.00993133
exp(4.819845137)
## [1] 123.9459
fit<-glm(DriversKilled~ppn+mkm+law, family = poisson, stblts)
fit2<-lm(I(log(DriversKilled))~ppn+mkm+law, stblts)
summary(fit)
##
## Call:
## glm(formula = DriversKilled ~ ppn + mkm + law, family = poisson,
## data = stblts)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 4.819845 0.007127 676.243 < 2e-16 ***
## ppn -0.055361 0.007243 -7.643 2.12e-14 ***
## mkm -0.009981 0.002614 -3.818 0.000134 ***
## law -0.114877 0.025558 -4.495 6.96e-06 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for poisson family taken to be 1)
##
## Null deviance: 984.50 on 191 degrees of freedom
## Residual deviance: 778.32 on 188 degrees of freedom
## AIC: 2059.1
##
## Number of Fisher Scoring iterations: 4
summary(fit)$coef
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 4.819845137 0.007127388 676.242884 0.000000e+00
## ppn -0.055361338 0.007243262 -7.643150 2.119715e-14
## mkm -0.009980975 0.002614002 -3.818274 1.343887e-04
## law -0.114877106 0.025557951 -4.494770 6.964526e-06
summary(fit2)$coef
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 4.805409776 0.014411832 333.435035 1.381656e-262
## ppn -0.053968063 0.014813218 -3.643237 3.481611e-04
## mkm -0.008189793 0.005325983 -1.537705 1.258020e-01
## law -0.131450856 0.048212881 -2.726468 7.007200e-03
1-exp(-0.131450856)
## [1] 0.1231776
fit <- glm(DriversKilled ~ mkm + ppn +law, family = poisson, stblts, offset = log(drivers))
fit0 <- glm(DriversKilled ~ mkm + ppn +law, family = poisson, stblts)
summary(fit)$coef
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -2.612798146 0.007122545 -366.834931 0.0000000
## mkm 0.003377675 0.002630717 1.283937 0.1991640
## ppn -0.007255064 0.007199577 -1.007707 0.3135952
## law 0.028484328 0.025512651 1.116479 0.2642173
summary(fit0)$coef
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 4.819845137 0.007127388 676.242884 0.000000e+00
## mkm -0.009980975 0.002614002 -3.818274 1.343887e-04
## ppn -0.055361338 0.007243262 -7.643150 2.119715e-14
## law -0.114877106 0.025557951 -4.494770 6.964526e-06
fit1 <- glm(DriversKilled ~ law, family = poisson, stblts)
fit2 <- update(fit1,DriversKilled ~ law+PetrolPrice)
fit3<- update(fit2,DriversKilled ~ law+PetrolPrice+kms)
anova(fit1,fit2,fit3)
## Analysis of Deviance Table
##
## Model 1: DriversKilled ~ law
## Model 2: DriversKilled ~ law + PetrolPrice
## Model 3: DriversKilled ~ law + PetrolPrice + kms
## Resid. Df Resid. Dev Df Deviance
## 1 190 870.06
## 2 189 792.88 1 77.178
## 3 188 778.32 1 14.561
summary(fit1)$coef
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 4.8352482 0.006856395 705.21727 0.000000e+00
## law -0.2274727 0.021923993 -10.37552 3.204779e-25
summary(fit2)$coef
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 5.3499077 0.05886323 90.887084 0.000000e+00
## law -0.1516301 0.02364153 -6.413720 1.420110e-10
## PetrolPrice -5.0697421 0.57792672 -8.772292 1.750654e-18
summary(fit3)$coef
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 5.440656e+00 6.360114e-02 85.543371 0.000000e+00
## law -1.148771e-01 2.555795e-02 -4.494770 6.964526e-06
## PetrolPrice -4.546821e+00 5.948884e-01 -7.643150 2.119715e-14
## kms -9.980975e-06 2.614002e-06 -3.818274 1.343887e-04