In this post, we will calculate the measures of influence by hand:
Cook’s D
DFFITS
DFBETAS
Also, in this post we will calculate:
General residuals
Predicted residuals
PRESS
Standardised residuals
Studentisied residuals
Leverage points
Lets fit two models, one model with all the observations and another after removing first observation.
attach(cars)
x1 <- lm(speed ~ dist, data = cars) # all observations
x2 <- lm(speed ~ dist, data = cars[-1,]) # without first obs
The fitted values for both the models
pred_x1 <- predict(x1, cars[1,], se.fit = T)
pred_x2 <- predict(x2, cars[1,], se.fit = T)
c(pred_x1$fit, pred_x2$fit) # Fitted values for x1 and x2
## 1 1
## 8.615041 8.971019
x <- model.matrix(speed ~ dist) # x matrix
y <- speed # response variable
betas <- solve(crossprod(x))%*%crossprod(x,y) # co-efficients
h <- x%*%solve(crossprod(x))%*%t(x) # Hat matrix
leverage <- diag(h) #Leverage points
i <- diag(1, dim(cars)[1]) # Identity matrix
e <- (i - h)%*%y # regular residuals
pi <- e/(1-diag(h)) # predicted residuals
press <- sum(pi)
di <- e/sqrt(anova(x1)$`Mean Sq`[2]) # standardised residuals
ri <- e/sqrt(anova(x1)$`Mean Sq`[2]*(1-diag(h))) # studentised residuals
#DFBETAS
num <- x1$coefficients[2] - x2$coefficients[2]
denom <- sqrt(anova(x2)$`Mean Sq`[2]*solve(crossprod(x))[4])
df_betas <- num/denom
#DFFITS
num_dffits <-pred_x1$fit - pred_x2$fit
denom_dffits <- sqrt(anova(x2)$`Mean Sq`[2]*diag(h)[1])
df_fits <- num_dffits/denom_dffits
#COOK'S D
c_num <- t(pred_x2$fit - pred_x1$fit)*(pred_x2$fit - pred_x1$fit)
c_denom <- sum(diag(h))*anova(x1)$`Mean Sq`[2]
cdist <- c_num/c_denom
#COOK'S D(using single fit)
risqaure <- ri^2 #square of studentised residuals
hii <- diag(h) # leverage points
p <- sum(diag(h)) # number of parameters
one_hii <- 1 - diag(h)
cooks_num <- risqaure*hii
cooks_denom <- p*one_hii
cooksdist <- round(cooks_num/cooks_denom, digits = 3)