# install packages this script uses if you dont have them already
#install.packages("ggplot2")
#install.packages("lmtest")
#install.packages("car")
#install.packages("caret")
#install.packages("e1071") 
# Clear workspace
rm(list=ls()) 
# Call packages you use
library(ggplot2)
library(lmtest)
library(car)
library(caret)
library(e1071)
# Download initial model
lmMod <- lm(dist ~ speed, data=cars) 
# PLOT 
par(mfrow=c(2,2))
plot(lmMod)

# Breusch-Pagan test
bptest(lmMod)  

    studentized Breusch-Pagan test

data:  lmMod
BP = 3.2149, df = 1, p-value = 0.07297
# Non-constant Variance Test
# Computes a score test of the hypothesis of constant error variance against the alternative that the error variance changes with the level of the response (fitted values), or with a linear combination of predictors.
ncvTest(lmMod)
Non-constant Variance Score Test 
Variance formula: ~ fitted.values 
Chisquare = 4.650233    Df = 1     p = 0.03104933 
# Box-Cox transformation
distBCMod <- BoxCoxTrans(cars$dist)
print(distBCMod)
Box-Cox Transformation

50 data points used to estimate Lambda

Input data summary:
   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
   2.00   26.00   36.00   42.98   56.00  120.00 

Largest/Smallest: 60 
Sample Skewness: 0.759 

Estimated Lambda: 0.5 
# append the transformed variable to cars
cars <- cbind(cars, dist_new=predict(distBCMod, cars$dist)) 
# view the top 6 rows
head(cars) 
# build model with correction
lmMod_bc <- lm(dist_new ~ speed, data=cars)
# Breusch-Pagan test with corrected model
bptest(lmMod_bc)

    studentized Breusch-Pagan test

data:  lmMod_bc
BP = 0.011192, df = 1, p-value = 0.9157
# plots for corrected model
par(mfrow=c(2,2))
plot(lmMod_bc)

Questions:

  1. What residual patterns are suggestive of heteroskedasticity?

When the residuas VS fitted plot is not a straight line, or residuals are spread in a non homoskedastic way (i.e. there is some correlation between the residuals and one of the regressors).

  1. What do they look like if there is homoskedasticity?

Homoskedasticity would be illustrated with residuals that are perfectly-randomly spread in regards to the regressor; they have the same variance for low and high numbers of the regressor. The variance is constant from \(U_i\) to \(U_n\).

LS0tCnRpdGxlOiAiU1MxNTRfNS4xIgpvdXRwdXQ6IGh0bWxfbm90ZWJvb2sKLS0tCmBgYHtyfQojIGluc3RhbGwgcGFja2FnZXMgdGhpcyBzY3JpcHQgdXNlcyBpZiB5b3UgZG9udCBoYXZlIHRoZW0gYWxyZWFkeQojaW5zdGFsbC5wYWNrYWdlcygiZ2dwbG90MiIpCiNpbnN0YWxsLnBhY2thZ2VzKCJsbXRlc3QiKQojaW5zdGFsbC5wYWNrYWdlcygiY2FyIikKI2luc3RhbGwucGFja2FnZXMoImNhcmV0IikKI2luc3RhbGwucGFja2FnZXMoImUxMDcxIikgCgojIENsZWFyIHdvcmtzcGFjZQpybShsaXN0PWxzKCkpIAoKIyBDYWxsIHBhY2thZ2VzIHlvdSB1c2UKbGlicmFyeShnZ3Bsb3QyKQpsaWJyYXJ5KGxtdGVzdCkKbGlicmFyeShjYXIpCmxpYnJhcnkoY2FyZXQpCmxpYnJhcnkoZTEwNzEpCmBgYAoKYGBge3J9CiMgRG93bmxvYWQgaW5pdGlhbCBtb2RlbApsbU1vZCA8LSBsbShkaXN0IH4gc3BlZWQsIGRhdGE9Y2FycykgCgojIFBMT1QgCnBhcihtZnJvdz1jKDIsMikpCnBsb3QobG1Nb2QpCmBgYAoKCmBgYHtyfQojIEJyZXVzY2gtUGFnYW4gdGVzdApicHRlc3QobG1Nb2QpICAKCiMgTm9uLWNvbnN0YW50IFZhcmlhbmNlIFRlc3QKIyBDb21wdXRlcyBhIHNjb3JlIHRlc3Qgb2YgdGhlIGh5cG90aGVzaXMgb2YgY29uc3RhbnQgZXJyb3IgdmFyaWFuY2UgYWdhaW5zdCB0aGUgYWx0ZXJuYXRpdmUgdGhhdCB0aGUgZXJyb3IgdmFyaWFuY2UgY2hhbmdlcyB3aXRoIHRoZSBsZXZlbCBvZiB0aGUgcmVzcG9uc2UgKGZpdHRlZCB2YWx1ZXMpLCBvciB3aXRoIGEgbGluZWFyIGNvbWJpbmF0aW9uIG9mIHByZWRpY3RvcnMuCm5jdlRlc3QobG1Nb2QpCgojIEJveC1Db3ggdHJhbnNmb3JtYXRpb24KZGlzdEJDTW9kIDwtIEJveENveFRyYW5zKGNhcnMkZGlzdCkKcHJpbnQoZGlzdEJDTW9kKQoKIyBhcHBlbmQgdGhlIHRyYW5zZm9ybWVkIHZhcmlhYmxlIHRvIGNhcnMKY2FycyA8LSBjYmluZChjYXJzLCBkaXN0X25ldz1wcmVkaWN0KGRpc3RCQ01vZCwgY2FycyRkaXN0KSkgCgojIHZpZXcgdGhlIHRvcCA2IHJvd3MKaGVhZChjYXJzKSAKCgoKYGBgCgpgYGB7cn0KIyBidWlsZCBtb2RlbCB3aXRoIGNvcnJlY3Rpb24KbG1Nb2RfYmMgPC0gbG0oZGlzdF9uZXcgfiBzcGVlZCwgZGF0YT1jYXJzKQoKIyBCcmV1c2NoLVBhZ2FuIHRlc3Qgd2l0aCBjb3JyZWN0ZWQgbW9kZWwKYnB0ZXN0KGxtTW9kX2JjKQoKIyBwbG90cyBmb3IgY29ycmVjdGVkIG1vZGVsCnBhcihtZnJvdz1jKDIsMikpCnBsb3QobG1Nb2RfYmMpCgpgYGAKCiMjIyBRdWVzdGlvbnM6CjEuICpfV2hhdCByZXNpZHVhbCBwYXR0ZXJucyBhcmUgc3VnZ2VzdGl2ZSBvZiBoZXRlcm9za2VkYXN0aWNpdHk/XyoKCldoZW4gdGhlIHJlc2lkdWFzIFZTIGZpdHRlZCBwbG90IGlzIG5vdCBhIHN0cmFpZ2h0IGxpbmUsIG9yIHJlc2lkdWFscyBhcmUgc3ByZWFkIGluIGEgbm9uIGhvbW9za2VkYXN0aWMgd2F5IChpLmUuIHRoZXJlIGlzIHNvbWUgY29ycmVsYXRpb24gYmV0d2VlbiB0aGUgcmVzaWR1YWxzIGFuZCBvbmUgb2YgdGhlIHJlZ3Jlc3NvcnMpLgoKMi4gKl9XaGF0IGRvIHRoZXkgbG9vayBsaWtlIGlmIHRoZXJlIGlzIGhvbW9za2VkYXN0aWNpdHk/XyoKCkhvbW9za2VkYXN0aWNpdHkgd291bGQgYmUgaWxsdXN0cmF0ZWQgd2l0aCByZXNpZHVhbHMgdGhhdCBhcmUgcGVyZmVjdGx5LXJhbmRvbWx5IHNwcmVhZCBpbiByZWdhcmRzIHRvIHRoZSByZWdyZXNzb3I7IHRoZXkgaGF2ZSB0aGUgc2FtZSB2YXJpYW5jZSBmb3IgbG93IGFuZCBoaWdoIG51bWJlcnMgb2YgdGhlIHJlZ3Jlc3Nvci4gVGhlIHZhcmlhbmNlIGlzIGNvbnN0YW50IGZyb20gJFVfaSQgdG8gJFVfbiQuCgoK