# install packages this script uses if you dont have them already
#install.packages("ggplot2")
#install.packages("lmtest")
#install.packages("car")
#install.packages("caret")
#install.packages("e1071")
# Clear workspace
rm(list=ls())
# Call packages you use
library(ggplot2)
library(lmtest)
library(car)
library(caret)
library(e1071)
# Download initial model
lmMod <- lm(dist ~ speed, data=cars)
# PLOT
par(mfrow=c(2,2))
plot(lmMod)

# Breusch-Pagan test
bptest(lmMod)
studentized Breusch-Pagan test
data: lmMod
BP = 3.2149, df = 1, p-value = 0.07297
# Non-constant Variance Test
# Computes a score test of the hypothesis of constant error variance against the alternative that the error variance changes with the level of the response (fitted values), or with a linear combination of predictors.
ncvTest(lmMod)
Non-constant Variance Score Test
Variance formula: ~ fitted.values
Chisquare = 4.650233 Df = 1 p = 0.03104933
# Box-Cox transformation
distBCMod <- BoxCoxTrans(cars$dist)
print(distBCMod)
Box-Cox Transformation
50 data points used to estimate Lambda
Input data summary:
Min. 1st Qu. Median Mean 3rd Qu. Max.
2.00 26.00 36.00 42.98 56.00 120.00
Largest/Smallest: 60
Sample Skewness: 0.759
Estimated Lambda: 0.5
# append the transformed variable to cars
cars <- cbind(cars, dist_new=predict(distBCMod, cars$dist))
# view the top 6 rows
head(cars)
# build model with correction
lmMod_bc <- lm(dist_new ~ speed, data=cars)
# Breusch-Pagan test with corrected model
bptest(lmMod_bc)
studentized Breusch-Pagan test
data: lmMod_bc
BP = 0.011192, df = 1, p-value = 0.9157
# plots for corrected model
par(mfrow=c(2,2))
plot(lmMod_bc)

Questions:
- What residual patterns are suggestive of heteroskedasticity?
When the residuas VS fitted plot is not a straight line, or residuals are spread in a non homoskedastic way (i.e. there is some correlation between the residuals and one of the regressors).
- What do they look like if there is homoskedasticity?
Homoskedasticity would be illustrated with residuals that are perfectly-randomly spread in regards to the regressor; they have the same variance for low and high numbers of the regressor. The variance is constant from \(U_i\) to \(U_n\).
LS0tCnRpdGxlOiAiU1MxNTRfNS4xIgpvdXRwdXQ6IGh0bWxfbm90ZWJvb2sKLS0tCmBgYHtyfQojIGluc3RhbGwgcGFja2FnZXMgdGhpcyBzY3JpcHQgdXNlcyBpZiB5b3UgZG9udCBoYXZlIHRoZW0gYWxyZWFkeQojaW5zdGFsbC5wYWNrYWdlcygiZ2dwbG90MiIpCiNpbnN0YWxsLnBhY2thZ2VzKCJsbXRlc3QiKQojaW5zdGFsbC5wYWNrYWdlcygiY2FyIikKI2luc3RhbGwucGFja2FnZXMoImNhcmV0IikKI2luc3RhbGwucGFja2FnZXMoImUxMDcxIikgCgojIENsZWFyIHdvcmtzcGFjZQpybShsaXN0PWxzKCkpIAoKIyBDYWxsIHBhY2thZ2VzIHlvdSB1c2UKbGlicmFyeShnZ3Bsb3QyKQpsaWJyYXJ5KGxtdGVzdCkKbGlicmFyeShjYXIpCmxpYnJhcnkoY2FyZXQpCmxpYnJhcnkoZTEwNzEpCmBgYAoKYGBge3J9CiMgRG93bmxvYWQgaW5pdGlhbCBtb2RlbApsbU1vZCA8LSBsbShkaXN0IH4gc3BlZWQsIGRhdGE9Y2FycykgCgojIFBMT1QgCnBhcihtZnJvdz1jKDIsMikpCnBsb3QobG1Nb2QpCmBgYAoKCmBgYHtyfQojIEJyZXVzY2gtUGFnYW4gdGVzdApicHRlc3QobG1Nb2QpICAKCiMgTm9uLWNvbnN0YW50IFZhcmlhbmNlIFRlc3QKIyBDb21wdXRlcyBhIHNjb3JlIHRlc3Qgb2YgdGhlIGh5cG90aGVzaXMgb2YgY29uc3RhbnQgZXJyb3IgdmFyaWFuY2UgYWdhaW5zdCB0aGUgYWx0ZXJuYXRpdmUgdGhhdCB0aGUgZXJyb3IgdmFyaWFuY2UgY2hhbmdlcyB3aXRoIHRoZSBsZXZlbCBvZiB0aGUgcmVzcG9uc2UgKGZpdHRlZCB2YWx1ZXMpLCBvciB3aXRoIGEgbGluZWFyIGNvbWJpbmF0aW9uIG9mIHByZWRpY3RvcnMuCm5jdlRlc3QobG1Nb2QpCgojIEJveC1Db3ggdHJhbnNmb3JtYXRpb24KZGlzdEJDTW9kIDwtIEJveENveFRyYW5zKGNhcnMkZGlzdCkKcHJpbnQoZGlzdEJDTW9kKQoKIyBhcHBlbmQgdGhlIHRyYW5zZm9ybWVkIHZhcmlhYmxlIHRvIGNhcnMKY2FycyA8LSBjYmluZChjYXJzLCBkaXN0X25ldz1wcmVkaWN0KGRpc3RCQ01vZCwgY2FycyRkaXN0KSkgCgojIHZpZXcgdGhlIHRvcCA2IHJvd3MKaGVhZChjYXJzKSAKCgoKYGBgCgpgYGB7cn0KIyBidWlsZCBtb2RlbCB3aXRoIGNvcnJlY3Rpb24KbG1Nb2RfYmMgPC0gbG0oZGlzdF9uZXcgfiBzcGVlZCwgZGF0YT1jYXJzKQoKIyBCcmV1c2NoLVBhZ2FuIHRlc3Qgd2l0aCBjb3JyZWN0ZWQgbW9kZWwKYnB0ZXN0KGxtTW9kX2JjKQoKIyBwbG90cyBmb3IgY29ycmVjdGVkIG1vZGVsCnBhcihtZnJvdz1jKDIsMikpCnBsb3QobG1Nb2RfYmMpCgpgYGAKCiMjIyBRdWVzdGlvbnM6CjEuICpfV2hhdCByZXNpZHVhbCBwYXR0ZXJucyBhcmUgc3VnZ2VzdGl2ZSBvZiBoZXRlcm9za2VkYXN0aWNpdHk/XyoKCldoZW4gdGhlIHJlc2lkdWFzIFZTIGZpdHRlZCBwbG90IGlzIG5vdCBhIHN0cmFpZ2h0IGxpbmUsIG9yIHJlc2lkdWFscyBhcmUgc3ByZWFkIGluIGEgbm9uIGhvbW9za2VkYXN0aWMgd2F5IChpLmUuIHRoZXJlIGlzIHNvbWUgY29ycmVsYXRpb24gYmV0d2VlbiB0aGUgcmVzaWR1YWxzIGFuZCBvbmUgb2YgdGhlIHJlZ3Jlc3NvcnMpLgoKMi4gKl9XaGF0IGRvIHRoZXkgbG9vayBsaWtlIGlmIHRoZXJlIGlzIGhvbW9za2VkYXN0aWNpdHk/XyoKCkhvbW9za2VkYXN0aWNpdHkgd291bGQgYmUgaWxsdXN0cmF0ZWQgd2l0aCByZXNpZHVhbHMgdGhhdCBhcmUgcGVyZmVjdGx5LXJhbmRvbWx5IHNwcmVhZCBpbiByZWdhcmRzIHRvIHRoZSByZWdyZXNzb3I7IHRoZXkgaGF2ZSB0aGUgc2FtZSB2YXJpYW5jZSBmb3IgbG93IGFuZCBoaWdoIG51bWJlcnMgb2YgdGhlIHJlZ3Jlc3Nvci4gVGhlIHZhcmlhbmNlIGlzIGNvbnN0YW50IGZyb20gJFVfaSQgdG8gJFVfbiQuCgoK