TextBook Page 699
Forbes studied the linear relationship between boiling point and pressure.
We would like to explain the pressure by using the boiling point as an explanatory variable.
library(MASS) # this library contains many datasets and functions
dataset <- data.frame(forbes)
# Conversion Fahrenheit to Celsius
dataset$bp_c <- (data$bp-32)*5/9
# Transform the pressure to 100*log(pressure)
dataset$lnpres <- 100*log10(dataset$pres)
show(dataset)
bp pres bp_c lnpres
1 194.5 20.79 90.27778 131.7854
2 194.3 20.79 90.16667 131.7854
3 197.9 22.40 92.16667 135.0248
4 198.4 22.67 92.44444 135.5452
5 199.4 23.15 93.00000 136.4551
6 199.9 23.35 93.27778 136.8287
7 200.9 23.89 93.83333 137.8216
8 201.1 23.99 93.94444 138.0030
9 201.4 24.02 94.11111 138.0573
10 201.3 24.01 94.05556 138.0392
11 203.6 25.14 95.33333 140.0365
12 204.6 26.57 95.88889 142.4392
13 209.5 28.49 98.61111 145.4692
14 208.6 27.76 98.11111 144.3419
15 210.7 29.04 99.27778 146.2997
16 211.9 29.88 99.94444 147.5381
17 212.2 30.06 100.11111 147.7989
bp: boiling point (degrees Farenheit)
bp_c: boiling point (degrees Celsius)
pres: barometric pressure in inches of mercury
# Create scatter plot
plot(bp_c~pres, data=data)

Run regression pres~bp
# Use optimization to find regression coefficients
ls <- function(dataset, par)
{with(dataset, sum((pres-par[1]-par[2]*bp)^2))}
result <- optim(par=c(0,0),ls,data=dataset)
# starting values for the parameters to be optimised (0,0,0)
coef <- result$par
coef
[1] -81.1864866 0.5234972
Run regression pres~bp_c
ls2 <- function(dataset, par)
{with(dataset, sum((pres-par[1]-par[2]*bp_c)^2))}
result2 <- optim(par=c(0,0),ls,data=dataset)
# starting values for the parameters to be optimised (0,0,0)
coef2 <- result2$par
coef2
[1] -81.1864866 0.5234972
Run regression pres~bp_c
ls <- function(dataset, par)
{with(dataset, sum((lnpres-par[1]-par[2]*bp)^2))}
result3 <- optim(par=c(0,0),ls,data=dataset)
# starting values for the parameters to be optimised (0,0,0)
coef3 <- result2$par
coef3
[1] -81.1864866 0.5234972
A 95% CI for the slope is (0.501,0.5444)
Since it didn’t contain 0, we can reject the null hypothesis
# Check with built in fucntion
confint(fit,"bp",level=0.95)
2.5 % 97.5 %
bp 0.501352 0.5444328
Diagnostics and Residual Analysis
Is there evidence that the residuals do not follow a normal distribution?
fit$residuals
1 2 3 4 5 6 7 8 9
0.151155176 0.255733656 -0.016678987 -0.008125187 -0.051017588 -0.112463788 -0.095356189 -0.099934669 -0.226802389
10 11 12 13 14 15 16 17
-0.184513149 -0.257165671 0.649941928 0.007769164 -0.251627675 -0.069701717 0.142827402 0.165959682
# plot residuals. We can spot an outlier
plot(fit$residuals)

#create normal probability plot with qqnorm function
qqnorm(fit$residuals)
qqline(fit$residuals)

fit.stdres = rstandard(fit)
plot(fit$fitted, fit.stdres, xlab="fitted values", ylab="standardized residuals")
abline(h=0, col=2,lwd=2)

In the presence of outliers, it is useful to do the analysis with and without the point.
Let’s remove the large residuals which deviates more than 2 s.d from the mean 0
index <- which(abs(fit.stdres) > 2)
index
12
12
dataset1 <- dataset[-index,]
str(dataset1)
'data.frame': 16 obs. of 4 variables:
$ bp : num 194 194 198 198 199 ...
$ pres : num 20.8 20.8 22.4 22.7 23.1 ...
$ bp_c : num 90.3 90.2 92.2 92.4 93 ...
$ lnpres: num 132 132 135 136 136 ...
#Use dataset1 to run the same regression
fit1 <- lm(pres~bp, data=dataset1)
summary(fit1)
Call:
lm(formula = pres ~ bp, data = dataset1)
Residuals:
Min 1Q Median 3Q Max
-0.21493 -0.09546 -0.01500 0.09048 0.27793
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) -80.667294 1.419984 -56.81 <2e-16 ***
bp 0.520738 0.006997 74.42 <2e-16 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 0.1608 on 14 degrees of freedom
Multiple R-squared: 0.9975, Adjusted R-squared: 0.9973
F-statistic: 5538 on 1 and 14 DF, p-value: < 2.2e-16
The regression fit and coefficient did not change much
fit.stdres = rstandard(fit1)
plot(fit1$fitted, fit.stdres, xlab="Fitted values", ylab="Standardized residuals")
abline(h=0, col=2, lwd=2)

LS0tDQp0aXRsZTogIlJlbGF0aW9uc2hpcCBiZXR3ZWVuIEJvaWxpbmcgcG9pbnQgYW5kIFByZXNzdXJlIg0KYXV0aG9yOiBTdGF0aXN0aWNzMiBDbGFzcw0Kb3V0cHV0OiBodG1sX25vdGVib29rDQotLS0NCg0KIyMjIyBUZXh0Qm9vayBQYWdlIDY5OQ0KIyMjIyBGb3JiZXMgc3R1ZGllZCB0aGUgbGluZWFyIHJlbGF0aW9uc2hpcCBiZXR3ZWVuIGJvaWxpbmcgcG9pbnQgYW5kIHByZXNzdXJlLiANCiMjIyMgV2Ugd291bGQgbGlrZSB0byBleHBsYWluIHRoZSBwcmVzc3VyZSBieSB1c2luZyB0aGUgYm9pbGluZyBwb2ludCBhcyBhbiBleHBsYW5hdG9yeSB2YXJpYWJsZS4NCg0KDQpgYGB7cn0NCmxpYnJhcnkoTUFTUykgIyB0aGlzIGxpYnJhcnkgY29udGFpbnMgbWFueSBkYXRhc2V0cyBhbmQgZnVuY3Rpb25zDQpkYXRhc2V0IDwtIGRhdGEuZnJhbWUoZm9yYmVzKQ0KIyBDb252ZXJzaW9uIEZhaHJlbmhlaXQgdG8gQ2Vsc2l1cw0KZGF0YXNldCRicF9jIDwtIChkYXRhJGJwLTMyKSo1LzkNCiMgVHJhbnNmb3JtIHRoZSBwcmVzc3VyZSB0byAxMDAqbG9nKHByZXNzdXJlKSANCmRhdGFzZXQkbG5wcmVzIDwtIDEwMCpsb2cxMChkYXRhc2V0JHByZXMpDQpzaG93KGRhdGFzZXQpDQpgYGANCg0KIyMjIyNicDogYm9pbGluZyBwb2ludCAoZGVncmVlcyBGYXJlbmhlaXQpDQojIyMjI2JwX2M6IGJvaWxpbmcgcG9pbnQgKGRlZ3JlZXMgQ2Vsc2l1cykNCiMjIyMjcHJlczogYmFyb21ldHJpYyBwcmVzc3VyZSBpbiBpbmNoZXMgb2YgbWVyY3VyeQ0KDQpgYGB7cn0NCiMgQ3JlYXRlIHNjYXR0ZXIgcGxvdCANCnBsb3QoYnBfY35wcmVzLCBkYXRhPWRhdGEpDQpgYGANCg0KIyMjIFJ1biByZWdyZXNzaW9uIHByZXN+YnANCmBgYHtyfQ0KIyBVc2Ugb3B0aW1pemF0aW9uIHRvIGZpbmQgcmVncmVzc2lvbiBjb2VmZmljaWVudHMNCmxzIDwtIGZ1bmN0aW9uKGRhdGFzZXQsIHBhcikNCnt3aXRoKGRhdGFzZXQsIHN1bSgocHJlcy1wYXJbMV0tcGFyWzJdKmJwKV4yKSl9DQpyZXN1bHQgPC0gb3B0aW0ocGFyPWMoMCwwKSxscyxkYXRhPWRhdGFzZXQpDQojIHN0YXJ0aW5nIHZhbHVlcyBmb3IgdGhlIHBhcmFtZXRlcnMgdG8gYmUgb3B0aW1pc2VkICgwLDAsMCkNCmNvZWYgPC0gcmVzdWx0JHBhcg0KY29lZg0KYGBgDQojIyMjIyANCiMjIyBSdW4gcmVncmVzc2lvbiBwcmVzfmJwX2MNCmBgYHtyfQ0KbHMyIDwtIGZ1bmN0aW9uKGRhdGFzZXQsIHBhcikNCnt3aXRoKGRhdGFzZXQsIHN1bSgocHJlcy1wYXJbMV0tcGFyWzJdKmJwX2MpXjIpKX0NCnJlc3VsdDIgPC0gb3B0aW0ocGFyPWMoMCwwKSxscyxkYXRhPWRhdGFzZXQpDQojIHN0YXJ0aW5nIHZhbHVlcyBmb3IgdGhlIHBhcmFtZXRlcnMgdG8gYmUgb3B0aW1pc2VkICgwLDAsMCkNCmNvZWYyIDwtIHJlc3VsdDIkcGFyDQpjb2VmMg0KYGBgDQojIyMjIyANCiMjIyBSdW4gcmVncmVzc2lvbiBwcmVzfmJwX2MNCmBgYHtyfQ0KbHMgPC0gZnVuY3Rpb24oZGF0YXNldCwgcGFyKQ0Ke3dpdGgoZGF0YXNldCwgc3VtKChsbnByZXMtcGFyWzFdLXBhclsyXSpicCleMikpfQ0KcmVzdWx0MyA8LSBvcHRpbShwYXI9YygwLDApLGxzLGRhdGE9ZGF0YXNldCkNCiMgc3RhcnRpbmcgdmFsdWVzIGZvciB0aGUgcGFyYW1ldGVycyB0byBiZSBvcHRpbWlzZWQgKDAsMCwwKQ0KY29lZjMgPC0gcmVzdWx0MiRwYXINCmNvZWYzDQpgYGANCg0KIyMjIEZyb20gdGhlIGFib3ZlLCB3ZSBjYW4gb2JzZXJ2ZSB0aGF0IGxpbmVhciB0cmFuc2Zvcm1hdGlvbiBvZiBpbmRlcGVuZGVudCB2YXJpYWJsZShzKSBkb2VzIG5vdCBjaGFuZ2UgdGhlIGNvZWZmaWNpZW50cy4NCg0KYGBge3J9DQojIEZpdCBhIGxpbmVhciByZWdyZXNzaW9uIG1vZGVsIGFuZCBjaGVjayBvdXQgdGhlIG91dHB1dA0KZml0IDwtIGxtKHByZXN+YnAsIGRhdGE9ZGF0YXNldCkNCnN1bW1hcnkoZml0KQ0KYGBgDQoNCmBgYHtyfQ0KIyBidWlsZCBhIHJlZ3Jlc3Npb24gbGluZSBieSBjb25zdHJ1Y3RpbmcgdGhlIGxpbmUNCiMgbGluZSA8LSBjb2VmWzFdK2NvZWZbMl0qZGF0YXNldCRicA0KcGxvdChwcmVzfmJwLCBkYXRhPWRhdGFzZXQpDQpjdXJ2ZShjb2VmWzFdK2NvZWZbMl0qeCwgZnJvbT0xOTAsdG89MjE1LCBhZGQ9VFJVRSwgY29sPSJyZWQiKQ0KYGBgDQoNCmBgYHtyfQ0KIyBEbyBzYW1lIHRoaW5nIHdpdGggbG0gZnVuY3Rpb24NCnBsb3QocHJlc35icCwgZGF0YT1kYXRhc2V0KQ0KYWJsaW5lKGxtKHByZXN+YnAsZGF0YT1kYXRhc2V0KSxjb2w9InJlZCIpDQpgYGANCg0KIyMjIyBMZXQncyBjYWxjdWxhdGUgYSA5NSUgQ29uZmlkZW5jZSBJbnRlcnZhbCBmb3IgdGhlIHNsb3BlDQojIyMjIFRvIGNvbnN0cnVjdCB0aGUgOTUlIENJIHdlIG5lZWQgbWVhbiwgc3RhbmRhcmQgZXJyb3IsIGFuZCBjcml0aWNhY2wgdmFsdWUuDQpgYGB7cn0NCmNvZWYudGFibGUgPC0gc3VtbWFyeShmaXQpJGNvZWZmaWNpZW50cw0KYnAubWVhbiA8LSBjb2VmLnRhYmxlWzIsMV0NCmJwLnNlIDwtIGNvZWYudGFibGVbMiwyXQ0KZGYgPC0gZml0JGRmLnJlc2lkdWFsICMgZGYgPTE1DQpjcml0dmFsIDwtIGFicyhxdCgwLjAyNSxkZikpICMgYWJzb2x1dGUgdmFsdWUgb2YgMi41dGggcGVyY2VudGlsZXMgb2YgdGhlIFN0dWRlbnQgdCBkaXN0cmlidXRpb24gd2l0aCAxNSBkLmYNCiMgMi4xMzE0NQ0KcHJpbnQoY29lZi50YWJsZSkNCmBgYA0KYGBge3J9DQpxdChjKC4wMjUsIC45NzUpLCBkZj0xNSkgIyAyLjUgcGVyY2VudGlsZSBhbmQgOTcuNSBwZXJjZW50aWxlIG9mIHRoZSBTdHVkZW50IHQgZGlzdHJpYnV0aW9uIHdpdGggZGYgPSAxNQ0KYGBgDQpgYGB7cn0NCiMgQ2FsY3VsYXRlIDk1JSBDSQ0KQ0lsb3cgPC0gYnAubWVhbiAtIGNyaXR2YWwqYnAuc2UNCkNJdXAgPC0gYnAubWVhbiArIGNyaXR2YWwqYnAuc2UNCmludGVydmFsIDwtIGMoQ0lsb3csQ0l1cCkNCmludGVydmFsDQpgYGANCg0KIyMjIEEgOTUlIENJIGZvciB0aGUgc2xvcGUgaXMgKDAuNTAxLDAuNTQ0NCkNCiMjIyBTaW5jZSBpdCBkaWRuJ3QgY29udGFpbiAwLCB3ZSBjYW4gcmVqZWN0IHRoZSBudWxsIGh5cG90aGVzaXMNCg0KYGBge3J9DQojIENoZWNrIHdpdGggYnVpbHQgaW4gZnVjbnRpb24NCmNvbmZpbnQoZml0LCJicCIsbGV2ZWw9MC45NSkNCmBgYA0KDQojIyBEaWFnbm9zdGljcyBhbmQgUmVzaWR1YWwgQW5hbHlzaXMNCiMjIyBJcyB0aGVyZSBldmlkZW5jZSB0aGF0IHRoZSByZXNpZHVhbHMgZG8gbm90IGZvbGxvdyBhIG5vcm1hbCBkaXN0cmlidXRpb24/DQpgYGB7cn0NCmZpdCRyZXNpZHVhbHMNCmBgYA0KYGBge3J9DQojIHBsb3QgcmVzaWR1YWxzLiBXZSBjYW4gc3BvdCBhbiBvdXRsaWVyDQpwbG90KGZpdCRyZXNpZHVhbHMpDQpgYGANCmBgYHtyfQ0KI2NyZWF0ZSBub3JtYWwgcHJvYmFiaWxpdHkgcGxvdCB3aXRoIHFxbm9ybSBmdW5jdGlvbg0KcXFub3JtKGZpdCRyZXNpZHVhbHMpDQpxcWxpbmUoZml0JHJlc2lkdWFscykNCmBgYA0KDQpgYGB7cn0NCmZpdC5zdGRyZXMgPSByc3RhbmRhcmQoZml0KQ0KcGxvdChmaXQkZml0dGVkLCBmaXQuc3RkcmVzLCB4bGFiPSJmaXR0ZWQgdmFsdWVzIiwgeWxhYj0ic3RhbmRhcmRpemVkIHJlc2lkdWFscyIpDQphYmxpbmUoaD0wLCBjb2w9Mixsd2Q9MikNCmBgYA0KIyMjIEluIHRoZSBwcmVzZW5jZSBvZiBvdXRsaWVycywgaXQgaXMgdXNlZnVsIHRvIGRvIHRoZSBhbmFseXNpcyB3aXRoIGFuZCB3aXRob3V0IHRoZSBwb2ludC4NCiMjIyBMZXQncyByZW1vdmUgdGhlIGxhcmdlIHJlc2lkdWFscyB3aGljaCBkZXZpYXRlcyBtb3JlIHRoYW4gMiBzLmQgZnJvbSB0aGUgbWVhbiAwDQoNCmBgYHtyfQ0KaW5kZXggPC0gd2hpY2goYWJzKGZpdC5zdGRyZXMpID4gMikNCmluZGV4DQpkYXRhc2V0MSA8LSBkYXRhc2V0Wy1pbmRleCxdDQpzdHIoZGF0YXNldDEpDQpgYGANCg0KYGBge3J9DQojVXNlIGRhdGFzZXQxIHRvIHJ1biB0aGUgc2FtZSByZWdyZXNzaW9uDQpmaXQxIDwtIGxtKHByZXN+YnAsIGRhdGE9ZGF0YXNldDEpDQpzdW1tYXJ5KGZpdDEpDQpgYGANCiMjIyBUaGUgcmVncmVzc2lvbiBmaXQgYW5kIGNvZWZmaWNpZW50IGRpZCBub3QgY2hhbmdlIG11Y2gNCg0KYGBge3J9DQpmaXQuc3RkcmVzID0gcnN0YW5kYXJkKGZpdDEpDQpwbG90KGZpdDEkZml0dGVkLCBmaXQuc3RkcmVzLCB4bGFiPSJGaXR0ZWQgdmFsdWVzIiwgeWxhYj0iU3RhbmRhcmRpemVkIHJlc2lkdWFscyIpDQphYmxpbmUoaD0wLCBjb2w9MiwgbHdkPTIpDQpgYGANCg0KDQo=