# Clear variables
rm(list=ls())
dataHanford <- read.table("hanford.txt", header = FALSE)
## Look at dataHanforda
exposure <- dataHanford[ , 2]
cancerMortality <- dataHanford[ , 3]
hanford <- data.frame(exposure, cancerMortality)
plot(hanford$exposure, hanford$cancerMortality, main = 'Plutonium Radiation in Hanford', xlab = 'Index of Exposure', ylab = 'Cancer Mortality')

y_i = beta_0 + beta_1 * x_i

## design matrix, and obs
y <- hanford$cancerMortality
x <- hanford$exposure
X <- cbind(1, x)  # m = 2
n <- length(y)  # n = 9
## correlation between estimates
cov2cor(t(X) %*% X)
                    x
  1.0000000 0.8143074
x 0.8143074 1.0000000
## remove the correlation ???
X.removeCorrelation <- X
X.removeCorrelation[ ,2] <- X[ ,2] - mean(X[ ,2])
cov2cor(t(X.removeCorrelation) %*% X.removeCorrelation)
                          x
  1.000000e+00 1.199279e-16
x 1.199279e-16 1.000000e+00
## Parameter estimaes
beta <- solve(t(X) %*% X) %*% t(X) %*% y
## 
regressionGLM <- glm(hanford$cancerMortality ~ hanford$exposure)
regressionLM <- lm(hanford$cancerMortality ~ hanford$exposure)

y_i = 157.333333 + 9.232264 * x_i

plot(hanford$exposure, hanford$cancerMortality, main = 'Linear Regression Model of Plutonium Radiation in Hanford', xlab = 'Index of Exposure', ylab = 'Cancer Mortality')
seq_x <- seq(1, 12)
seq_y <- beta[1] + beta[2] * seq_x
seq_y2 <- regressionGLM$coefficients[1] + regressionGLM$coefficients[2] * seq_x
lines(seq_x, seq_y, col = 4, lwd = 2)

## variance parameter
y.hat <- X %*% beta
sigmaSquare.hat <- sum((y - y.hat)^2) / n
## Unbiased Estimate, this is what we use
sigmaSquareUnbiased.hat <- sum((y - y.hat)^2) / (n - 2)
## Standard Error for beta_1 and beta_2
seBeta <- sqrt(diag(sigmaSquareUnbiased.hat * solve(t(X) %*% X)))
# Standard Error line and Confidence Interval Line
lineSE <- function(beta, seBeta, x, y){
    beta_2.se_1 <- beta[2] + seBeta[2]  # Standard error of beta_2
    beta_2.se_2 <- beta[2] - seBeta[2]
    lineSE_1 <- mean(y) - beta_2.se_1 * mean(x)
    lineSE_2 <- mean(y) - beta_2.se_2 * mean(x)
    abline(lineSE_1, beta_2.se_1, lty = 2, col = 'blue')
    abline(lineSE_2, beta_2.se_2, lty = 2, col = 'blue')
}
lineConf <- function(beta, sigmaSquareUnbiased.hat, x, y){
    n <- length(x)
    ssx <- sum(x^2) - sum(x^2) / n
    s.t <- qt(0.975, n-2)
    x.v <- seq(min(x), max(x), length = 100)
    y.v <- beta[1] + beta[2] * x.v
    se <- sqrt(sigmaSquareUnbiased.hat * (1 / n + (x.v - mean(x))^2 / ssx))  # Not standard error ?
    ci <- s.t * se
    y.v.upper <- y.v + ci
    y.v.lower <- y.v - ci
    lines(x.v, y.v.upper, lty = 3, col = 'blue')
    lines(x.v, y.v.lower, lty = 3, col = 'blue')
}
plot(hanford$exposure, hanford$cancerMortality, main = 'Linear Regression Model of Plutonium Radiation in Hanford', xlab = 'Index of Exposure', ylab = 'Cancer Mortality')
seq_x <- seq(1, 12)
seq_y <- beta[1] + beta[2] * seq_x
lines(seq_x, seq_y, col = 4, lwd = 2)
lineSE(beta, seBeta, x, y)
lineConf(beta, sigmaSquareUnbiased.hat, x, y)

## Remember to convert the name of the data, because the name in model must consist with those in the new data.
y <- hanford$cancerMortality
x <- hanford$exposure
regressionLM_2 <- lm(y ~ x)
pred.frame <- data.frame(x = seq(1, 30))
valuePred <- predict(regressionLM_2, pred.frame, se.fit = TRUE)
intervalPred <- predict(regressionLM_2, newdata = pred.frame, interval = "pred")
intervalConf <- predict(regressionLM_2, newdata = pred.frame, interval = "conf")
plot(pred.frame$x, valuePred$fit, pch = 8, ylim = c(min(y, valuePred$fit), max(y, valuePred$fit)), xlim = c(min(x, pred.frame$x), max(x, pred.frame$x)))
matlines(pred.frame$x, intervalPred, lty = c(1, 2, 2), col = c("black", "blue", "blue"), lwd = 2)
points(hanford$exposure, hanford$cancerMortality)
lines(pred.frame$x, intervalConf[, 2], lty = 3, col = "red", lwd = 2)
lines(pred.frame$x, intervalConf[, 3], lty = 3, col = "red", lwd = 2)

## Directly in R
summary(lm(y ~ X[ ,2]))

Call:
lm(formula = y ~ X[, 2])

Residuals:
    Min      1Q  Median      3Q     Max 
-16.283 -12.741   4.020   9.411  18.602 

Coefficients:
            Estimate Std. Error t value Pr(>|t|)    
(Intercept)  114.701      8.039  14.269 1.97e-06 ***
X[, 2]         9.232      1.418   6.513  0.00033 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 14 on 7 degrees of freedom
Multiple R-squared:  0.8584,    Adjusted R-squared:  0.8381 
F-statistic: 42.42 on 1 and 7 DF,  p-value: 0.0003301
cbind(beta, seBeta)
               seBeta
  114.700789 8.038525
x   9.232264 1.417528
sqrt(sigmaSquareUnbiased.hat)
[1] 13.9975
## qqplot of residuals
library(car)
qqPlot(y - y.hat)
[1] 1 3

## Note the effect of the small dataHanfordaset...
LS0tCnRpdGxlOiAiRXhhbXBsZSA2LjIsIFBsdXRvbml1bSBSYWRpYXRpb24gaW4gSGFuZm9yZCIKb3V0cHV0OiBodG1sX25vdGVib29rCmF1dGhvcjogRWR3YXJkIEouIFh1CkRhdGU6IERlYyAxNHRoLCAyMDE4Ci0tLQoKYGBge3J9CiMgQ2xlYXIgdmFyaWFibGVzCnJtKGxpc3Q9bHMoKSkKYGBgCgpgYGB7ciwgZmlnLndpZHRoID0gMTB9CmRhdGFIYW5mb3JkIDwtIHJlYWQudGFibGUoImhhbmZvcmQudHh0IiwgaGVhZGVyID0gRkFMU0UpCiMjIExvb2sgYXQgZGF0YUhhbmZvcmRhCmV4cG9zdXJlIDwtIGRhdGFIYW5mb3JkWyAsIDJdCmNhbmNlck1vcnRhbGl0eSA8LSBkYXRhSGFuZm9yZFsgLCAzXQpoYW5mb3JkIDwtIGRhdGEuZnJhbWUoZXhwb3N1cmUsIGNhbmNlck1vcnRhbGl0eSkKcGxvdChoYW5mb3JkJGV4cG9zdXJlLCBoYW5mb3JkJGNhbmNlck1vcnRhbGl0eSwgbWFpbiA9ICdQbHV0b25pdW0gUmFkaWF0aW9uIGluIEhhbmZvcmQnLCB4bGFiID0gJ0luZGV4IG9mIEV4cG9zdXJlJywgeWxhYiA9ICdDYW5jZXIgTW9ydGFsaXR5JykKYGBgCgp5X2kgPSBiZXRhXzAgKyBiZXRhXzEgKiB4X2kKCmBgYHtyfQojIyBkZXNpZ24gbWF0cml4LCBhbmQgb2JzCnkgPC0gaGFuZm9yZCRjYW5jZXJNb3J0YWxpdHkKeCA8LSBoYW5mb3JkJGV4cG9zdXJlClggPC0gY2JpbmQoMSwgeCkgICMgbSA9IDIKbiA8LSBsZW5ndGgoeSkgICMgbiA9IDkKCiMjIGNvcnJlbGF0aW9uIGJldHdlZW4gZXN0aW1hdGVzCmNvdjJjb3IodChYKSAlKiUgWCkKCiMjIHJlbW92ZSB0aGUgY29ycmVsYXRpb24gPz8/ClgucmVtb3ZlQ29ycmVsYXRpb24gPC0gWApYLnJlbW92ZUNvcnJlbGF0aW9uWyAsMl0gPC0gWFsgLDJdIC0gbWVhbihYWyAsMl0pCmNvdjJjb3IodChYLnJlbW92ZUNvcnJlbGF0aW9uKSAlKiUgWC5yZW1vdmVDb3JyZWxhdGlvbikKCiMjIFBhcmFtZXRlciBlc3RpbWFlcwpiZXRhIDwtIHNvbHZlKHQoWCkgJSolIFgpICUqJSB0KFgpICUqJSB5CgojIyAKcmVncmVzc2lvbkdMTSA8LSBnbG0oaGFuZm9yZCRjYW5jZXJNb3J0YWxpdHkgfiBoYW5mb3JkJGV4cG9zdXJlKQpyZWdyZXNzaW9uTE0gPC0gbG0oaGFuZm9yZCRjYW5jZXJNb3J0YWxpdHkgfiBoYW5mb3JkJGV4cG9zdXJlKQpgYGAKCnlfaSA9IDE1Ny4zMzMzMzMgKyA5LjIzMjI2NCAqIHhfaQoKYGBge3IsIGZpZy53aWR0aCA9IDEwfQpwbG90KGhhbmZvcmQkZXhwb3N1cmUsIGhhbmZvcmQkY2FuY2VyTW9ydGFsaXR5LCBtYWluID0gJ0xpbmVhciBSZWdyZXNzaW9uIE1vZGVsIG9mIFBsdXRvbml1bSBSYWRpYXRpb24gaW4gSGFuZm9yZCcsIHhsYWIgPSAnSW5kZXggb2YgRXhwb3N1cmUnLCB5bGFiID0gJ0NhbmNlciBNb3J0YWxpdHknKQpzZXFfeCA8LSBzZXEoMSwgMTIpCnNlcV95IDwtIGJldGFbMV0gKyBiZXRhWzJdICogc2VxX3gKc2VxX3kyIDwtIHJlZ3Jlc3Npb25HTE0kY29lZmZpY2llbnRzWzFdICsgcmVncmVzc2lvbkdMTSRjb2VmZmljaWVudHNbMl0gKiBzZXFfeApsaW5lcyhzZXFfeCwgc2VxX3ksIGNvbCA9IDQsIGx3ZCA9IDIpCmBgYAoKYGBge3IsIGZpZy53aWR0aCA9IDEwfQojIyB2YXJpYW5jZSBwYXJhbWV0ZXIKeS5oYXQgPC0gWCAlKiUgYmV0YQpzaWdtYVNxdWFyZS5oYXQgPC0gc3VtKCh5IC0geS5oYXQpXjIpIC8gbgojIyBVbmJpYXNlZCBFc3RpbWF0ZSwgdGhpcyBpcyB3aGF0IHdlIHVzZQpzaWdtYVNxdWFyZVVuYmlhc2VkLmhhdCA8LSBzdW0oKHkgLSB5LmhhdCleMikgLyAobiAtIDIpCiMjIFN0YW5kYXJkIEVycm9yIGZvciBiZXRhXzEgYW5kIGJldGFfMgpzZUJldGEgPC0gc3FydChkaWFnKHNpZ21hU3F1YXJlVW5iaWFzZWQuaGF0ICogc29sdmUodChYKSAlKiUgWCkpKQoKIyBTdGFuZGFyZCBFcnJvciBsaW5lIGFuZCBDb25maWRlbmNlIEludGVydmFsIExpbmUKbGluZVNFIDwtIGZ1bmN0aW9uKGJldGEsIHNlQmV0YSwgeCwgeSl7CiAgICBiZXRhXzIuc2VfMSA8LSBiZXRhWzJdICsgc2VCZXRhWzJdICAjIFN0YW5kYXJkIGVycm9yIG9mIGJldGFfMgogICAgYmV0YV8yLnNlXzIgPC0gYmV0YVsyXSAtIHNlQmV0YVsyXQogICAgbGluZVNFXzEgPC0gbWVhbih5KSAtIGJldGFfMi5zZV8xICogbWVhbih4KQogICAgbGluZVNFXzIgPC0gbWVhbih5KSAtIGJldGFfMi5zZV8yICogbWVhbih4KQogICAgYWJsaW5lKGxpbmVTRV8xLCBiZXRhXzIuc2VfMSwgbHR5ID0gMiwgY29sID0gJ2JsdWUnKQogICAgYWJsaW5lKGxpbmVTRV8yLCBiZXRhXzIuc2VfMiwgbHR5ID0gMiwgY29sID0gJ2JsdWUnKQp9CmxpbmVDb25mIDwtIGZ1bmN0aW9uKGJldGEsIHNpZ21hU3F1YXJlVW5iaWFzZWQuaGF0LCB4LCB5KXsKICAgIG4gPC0gbGVuZ3RoKHgpCiAgICBzc3ggPC0gc3VtKHheMikgLSBzdW0oeF4yKSAvIG4KICAgIHMudCA8LSBxdCgwLjk3NSwgbi0yKQogICAgeC52IDwtIHNlcShtaW4oeCksIG1heCh4KSwgbGVuZ3RoID0gMTAwKQogICAgeS52IDwtIGJldGFbMV0gKyBiZXRhWzJdICogeC52CiAgICBzZSA8LSBzcXJ0KHNpZ21hU3F1YXJlVW5iaWFzZWQuaGF0ICogKDEgLyBuICsgKHgudiAtIG1lYW4oeCkpXjIgLyBzc3gpKSAgIyBOb3Qgc3RhbmRhcmQgZXJyb3IgPwogICAgY2kgPC0gcy50ICogc2UKICAgIHkudi51cHBlciA8LSB5LnYgKyBjaQogICAgeS52Lmxvd2VyIDwtIHkudiAtIGNpCiAgICBsaW5lcyh4LnYsIHkudi51cHBlciwgbHR5ID0gMywgY29sID0gJ2JsdWUnKQogICAgbGluZXMoeC52LCB5LnYubG93ZXIsIGx0eSA9IDMsIGNvbCA9ICdibHVlJykKfQpwbG90KGhhbmZvcmQkZXhwb3N1cmUsIGhhbmZvcmQkY2FuY2VyTW9ydGFsaXR5LCBtYWluID0gJ0xpbmVhciBSZWdyZXNzaW9uIE1vZGVsIG9mIFBsdXRvbml1bSBSYWRpYXRpb24gaW4gSGFuZm9yZCcsIHhsYWIgPSAnSW5kZXggb2YgRXhwb3N1cmUnLCB5bGFiID0gJ0NhbmNlciBNb3J0YWxpdHknKQpzZXFfeCA8LSBzZXEoMSwgMTIpCnNlcV95IDwtIGJldGFbMV0gKyBiZXRhWzJdICogc2VxX3gKbGluZXMoc2VxX3gsIHNlcV95LCBjb2wgPSA0LCBsd2QgPSAyKQpsaW5lU0UoYmV0YSwgc2VCZXRhLCB4LCB5KQpsaW5lQ29uZihiZXRhLCBzaWdtYVNxdWFyZVVuYmlhc2VkLmhhdCwgeCwgeSkKYGBgCgpgYGB7ciwgZmlnLndpZHRoID0gMTB9CiMjIFJlbWVtYmVyIHRvIGNvbnZlcnQgdGhlIG5hbWUgb2YgdGhlIGRhdGEsIGJlY2F1c2UgdGhlIG5hbWUgaW4gbW9kZWwgbXVzdCBjb25zaXN0IHdpdGggdGhvc2UgaW4gdGhlIG5ldyBkYXRhLgp5IDwtIGhhbmZvcmQkY2FuY2VyTW9ydGFsaXR5CnggPC0gaGFuZm9yZCRleHBvc3VyZQpyZWdyZXNzaW9uTE1fMiA8LSBsbSh5IH4geCkKcHJlZC5mcmFtZSA8LSBkYXRhLmZyYW1lKHggPSBzZXEoMSwgMzApKQp2YWx1ZVByZWQgPC0gcHJlZGljdChyZWdyZXNzaW9uTE1fMiwgcHJlZC5mcmFtZSwgc2UuZml0ID0gVFJVRSkKaW50ZXJ2YWxQcmVkIDwtIHByZWRpY3QocmVncmVzc2lvbkxNXzIsIG5ld2RhdGEgPSBwcmVkLmZyYW1lLCBpbnRlcnZhbCA9ICJwcmVkIikKaW50ZXJ2YWxDb25mIDwtIHByZWRpY3QocmVncmVzc2lvbkxNXzIsIG5ld2RhdGEgPSBwcmVkLmZyYW1lLCBpbnRlcnZhbCA9ICJjb25mIikKcGxvdChwcmVkLmZyYW1lJHgsIHZhbHVlUHJlZCRmaXQsIHBjaCA9IDgsIHlsaW0gPSBjKG1pbih5LCB2YWx1ZVByZWQkZml0KSwgbWF4KHksIHZhbHVlUHJlZCRmaXQpKSwgeGxpbSA9IGMobWluKHgsIHByZWQuZnJhbWUkeCksIG1heCh4LCBwcmVkLmZyYW1lJHgpKSkKbWF0bGluZXMocHJlZC5mcmFtZSR4LCBpbnRlcnZhbFByZWQsIGx0eSA9IGMoMSwgMiwgMiksIGNvbCA9IGMoImJsYWNrIiwgImJsdWUiLCAiYmx1ZSIpLCBsd2QgPSAyKQpwb2ludHMoaGFuZm9yZCRleHBvc3VyZSwgaGFuZm9yZCRjYW5jZXJNb3J0YWxpdHkpCmxpbmVzKHByZWQuZnJhbWUkeCwgaW50ZXJ2YWxDb25mWywgMl0sIGx0eSA9IDMsIGNvbCA9ICJyZWQiLCBsd2QgPSAyKQpsaW5lcyhwcmVkLmZyYW1lJHgsIGludGVydmFsQ29uZlssIDNdLCBsdHkgPSAzLCBjb2wgPSAicmVkIiwgbHdkID0gMikKYGBgCgoKYGBge3IsIGZpZy53aWR0aCA9IDEwfQojIyBEaXJlY3RseSBpbiBSCnN1bW1hcnkobG0oeSB+IFhbICwyXSkpCmNiaW5kKGJldGEsIHNlQmV0YSkKc3FydChzaWdtYVNxdWFyZVVuYmlhc2VkLmhhdCkKCiMjIHFxcGxvdCBvZiByZXNpZHVhbHMKbGlicmFyeShjYXIpCnFxUGxvdCh5IC0geS5oYXQpCiMjIE5vdGUgdGhlIGVmZmVjdCBvZiB0aGUgc21hbGwgZGF0YUhhbmZvcmRhc2V0Li4uCmBgYAoK