library(ISLR2)
## Warning: package 'ISLR2' was built under R version 4.3.3
library(boot)
attach(Wage)
library(gam)
## Warning: package 'gam' was built under R version 4.3.3
## Loading required package: splines
## Loading required package: foreach
## Warning: package 'foreach' was built under R version 4.3.3
## Loaded gam 1.22-5
library(MASS)
## 
## Attaching package: 'MASS'
## The following object is masked from 'package:ISLR2':
## 
##     Boston

Lab 8

  1. In this exercise, you will further analyze the Wage data set considered throughout this chapter. (a) Perform polynomial regression to predict wage using age. Use cross-validation to select the optimal degree d for the polynomial. What degree was chosen, and how does this compare to the results of hypothesis testing using ANOVA? Make a plot of the resulting polynomial ft to the data.

```{{r}} set.seed(15) cv.error <- rep(0,5)

for (i in 1:5){ glm.fit <- glm(wage ~ poly(age,i),data=Wage) cv.error[i]<- cv.glm(Wage,glm.fit,K=10)$delta[1] } cv.error

plot(cv.error, type=“b”, xlab=“Degree”, ylab=“Test MSE”) points(which.min(cv.error), cv.error[4], col=“red”, pch=20, cex=2)


The optimal degree chosen through cross-validation is 4.


```r
fit1 <- lm(wage ~ age, data = Wage)
fit2 <- lm(wage ~ poly(age, 2), data = Wage)
fit3 <- lm(wage ~ poly(age, 3), data = Wage)
fit4 <- lm(wage ~ poly(age, 4), data = Wage)
fit5 <- lm(wage ~ poly(age, 5), data = Wage)
anova(fit1, fit2, fit3, fit4, fit5)
## Analysis of Variance Table
## 
## Model 1: wage ~ age
## Model 2: wage ~ poly(age, 2)
## Model 3: wage ~ poly(age, 3)
## Model 4: wage ~ poly(age, 4)
## Model 5: wage ~ poly(age, 5)
##   Res.Df     RSS Df Sum of Sq        F    Pr(>F)    
## 1   2998 5022216                                    
## 2   2997 4793430  1    228786 143.5931 < 2.2e-16 ***
## 3   2996 4777674  1     15756   9.8888  0.001679 ** 
## 4   2995 4771604  1      6070   3.8098  0.051046 .  
## 5   2994 4770322  1      1283   0.8050  0.369682    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

The p-values indicate that either cubic or quadratic fit provide a reasonable fit for the data.

age_lim <- range(age)
age_grid <- seq(from=age_lim[1], to=age_lim[2])
preds <- predict(fit4, newdata=list(age=age_grid),se=TRUE)
se_bands <- cbind(preds$fit+2*preds$se.fit,preds$fit-2*preds$se.fit)

plot(age, wage, xlim=age_lim, cex=.5, col="darkgrey")
lines(age_grid, preds$fit, lwd=2, col="blue")
matlines(age_grid, se_bands, lwd=1, col="blue", lty=3)

  1. Fit a step function to predict wage using age, and perform cross-validation to choose the optimal number of cuts. Make a plot of the fit obtained.
set.seed(2)
cv.errors <- rep(NA, 10)

for(i in 2:10){
  Wage$age.cut <- cut(Wage$age,i)
  glm.fit <- glm(wage ~ age.cut, data=Wage)
  cv.errors[i] <- cv.glm(Wage, glm.fit, K=10)$delta[1]
}
cv.errors
##  [1]       NA 1734.448 1681.623 1635.869 1632.864 1621.643 1613.148 1601.850
##  [9] 1610.852 1603.607
plot(2:10, cv.errors[-1], type="b", xlab="Number of cuts", ylab="Test MSE")
points(which.min(cv.errors), cv.errors[which.min(cv.errors)], col="red", pch=20, cex=2)

The optimal number of cuts is 8.

fit_step <- glm(wage ~ cut(age, 8), data=Wage)
preds <- predict(fit_step, data.frame(age = age_grid))
plot(age, wage, col="darkgray")
lines(age_grid, preds, col="darkgreen", lwd=2)

  1. The Wage data set contains a number of other features not explored in this chapter, such as marital status (maritl), job class (jobclass), and others. Explore the relationships between some of these other predictors and wage, and use non-linear fitting techniques in order to fit fexible models to the data. Create plots of the results obtained, and write a summary of your fndings.

```{{r}} summary(maritl)

plot(maritl, wage) title(“Marital Status vs Wage”)


The plot shows that married men have the highest average wage, but also the highest variability in wage.


```r
summary(jobclass)
##  1. Industrial 2. Information 
##           1544           1456
plot(jobclass, wage)
title("Jobclass vs Wage")

Jobs in the information sector pay more on average than those in the industrial sector.

fit0 <- gam(wage ~ lo(year, span = 0.7) + s(age, 5) + education, data = Wage)
fit1 <- gam(wage ~ lo(year, span = 0.7) + s(age, 5) + education + jobclass, data = Wage)
fit2 <- gam(wage ~ lo(year, span = 0.7) + s(age, 5) + education + maritl, data = Wage)
fit3 <- gam(wage ~ lo(year, span = 0.7) + s(age, 5) + education + jobclass + maritl, data = Wage)
anova(fit0, fit1, fit2, fit3)
## Analysis of Deviance Table
## 
## Model 1: wage ~ lo(year, span = 0.7) + s(age, 5) + education
## Model 2: wage ~ lo(year, span = 0.7) + s(age, 5) + education + jobclass
## Model 3: wage ~ lo(year, span = 0.7) + s(age, 5) + education + maritl
## Model 4: wage ~ lo(year, span = 0.7) + s(age, 5) + education + jobclass + 
##     maritl
##   Resid. Df Resid. Dev Df Deviance  Pr(>Chi)    
## 1    2987.1    3691855                          
## 2    2986.1    3679689  1    12166 0.0014637 ** 
## 3    2983.1    3597526  3    82163  9.53e-15 ***
## 4    2982.1    3583675  1    13852 0.0006862 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Fit 3 is significantly better than the other models.

par(mfrow = c(3, 3))
plot(fit3, se = T, col = "blue")

  1. This question uses the variables dis (the weighted mean of distances to five Boston employment centers) and nox (nitrogen oxides concentration in parts per 10 million) from the Boston data. We will treat dis as the predictor and nox as the response.

    1. Use the poly() function to ft a cubic polynomial regression to predict nox using dis. Report the regression output, and plot the resulting data and polynomial fits.
    set.seed(1)
    fit <- lm(nox ~ poly(dis, 3), data = Boston)
    summary(fit)
    ## 
    ## Call:
    ## lm(formula = nox ~ poly(dis, 3), data = Boston)
    ## 
    ## Residuals:
    ##       Min        1Q    Median        3Q       Max 
    ## -0.121130 -0.040619 -0.009738  0.023385  0.194904 
    ## 
    ## Coefficients:
    ##                Estimate Std. Error t value Pr(>|t|)    
    ## (Intercept)    0.554695   0.002759 201.021  < 2e-16 ***
    ## poly(dis, 3)1 -2.003096   0.062071 -32.271  < 2e-16 ***
    ## poly(dis, 3)2  0.856330   0.062071  13.796  < 2e-16 ***
    ## poly(dis, 3)3 -0.318049   0.062071  -5.124 4.27e-07 ***
    ## ---
    ## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
    ## 
    ## Residual standard error: 0.06207 on 502 degrees of freedom
    ## Multiple R-squared:  0.7148, Adjusted R-squared:  0.7131 
    ## F-statistic: 419.3 on 3 and 502 DF,  p-value: < 2.2e-16
    dislims <- range(Boston$dis)
    dis.grid <- seq(from = dislims[1], to = dislims[2], by = 0.1)
    preds <- predict(fit, list(dis = dis.grid))
    plot(nox ~ dis, data = Boston, col = "darkgrey")
    lines(dis.grid, preds, col = "red", lwd = 2)

    All polynomial terms are significant.

    1. Plot the polynomial fts for a range of diferent polynomial degrees (say, from 1 to 10), and report the associated residual sum of squares.
    rss <- rep(NA, 10)
    for (i in 1:10) {
        fit <- lm(nox ~ poly(dis, i), data = Boston)
        rss[i] <- sum(fit$residuals^2)
    }
    plot(1:10, rss, xlab = "Degree", ylab = "RSS", type = "l")

    The RSS decreases as we increase the degree of polynomial. The minimum RSS is at polynomial 10.

    1. Perform cross-validation or another approach to select the optimal degree for the polynomial, and explain your results.
    deltas <- rep(NA, 10)
    for (i in 1:10) {
        fit <- glm(nox ~ poly(dis, i), data = Boston)
        deltas[i] <- cv.glm(Boston, fit, K = 10)$delta[1]
    }
    plot(1:10, deltas, xlab = "Degree", ylab = "Test MSE", type = "l")

    A degree 4 polynomial minimizes test MSE.

    1. Use the bs() function to ft a regression spline to predict nox using dis. Report the output for the ft using four degrees of freedom. How did you choose the knots? Plot the resulting fit.
    fit <- lm(nox ~ bs(dis, knots = c(4, 7, 11)), data = Boston)
    summary(fit)
    ## 
    ## Call:
    ## lm(formula = nox ~ bs(dis, knots = c(4, 7, 11)), data = Boston)
    ## 
    ## Residuals:
    ##       Min        1Q    Median        3Q       Max 
    ## -0.124567 -0.040355 -0.008702  0.024740  0.192920 
    ## 
    ## Coefficients:
    ##                               Estimate Std. Error t value Pr(>|t|)    
    ## (Intercept)                    0.73926    0.01331  55.537  < 2e-16 ***
    ## bs(dis, knots = c(4, 7, 11))1 -0.08861    0.02504  -3.539  0.00044 ***
    ## bs(dis, knots = c(4, 7, 11))2 -0.31341    0.01680 -18.658  < 2e-16 ***
    ## bs(dis, knots = c(4, 7, 11))3 -0.26618    0.03147  -8.459 3.00e-16 ***
    ## bs(dis, knots = c(4, 7, 11))4 -0.39802    0.04647  -8.565  < 2e-16 ***
    ## bs(dis, knots = c(4, 7, 11))5 -0.25681    0.09001  -2.853  0.00451 ** 
    ## bs(dis, knots = c(4, 7, 11))6 -0.32926    0.06327  -5.204 2.85e-07 ***
    ## ---
    ## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
    ## 
    ## Residual standard error: 0.06185 on 499 degrees of freedom
    ## Multiple R-squared:  0.7185, Adjusted R-squared:  0.7151 
    ## F-statistic: 212.3 on 6 and 499 DF,  p-value: < 2.2e-16
    pred <- predict(fit, list(dis = dis.grid))
    plot(nox ~ dis, data = Boston, col = "darkgrey")
    lines(dis.grid, preds, col = "red", lwd = 2)

    All terms in spline fit are significant.

    1. Now ft a regression spline for a range of degrees of freedom, and plot the resulting fts and report the resulting RSS. Describe the results obtained.
    rss <- rep(NA, 16)
    for (i in 3:16) {
        fit <- lm(nox ~ bs(dis, df = i), data = Boston)
        rss[i] <- sum(fit$residuals^2)
    }
    plot(3:16, rss[-c(1, 2)], xlab = "Degrees of freedom", ylab = "RSS", type = "l")

    RSS decreases until 14 and then slightly begins to increase again.

    1. Perform cross-validation or another approach in order to select the best degrees of freedom for a regression spline on this data. Describe your results.
    cv <- rep(NA, 16)
    for (i in 3:16) {
        fit <- glm(nox ~ bs(dis, df = i), data = Boston)
        cv[i] <- cv.glm(Boston, fit, K = 10)$delta[1]
    }
    ## Warning in bs(dis, degree = 3L, knots = numeric(0), Boundary.knots = c(1.1296,
    ## : some 'x' values beyond boundary knots may cause ill-conditioned bases
    
    ## Warning in bs(dis, degree = 3L, knots = numeric(0), Boundary.knots = c(1.1296,
    ## : some 'x' values beyond boundary knots may cause ill-conditioned bases
    ## Warning in bs(dis, degree = 3L, knots = numeric(0), Boundary.knots = c(1.137, :
    ## some 'x' values beyond boundary knots may cause ill-conditioned bases
    
    ## Warning in bs(dis, degree = 3L, knots = numeric(0), Boundary.knots = c(1.137, :
    ## some 'x' values beyond boundary knots may cause ill-conditioned bases
    ## Warning in bs(dis, degree = 3L, knots = 3.0992, Boundary.knots = c(1.137, :
    ## some 'x' values beyond boundary knots may cause ill-conditioned bases
    
    ## Warning in bs(dis, degree = 3L, knots = 3.0992, Boundary.knots = c(1.137, :
    ## some 'x' values beyond boundary knots may cause ill-conditioned bases
    ## Warning in bs(dis, degree = 3L, knots = 3.2157, Boundary.knots = c(1.1296, :
    ## some 'x' values beyond boundary knots may cause ill-conditioned bases
    
    ## Warning in bs(dis, degree = 3L, knots = 3.2157, Boundary.knots = c(1.1296, :
    ## some 'x' values beyond boundary knots may cause ill-conditioned bases
    ## Warning in bs(dis, degree = 3L, knots = c(2.354, 4.2474), Boundary.knots =
    ## c(1.137, : some 'x' values beyond boundary knots may cause ill-conditioned
    ## bases
    
    ## Warning in bs(dis, degree = 3L, knots = c(2.354, 4.2474), Boundary.knots =
    ## c(1.137, : some 'x' values beyond boundary knots may cause ill-conditioned
    ## bases
    ## Warning in bs(dis, degree = 3L, knots = c(2.4212, 4.38856666666667),
    ## Boundary.knots = c(1.1296, : some 'x' values beyond boundary knots may cause
    ## ill-conditioned bases
    
    ## Warning in bs(dis, degree = 3L, knots = c(2.4212, 4.38856666666667),
    ## Boundary.knots = c(1.1296, : some 'x' values beyond boundary knots may cause
    ## ill-conditioned bases
    ## Warning in bs(dis, degree = 3L, knots = c(2.1105, 3.2721, 5.2146),
    ## Boundary.knots = c(1.1296, : some 'x' values beyond boundary knots may cause
    ## ill-conditioned bases
    
    ## Warning in bs(dis, degree = 3L, knots = c(2.1105, 3.2721, 5.2146),
    ## Boundary.knots = c(1.1296, : some 'x' values beyond boundary knots may cause
    ## ill-conditioned bases
    ## Warning in bs(dis, degree = 3L, knots = c(2.1, 3.1323, 5.118), Boundary.knots =
    ## c(1.137, : some 'x' values beyond boundary knots may cause ill-conditioned
    ## bases
    
    ## Warning in bs(dis, degree = 3L, knots = c(2.1, 3.1323, 5.118), Boundary.knots =
    ## c(1.137, : some 'x' values beyond boundary knots may cause ill-conditioned
    ## bases
    ## Warning in bs(dis, degree = 3L, knots = c(1.92938, 2.55946, 3.78776, 5.51426:
    ## some 'x' values beyond boundary knots may cause ill-conditioned bases
    
    ## Warning in bs(dis, degree = 3L, knots = c(1.92938, 2.55946, 3.78776, 5.51426:
    ## some 'x' values beyond boundary knots may cause ill-conditioned bases
    ## Warning in bs(dis, degree = 3L, knots = c(1.93736, 2.59666, 3.79734, 5.5005:
    ## some 'x' values beyond boundary knots may cause ill-conditioned bases
    
    ## Warning in bs(dis, degree = 3L, knots = c(1.93736, 2.59666, 3.79734, 5.5005:
    ## some 'x' values beyond boundary knots may cause ill-conditioned bases
    ## Warning in bs(dis, degree = 3L, knots = c(1.86156666666667, 2.34846666666667, :
    ## some 'x' values beyond boundary knots may cause ill-conditioned bases
    
    ## Warning in bs(dis, degree = 3L, knots = c(1.86156666666667, 2.34846666666667, :
    ## some 'x' values beyond boundary knots may cause ill-conditioned bases
    ## Warning in bs(dis, degree = 3L, knots = c(1.79777142857143, 2.19782857142857, :
    ## some 'x' values beyond boundary knots may cause ill-conditioned bases
    
    ## Warning in bs(dis, degree = 3L, knots = c(1.79777142857143, 2.19782857142857, :
    ## some 'x' values beyond boundary knots may cause ill-conditioned bases
    ## Warning in bs(dis, degree = 3L, knots = c(1.7936, 2.16771428571429,
    ## 2.74465714285714, : some 'x' values beyond boundary knots may cause
    ## ill-conditioned bases
    
    ## Warning in bs(dis, degree = 3L, knots = c(1.7936, 2.16771428571429,
    ## 2.74465714285714, : some 'x' values beyond boundary knots may cause
    ## ill-conditioned bases
    ## Warning in bs(dis, degree = 3L, knots = c(1.734325, 2.0941, 2.522775, 3.1827, :
    ## some 'x' values beyond boundary knots may cause ill-conditioned bases
    
    ## Warning in bs(dis, degree = 3L, knots = c(1.734325, 2.0941, 2.522775, 3.1827, :
    ## some 'x' values beyond boundary knots may cause ill-conditioned bases
    ## Warning in bs(dis, degree = 3L, knots = c(1.751575, 2.1084, 2.53475, 3.2797, :
    ## some 'x' values beyond boundary knots may cause ill-conditioned bases
    
    ## Warning in bs(dis, degree = 3L, knots = c(1.751575, 2.1084, 2.53475, 3.2797, :
    ## some 'x' values beyond boundary knots may cause ill-conditioned bases
    ## Warning in bs(dis, degree = 3L, knots = c(1.71552222222222, 2.00284444444444, :
    ## some 'x' values beyond boundary knots may cause ill-conditioned bases
    
    ## Warning in bs(dis, degree = 3L, knots = c(1.71552222222222, 2.00284444444444, :
    ## some 'x' values beyond boundary knots may cause ill-conditioned bases
    ## Warning in bs(dis, degree = 3L, knots = c(1.66286666666667, 2.00613333333333, :
    ## some 'x' values beyond boundary knots may cause ill-conditioned bases
    
    ## Warning in bs(dis, degree = 3L, knots = c(1.66286666666667, 2.00613333333333, :
    ## some 'x' values beyond boundary knots may cause ill-conditioned bases
    ## Warning in bs(dis, degree = 3L, knots = c(1.62008, 1.92938, 2.22906, 2.5899, :
    ## some 'x' values beyond boundary knots may cause ill-conditioned bases
    
    ## Warning in bs(dis, degree = 3L, knots = c(1.62008, 1.92938, 2.22906, 2.5899, :
    ## some 'x' values beyond boundary knots may cause ill-conditioned bases
    ## Warning in bs(dis, degree = 3L, knots = c(1.6283, 1.9512, 2.25965, 2.6775, :
    ## some 'x' values beyond boundary knots may cause ill-conditioned bases
    
    ## Warning in bs(dis, degree = 3L, knots = c(1.6283, 1.9512, 2.25965, 2.6775, :
    ## some 'x' values beyond boundary knots may cause ill-conditioned bases
    ## Warning in bs(dis, degree = 3L, knots = c(1.61225454545455, 1.89514545454545, :
    ## some 'x' values beyond boundary knots may cause ill-conditioned bases
    
    ## Warning in bs(dis, degree = 3L, knots = c(1.61225454545455, 1.89514545454545, :
    ## some 'x' values beyond boundary knots may cause ill-conditioned bases
    ## Warning in bs(dis, degree = 3L, knots = c(1.61066363636364, 1.88728181818182, :
    ## some 'x' values beyond boundary knots may cause ill-conditioned bases
    
    ## Warning in bs(dis, degree = 3L, knots = c(1.61066363636364, 1.88728181818182, :
    ## some 'x' values beyond boundary knots may cause ill-conditioned bases
    ## Warning in bs(dis, degree = 3L, knots = c(1.60476666666667, 1.8651, 2.1103, :
    ## some 'x' values beyond boundary knots may cause ill-conditioned bases
    
    ## Warning in bs(dis, degree = 3L, knots = c(1.60476666666667, 1.8651, 2.1103, :
    ## some 'x' values beyond boundary knots may cause ill-conditioned bases
    ## Warning in bs(dis, degree = 3L, knots = c(1.5881, 1.82231666666667, 2.097575, :
    ## some 'x' values beyond boundary knots may cause ill-conditioned bases
    
    ## Warning in bs(dis, degree = 3L, knots = c(1.5881, 1.82231666666667, 2.097575, :
    ## some 'x' values beyond boundary knots may cause ill-conditioned bases
    ## Warning in bs(dis, degree = 3L, knots = c(1.58949230769231, 1.84561538461538, :
    ## some 'x' values beyond boundary knots may cause ill-conditioned bases
    
    ## Warning in bs(dis, degree = 3L, knots = c(1.58949230769231, 1.84561538461538, :
    ## some 'x' values beyond boundary knots may cause ill-conditioned bases
    ## Warning in bs(dis, degree = 3L, knots = c(1.5741, 1.8209, 2.0527, 2.271, : some
    ## 'x' values beyond boundary knots may cause ill-conditioned bases
    
    ## Warning in bs(dis, degree = 3L, knots = c(1.5741, 1.8209, 2.0527, 2.271, : some
    ## 'x' values beyond boundary knots may cause ill-conditioned bases
    ## Warning in bs(dis, degree = 3L, knots = c(1.54201428571429, 1.7936,
    ## 1.97882857142857, : some 'x' values beyond boundary knots may cause
    ## ill-conditioned bases
    
    ## Warning in bs(dis, degree = 3L, knots = c(1.54201428571429, 1.7936,
    ## 1.97882857142857, : some 'x' values beyond boundary knots may cause
    ## ill-conditioned bases
    ## Warning in bs(dis, degree = 3L, knots = c(1.5768, 1.81652857142857,
    ## 1.99424285714286, : some 'x' values beyond boundary knots may cause
    ## ill-conditioned bases
    
    ## Warning in bs(dis, degree = 3L, knots = c(1.5768, 1.81652857142857,
    ## 1.99424285714286, : some 'x' values beyond boundary knots may cause
    ## ill-conditioned bases
    plot(3:16, cv[-c(1, 2)], xlab = "Degrees of freedom", ylab = "Test MSE", type = "l")

    Test MSE is lowest at 13 degrees of freedom.

  2. In Section 7.7, it was mentioned that GAMs are generally ft using a backftting approach. The idea behind backftting is actually quite simple. We will now explore backfitting in the context of multiple linear regression. Suppose that we would like to perform multiple linear regression, but we do not have software to do so. Instead, we only have software to perform simple linear regression. Therefore, we take the following iterative approach: we repeatedly hold all but one coeffcient estimate fixed at its current value, and update only that coefficient estimate using a simple linear regression. The process is continued until convergence—that is, until the coeffcient estimates stop changing. We now try this out on a toy example. (a) Generate a response Y and two predictors X1 and X2, with n = 100.

    set.seed(123)
    n <- 100
    X1 <- rnorm(n)
    X2 <- rnorm(n)
    Y <- X1 + 2*X2 + rnorm(n)
  1. Initialize βˆ1 to take on a value of your choice. It does not matter what value you choose.
beta1 <- 1
  1. Keeping βˆ1 fxed, fit the model Y − βˆ1X1 = β0 + β2X2 + ϵ.
beta2 <- lm(Y - beta1*X1 ~ X2)$coef[2]
  1. Keeping βˆ2 fxed, ft the model Y − βˆ2X2 = β0 + β1X1 + ϵ.
beta1 <- lm(Y - beta2*X2 ~ X1)$coef[2]
  1. Write a for loop to repeat (c) and (d) 1,000 times. Report the estimates of βˆ0, βˆ1, and βˆ2 at each iteration of the for loop. Create a plot in which each of these values is displayed, with βˆ0, βˆ1, and βˆ2 each shown in a different color.
niter <- 1000
beta0_list <- numeric(niter)
beta1_list <- numeric(niter)
beta2_list <- numeric(niter)

for (i in 1:niter) {
  beta2 <- lm(Y - beta1*X1 ~ X2)$coef[2]
  beta1 <- lm(Y - beta2*X2 ~ X1)$coef[2]
  beta0 <- mean(Y - beta1*X1 - beta2*X2)
  
  beta0_list[i] <- beta0
  beta1_list[i] <- beta1
  beta2_list[i] <- beta2
}
plot(1:niter, beta0_list, col = "red", type = "l", ylim = range(c(beta0_list, beta1_list, beta2_list)), ylab = "Coefficient estimates")
lines(1:niter, beta1_list, col = "blue", type = "l")
lines(1:niter, beta2_list, col = "green", type = "l")
abline(a = mean(Y - beta2_list[niter]*X2 - beta1_list[niter]*X1), b = beta1_list[niter], col = "purple")
abline(a = mean(Y - beta2_list[niter]*X2 - beta1_list[niter]*X1), b = beta2_list[niter], col = "orange")
legend("topright", legend = c("beta0", "beta1", "beta2", "MLR beta1", "MLR beta2"), col = c("red", "blue", "green", "purple", "orange"), lty = c(1,1,1,2,2))

  1. Compare your answer in (e) to the results of simply performing multiple linear regression to predict Y using X1 and X2. Use the abline() function to overlay those multiple linear regression coefficient estimates on the plot obtained in (e).
mlr <- lm(Y ~ X1 + X2)
summary(mlr)
## 
## Call:
## lm(formula = Y ~ X1 + X2)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -1.8730 -0.6607 -0.1245  0.6214  2.0798 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  0.13507    0.09614   1.405    0.163    
## X1           0.86683    0.10487   8.266 7.29e-13 ***
## X2           2.02381    0.09899  20.444  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.9513 on 97 degrees of freedom
## Multiple R-squared:  0.8291, Adjusted R-squared:  0.8256 
## F-statistic: 235.3 on 2 and 97 DF,  p-value: < 2.2e-16
  1. On this data set, how many backftting iterations were required in order to obtain a “good” approximation to the multiple regression coeffcient estimates?

Two iterations provides a reasonably good estimate of the coefficients.

11.undefined