Question 6

# Load the libraries
library(ISLR2)
## Warning: package 'ISLR2' was built under R version 4.4.3
library(boot)
## Warning: package 'boot' was built under R version 4.4.3
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 4.4.3

(a).

# View the data
data(Wage)

# Set seed for reproducibility
set.seed(1)

# Cross-validation to choose optimal degree
cv.error <- rep(0, 10)
for (d in 1:10) {
  glm.fit <- glm(wage ~ poly(age, d), data = Wage)
  cv.error[d] <- cv.glm(Wage, glm.fit, K = 10)$delta[1]
}

# Plot cross-validation errors
plot(1:10, cv.error, type = "b", xlab = "Degree", ylab = "CV Error", main = "CV Error vs Polynomial Degree")

# Optimal degree
optimal.degree <- which.min(cv.error)
cat("Optimal degree selected by CV:", optimal.degree, "\n")
## Optimal degree selected by CV: 9
# Fit the polynomial model with optimal degree
fit.poly <- glm(wage ~ poly(age, optimal.degree), data = Wage)

# ANOVA to compare degrees
fit.1 <- lm(wage ~ poly(age, 1), data = Wage)
fit.2 <- lm(wage ~ poly(age, 2), data = Wage)
fit.3 <- lm(wage ~ poly(age, 3), data = Wage)
fit.4 <- lm(wage ~ poly(age, 4), data = Wage)
fit.5 <- lm(wage ~ poly(age, 5), data = Wage)

# ANOVA table comparing nested models
anova(fit.1, fit.2, fit.3, fit.4, fit.5, test = "F")
## Analysis of Variance Table
## 
## Model 1: wage ~ poly(age, 1)
## Model 2: wage ~ poly(age, 2)
## Model 3: wage ~ poly(age, 3)
## Model 4: wage ~ poly(age, 4)
## Model 5: wage ~ poly(age, 5)
##   Res.Df     RSS Df Sum of Sq        F    Pr(>F)    
## 1   2998 5022216                                    
## 2   2997 4793430  1    228786 143.5931 < 2.2e-16 ***
## 3   2996 4777674  1     15756   9.8888  0.001679 ** 
## 4   2995 4771604  1      6070   3.8098  0.051046 .  
## 5   2994 4770322  1      1283   0.8050  0.369682    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# Plot polynomial fit
age.lims <- range(Wage$age)
age.grid <- seq(from = age.lims[1], to = age.lims[2])
preds <- predict(fit.poly, newdata = list(age = age.grid), se = TRUE)

plot(Wage$age, Wage$wage, col = "gray", main = "Polynomial Fit (Degree = Optimal)")
lines(age.grid, preds$fit, col = "blue", lwd = 2)
lines(age.grid, preds$fit + 2 * preds$se.fit, col = "blue", lty = "dashed")
lines(age.grid, preds$fit - 2 * preds$se.fit, col = "blue", lty = "dashed")

Cross-validation favors better predictive accuracy, whereas ANOVA emphasizes statistical confidence in model complexity. Degree 9 may slightly overfit but performs best in terms of CV error.

(b)

# Create bins and perform cross-validation for step functions
cv.errors.step <- rep(NA, 10)

for (i in 2:10) {
  Wage$cut.age <- cut(Wage$age, i)
  fit.step <- glm(wage ~ cut.age, data = Wage)
  cv.errors.step[i] <- cv.glm(Wage, fit.step, K = 10)$delta[1]
}

# Plot CV errors for step function
plot(2:10, cv.errors.step[2:10], type = "b", xlab = "Number of Cuts", ylab = "CV Error", main = "CV Error vs Number of Cuts")

# Optimal cut
best.cut <- which.min(cv.errors.step)
cat("Optimal number of cuts:", best.cut, "\n")
## Optimal number of cuts: 8
# Fit the final step function
Wage$cut.age <- cut(Wage$age, best.cut)
fit.final.step <- lm(wage ~ cut.age, data = Wage)

# Plot step function
plot(Wage$age, Wage$wage, col = "gray", main = "Step Function Fit")
lines(sort(Wage$age), fitted(fit.final.step)[order(Wage$age)], col = "red", lwd = 2)

Question - 7

# View structure of the Wage dataset
str(Wage)
## 'data.frame':    3000 obs. of  12 variables:
##  $ year      : int  2006 2004 2003 2003 2005 2008 2009 2008 2006 2004 ...
##  $ age       : int  18 24 45 43 50 54 44 30 41 52 ...
##  $ maritl    : Factor w/ 5 levels "1. Never Married",..: 1 1 2 2 4 2 2 1 1 2 ...
##  $ race      : Factor w/ 4 levels "1. White","2. Black",..: 1 1 1 3 1 1 4 3 2 1 ...
##  $ education : Factor w/ 5 levels "1. < HS Grad",..: 1 4 3 4 2 4 3 3 3 2 ...
##  $ region    : Factor w/ 9 levels "1. New England",..: 2 2 2 2 2 2 2 2 2 2 ...
##  $ jobclass  : Factor w/ 2 levels "1. Industrial",..: 1 2 1 2 2 2 1 2 2 2 ...
##  $ health    : Factor w/ 2 levels "1. <=Good","2. >=Very Good": 1 2 1 2 1 2 2 1 2 2 ...
##  $ health_ins: Factor w/ 2 levels "1. Yes","2. No": 2 2 1 1 1 1 1 1 1 1 ...
##  $ logwage   : num  4.32 4.26 4.88 5.04 4.32 ...
##  $ wage      : num  75 70.5 131 154.7 75 ...
##  $ cut.age   : Factor w/ 8 levels "(17.9,25.8]",..: 1 1 4 4 5 5 4 2 3 5 ...
# Plot wage vs marital status
ggplot(Wage, aes(x = maritl, y = wage)) +
  geom_boxplot(fill = "skyblue") +
  labs(title = "Wage by Marital Status", x = "Marital Status", y = "Wage")

# Plot wage vs jobclass
ggplot(Wage, aes(x = jobclass, y = wage)) +
  geom_boxplot(fill = "orange") +
  labs(title = "Wage by Job Class", x = "Job Class", y = "Wage")

# Plot wage vs education
ggplot(Wage, aes(x = education, y = wage)) +
  geom_boxplot(fill = "lightgreen") +
  labs(title = "Wage by Education Level", x = "Education Level", y = "Wage")

# Fit model with non-linear functions for categorical variables
fit <- lm(wage ~ maritl + jobclass + education, data = Wage)

# Summary of the model
summary(fit)
## 
## Call:
## lm(formula = wage ~ maritl + jobclass + education, data = Wage)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -109.75  -19.81   -2.74   14.85  219.37 
## 
## Coefficients:
##                             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                   66.632      2.502  26.635  < 2e-16 ***
## maritl2. Married              22.332      1.594  14.006  < 2e-16 ***
## maritl3. Widowed               7.309      8.214   0.890 0.373597    
## maritl4. Divorced              9.723      2.833   3.432 0.000607 ***
## maritl5. Separated            15.783      4.972   3.174 0.001518 ** 
## jobclass2. Information         5.199      1.355   3.836 0.000127 ***
## education2. HS Grad           11.268      2.440   4.618 4.04e-06 ***
## education3. Some College      23.128      2.579   8.966  < 2e-16 ***
## education4. College Grad      37.956      2.584  14.689  < 2e-16 ***
## education5. Advanced Degree   61.881      2.840  21.793  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 35.27 on 2990 degrees of freedom
## Multiple R-squared:  0.2876, Adjusted R-squared:  0.2855 
## F-statistic: 134.1 on 9 and 2990 DF,  p-value: < 2.2e-16
-    People in *Information* sector jobs tend to earn more than those in *Industrial* jobs.

-    This aligns with the economic structure where tech and white-collar jobs often pay better.
-    Wage tends to increase with higher education levels.

-    Those with advanced degrees have visibly higher wages than those without high school diplomas.
-    The linear model with categorical variables treats each category as a separate level (factor), allowing flexible mean estimates.

-    No transformation is needed; R automatically chooses a reference level and estimates effects relative to it.

Question 9

(a)

# Load required libraries
library(MASS)
## Warning: package 'MASS' was built under R version 4.4.3
## 
## Attaching package: 'MASS'
## The following object is masked from 'package:ISLR2':
## 
##     Boston
# Load the Boston data
data("Boston")

# Fit a cubic polynomial regression to predict nox using dis
fit_cubic <- lm(nox ~ poly(dis, 3), data = Boston)

# Summarize the fit
summary(fit_cubic)
## 
## Call:
## lm(formula = nox ~ poly(dis, 3), data = Boston)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.121130 -0.040619 -0.009738  0.023385  0.194904 
## 
## Coefficients:
##                Estimate Std. Error t value Pr(>|t|)    
## (Intercept)    0.554695   0.002759 201.021  < 2e-16 ***
## poly(dis, 3)1 -2.003096   0.062071 -32.271  < 2e-16 ***
## poly(dis, 3)2  0.856330   0.062071  13.796  < 2e-16 ***
## poly(dis, 3)3 -0.318049   0.062071  -5.124 4.27e-07 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.06207 on 502 degrees of freedom
## Multiple R-squared:  0.7148, Adjusted R-squared:  0.7131 
## F-statistic: 419.3 on 3 and 502 DF,  p-value: < 2.2e-16
# Plot the resulting data and polynomial fits
ggplot(Boston, aes(x = dis, y = nox)) +
  geom_point(color = "gray") +
  geom_smooth(method = "lm", formula = y ~ poly(x, 3), color = "blue") +
  labs(title = "Cubic Polynomial Fit: nox vs dis", x = "Distance to Employment Centers", y = "Nitrogen Oxides Concentration (nox)")

(b)

# Fit polynomial regressions for degrees 1 to 10
rss_values <- rep(0, 10)
for (degree in 1:10) {
  fit <- lm(nox ~ poly(dis, degree), data = Boston)
  rss_values[degree] <- sum(resid(fit)^2)
}

# Plot the polynomial fits and report RSS
plot(1:10, rss_values, type = "b", xlab = "Degree", ylab = "Residual Sum of Squares (RSS)",
     main = "RSS vs Polynomial Degree for Polynomial Fits")

(c)

# Load required libraries
library(splines)

# Remove rows with missing values in 'nox' and 'dis'
Boston_clean <- na.omit(Boston[, c("nox", "dis")])

# Initialize cv_errors vector to store cross-validation errors
cv_errors <- rep(NA, 10)

# Perform k-fold cross-validation for polynomial degrees 1 to 10
k <- 10  # Number of folds
folds <- sample(1:k, nrow(Boston_clean), replace = TRUE)

for (degree in 1:10) {
  cv_error <- 0
  
  # Perform cross-validation for each fold
  for (i in 1:k) {
    # Split data into training and validation sets
    train_data <- Boston_clean[folds != i, ]
    val_data <- Boston_clean[folds == i, ]
    
    # Fit the polynomial model to the training data
    fit <- lm(nox ~ poly(dis, degree), data = train_data)
    
    # Predict on validation data
    pred <- predict(fit, newdata = val_data)
    
    # Calculate the residual sum of squares (RSS) for this fold
    rss <- sum((val_data$nox - pred)^2)
    
    # Average the errors across all folds
    cv_error <- cv_error + rss
  }
  
  # Compute the average CV error for the degree
  cv_errors[degree] <- cv_error / k
  
  # Debugging: Print each cv error to check the results
  cat("Degree:", degree, "CV Error:", cv_errors[degree], "\n")
}
## Degree: 1 CV Error: 0.281412 
## Degree: 2 CV Error: 0.2078067 
## Degree: 3 CV Error: 0.1963942 
## Degree: 4 CV Error: 0.1978465 
## Degree: 5 CV Error: 0.2119629 
## Degree: 6 CV Error: 0.2562822 
## Degree: 7 CV Error: 0.5853273 
## Degree: 8 CV Error: 0.5162523 
## Degree: 9 CV Error: 1.92793 
## Degree: 10 CV Error: 1.173036
# Remove NA values from the cv_errors vector
cv_errors <- na.omit(cv_errors)

# Ensure that valid degrees are selected
valid_degrees <- 1:length(cv_errors)

# Plot CV errors to visualize
plot(valid_degrees, cv_errors, type = "b", xlab = "Degree", ylab = "Cross-Validation Error",
     main = "CV Error vs Polynomial Degree")

# Report the optimal polynomial degree based on cross-validation
optimal_degree <- valid_degrees[which.min(cv_errors)]
cat("Optimal polynomial degree chosen by cross-validation:", optimal_degree, "\n")
## Optimal polynomial degree chosen by cross-validation: 3
  • The best-fitting model has a polynomial degree of 3, which minimizes the cross-validation error.
  • Higher polynomial degrees lead to overfitting, as seen from the increasing errors for degrees 7 to 10.

(d)

# Fit a regression spline using bs() function with 4 degrees of freedom
fit_spline <- lm(nox ~ bs(dis, df = 4), data = Boston)

# Summarize the spline fit
summary(fit_spline)
## 
## Call:
## lm(formula = nox ~ bs(dis, df = 4), data = Boston)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.124622 -0.039259 -0.008514  0.020850  0.193891 
## 
## Coefficients:
##                  Estimate Std. Error t value Pr(>|t|)    
## (Intercept)       0.73447    0.01460  50.306  < 2e-16 ***
## bs(dis, df = 4)1 -0.05810    0.02186  -2.658  0.00812 ** 
## bs(dis, df = 4)2 -0.46356    0.02366 -19.596  < 2e-16 ***
## bs(dis, df = 4)3 -0.19979    0.04311  -4.634 4.58e-06 ***
## bs(dis, df = 4)4 -0.38881    0.04551  -8.544  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.06195 on 501 degrees of freedom
## Multiple R-squared:  0.7164, Adjusted R-squared:  0.7142 
## F-statistic: 316.5 on 4 and 501 DF,  p-value: < 2.2e-16
# Plot the regression spline fit
ggplot(Boston, aes(x = dis, y = nox)) +
  geom_point(color = "gray") +
  geom_smooth(method = "lm", formula = y ~ bs(x, df = 4), color = "red") +
  labs(title = "Regression Spline Fit: nox vs dis", x = "Distance to Employment Centers", y = "Nitrogen Oxides Concentration (nox)")

  • bs(dis, df = 4): We fit a regression spline with 4 degrees of freedom. This will allow the model to flexibly capture the relationship between dis and nox.
  • Plot: The regression spline fit is visualized using geom_smooth(), which is similar to polynomial fitting but more flexible.

(e)

# Fit regression splines for a range of degrees of freedom (3 to 12)
rss_spline <- rep(NA, 10)
for (df in 3:12) {
  fit_spline <- lm(nox ~ bs(dis, df = df), data = Boston)
  rss_spline[df - 2] <- sum(resid(fit_spline)^2)
}

# Plot the resulting fits and report the RSS
plot(3:12, rss_spline, type = "b", xlab = "Degrees of Freedom", ylab = "Residual Sum of Squares (RSS)",
     main = "RSS vs Degrees of Freedom for Regression Spline Fits")

  • rss_spline[df - 2]: We compute the residual sum of squares (RSS) for regression splines with different degrees of freedom ranging from 3 to 12.
  • The plot helps to see how the model fit improves with increasing degrees of freedom.

(f)

# Fit a simple linear model to check if basic regression works
fit_linear <- lm(nox ~ dis, data = Boston)

# Summarize the linear model
summary(fit_linear)
## 
## Call:
## lm(formula = nox ~ dis, data = Boston)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.12239 -0.05212 -0.01257  0.04391  0.23041 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  0.715343   0.006796  105.26   <2e-16 ***
## dis         -0.042331   0.001566  -27.03   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.07412 on 504 degrees of freedom
## Multiple R-squared:  0.5917, Adjusted R-squared:  0.5909 
## F-statistic: 730.4 on 1 and 504 DF,  p-value: < 2.2e-16
# Plot the fitted linear model
plot(Boston$dis, Boston$nox, main = "Linear Fit: nox vs dis", xlab = "Distance to Employment Centers", ylab = "Nitrogen Oxides Concentration (nox)")
abline(fit_linear, col = "red")

Question 11

set.seed(123)  # For reproducibility

(a)

n <- 100  # Sample size
X1 <- rnorm(n, mean = 0, sd = sqrt(2))  # X1 with mean 0 and variance 2
X2 <- rnorm(n, mean = 0, sd = sqrt(2))  # X2 with mean 0 and variance 2
beta0 <- 1.5
beta1 <- 3
beta2 <- -4.5
epsilon <- rnorm(n, mean = 0, sd = 1)  # Error term with variance 1
Y <- beta0 + beta1 * X1 + beta2 * X2 + epsilon  # Response variable Y
Y
##   [1]   5.84191325   0.20105468   9.41783176   4.55408339   7.69024494
##   [6]   8.58671229   7.66200047   6.15289995   2.65658437  -6.29327034
##  [11]  10.47407012  -0.59881304  14.72894765   1.80711735  -5.15622857
##  [16]   8.84039905   2.49851830  -3.48928787   8.64681331   4.72691593
##  [21]  -4.35306414   7.22288810   1.37876838   0.74492550 -13.24974027
##  [26]  -1.44729996   2.85184197   0.93734906   3.67716256   6.25769550
##  [31]  -3.92843695  -2.71555607   5.24983067   7.17583053  17.47806994
##  [36]  -4.09514891  12.96260468  -3.05268386 -11.62324602   8.29314157
##  [41]  -6.70214223   1.78429758   7.63250279  19.20408313  16.63801093
##  [46]   2.01608407   8.99228510  -6.21756723  -9.22066915   9.82236678
##  [51]  -2.81400277  -4.07714484  -1.13992545  13.81425382   2.90083534
##  [56]   9.62969976  -7.57284562   6.98124593  -4.30559140   3.26707846
##  [61]  -4.10985815   5.55587321   8.15305410 -22.14710253   1.89871935
##  [66]   2.43743525  -0.78266252   3.04711154   1.73477971   7.93890405
##  [71]   1.63241736  -7.74996818   6.66809929 -16.44954330   4.14850372
##  [76]  12.37945114   0.22612925  -5.58040285  -0.08064612   3.85242795
##  [81]   6.62394635  -4.16775896   2.53861068   9.47629667   2.18637322
##  [86]   4.29649001  -0.68897618   4.44790004  -4.90062989   9.71951827
##  [91]   5.51874216   6.94712076   3.05619020   3.95660965  17.61713205
##  [96] -13.69017303   8.82403225  14.61445570   4.41042773   5.93952856
X1
##   [1] -0.79263226 -0.32552013  2.20434644  0.09971392  0.18284047  2.42546816
##   [7]  0.65183395 -1.78906676 -0.97135662 -0.63026120  1.73111308  0.50885359
##  [13]  0.56677642  0.15652900 -0.78607807  2.52707679  0.70406690 -2.78121665
##  [19]  0.99186703 -0.66862802 -1.51013077 -0.30826308 -1.45098941 -1.03080786
##  [25] -0.88393901 -2.38534456  1.18480980  0.21690234 -1.60956869  1.77316207
##  [31]  0.60311149 -0.41729409  1.26589885  1.24186829  1.16189111  0.97388439
##  [37]  0.78335786 -0.08755638 -0.43269655 -0.53806725 -0.98246403 -0.29403943
##  [43] -1.78954068  3.06736694  1.70831624 -1.58831539 -0.56976520 -0.65995033
##  [49]  1.10303725 -0.11790166  0.35824648 -0.04037121 -0.06062798  1.93549591
##  [55] -0.31928839  2.14461330 -2.19026722  0.82676869  0.17515635  0.30538750
##  [61]  0.53689131 -0.71039264 -0.47122640 -1.44048312 -1.51574169  0.42925432
##  [67]  0.63386435  0.07495930  1.30428316  2.89925757 -0.69442293 -3.26565794
##  [73]  1.42232906 -1.00296134 -0.97299112  1.45037694 -0.40272985 -1.72635554
##  [79]  0.25640184 -0.19642205  0.00815179  0.54486877 -0.52419244  0.91128605
##  [85] -0.31181509  0.46921055  1.55116461  0.61543957 -0.46093687  1.62465931
##  [91]  1.40502663  0.77555042  0.33761766 -0.88799329  1.92425315 -0.84889525
##  [97]  3.09335598  2.16743873 -0.33333064 -1.45157836
X2
##   [1] -1.00466660  0.36328843 -0.34887500 -0.49149946 -1.34579188 -0.06367882
##   [7] -1.11002255 -2.35882611 -0.53772150  1.29965747 -0.81366348  0.85979139
##  [13] -2.28803167 -0.07857649  0.73455271  0.42589517  0.14944871 -0.90609513
##  [19] -1.20166341 -1.44833683  0.16637741 -1.33993145 -0.69375299 -0.36216905
##  [25]  2.60761465 -0.92199639  0.33288688  0.11025329 -1.36027070 -0.10084486
##  [31]  2.04290342  0.63852316  0.05831216 -0.59750075 -2.90373007  1.59995243
##  [37] -2.06565700  1.04644381  2.69988016 -2.04197329  0.99247292 -0.37080325
##  [43] -2.22334759 -2.14206354 -2.26491418 -0.75081520 -2.06723457  0.97286123
##  [49]  2.97000255 -1.82013595  1.11403096  1.08758997  0.46980539 -1.42605988
##  [55] -0.16893150 -0.39653889  0.79618743 -0.52670794  1.38164901 -0.52973733
##  [61]  1.48875883 -1.48376035 -1.78212864  4.58352263 -0.58952565  0.42175750
##  [67]  0.90024547 -0.68416912  0.73095331  0.52179464 -0.30459403  0.09233829
##  [73] -0.04817837  3.01008554 -1.04840756 -1.54997279  0.05344087  0.43908609
##  [79]  0.61733742 -0.64822647 -1.50377024  1.78641361 -0.49448032 -1.22402003
##  [85] -0.33414977 -0.27884882  1.56966433  0.11983663  1.06639309 -0.70610554
##  [91]  0.30327147 -0.45917522  0.13376131 -1.26623500 -1.85375331  2.82448626
##  [97]  0.84953057 -1.76956493 -0.86431913 -1.67652201

(b)

beta1_init <- 0

(C)

a <- Y - beta1_init * X1
beta2_est <- lm(a ~ X2)$coef[2]

(d)

a <- Y - beta2_est * X2
beta1_est <- lm(a ~ X1)$coef[2]

(e)

beta1_vals <- c(beta1_init)  # Store beta1 values
beta2_vals <- c(beta2_est)   # Store beta2 values
convergence <- FALSE
iterations <- 0

# Perform backfitting loop
while (!convergence && iterations < 1000) {
  iterations <- iterations + 1
  # Update beta2 by keeping beta1 fixed
  a <- Y - beta1_vals[iterations] * X1
  beta2_new <- lm(a ~ X2)$coef[2]
  
  # Update beta1 by keeping beta2 fixed
  a <- Y - beta2_new * X2
  beta1_new <- lm(a ~ X1)$coef[2]
  
  # Store new values
  beta1_vals <- c(beta1_vals, beta1_new)
  beta2_vals <- c(beta2_vals, beta2_new)
  
  # Check for convergence (small change in coefficients)
  if (abs(beta1_new - beta1_vals[iterations]) < 0.0001 && abs(beta2_new - beta2_vals[iterations]) < 0.0001) {
    convergence <- TRUE
  }
}

# Plot the results (Beta estimates over iterations)
plot(1:iterations, beta1_vals[2:(iterations + 1)], type = "b", col = "blue", xlab = "Iterations", ylab = "Beta1", main = "Backfitting: Beta1 Convergence")
lines(1:iterations, beta2_vals[2:(iterations + 1)], type = "b", col = "red")
legend("topright", legend = c("Beta1", "Beta2"), col = c("blue", "red"), lty = 1)

(f)

# Plot the data first
plot(Boston$dis, Boston$nox, main = "Linear Fit: nox vs dis", xlab = "Distance to Employment Centers", ylab = "Nitrogen Oxides Concentration (nox)", col = "black", pch = 16)

# Now perform multiple linear regression
lm_fit <- lm(Y ~ X1 + X2)

# Overlay the multiple linear regression results on the plot
abline(h = lm_fit$coef[1] + lm_fit$coef[2] * 0, col = "blue", lty = 2)  # Intercept line
abline(h = lm_fit$coef[1] + lm_fit$coef[2] * 12, col = "red", lty = 2)  # Slope line

(g)

# Perform multiple linear regression
lm_fit <- lm(Y ~ X1 + X2)
beta0_true <- lm_fit$coef[1]
beta1_true <- lm_fit$coef[2]
beta2_true <- lm_fit$coef[3]

# Initialize backfitting
beta1_vals <- c(beta1_init)  # Store beta1 values
beta2_vals <- c(beta2_est)   # Store beta2 values
convergence <- FALSE
iterations <- 0
convergence_threshold <- 0.01  # Define good approximation as less than 0.01 difference

# Perform backfitting loop
while (!convergence && iterations < 1000) {
  iterations <- iterations + 1
  # Update beta2 by keeping beta1 fixed
  a <- Y - beta1_vals[iterations] * X1
  beta2_new <- lm(a ~ X2)$coef[2]
  
  # Update beta1 by keeping beta2 fixed
  a <- Y - beta2_new * X2
  beta1_new <- lm(a ~ X1)$coef[2]
  
  # Store new values
  beta1_vals <- c(beta1_vals, beta1_new)
  beta2_vals <- c(beta2_vals, beta2_new)
  
  # Check for convergence
  if (abs(beta1_new - beta1_true) < convergence_threshold && abs(beta2_new - beta2_true) < convergence_threshold) {
    convergence <- TRUE
  }
}

# Report the number of iterations required for good approximation
cat("Number of backfitting iterations for good approximation:", iterations, "\n")
## Number of backfitting iterations for good approximation: 2
  • Backfitting was able to reach a good approximation in very few iterations (2 iterations).
  • This suggests that the coefficients β1_1β1​ and β2_2β2​ were not too far from their true values initially, which helped the backfitting method converge quickly.