Assignment 4 Leaps vs Lars

Write a program where

input a matrix or data frame, choosing a y variable and a set of x variables, specifying a training data set and a testing data set using random selection (of units so rows in the (X|y) matrix)

data("Auto")

#removing unwanted columns(last 2 rows)
auto_data <- Auto[, !names(Auto) %in% c("name", "origin")]
#using model.matrix
preparing_data <- function(df, target = "mpg") {
  X <- df[, setdiff(names(df), target)]
  y <- df[[target]]
  #making sure factors are numeric
  X[] <- lapply(X, function(col) if (is.factor(col)) as.numeric(col) else col)
  #creating 2nd order (with interactions) features
  X_poly <- model.matrix(~ .^2, data = X)[, -1]  #removing intercept column
  list(X = X_poly, y = y)
}

setting our seed for repeatability and setting up our x’s and y’s as well as splitting our train and test data set 80/20 (100)

preped_data <- preparing_data(auto_data, target = "mpg")
X <- preped_data$X
y <- preped_data$y

#splitting into training and testing sets (80/20 split)
set.seed(42)
train_index <- createDataPartition(y, p = 0.8, list = FALSE)

X_train <- X[train_index, ]
y_train <- y[train_index]

X_test  <- X[-train_index, ]
y_test  <- y[-train_index]

Choosing our top 5 overall models for MPG

best_subset_selection <- function(X, y, num_models = 5) {
  
  train_df <- as.data.frame(cbind(y, X))
  form <- as.formula("y ~ .")
  
  regfit <- regsubsets(form, data = train_df, nvmax = ncol(X), method = "exhaustive")
  reg_summary <- summary(regfit)
  
  cp_vals <- reg_summary$cp
  valid_idx <- which(!is.na(cp_vals))
  best_indices <- valid_idx[order(cp_vals[valid_idx])][1:num_models]
  
  best_formulas <- list()
  for(i in best_indices) {
    included <- reg_summary$which[i, ]
    
    predictors <- names(included)[included][-1] #excludes intercept
    if(length(predictors) == 0) next #skipping nulls
    best_formulas[[paste0("Model_", i)]] <- as.formula(paste("y ~", paste(predictors, collapse = " + ")))
  }
  list(best_formulas = best_formulas,
       cp_vals = cp_vals[best_indices])
}

best_lars_model <- function(X_train, y_train) {
  # Fit the LARS model with type "lasso"
  lars_model <- lars(X_train, y_train, type = "lasso")
  
  # Use cross-validation to select the best step
  cv_model <- cv.lars(X_train, y_train, type = "lasso")
  best_step <- cv_model$index[which.min(cv_model$cv)]
  
  # Identify nonzero coefficients at the best step
  nonzero_coefs <- which(lars_model$beta[best_step, ] != 0)
  cat("Non-zero coefficients in LARS model at best step (step", best_step, "):", 
      paste(nonzero_coefs, collapse = ", "), "\n")
  
  list(lars_model = lars_model,
       best_step = best_step,
       nonzero_coefs = nonzero_coefs)
}

bs_results <- best_subset_selection(X_train, y_train, num_models = 5)
best_formulas1 <- bs_results$best_formulas
cat("Top candidate models by Cp:\n")

## Top candidate models by Cp:

print(best_formulas1)

## $Model_9
## y ~ displacement + horsepower + weight + year + `cylinders:acceleration` + 
##     `displacement:weight` + `displacement:acceleration` + `displacement:year` + 
##     `horsepower:year`
## <environment: 0x00000151ceb111e8>
## 
## $Model_10
## y ~ displacement + horsepower + weight + acceleration + `cylinders:displacement` + 
##     `cylinders:horsepower` + `displacement:weight` + `displacement:year` + 
##     `horsepower:year` + `acceleration:year`
## <environment: 0x00000151ceb111e8>
## 
## $Model_11
## y ~ displacement + horsepower + weight + acceleration + year + 
##     `cylinders:acceleration` + `displacement:weight` + `displacement:acceleration` + 
##     `displacement:year` + `horsepower:year` + `acceleration:year`
## <environment: 0x00000151ceb111e8>
## 
## $Model_13
## y ~ cylinders + displacement + horsepower + weight + acceleration + 
##     year + `cylinders:acceleration` + `cylinders:year` + `displacement:weight` + 
##     `displacement:acceleration` + `displacement:year` + `horsepower:year` + 
##     `acceleration:year`
## <environment: 0x00000151ceb111e8>
## 
## $Model_12
## y ~ displacement + horsepower + weight + acceleration + year + 
##     `cylinders:displacement` + `cylinders:weight` + `displacement:weight` + 
##     `displacement:acceleration` + `displacement:year` + `horsepower:year` + 
##     `acceleration:year`
## <environment: 0x00000151ceb111e8>

cat("Cp values for these models:\n")

## Cp values for these models:

print(bs_results$cp_vals)

## [1] 8.708209 9.295648 9.484580 9.715710 9.934279

Creating press Calculator

calc_press <- function(model) {
  resid <- residuals(model)
  hat_vals <- hatvalues(model)
  sum((resid / (1 - hat_vals))^2)
}

Obtaining our press values

train_df <- as.data.frame(cbind(y = y_train, X_train))
press_values <- sapply(best_formulas1, function(form) {
  mod <- lm(form, data = train_df)
  calc_press(mod)
})
cat("PRESS values for top models:\n")

## PRESS values for top models:

print(press_values)

##  Model_9 Model_10 Model_11 Model_13 Model_12 
## 2385.696 2393.448 2395.702 2392.663 2403.257

Top model chosen with min press value

best_model_name <- names(which.min(press_values))
best_formula1 <- best_formulas1[[best_model_name]]
cat("Selected model by min PRESS:\n")

## Selected model by min PRESS:

print(best_formula1)

## y ~ displacement + horsepower + weight + year + `cylinders:acceleration` + 
##     `displacement:weight` + `displacement:acceleration` + `displacement:year` + 
##     `horsepower:year`
## <environment: 0x00000151ceb111e8>

final_model <- lm(best_formula1, data = train_df)
cat("Final model summary:\n")

## Final model summary:

print(summary(final_model))

## 
## Call:
## lm(formula = best_formula1, data = train_df)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -8.7608 -1.4214 -0.0954  1.2451 11.3314 
## 
## Coefficients:
##                               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                 -9.348e+01  1.107e+01  -8.448 1.23e-15 ***
## displacement                -3.053e-01  7.706e-02  -3.962 9.27e-05 ***
## horsepower                   1.282e+00  2.007e-01   6.385 6.38e-10 ***
## weight                      -7.204e-03  9.036e-04  -7.972 3.16e-14 ***
## year                         1.936e+00  1.407e-01  13.760  < 2e-16 ***
## `cylinders:acceleration`     3.768e-02  1.883e-02   2.001 0.046283 *  
## `displacement:weight`        1.719e-05  2.345e-06   7.333 2.04e-12 ***
## `displacement:acceleration` -2.537e-03  7.324e-04  -3.464 0.000609 ***
## `displacement:year`          3.668e-03  1.053e-03   3.483 0.000568 ***
## `horsepower:year`           -1.830e-02  2.724e-03  -6.715 9.17e-11 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2.702 on 305 degrees of freedom
## Multiple R-squared:  0.8847, Adjusted R-squared:  0.8813 
## F-statistic:   260 on 9 and 305 DF,  p-value: < 2.2e-16

Evaluating our test data by comparing it to our predicted values

test_df <- as.data.frame(cbind(y = y_test, X_test))
preds <- predict(final_model, newdata = test_df)
results_df <- data.frame(actual = y_test, predicted = preds)
cat("Comparison of actual vs. predicted on test data (1st few rows):\n")

## Comparison of actual vs. predicted on test data (1st few rows):

print(head(results_df))

##    actual predicted
## 2      15  14.10196
## 13     15  14.77870
## 16     22  17.48390
## 17     18  17.67007
## 26     10  11.54983
## 34     19  18.66122

only here to make knit possible into html and thus into a pdf, makes by origin possible

lars_results <- best_lars_model(X_train, y_train)

## Non-zero coefficients in LARS model at best step (step 0.5555556 ):

lars_model <- lars_results$lars_model
best_step <- lars_results$best_step

Generating our predictions for Leaps and Lars in order to plot

#generating predictions for Leaps model
preds_leaps <- predict(final_model, newdata = test_df)
corr_leaps <- cor(y_test, preds_leaps)

#generating predictions for LARS model 
preds_lars <- predict(lars_model, newx = X_test, s = best_step, type = "fit", mode = "fraction")$fit
corr_lars  <- cor(y_test, preds_lars)


par(mfrow = c(1, 2))

# Leaps plot
plot(y_test, preds_leaps,
     main = paste0("Leaps Model Predictions\nCorrelation: ", round(corr_leaps, 3)),
     xlab = "Actual MPG", ylab = "Predicted MPG")
abline(0, 1, col = "red")

# Lars plot
plot(y_test, preds_lars,
     main = paste0("LARS Model Predictions\nCorrelation: ", round(corr_lars, 3)),
     xlab = "Actual MPG", ylab = "Predicted MPG")
abline(0, 1, col = "red")

par(mfrow = c(1, 1))

Separating data into 3 sets by origin 1, 2, 3

auto_data1 <- Auto[Auto$origin == 1, ]
auto_data1 <- auto_data1[, !(names(auto_data1) %in% c("origin", "name"))]

auto_data2 <- Auto[Auto$origin == 2, ]
auto_data2 <- auto_data2[, !(names(auto_data2) %in% c("origin", "name"))]

auto_data3 <- Auto[Auto$origin == 3, ]
auto_data3 <- auto_data3[, !(names(auto_data3) %in% c("origin", "name"))]

setting up our x’s and y’s accordingly to origins

preped_data1 <- preparing_data(auto_data1, target = "mpg")
X1 <- preped_data1$X
y1 <- preped_data1$y

set.seed(42)
train_index1 <- createDataPartition(y1, p = 0.8, list = FALSE)

X1_train <- X1[train_index1, ]
y1_train <- y1[train_index1]

X1_test  <- X1[-train_index1, ]
y1_test  <- y1[-train_index1]

preped_data2 <- preparing_data(auto_data2, target = "mpg")
X2 <- preped_data2$X
y2 <- preped_data2$y

set.seed(42)
train_index2 <- createDataPartition(y2, p = 0.8, list = FALSE)

X2_train <- X2[train_index2, ]
y2_train <- y2[train_index2]

X2_test  <- X2[-train_index2, ]
y2_test  <- y2[-train_index2]

preped_data3 <- preparing_data(auto_data3, target = "mpg")
X3 <- preped_data3$X
y3 <- preped_data3$y

set.seed(42)
train_index3 <- createDataPartition(y2, p = 0.8, list = FALSE)

X3_train <- X3[train_index3, ]
y3_train <- y3[train_index3]

X3_test  <- X3[-train_index3, ]
y3_test  <- y3[-train_index3]

All of the above reused together in order to compare origins

datasets <- list(
  origin1 = list(X_train = X1_train, y_train = y1_train,
                 X_test = X1_test, y_test = y1_test),
  origin2 = list(X_train = X2_train, y_train = y2_train,
                 X_test = X2_test, y_test = y2_test),
  origin3 = list(X_train = X3_train, y_train = y3_train,
                 X_test = X3_test, y_test = y3_test)
)

for(origin in names(datasets)) {
  data <- datasets[[origin]]
  bs_results <- best_subset_selection(data$X_train, data$y_train, num_models = 5)
  
  cat(sprintf("Top candidate models by Cp for country %s:\n", origin))
  print(bs_results$best_formulas)
  cat("Cp values for these models:\n")
  print(bs_results$cp_vals)
  cat("\n")
  
  train_df <- data.frame(y = data$y_train, data$X_train, check.names = FALSE)
  
  press_values <- sapply(bs_results$best_formulas, function(form) {
    mod <- lm(form, data = train_df)
    calc_press(mod)
  })
  cat("PRESS values for top models:\n")
  print(press_values)
  cat("\n")
  
  best_model_name <- names(which.min(press_values))
  best_formula <- bs_results$best_formulas[[best_model_name]]
  cat("Selected model by min PRESS:\n")
  print(best_formula)
  
  # Fit the final model using the selected formula
  final_model <- lm(best_formula, data = train_df)
  cat("Final model summary:\n")
  print(summary(final_model))
  cat("\n")
  
  test_df <- data.frame(y = data$y_test, data$X_test, check.names = FALSE)
  preds <- predict(final_model, newdata = test_df)
  results_df <- data.frame(actual = data$y_test, predicted = preds)
  
  cat("Comparison of actual vs. predicted on test data (1st few rows):\n")
  print(head(results_df))
  
  preds_leaps <- predict(final_model, newdata = test_df)
  corr_leaps <- cor(data$y_test, preds_leaps)
  
  preds_lars <- predict(lars_model, newx = data$X_test, s = best_step, type = "fit", mode = "fraction")$fit
  corr_lars  <- cor(data$y_test, preds_lars)
  
  par(mfrow = c(1, 2))
  #Leaps plot
  plot(data$y_test, preds_leaps,
       main = paste0("Leaps Model Predictions for ", origin, "\nCorrelation: ", round(corr_leaps, 3)),
       xlab = "Actual MPG", ylab = "Predicted MPG",
       cex.main = 0.8, cex.lab = 0.8, cex.axis = 0.8)
  abline(0, 1, col = "red")
  #LARS plot
  plot(data$y_test, preds_lars,
       main = paste0("LARS Model Predictions for ", origin, "\nCorrelation: ", round(corr_lars, 3)),
       xlab = "Actual MPG", ylab = "Predicted MPG",
       cex.main = 0.8, cex.lab = 0.8, cex.axis = 0.8)
  abline(0, 1, col = "red")
  par(mfrow = c(1, 1))

  cat("******************************************************************************************\n")
}

## Top candidate models by Cp for country origin1:
## $Model_8
## y ~ cylinders + horsepower + acceleration + `cylinders:weight` + 
##     `cylinders:acceleration` + `horsepower:weight` + `weight:year` + 
##     `acceleration:year`
## <environment: 0x00000151d9c29858>
## 
## $Model_9
## y ~ cylinders + horsepower + acceleration + `cylinders:weight` + 
##     `cylinders:acceleration` + `cylinders:year` + `horsepower:weight` + 
##     `weight:year` + `acceleration:year`
## <environment: 0x00000151d9c29858>
## 
## $Model_10
## y ~ displacement + acceleration + `cylinders:weight` + `cylinders:acceleration` + 
##     `cylinders:year` + `displacement:year` + `horsepower:weight` + 
##     `horsepower:year` + `weight:year` + `acceleration:year`
## <environment: 0x00000151d9c29858>
## 
## $Model_11
## y ~ displacement + horsepower + weight + acceleration + `cylinders:weight` + 
##     `cylinders:acceleration` + `cylinders:year` + `displacement:horsepower` + 
##     `displacement:year` + `weight:year` + `acceleration:year`
## <environment: 0x00000151d9c29858>
## 
## $Model_7
## y ~ cylinders + acceleration + `cylinders:weight` + `cylinders:acceleration` + 
##     `horsepower:acceleration` + `weight:year` + `acceleration:year`
## <environment: 0x00000151d9c29858>
## 
## Cp values for these models:
## [1] 1.007339 2.146979 2.282387 3.156909 3.787229
## 
## PRESS values for top models:
##  Model_8  Model_9 Model_10 Model_11  Model_7 
## 884.1332 887.9229 907.1569 952.1318 896.7424 
## 
## Selected model by min PRESS:
## y ~ cylinders + horsepower + acceleration + `cylinders:weight` + 
##     `cylinders:acceleration` + `horsepower:weight` + `weight:year` + 
##     `acceleration:year`
## <environment: 0x00000151d9c29858>
## Final model summary:
## 
## Call:
## lm(formula = best_formula, data = train_df)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -5.2002 -1.2647  0.0155  1.1076 11.8067 
## 
## Coefficients:
##                            Estimate Std. Error t value Pr(>|t|)    
## (Intercept)               9.156e+01  5.762e+00  15.889  < 2e-16 ***
## cylinders                -5.751e+00  1.207e+00  -4.766 3.73e-06 ***
## horsepower               -1.398e-01  4.346e-02  -3.216  0.00153 ** 
## acceleration             -7.412e+00  4.475e-01 -16.564  < 2e-16 ***
## `cylinders:weight`        7.737e-04  2.705e-04   2.860  0.00471 ** 
## `cylinders:acceleration`  2.127e-01  4.738e-02   4.490 1.24e-05 ***
## `horsepower:weight`       2.939e-05  1.045e-05   2.813  0.00542 ** 
## `weight:year`            -1.745e-04  1.664e-05 -10.483  < 2e-16 ***
## `acceleration:year`       7.671e-02  4.408e-03  17.402  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2.056 on 189 degrees of freedom
## Multiple R-squared:  0.9008, Adjusted R-squared:  0.8966 
## F-statistic: 214.5 on 8 and 189 DF,  p-value: < 2.2e-16
## 
## 
## Comparison of actual vs. predicted on test data (1st few rows):
##    actual predicted
## 1      18  15.57781
## 2      15  14.23848
## 13     15  15.27868
## 14     14  12.51643
## 25     21  19.94057
## 26     10  12.09805

## ******************************************************************************************
## Top candidate models by Cp for country origin2:
## $Model_5
## y ~ horsepower + acceleration + year + `horsepower:year` + `weight:acceleration`
## <environment: 0x00000151d1e90400>
## 
## $Model_6
## y ~ horsepower + year + `cylinders:acceleration` + `cylinders:year` + 
##     `horsepower:year` + `weight:acceleration`
## <environment: 0x00000151d1e90400>
## 
## $Model_7
## y ~ horsepower + weight + acceleration + year + `horsepower:year` + 
##     `weight:acceleration` + `weight:year`
## <environment: 0x00000151d1e90400>
## 
## $Model_8
## y ~ cylinders + horsepower + acceleration + year + `cylinders:weight` + 
##     `horsepower:weight` + `horsepower:year` + `weight:acceleration`
## <environment: 0x00000151d1e90400>
## 
## $Model_4
## y ~ horsepower + weight + year + `horsepower:year`
## <environment: 0x00000151d1e90400>
## 
## Cp values for these models:
## [1] -1.7613949 -0.4831622  0.7532218  0.9017915  1.5105327
## 
## PRESS values for top models:
##  Model_5  Model_6  Model_7  Model_8  Model_4 
## 481.2531 478.8528 481.6577 494.7330 521.8340 
## 
## Selected model by min PRESS:
## y ~ horsepower + year + `cylinders:acceleration` + `cylinders:year` + 
##     `horsepower:year` + `weight:acceleration`
## <environment: 0x00000151d1e90400>
## Final model summary:
## 
## Call:
## lm(formula = best_formula, data = train_df)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -7.7495 -1.4228  0.2323  1.2989  8.6054 
## 
## Coefficients:
##                            Estimate Std. Error t value Pr(>|t|)    
## (Intercept)              -2.193e+02  3.699e+01  -5.929 3.01e-07 ***
## horsepower                2.111e+00  4.714e-01   4.479 4.50e-05 ***
## year                      3.521e+00  4.789e-01   7.351 1.90e-09 ***
## `cylinders:acceleration`  3.301e-01  9.244e-02   3.571 0.000809 ***
## `cylinders:year`         -7.124e-02  2.337e-02  -3.048 0.003707 ** 
## `horsepower:year`        -2.874e-02  6.280e-03  -4.576 3.25e-05 ***
## `weight:acceleration`    -3.749e-04  8.085e-05  -4.637 2.66e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2.807 on 49 degrees of freedom
## Multiple R-squared:  0.8338, Adjusted R-squared:  0.8134 
## F-statistic: 40.97 on 6 and 49 DF,  p-value: < 2.2e-16
## 
## 
## Comparison of actual vs. predicted on test data (1st few rows):
##     actual predicted
## 51      28  24.16980
## 53      30  23.75635
## 78      22  23.71001
## 80      26  25.58853
## 103     26  29.91588
## 121     19  22.21875

## ******************************************************************************************

## Warning in leaps.setup(x, y, wt = wt, nbest = nbest, nvmax = nvmax, force.in =
## force.in, : 3 linear dependencies found

## Reordering variables and trying again:
## Top candidate models by Cp for country origin3:
## $Model_6
## y ~ year + `cylinders:displacement` + `cylinders:horsepower` + 
##     `displacement:acceleration` + `displacement:year` + `horsepower:year`
## <environment: 0x00000151d00c0d68>
## 
## $Model_5
## y ~ weight + acceleration + `displacement:acceleration` + `displacement:year` + 
##     `weight:acceleration`
## <environment: 0x00000151d00c0d68>
## 
## $Model_7
## y ~ displacement + weight + `cylinders:displacement` + `cylinders:horsepower` + 
##     `displacement:acceleration` + `horsepower:year` + `weight:year`
## <environment: 0x00000151d00c0d68>
## 
## $Model_8
## y ~ displacement + weight + `cylinders:displacement` + `cylinders:horsepower` + 
##     `displacement:acceleration` + `displacement:year` + `horsepower:year` + 
##     `weight:year`
## <environment: 0x00000151d00c0d68>
## 
## $Model_10
## y ~ displacement + weight + `cylinders:displacement` + `cylinders:horsepower` + 
##     `displacement:horsepower` + `displacement:acceleration` + 
##     `horsepower:weight` + `horsepower:year` + `weight:acceleration` + 
##     `weight:year`
## <environment: 0x00000151d00c0d68>
## 
## Cp values for these models:
## [1] -0.0645841  1.2196160  1.2789209  2.4662333  3.0777192
## 
## PRESS values for top models:
##  Model_6  Model_5  Model_7  Model_8 Model_10 
## 513.6535 598.7956 548.8406 544.5331 630.6537 
## 
## Selected model by min PRESS:
## y ~ year + `cylinders:displacement` + `cylinders:horsepower` + 
##     `displacement:acceleration` + `displacement:year` + `horsepower:year`
## <environment: 0x00000151d00c0d68>
## Final model summary:
## 
## Call:
## lm(formula = best_formula, data = train_df)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -4.6496 -1.6434 -0.0782  1.3417  9.5466 
## 
## Coefficients:
##                               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                 -53.249604  13.034385  -4.085 0.000163 ***
## year                          1.402686   0.166081   8.446 4.01e-11 ***
## `cylinders:displacement`     -0.174018   0.043717  -3.981 0.000227 ***
## `cylinders:horsepower`        0.259501   0.058034   4.472 4.61e-05 ***
## `displacement:acceleration`  -0.006397   0.003254  -1.966 0.054999 .  
## `displacement:year`           0.007557   0.002262   3.341 0.001603 ** 
## `horsepower:year`            -0.013781   0.002799  -4.923 1.01e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2.969 on 49 degrees of freedom
## Multiple R-squared:  0.7467, Adjusted R-squared:  0.7157 
## F-statistic: 24.07 on 6 and 49 DF,  p-value: 4.738e-13
## 
## 
## Comparison of actual vs. predicted on test data (1st few rows):
##     actual predicted
## 55      35  30.67135
## 72      19  22.50731
## 109     20  26.16063
## 112     18  24.69484
## 124     20  26.06789
## 150     24  24.37393

## ******************************************************************************************

Assignment 4 v2

Zahir Roman,

2025-03-03

Assignment 4 Leaps vs Lars

Write a program where

input a matrix or data frame, choosing a y variable and a set of x variables, specifying a training data set and a testing data set using random selection (of units so rows in the (X|y) matrix)

setting our seed for repeatability and setting up our x’s and y’s as well as splitting our train and test data set 80/20 (100)

Choosing our top 5 overall models for MPG

Creating press Calculator

Obtaining our press values

Top model chosen with min press value

Evaluating our test data by comparing it to our predicted values

only here to make knit possible into html and thus into a pdf, makes by origin possible

Generating our predictions for Leaps and Lars in order to plot

Separating data into 3 sets by origin 1, 2, 3

setting up our x’s and y’s accordingly to origins

All of the above reused together in order to compare origins