Module 9 Lab

Part 1 - Simple Linear Regression

cmedv, lon, lat, crim, zn, indus, chas, nox, rm, age, dis, rad, tax, ptratio, b, lstat Questions 1, 2, 3

install.packages("vip", repos = "https://cran.rstudio.com/")

## Installing package into 'C:/Users/Senge/AppData/Local/R/win-library/4.4'
## (as 'lib' is unspecified)

## package 'vip' successfully unpacked and MD5 sums checked
## 
## The downloaded binary packages are in
##  C:\Users\Senge\AppData\Local\Temp\RtmpMJhsfD\downloaded_packages

library(vip)

## Warning: package 'vip' was built under R version 4.4.3

## 
## Attaching package: 'vip'

## The following object is masked from 'package:utils':
## 
##     vi

library(tidymodels)

## Warning: package 'tidymodels' was built under R version 4.4.3

## ── Attaching packages ────────────────────────────────────── tidymodels 1.3.0 ──

## ✔ broom        1.0.7     ✔ recipes      1.1.1
## ✔ dials        1.4.0     ✔ rsample      1.2.1
## ✔ dplyr        1.1.4     ✔ tibble       3.2.1
## ✔ ggplot2      3.5.1     ✔ tidyr        1.3.1
## ✔ infer        1.0.7     ✔ tune         1.3.0
## ✔ modeldata    1.4.0     ✔ workflows    1.2.0
## ✔ parsnip      1.3.0     ✔ workflowsets 1.1.0
## ✔ purrr        1.0.4     ✔ yardstick    1.3.2

## Warning: package 'dials' was built under R version 4.4.3

## Warning: package 'infer' was built under R version 4.4.3

## Warning: package 'modeldata' was built under R version 4.4.3

## Warning: package 'parsnip' was built under R version 4.4.3

## Warning: package 'purrr' was built under R version 4.4.3

## Warning: package 'recipes' was built under R version 4.4.3

## Warning: package 'rsample' was built under R version 4.4.3

## Warning: package 'tune' was built under R version 4.4.3

## Warning: package 'workflows' was built under R version 4.4.3

## Warning: package 'workflowsets' was built under R version 4.4.3

## Warning: package 'yardstick' was built under R version 4.4.3

## ── Conflicts ───────────────────────────────────────── tidymodels_conflicts() ──
## ✖ purrr::discard() masks scales::discard()
## ✖ dplyr::filter()  masks stats::filter()
## ✖ dplyr::lag()     masks stats::lag()
## ✖ recipes::step()  masks stats::step()

library(tidyverse)

## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ forcats   1.0.0     ✔ readr     2.1.5
## ✔ lubridate 1.9.4     ✔ stringr   1.5.1

## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ readr::col_factor() masks scales::col_factor()
## ✖ purrr::discard()    masks scales::discard()
## ✖ dplyr::filter()     masks stats::filter()
## ✖ stringr::fixed()    masks recipes::fixed()
## ✖ dplyr::lag()        masks stats::lag()
## ✖ readr::spec()       masks yardstick::spec()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors

warning=FALSE
messages=FALSE
boston <- readr::read_csv("boston.csv")

## Rows: 506 Columns: 16
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (16): lon, lat, cmedv, crim, zn, indus, chas, nox, rm, age, dis, rad, ta...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

set.seed(123)
split <- initial_split(boston, prop = 0.7, strata = cmedv)
train <- training(split)
test <- testing(split)

Question 3

correlation_data <- cor(train)
corr_cmedv <- correlation_data["cmedv", ]
corr_cmedv

##          lon          lat        cmedv         crim           zn        indus 
## -0.315456520 -0.002024587  1.000000000 -0.384298342  0.344272023 -0.489537963 
##         chas          nox           rm          age          dis          rad 
##  0.164575979 -0.439021014  0.708152619 -0.398915864  0.271455846 -0.396999035 
##          tax      ptratio            b        lstat 
## -0.479383777 -0.500927283  0.358302318 -0.742823016

Question 4

ggplot(train, aes(cmedv, rm)) +
  geom_point(size = 1.5, alpha = 0.5) +
  geom_smooth(method = "lm", se = FALSE)

## `geom_smooth()` using formula = 'y ~ x'

Question 5

model1 <- linear_reg() %>%
  fit(cmedv ~ rm, data = train)
tidy(model1)

## # A tibble: 2 × 5
##   term        estimate std.error statistic  p.value
##   <chr>          <dbl>     <dbl>     <dbl>    <dbl>
## 1 (Intercept)   -35.4      3.11      -11.4 8.70e-26
## 2 rm              9.22     0.491      18.8 7.46e-55

Question 6

model1 %>%
  predict(test) %>%
  bind_cols(test) %>%
  rmse(truth = cmedv, estimate = .pred)

## # A tibble: 1 × 3
##   .metric .estimator .estimate
##   <chr>   <chr>          <dbl>
## 1 rmse    standard        6.83

Module 9 Lab

Seth Engelhardt

2025-03-12

Part 1 - Simple Linear Regression

Part 2: Multiple Linear Regression