Part 1: Simple linear regression
1. Import data set
boston <- readr::read_csv('data/boston.csv')
2. Split the data
set.seed(123)
split <- initial_split(boston, prop = .7, strata = cmedv)
train <- training(split)
test <- testing(split)
3.Assess the correlation between cmedv and predictor variables
cor(train) %>%
as_tibble(rownames = 'predictor_vars') %>%
select(predictor_vars, cmedv) %>%
arrange(desc(cmedv))
predictor variable with the strongest positive correlation is rm, and
the strongest negative correlation is lstat.
4. Plot relationship between cmedv and rm
train %>%
ggplot(aes(x = rm, y = cmedv)) +
geom_point(alpha = .2) +
geom_smooth(method = "lm")

accurate capture of relationship
5. Train a simple regression model with rm
model_rm <- linear_reg() %>%
fit(cmedv ~ rm, data = train)
tidy(model_rm)
our intercept value is -35.429 this means that if a house has 0
rooms, we predict that its median sale price would be -35,429. Our
coefficient for rm is 9.217, this means that if our number of rooms
increase by 1, the predicted cmedv increases by 9,217. Both of our
values are statistically different than 0, as their p values are both
less than 0.05.
confint(model_rm$fit)
2.5 % 97.5 %
(Intercept) -41.546106 -29.31285
rm 8.250909 10.18311
The confidence interval also confirms that these are statistically
different than 0.
6. compute the generalization RMSE for the model and interpret the
results.
model_rm %>%
predict(test) %>%
bind_cols(test %>% select(cmedv)) %>%
rmse(truth = cmedv, estimate = .pred)
The generalization RMSE for this model is 6.83, This means that, on
average, our model is off by 6,831.41.
7. Create a multiple linear regression model using all
variables
model_all <- linear_reg() %>%
fit(cmedv ~ ., data = train)
tidy(model_all)
8. Compute RMSE
model_all %>%
predict(test) %>%
bind_cols(test %>% select(cmedv)) %>%
rmse(truth = cmedv, estimate = .pred)
This RMSE is Better.
9.
top_predictors <- tidy(model_all) %>%
arrange(desc(abs(statistic)))
top_predictors
rm, lstat, ptratio, dis, b
LS0tDQp0aXRsZTogIk1vZHVsZSA5IExhYiINCm91dHB1dDogaHRtbF9ub3RlYm9vaw0KLS0tDQoNCmBgYHtyIHNldHVwLCBpbmNsdWRlPUZBTFNFfQ0Ka25pdHI6Om9wdHNfY2h1bmskc2V0KA0KICBmaWcuYWxpZ24gPSAiY2VudGVyIiwNCiAgZXJyb3IgPSBGQUxTRSwNCiAgbWVzc2FnZSA9IEZBTFNFLA0KICB3YXJuaW5nID0gRkFMU0UsDQogIGNvbGxhcHNlID0gVFJVRQ0KKQ0KYGBgDQoNCmBgYHtyfQ0KbGlicmFyeSh0aWR5bW9kZWxzKQ0KbGlicmFyeSh0aWR5dmVyc2UpDQpgYGANCg0KIyBQYXJ0IDE6IFNpbXBsZSBsaW5lYXIgcmVncmVzc2lvbg0KDQojIyAxLiBJbXBvcnQgZGF0YSBzZXQNCmBgYHtyfQ0KYm9zdG9uIDwtIHJlYWRyOjpyZWFkX2NzdignZGF0YS9ib3N0b24uY3N2JykNCmBgYA0KDQojIyAyLiBTcGxpdCB0aGUgZGF0YQ0KYGBge3J9DQpzZXQuc2VlZCgxMjMpDQpzcGxpdCA8LSBpbml0aWFsX3NwbGl0KGJvc3RvbiwgcHJvcCA9IC43LCBzdHJhdGEgPSBjbWVkdikNCnRyYWluIDwtIHRyYWluaW5nKHNwbGl0KQ0KdGVzdCA8LSB0ZXN0aW5nKHNwbGl0KQ0KYGBgDQoNCiMjIDMuQXNzZXNzIHRoZSBjb3JyZWxhdGlvbiBiZXR3ZWVuIGNtZWR2IGFuZCBwcmVkaWN0b3IgdmFyaWFibGVzDQpgYGB7cn0NCmNvcih0cmFpbikgJT4lDQogIGFzX3RpYmJsZShyb3duYW1lcyA9ICdwcmVkaWN0b3JfdmFycycpICU+JQ0KICBzZWxlY3QocHJlZGljdG9yX3ZhcnMsIGNtZWR2KSAlPiUNCiAgYXJyYW5nZShkZXNjKGNtZWR2KSkNCmBgYA0KcHJlZGljdG9yIHZhcmlhYmxlIHdpdGggdGhlIHN0cm9uZ2VzdCBwb3NpdGl2ZSBjb3JyZWxhdGlvbiBpcyBybSwgYW5kIHRoZSBzdHJvbmdlc3QgbmVnYXRpdmUgY29ycmVsYXRpb24gaXMgbHN0YXQuDQoNCiMjIDQuIFBsb3QgcmVsYXRpb25zaGlwIGJldHdlZW4gY21lZHYgYW5kIHJtDQpgYGB7cn0NCnRyYWluICU+JQ0KICBnZ3Bsb3QoYWVzKHggPSBybSwgeSA9IGNtZWR2KSkgKw0KICBnZW9tX3BvaW50KGFscGhhID0gLjIpICsNCiAgZ2VvbV9zbW9vdGgobWV0aG9kID0gImxtIikNCmBgYA0KIGFjY3VyYXRlIGNhcHR1cmUgb2YgcmVsYXRpb25zaGlwDQoNCiMjIDUuIFRyYWluIGEgc2ltcGxlIHJlZ3Jlc3Npb24gbW9kZWwgd2l0aCBybQ0KYGBge3J9DQptb2RlbF9ybSA8LSBsaW5lYXJfcmVnKCkgJT4lDQogIGZpdChjbWVkdiB+IHJtLCBkYXRhID0gdHJhaW4pDQp0aWR5KG1vZGVsX3JtKQ0KYGBgDQpvdXIgaW50ZXJjZXB0IHZhbHVlIGlzIC0zNS40MjkgdGhpcyBtZWFucyB0aGF0IGlmIGEgaG91c2UgaGFzIDAgcm9vbXMsIHdlIHByZWRpY3QgdGhhdCBpdHMgbWVkaWFuIHNhbGUgcHJpY2Ugd291bGQgYmUgLTM1LDQyOS4gT3VyIGNvZWZmaWNpZW50IGZvciBybSBpcyA5LjIxNywgdGhpcyBtZWFucyB0aGF0IGlmIG91ciBudW1iZXIgb2Ygcm9vbXMgaW5jcmVhc2UgYnkgMSwgdGhlIHByZWRpY3RlZCBjbWVkdiBpbmNyZWFzZXMgYnkgOSwyMTcuIEJvdGggb2Ygb3VyIHZhbHVlcyBhcmUgc3RhdGlzdGljYWxseSBkaWZmZXJlbnQgdGhhbiAwLCBhcyB0aGVpciBwIHZhbHVlcyBhcmUgYm90aCBsZXNzIHRoYW4gMC4wNS4NCg0KYGBge3J9DQpjb25maW50KG1vZGVsX3JtJGZpdCkNCmBgYA0KVGhlIGNvbmZpZGVuY2UgaW50ZXJ2YWwgYWxzbyBjb25maXJtcyB0aGF0IHRoZXNlIGFyZSBzdGF0aXN0aWNhbGx5IGRpZmZlcmVudCB0aGFuIDAuDQoNCiMjIDYuIGNvbXB1dGUgdGhlIGdlbmVyYWxpemF0aW9uIFJNU0UgZm9yIHRoZSBtb2RlbCBhbmQgaW50ZXJwcmV0IHRoZSByZXN1bHRzLg0KYGBge3J9DQptb2RlbF9ybSAlPiUNCiAgcHJlZGljdCh0ZXN0KSAlPiUNCiAgYmluZF9jb2xzKHRlc3QgJT4lIHNlbGVjdChjbWVkdikpICU+JQ0KICBybXNlKHRydXRoID0gY21lZHYsIGVzdGltYXRlID0gLnByZWQpDQpgYGANClRoZSBnZW5lcmFsaXphdGlvbiBSTVNFIGZvciB0aGlzIG1vZGVsIGlzIDYuODMsIFRoaXMgbWVhbnMgdGhhdCwgb24gYXZlcmFnZSwgb3VyIG1vZGVsIGlzIG9mZiBieSA2LDgzMS40MS4NCg0KIyMgNy4gQ3JlYXRlIGEgbXVsdGlwbGUgbGluZWFyIHJlZ3Jlc3Npb24gbW9kZWwgdXNpbmcgYWxsIHZhcmlhYmxlcw0KYGBge3J9DQptb2RlbF9hbGwgPC0gbGluZWFyX3JlZygpICU+JQ0KICBmaXQoY21lZHYgfiAuLCBkYXRhID0gdHJhaW4pDQoNCnRpZHkobW9kZWxfYWxsKQ0KYGBgDQoNCiMjIDguIENvbXB1dGUgUk1TRQ0KYGBge3J9DQptb2RlbF9hbGwgJT4lDQogIHByZWRpY3QodGVzdCkgJT4lIA0KICBiaW5kX2NvbHModGVzdCAlPiUgc2VsZWN0KGNtZWR2KSkgJT4lDQogIHJtc2UodHJ1dGggPSBjbWVkdiwgZXN0aW1hdGUgPSAucHJlZCkNCmBgYA0KVGhpcyBSTVNFIGlzIEJldHRlci4NCg0KIyMgOS4gDQpgYGB7cn0NCnRvcF9wcmVkaWN0b3JzIDwtIHRpZHkobW9kZWxfYWxsKSAlPiUNCiAgYXJyYW5nZShkZXNjKGFicyhzdGF0aXN0aWMpKSkNCg0KdG9wX3ByZWRpY3RvcnMNCmBgYA0Kcm0sIGxzdGF0LCBwdHJhdGlvLCBkaXMsIGINCg0KDQo=