install.packages(“tidymodels”) library(tidymodels)
#Are there any missing values? What is the minimum and maximum values of cmedv? What is the average cmedv value?
library(readr) boston <- read_csv(“~/Documents/boston.csv”) View(boston)
#Fill in the blanks to split the data into a training set and test set using a 70-30% split. Be sure to include the set.seed(123) so that your train and test sets are the same size as mine.
set.seed(123) split <- initial_split(boston, prop = 0.7 , strata = cmedv ) train <- training(split) test <- testing(split)
#How many observations are in the training set and test set? dim(train) dim(test)
#Compare the distribution of cmedv between the training set and test set. Do they appear to have the same distribution or do they differ significantly?
ggplot(train, aes(x = cmedv)) + geom_line(stat = “density”, trim = TRUE) + geom_line(data = test, stat = “density”, trim = TRUE, col = “blue”)
#Fill in the blanks to fit a linear regression model using the rm feature variable to predict cmedv and compute the RMSE on the test data. What is the test set RMSE? LM1 <- linear_reg() %>% fit(cmedv ~ ., data = train)
LM1 %>% predict(test) %>% bind_cols(test %>% select(cmedv)) %>% rmse(truth = cmedv, estimate = .pred)
rmse standard 6.831405
#Fill in the blanks to fit a linear regression model using all available features to predict cmedv and compute the RMSE on the test data. What is the test set RMSE? Is this better than the previous model’s performance?
LM2 <- linear_reg() %>% fit(cmedv ~ ., data = train)
LM2 %>% predict(test) %>% bind_cols(test %>% select(cmedv)) %>% rmse(truth = cmedv, estimate = .pred)
rmse standard 4.829261
install.packages(“kknn”) library(kknn)
KNN <- nearest_neighbor() %>% set_engine(‘kknn’) %>% set_mode(“regression”) %>% fit( cmedv ~ . , data = train)
KNN %>% predict(test) %>% bind_cols(test %>% select(cmedv)) %>% rmse(truth = cmedv, estimate = .pred) rmse standard 3.37
plot(cars)
Add a new chunk by clicking the Insert Chunk button on the toolbar or by pressing Cmd+Option+I.
When you save the notebook, an HTML file containing the code and output will be saved alongside it (click the Preview button or press Cmd+Shift+K to preview the HTML file).
The preview shows you a rendered HTML copy of the contents of the editor. Consequently, unlike Knit, Preview does not run any R code chunks. Instead, the output of the chunk when it was last run in the editor is displayed.