# Loading the necessary libraries
library(ggplot2)
library(readr)
library(lubridate)
library(dplyr)
library(broom)
library(modelr)
library(rsample)
library(tidyverse)
library(tidymodels)
library(performance)
dfb_org <- read_csv("C:/Users/eafre/OneDrive/Desktop/projects/ongoing_projects/capital_bike_share_clean.csv")
## Rows: 731 Columns: 10
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (3): DATE, HOLIDAY, WEEKDAY
## dbl (7): WEATHERSIT, TEMP, ATEMP, HUMIDITY, WINDSPEED, CASUAL, REGISTERED
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
dfb_org$DATE <- as.Date(dfb_org$DATE, format = "%m/%d/%Y")
str(dfb_org)
## spc_tbl_ [731 × 10] (S3: spec_tbl_df/tbl_df/tbl/data.frame)
## $ DATE : Date[1:731], format: "2011-01-01" "2011-01-02" ...
## $ HOLIDAY : chr [1:731] "NO" "NO" "NO" "NO" ...
## $ WEEKDAY : chr [1:731] "NO" "NO" "YES" "YES" ...
## $ WEATHERSIT: num [1:731] 2 2 1 1 1 1 2 2 1 1 ...
## $ TEMP : num [1:731] 11 9 1 2 2.5 2 1 1 2 2 ...
## $ ATEMP : num [1:731] 11 6.5 4 2.5 1 2 3 5 8.5 6 ...
## $ HUMIDITY : num [1:731] 81 71.5 44 64 42.5 52 47.5 51 46 50 ...
## $ WINDSPEED : num [1:731] 17 17 18 9 13 6 11 17 25 15 ...
## $ CASUAL : num [1:731] 331 131 120 108 82 88 148 68 54 41 ...
## $ REGISTERED: num [1:731] 654 670 1229 1454 1518 ...
## - attr(*, "spec")=
## .. cols(
## .. DATE = col_character(),
## .. HOLIDAY = col_character(),
## .. WEEKDAY = col_character(),
## .. WEATHERSIT = col_double(),
## .. TEMP = col_double(),
## .. ATEMP = col_double(),
## .. HUMIDITY = col_double(),
## .. WINDSPEED = col_double(),
## .. CASUAL = col_double(),
## .. REGISTERED = col_double()
## .. )
## - attr(*, "problems")=<externalptr>
# We'll create a COUNT variable, and extract MONTH from DATE with lubridate::month() and convert MONTH to a factor
dfb_int <-
dfb_org %>%
mutate(COUNT = CASUAL + REGISTERED, MONTH = lubridate::month(DATE)) %>%
mutate(MONTH = as.factor(MONTH))
str(dfb_int)
## tibble [731 × 12] (S3: tbl_df/tbl/data.frame)
## $ DATE : Date[1:731], format: "2011-01-01" "2011-01-02" ...
## $ HOLIDAY : chr [1:731] "NO" "NO" "NO" "NO" ...
## $ WEEKDAY : chr [1:731] "NO" "NO" "YES" "YES" ...
## $ WEATHERSIT: num [1:731] 2 2 1 1 1 1 2 2 1 1 ...
## $ TEMP : num [1:731] 11 9 1 2 2.5 2 1 1 2 2 ...
## $ ATEMP : num [1:731] 11 6.5 4 2.5 1 2 3 5 8.5 6 ...
## $ HUMIDITY : num [1:731] 81 71.5 44 64 42.5 52 47.5 51 46 50 ...
## $ WINDSPEED : num [1:731] 17 17 18 9 13 6 11 17 25 15 ...
## $ CASUAL : num [1:731] 331 131 120 108 82 88 148 68 54 41 ...
## $ REGISTERED: num [1:731] 654 670 1229 1454 1518 ...
## $ COUNT : num [1:731] 985 801 1349 1562 1600 ...
## $ MONTH : Factor w/ 12 levels "1","2","3","4",..: 1 1 1 1 1 1 1 1 1 1 ...
#We'd also like to rename the values in MONTH to their respective names
dfb_int$MONTH <- factor(dfb_int$MONTH, levels = 1:12, labels = c("January", "February", "March", "April", "May", "June", "July", "August", "September", "October", "November", "December"))
head(dfb_int)
Let’s examine what the overall trend in ridership over time looks like for both Registered and Casual ridership. That is, users who have subscribed to the service vs. those who have not (e.g., tourists).
registered <-
ggplot(dfb_int, aes(x = MONTH, y = COUNT, fill = MONTH)) +
geom_boxplot() +
scale_x_discrete(labels = c("January", "February", "March", "April", "May", "June", "July", "August", "September", "October", "November", "December")) +
theme(axis.text.x = element_text(angle = 90, hjust = 1)) +
labs(x = "Month", y = "Rentals", title = "Registered Ridership by Month")
registered <-
registered + theme(legend.position = "none")
plotly::ggplotly(registered)
casual <-
ggplot(dfb_int, aes(x = MONTH, y = CASUAL, fill = MONTH)) +
geom_boxplot() +
scale_x_discrete(labels = c("January", "February", "March", "April", "May", "June", "July", "August", "September", "October", "November", "December")) +
theme(axis.text.x = element_text(angle = 90, hjust = 1)) +
labs(x = "Month", y = "Rentals", title = "Casual Ridership by Month")
casual <- casual + theme(legend.position = "none")
plotly::ggplotly(casual)
ggplot(dfb_int, aes(x = DATE, y = REGISTERED)) +
geom_point() +
geom_smooth() +
scale_x_date(date_labels = "%b %Y", date_breaks = "4 months") +
theme(axis.text.x = element_text(angle = 65, hjust = 1)) +
labs(x = "Date", y = "Rentals", title = "Total Registered Rentals Over Time")
## `geom_smooth()` using method = 'loess' and formula = 'y ~ x'
ggplot(dfb_int, aes(x = DATE, y = REGISTERED)) +
geom_point() +
geom_smooth(method = "lm", color = "red") +
scale_x_date(date_labels = "%b %Y", date_breaks = "4 months") +
theme(axis.text.x = element_text(angle = 65, hjust = 1)) +
labs(x = "Date", y = "Rentals", title = "Linear Trend of Registered Rentals Over Time")
## `geom_smooth()` using formula = 'y ~ x'
ggplot(dfb_int, aes(x = DATE, y = CASUAL)) +
geom_point() +
geom_smooth() +
scale_x_date(date_labels = "%b %Y", date_breaks = "4 months") +
theme(axis.text.x = element_text(angle = 65, hjust = 1)) +
labs(x = "Date", y = "Rentals", title = "Total Casual Rentals Over Time")
## `geom_smooth()` using method = 'loess' and formula = 'y ~ x'
ggplot(dfb_int, aes(x = DATE, y = CASUAL)) +
geom_point() +
geom_smooth(method = "lm", color = "red") +
scale_x_date(date_labels = "%b %Y", date_breaks = "4 months") +
theme(axis.text.x = element_text(angle = 65, hjust = 1)) +
labs(x = "Date", y = "Rentals", title = "Linear Trend of Casual Rentals Over Time")
## `geom_smooth()` using formula = 'y ~ x'
ggplot(dfb_int, aes(x = ATEMP, y = COUNT)) +
geom_point() +
geom_smooth() +
labs(title = "Temperature vs. Total Rentals", x = "Temperature", y = "Total Rentals") +
scale_color_brewer(palette = "Set1")
## `geom_smooth()` using method = 'loess' and formula = 'y ~ x'
linear_model <-
linear_reg() %>%
set_engine("lm")
fit_all <-
linear_model %>%
fit(COUNT ~ ., data = dfb_int)
summary(fit_all$fit)
## Warning in summary.lm(fit_all$fit): essentially perfect fit: summary may be
## unreliable
##
## Call:
## stats::lm(formula = COUNT ~ ., data = data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.689e-11 -1.728e-13 1.970e-14 1.962e-13 2.943e-11
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -1.980e-11 7.605e-12 -2.604e+00 0.00942 **
## DATE 1.248e-15 5.127e-16 2.435e+00 0.01515 *
## HOLIDAYYES 1.720e-12 3.780e-13 4.549e+00 6.33e-06 ***
## WEEKDAYYES -2.479e-12 2.135e-13 -1.161e+01 < 2e-16 ***
## WEATHERSIT 3.216e-13 1.453e-13 2.213e+00 0.02720 *
## TEMP 1.260e-14 4.920e-14 2.560e-01 0.79797
## ATEMP 6.865e-14 4.206e-14 1.632e+00 0.10313
## HUMIDITY 6.408e-15 5.426e-15 1.181e+00 0.23802
## WINDSPEED 3.809e-14 1.186e-14 3.212e+00 0.00138 **
## CASUAL 1.000e+00 1.619e-16 6.176e+15 < 2e-16 ***
## REGISTERED 1.000e+00 8.735e-17 1.145e+16 < 2e-16 ***
## MONTHFebruary 3.322e-13 2.922e-13 1.137e+00 0.25611
## MONTHMarch 1.978e-13 3.140e-13 6.300e-01 0.52889
## MONTHApril 9.385e-14 3.425e-13 2.740e-01 0.78413
## MONTHMay -1.453e-14 3.953e-13 -3.700e-02 0.97069
## MONTHJune -9.767e-14 4.443e-13 -2.200e-01 0.82607
## MONTHJuly -1.809e-13 4.769e-13 -3.790e-01 0.70459
## MONTHAugust -1.104e-13 4.489e-13 -2.460e-01 0.80576
## MONTHSeptember -5.280e-15 4.065e-13 -1.300e-02 0.98964
## MONTHOctober 5.025e-14 3.450e-13 1.460e-01 0.88422
## MONTHNovember 2.239e-13 3.099e-13 7.220e-01 0.47036
## MONTHDecember 2.078e-13 3.235e-13 6.420e-01 0.52077
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.526e-12 on 709 degrees of freedom
## Multiple R-squared: 1, Adjusted R-squared: 1
## F-statistic: 5.598e+31 on 21 and 709 DF, p-value: < 2.2e-16
plot(fit_all$fit)
#We'll add a new variable and call it BADWEATHER, which is “YES” if there is light or heavy rain or snow (i.e., if #WEATHERSIT is 3 or 4), and “NO” otherwise (i.e., if WEATHERSIT is 1 or 2).
dfb <-
dfb_int %>%
mutate(BADWEATHER = ifelse(WEATHERSIT == 3 | WEATHERSIT == 4, "YES", "NO"))
str(dfb)
## tibble [731 × 13] (S3: tbl_df/tbl/data.frame)
## $ DATE : Date[1:731], format: "2011-01-01" "2011-01-02" ...
## $ HOLIDAY : chr [1:731] "NO" "NO" "NO" "NO" ...
## $ WEEKDAY : chr [1:731] "NO" "NO" "YES" "YES" ...
## $ WEATHERSIT: num [1:731] 2 2 1 1 1 1 2 2 1 1 ...
## $ TEMP : num [1:731] 11 9 1 2 2.5 2 1 1 2 2 ...
## $ ATEMP : num [1:731] 11 6.5 4 2.5 1 2 3 5 8.5 6 ...
## $ HUMIDITY : num [1:731] 81 71.5 44 64 42.5 52 47.5 51 46 50 ...
## $ WINDSPEED : num [1:731] 17 17 18 9 13 6 11 17 25 15 ...
## $ CASUAL : num [1:731] 331 131 120 108 82 88 148 68 54 41 ...
## $ REGISTERED: num [1:731] 654 670 1229 1454 1518 ...
## $ COUNT : num [1:731] 985 801 1349 1562 1600 ...
## $ MONTH : Factor w/ 12 levels "January","February",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ BADWEATHER: chr [1:731] "NO" "NO" "NO" "NO" ...
head(dfb)
total_plot <-
ggplot(dfb, aes(x = ATEMP, y = COUNT, color = BADWEATHER)) +
geom_point() +
geom_smooth() +
labs(x = "Heat Index", y = "Rentals", title = "Total Rentals vs Heat Index")
plotly::ggplotly(total_plot)
## `geom_smooth()` using method = 'loess' and formula = 'y ~ x'
casual_plot <-
ggplot(dfb, aes(x = ATEMP, y = CASUAL, color = BADWEATHER)) +
geom_point() +
geom_smooth() +
labs(x = "Heat Index", y = "Rentals", title = "Casual Rentals vs Heat Index")
plotly::ggplotly(casual_plot)
## `geom_smooth()` using method = 'loess' and formula = 'y ~ x'
reg_plot <-
ggplot(dfb, aes(x = ATEMP, y = REGISTERED, color = BADWEATHER)) +
geom_point() +
geom_smooth() +
labs(x = "Heat Index", y = "Rentals", title = "Registered Rentals vs Heat Index")
plotly::ggplotly(reg_plot)
## `geom_smooth()` using method = 'loess' and formula = 'y ~ x'
fit_bad_weather <-
linear_model %>%
fit(COUNT ~ MONTH + WEEKDAY + BADWEATHER + TEMP + ATEMP + HUMIDITY, data = dfb)
summary(fit_bad_weather$fit)
##
## Call:
## stats::lm(formula = COUNT ~ MONTH + WEEKDAY + BADWEATHER + TEMP +
## ATEMP + HUMIDITY, data = data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -3729.0 -1005.1 -190.3 1115.0 3750.1
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 3109.647 280.206 11.098 < 2e-16 ***
## MONTHFebruary 56.015 256.395 0.218 0.827126
## MONTHMarch 616.314 271.040 2.274 0.023269 *
## MONTHApril 858.334 293.371 2.926 0.003545 **
## MONTHMay 1138.064 340.546 3.342 0.000875 ***
## MONTHJune 669.104 386.404 1.732 0.083774 .
## MONTHJuly 181.690 416.044 0.437 0.662455
## MONTHAugust 648.674 391.143 1.658 0.097674 .
## MONTHSeptember 1600.807 351.613 4.553 6.22e-06 ***
## MONTHOctober 1930.646 294.271 6.561 1.03e-10 ***
## MONTHNovember 1510.300 260.027 5.808 9.50e-09 ***
## MONTHDecember 963.998 261.353 3.688 0.000243 ***
## WEEKDAYYES 69.745 110.118 0.633 0.526698
## BADWEATHERYES -1954.835 316.601 -6.174 1.11e-09 ***
## TEMP 184.596 42.011 4.394 1.28e-05 ***
## ATEMP -48.640 36.621 -1.328 0.184535
## HUMIDITY -25.341 3.623 -6.995 6.09e-12 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1341 on 714 degrees of freedom
## Multiple R-squared: 0.5315, Adjusted R-squared: 0.521
## F-statistic: 50.64 on 16 and 714 DF, p-value: < 2.2e-16
bw_data <- tibble(MONTH="February", WEEKDAY="YES", BADWEATHER="YES", TEMP=18, ATEMP=16, HUMIDITY=80)
predict(fit_bad_weather, bw_data)
gw_data <- tibble(MONTH="July", WEEKDAY="YES", BADWEATHER="NO", TEMP=18, ATEMP=16, HUMIDITY=80)
predict(fit_bad_weather, gw_data)
fit_bad_weather_interact <-
linear_model %>%
fit(COUNT ~ BADWEATHER * WEEKDAY, data = dfb)
summary(fit_bad_weather_interact$fit)
##
## Call:
## stats::lm(formula = COUNT ~ BADWEATHER * WEEKDAY, data = data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -4206.7 -1262.1 -3.7 1405.3 4261.5
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 4452.5 131.5 33.861 < 2e-16 ***
## BADWEATHERYES -2637.1 852.2 -3.095 0.00205 **
## WEEKDAYYES 185.3 155.9 1.188 0.23514
## BADWEATHERYES:WEEKDAYYES -201.2 977.1 -0.206 0.83695
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1883 on 727 degrees of freedom
## Multiple R-squared: 0.05941, Adjusted R-squared: 0.05553
## F-statistic: 15.31 on 3 and 727 DF, p-value: 1.15e-09
plot(fit_bad_weather$fit)
performance::check_collinearity(fit_bad_weather$fit)
fit_bad_weather_nocoll <-
linear_model %>%
fit(COUNT ~ MONTH + WEEKDAY + BADWEATHER + ATEMP + HUMIDITY, data = dfb)
summary(fit_bad_weather_nocoll$fit)
##
## Call:
## stats::lm(formula = COUNT ~ MONTH + WEEKDAY + BADWEATHER + ATEMP +
## HUMIDITY, data = data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -3760.9 -1058.5 -207.5 1154.8 3822.9
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 3116.922 283.766 10.984 < 2e-16 ***
## MONTHFebruary 369.664 249.391 1.482 0.138710
## MONTHMarch 1100.895 250.739 4.391 1.30e-05 ***
## MONTHApril 1386.574 271.012 5.116 4.01e-07 ***
## MONTHMay 1764.733 313.177 5.635 2.52e-08 ***
## MONTHJune 1369.152 356.509 3.840 0.000134 ***
## MONTHJuly 801.206 396.405 2.021 0.043634 *
## MONTHAugust 1316.387 365.002 3.607 0.000332 ***
## MONTHSeptember 2228.197 325.405 6.847 1.62e-11 ***
## MONTHOctober 2420.401 275.810 8.776 < 2e-16 ***
## MONTHNovember 1848.898 251.506 7.351 5.38e-13 ***
## MONTHDecember 1387.220 246.047 5.638 2.48e-08 ***
## WEEKDAYYES 91.445 111.406 0.821 0.412023
## BADWEATHERYES -1961.852 320.624 -6.119 1.55e-09 ***
## ATEMP 103.172 12.294 8.392 2.55e-16 ***
## HUMIDITY -25.438 3.669 -6.934 9.16e-12 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1358 on 715 degrees of freedom
## Multiple R-squared: 0.5189, Adjusted R-squared: 0.5088
## F-statistic: 51.41 on 15 and 715 DF, p-value: < 2.2e-16
performance::check_collinearity(fit_bad_weather_nocoll$fit)
fit_bad_weather_only <-
linear_model %>%
fit(COUNT ~ BADWEATHER, data = dfb)
summary(fit_bad_weather_only$fit)
##
## Call:
## stats::lm(formula = COUNT ~ BADWEATHER, data = data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -4153.2 -1257.7 1.8 1404.8 4129.8
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 4584.24 70.63 64.908 < 2e-16 ***
## BADWEATHERYES -2780.95 416.69 -6.674 4.93e-11 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1882 on 729 degrees of freedom
## Multiple R-squared: 0.05758, Adjusted R-squared: 0.05629
## F-statistic: 44.54 on 1 and 729 DF, p-value: 4.934e-11
#Let's set seed for reproducibility and split the data into training and testing sets to build predictive models
set.seed(3.14159)
dfb_split <- initial_split(dfb, prop = 0.75, strata = COUNT)
dfb_train <- training(dfb_split)
dfb_test <- testing(dfb_split)
#Model 1: fit_org
fit_org <-
linear_model %>%
fit(COUNT ~ MONTH + WEEKDAY + BADWEATHER + ATEMP + HUMIDITY, data = dfb_train)
summary(fit_org$fit)
##
## Call:
## stats::lm(formula = COUNT ~ MONTH + WEEKDAY + BADWEATHER + ATEMP +
## HUMIDITY, data = data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -3724.5 -1061.6 -239.4 1144.6 3784.4
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2720.872 335.471 8.111 3.51e-15 ***
## MONTHFebruary 502.237 303.232 1.656 0.098257 .
## MONTHMarch 1205.899 308.187 3.913 0.000103 ***
## MONTHApril 1635.207 329.834 4.958 9.61e-07 ***
## MONTHMay 1818.828 366.753 4.959 9.54e-07 ***
## MONTHJune 1377.136 425.348 3.238 0.001280 **
## MONTHJuly 1012.422 472.386 2.143 0.032550 *
## MONTHAugust 1307.145 427.175 3.060 0.002325 **
## MONTHSeptember 2259.819 382.118 5.914 5.99e-09 ***
## MONTHOctober 2714.125 331.458 8.188 1.98e-15 ***
## MONTHNovember 1935.616 294.755 6.567 1.23e-10 ***
## MONTHDecember 1356.684 287.740 4.715 3.09e-06 ***
## WEEKDAYYES 92.587 129.435 0.715 0.474729
## BADWEATHERYES -2003.008 383.283 -5.226 2.49e-07 ***
## ATEMP 104.629 14.378 7.277 1.23e-12 ***
## HUMIDITY -21.479 4.335 -4.955 9.74e-07 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1370 on 531 degrees of freedom
## Multiple R-squared: 0.5104, Adjusted R-squared: 0.4965
## F-statistic: 36.9 on 15 and 531 DF, p-value: < 2.2e-16
plot(fit_org$fit)
#Model 2: fit_new
fit_new <-
linear_model %>%
fit(COUNT ~ MONTH + WEEKDAY + BADWEATHER + ATEMP + HUMIDITY + WINDSPEED, data = dfb_train)
summary(fit_new$fit)
##
## Call:
## stats::lm(formula = COUNT ~ MONTH + WEEKDAY + BADWEATHER + ATEMP +
## HUMIDITY + WINDSPEED, data = data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -3291.1 -1001.0 -184.2 1127.8 3354.8
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 4008.065 399.906 10.023 < 2e-16 ***
## MONTHFebruary 499.184 295.007 1.692 0.09121 .
## MONTHMarch 1284.249 300.157 4.279 2.23e-05 ***
## MONTHApril 1725.855 321.300 5.371 1.17e-07 ***
## MONTHMay 1811.937 356.807 5.078 5.29e-07 ***
## MONTHJune 1267.191 414.280 3.059 0.00233 **
## MONTHJuly 859.943 460.386 1.868 0.06233 .
## MONTHAugust 1270.246 415.640 3.056 0.00236 **
## MONTHSeptember 2175.279 372.062 5.847 8.78e-09 ***
## MONTHOctober 2612.670 322.981 8.089 4.12e-15 ***
## MONTHNovember 1868.189 287.015 6.509 1.76e-10 ***
## MONTHDecember 1231.226 280.840 4.384 1.41e-05 ***
## WEEKDAYYES 45.988 126.202 0.364 0.71570
## BADWEATHERYES -1711.614 376.538 -4.546 6.79e-06 ***
## ATEMP 103.171 13.990 7.375 6.39e-13 ***
## HUMIDITY -27.484 4.353 -6.314 5.76e-10 ***
## WINDSPEED -64.107 11.509 -5.570 4.06e-08 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1333 on 530 degrees of freedom
## Multiple R-squared: 0.5374, Adjusted R-squared: 0.5235
## F-statistic: 38.49 on 16 and 530 DF, p-value: < 2.2e-16
plot(fit_new$fit)
anova(fit_org$fit, fit_new$fit)
#Let's also calculate the performance metrics rmse() and mae using the actual and predicted values stored in the resulting data frame results_xxx
results_org <- fit_org %>%
predict(dfb_test) %>%
bind_cols(dfb_test) %>%
metrics(truth = COUNT, estimate = .pred) %>%
filter(.metric != "rsq")
results_org
results_new <- fit_new %>%
predict(dfb_test) %>%
bind_cols(dfb_test) %>%
metrics(truth = COUNT, estimate = .pred) %>%
filter(.metric != "rsq")
results_new