# Loading the necessary libraries
library(ggplot2)
library(readr)
library(lubridate)
library(dplyr)
library(broom)
library(modelr)
library(rsample)
library(tidyverse)
library(tidymodels)
library(performance)
DATE: The date the bicycle rental occurred
HOLIDAY: Whether the day is a U.S. holiday (1) or not (0). WEEKDAY: If a day is neither a weekend nor a holiday, then WEEKDAY is YES (1).
WEATHERSIT: The values are (1) Clear/Few clouds (2) Misty (3) Light snow or light rain (4) Heavy rain, snow, or thunderstorms.
TEMP: Average temperature in Celsius.
ATEMP: “Feels like” temperature in Celsius.
HUMIDITY: Humidity out of 100 (not divided by 100).
WINDSPEED: Wind speed in km/h.
CASUAL: Count of bikes rented by casual bikeshare users.
REGISTERED: Count of bikes rented by registered bikeshare members.
COUNT: Total count of bikes rented by both casual users and members
dfb_org <- read_csv("C:/Users/eafre/OneDrive/Desktop/completed_projects/capital_bike_share_clean.csv")
## Rows: 731 Columns: 10
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (3): DATE, HOLIDAY, WEEKDAY
## dbl (7): WEATHERSIT, TEMP, ATEMP, HUMIDITY, WINDSPEED, CASUAL, REGISTERED
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
#Reformatting the DATE variable to a Date object
dfb_org$DATE <- as.Date(dfb_org$DATE, format = "%m/%d/%Y")
#Checking the structure of the data
str(dfb_org)
## spc_tbl_ [731 × 10] (S3: spec_tbl_df/tbl_df/tbl/data.frame)
## $ DATE : Date[1:731], format: "2011-01-01" "2011-01-02" ...
## $ HOLIDAY : chr [1:731] "NO" "NO" "NO" "NO" ...
## $ WEEKDAY : chr [1:731] "NO" "NO" "YES" "YES" ...
## $ WEATHERSIT: num [1:731] 2 2 1 1 1 1 2 2 1 1 ...
## $ TEMP : num [1:731] 11 9 1 2 2.5 2 1 1 2 2 ...
## $ ATEMP : num [1:731] 11 6.5 4 2.5 1 2 3 5 8.5 6 ...
## $ HUMIDITY : num [1:731] 81 71.5 44 64 42.5 52 47.5 51 46 50 ...
## $ WINDSPEED : num [1:731] 17 17 18 9 13 6 11 17 25 15 ...
## $ CASUAL : num [1:731] 331 131 120 108 82 88 148 68 54 41 ...
## $ REGISTERED: num [1:731] 654 670 1229 1454 1518 ...
## - attr(*, "spec")=
## .. cols(
## .. DATE = col_character(),
## .. HOLIDAY = col_character(),
## .. WEEKDAY = col_character(),
## .. WEATHERSIT = col_double(),
## .. TEMP = col_double(),
## .. ATEMP = col_double(),
## .. HUMIDITY = col_double(),
## .. WINDSPEED = col_double(),
## .. CASUAL = col_double(),
## .. REGISTERED = col_double()
## .. )
## - attr(*, "problems")=<externalptr>
# Let's create a COUNT variable, and extract MONTH from DATE with lubridate::month() and convert MONTH to a factor
dfb_int <-
dfb_org %>%
mutate(COUNT = CASUAL + REGISTERED, MONTH = lubridate::month(DATE)) %>%
mutate(MONTH = as.factor(MONTH))
str(dfb_int)
## tibble [731 × 12] (S3: tbl_df/tbl/data.frame)
## $ DATE : Date[1:731], format: "2011-01-01" "2011-01-02" ...
## $ HOLIDAY : chr [1:731] "NO" "NO" "NO" "NO" ...
## $ WEEKDAY : chr [1:731] "NO" "NO" "YES" "YES" ...
## $ WEATHERSIT: num [1:731] 2 2 1 1 1 1 2 2 1 1 ...
## $ TEMP : num [1:731] 11 9 1 2 2.5 2 1 1 2 2 ...
## $ ATEMP : num [1:731] 11 6.5 4 2.5 1 2 3 5 8.5 6 ...
## $ HUMIDITY : num [1:731] 81 71.5 44 64 42.5 52 47.5 51 46 50 ...
## $ WINDSPEED : num [1:731] 17 17 18 9 13 6 11 17 25 15 ...
## $ CASUAL : num [1:731] 331 131 120 108 82 88 148 68 54 41 ...
## $ REGISTERED: num [1:731] 654 670 1229 1454 1518 ...
## $ COUNT : num [1:731] 985 801 1349 1562 1600 ...
## $ MONTH : Factor w/ 12 levels "1","2","3","4",..: 1 1 1 1 1 1 1 1 1 1 ...
# We will also rename the values in MONTH to their respective names
dfb_int$MONTH <- factor(dfb_int$MONTH, levels = 1:12, labels = c("January", "February", "March", "April", "May", "June", "July", "August", "September", "October", "November", "December"))
head(dfb_int)
Let’s examine what the overall trend in ridership over time looks like for both Registered and Casual ridership. That is, users who have subscribed to the service vs. those who have not (e.g., tourists).
# Box plot of REGISTERED ridership by month
registered <-
ggplot(dfb_int, aes(x = MONTH, y = COUNT, fill = MONTH)) +
geom_boxplot() +
scale_x_discrete(labels = c("January", "February", "March", "April", "May", "June", "July", "August", "September", "October", "November", "December")) +
theme(axis.text.x = element_text(angle = 65, hjust = 1)) +
labs(x = "Month", y = "Rentals", title = "Registered Ridership by Month")
# Hiding the legend because it is redundant
registered <-
registered + theme(legend.position = "none")
# Dynamic plot
plotly::ggplotly(registered)
# Box plot of CASUAL ridership by month
casual <-
ggplot(dfb_int, aes(x = MONTH, y = CASUAL, fill = MONTH)) +
geom_boxplot() +
scale_x_discrete(labels = c("January", "February", "March", "April", "May", "June", "July", "August", "September", "October", "November", "December")) +
theme(axis.text.x = element_text(angle = 65, hjust = 1)) +
labs(x = "Month", y = "Rentals", title = "Casual Ridership by Month")
# Again we remove the legend
casual <- casual + theme(legend.position = "none")
#Dynamic plot
plotly::ggplotly(casual)
reg_smooth <-
ggplot(dfb_int, aes(x = DATE, y = REGISTERED)) +
geom_point() +
geom_smooth() +
scale_x_date(date_labels = "%b %Y", date_breaks = "4 months") +
theme(axis.text.x = element_text(angle = 65, hjust = 1)) +
labs(x = "Date", y = "Rentals", title = "Total Registered Rentals Over Time")
plotly::ggplotly(reg_smooth)
## `geom_smooth()` using method = 'loess' and formula = 'y ~ x'
reg_line <-
ggplot(dfb_int, aes(x = DATE, y = REGISTERED)) +
geom_point() +
geom_smooth(method = "lm", color = "red") +
scale_x_date(date_labels = "%b %Y", date_breaks = "4 months") +
theme(axis.text.x = element_text(angle = 65, hjust = 1)) +
labs(x = "Date", y = "Rentals", title = "Linear Trend of Registered Rentals Over Time")
plotly::ggplotly(reg_line)
## `geom_smooth()` using formula = 'y ~ x'
casual_smooth <-
ggplot(dfb_int, aes(x = DATE, y = CASUAL)) +
geom_point() +
geom_smooth() +
scale_x_date(date_labels = "%b %Y", date_breaks = "4 months") +
theme(axis.text.x = element_text(angle = 65, hjust = 1)) +
labs(x = "Date", y = "Rentals", title = "Total Casual Rentals Over Time")
plotly::ggplotly(casual_smooth)
## `geom_smooth()` using method = 'loess' and formula = 'y ~ x'
casual_line <-
ggplot(dfb_int, aes(x = DATE, y = CASUAL)) +
geom_point() +
geom_smooth(method = "lm", color = "red") +
scale_x_date(date_labels = "%b %Y", date_breaks = "4 months") +
theme(axis.text.x = element_text(angle = 65, hjust = 1)) +
labs(x = "Date", y = "Rentals", title = "Linear Trend of Casual Rentals Over Time")
plotly::ggplotly(casual_line)
## `geom_smooth()` using formula = 'y ~ x'
total_smooth <-
ggplot(dfb_int, aes(x = ATEMP, y = COUNT)) +
geom_point() +
geom_smooth() +
labs(title = "Temperature vs. Total Rentals", x = "Temperature", y = "Total Rentals") +
scale_color_brewer(palette = "Set1")
plotly::ggplotly(total_smooth)
## `geom_smooth()` using method = 'loess' and formula = 'y ~ x'
# A model that regresses total rentals onto all variables in the data set. First specifying linear regression and setting the computational engine to lm
linear_model <-
linear_reg() %>%
set_engine("lm")
# Fitting the model to the data
fit_all <-
linear_model %>%
fit(COUNT ~ ., data = dfb_int)
summary(fit_all$fit)
## Warning in summary.lm(fit_all$fit): essentially perfect fit: summary may be
## unreliable
##
## Call:
## stats::lm(formula = COUNT ~ ., data = data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.689e-11 -1.728e-13 1.970e-14 1.962e-13 2.943e-11
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -1.980e-11 7.605e-12 -2.604e+00 0.00942 **
## DATE 1.248e-15 5.127e-16 2.435e+00 0.01515 *
## HOLIDAYYES 1.720e-12 3.780e-13 4.549e+00 6.33e-06 ***
## WEEKDAYYES -2.479e-12 2.135e-13 -1.161e+01 < 2e-16 ***
## WEATHERSIT 3.216e-13 1.453e-13 2.213e+00 0.02720 *
## TEMP 1.260e-14 4.920e-14 2.560e-01 0.79797
## ATEMP 6.865e-14 4.206e-14 1.632e+00 0.10313
## HUMIDITY 6.408e-15 5.426e-15 1.181e+00 0.23802
## WINDSPEED 3.809e-14 1.186e-14 3.212e+00 0.00138 **
## CASUAL 1.000e+00 1.619e-16 6.176e+15 < 2e-16 ***
## REGISTERED 1.000e+00 8.735e-17 1.145e+16 < 2e-16 ***
## MONTHFebruary 3.322e-13 2.922e-13 1.137e+00 0.25611
## MONTHMarch 1.978e-13 3.140e-13 6.300e-01 0.52889
## MONTHApril 9.385e-14 3.425e-13 2.740e-01 0.78413
## MONTHMay -1.453e-14 3.953e-13 -3.700e-02 0.97069
## MONTHJune -9.767e-14 4.443e-13 -2.200e-01 0.82607
## MONTHJuly -1.809e-13 4.769e-13 -3.790e-01 0.70459
## MONTHAugust -1.104e-13 4.489e-13 -2.460e-01 0.80576
## MONTHSeptember -5.280e-15 4.065e-13 -1.300e-02 0.98964
## MONTHOctober 5.025e-14 3.450e-13 1.460e-01 0.88422
## MONTHNovember 2.239e-13 3.099e-13 7.220e-01 0.47036
## MONTHDecember 2.078e-13 3.235e-13 6.420e-01 0.52077
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.526e-12 on 709 degrees of freedom
## Multiple R-squared: 1, Adjusted R-squared: 1
## F-statistic: 5.598e+31 on 21 and 709 DF, p-value: < 2.2e-16
# Checking the linear assumptions
plot(fit_all$fit)
#We'll add a new variable and call it BADWEATHER, which is “YES” if there is light or heavy rain or snow (i.e., if #WEATHERSIT is 3 or 4), and “NO” otherwise (i.e., if WEATHERSIT is 1 or 2).
dfb <-
dfb_int %>%
mutate(BADWEATHER = ifelse(WEATHERSIT == 3 | WEATHERSIT == 4, "YES", "NO"))
str(dfb)
## tibble [731 × 13] (S3: tbl_df/tbl/data.frame)
## $ DATE : Date[1:731], format: "2011-01-01" "2011-01-02" ...
## $ HOLIDAY : chr [1:731] "NO" "NO" "NO" "NO" ...
## $ WEEKDAY : chr [1:731] "NO" "NO" "YES" "YES" ...
## $ WEATHERSIT: num [1:731] 2 2 1 1 1 1 2 2 1 1 ...
## $ TEMP : num [1:731] 11 9 1 2 2.5 2 1 1 2 2 ...
## $ ATEMP : num [1:731] 11 6.5 4 2.5 1 2 3 5 8.5 6 ...
## $ HUMIDITY : num [1:731] 81 71.5 44 64 42.5 52 47.5 51 46 50 ...
## $ WINDSPEED : num [1:731] 17 17 18 9 13 6 11 17 25 15 ...
## $ CASUAL : num [1:731] 331 131 120 108 82 88 148 68 54 41 ...
## $ REGISTERED: num [1:731] 654 670 1229 1454 1518 ...
## $ COUNT : num [1:731] 985 801 1349 1562 1600 ...
## $ MONTH : Factor w/ 12 levels "January","February",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ BADWEATHER: chr [1:731] "NO" "NO" "NO" "NO" ...
head(dfb)
# The effect of bad weather on rentals
total_plot <-
ggplot(dfb, aes(x = ATEMP, y = COUNT, color = BADWEATHER)) +
geom_point() +
geom_smooth() +
labs(x = "Heat Index", y = "Rentals", title = "Total Rentals vs Heat Index")
plotly::ggplotly(total_plot)
## `geom_smooth()` using method = 'loess' and formula = 'y ~ x'
casual_plot <-
ggplot(dfb, aes(x = ATEMP, y = CASUAL, color = BADWEATHER)) +
geom_point() +
geom_smooth() +
labs(x = "Heat Index", y = "Rentals", title = "Casual Rentals vs Heat Index")
plotly::ggplotly(casual_plot)
## `geom_smooth()` using method = 'loess' and formula = 'y ~ x'
reg_plot <-
ggplot(dfb, aes(x = ATEMP, y = REGISTERED, color = BADWEATHER)) +
geom_point() +
geom_smooth() +
labs(x = "Heat Index", y = "Rentals", title = "Registered Rentals vs Heat Index")
plotly::ggplotly(reg_plot)
## `geom_smooth()` using method = 'loess' and formula = 'y ~ x'
fit_bad_weather <-
linear_model %>%
fit(COUNT ~ MONTH + WEEKDAY + BADWEATHER + TEMP + ATEMP + HUMIDITY, data = dfb)
summary(fit_bad_weather$fit)
##
## Call:
## stats::lm(formula = COUNT ~ MONTH + WEEKDAY + BADWEATHER + TEMP +
## ATEMP + HUMIDITY, data = data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -3729.0 -1005.1 -190.3 1115.0 3750.1
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 3109.647 280.206 11.098 < 2e-16 ***
## MONTHFebruary 56.015 256.395 0.218 0.827126
## MONTHMarch 616.314 271.040 2.274 0.023269 *
## MONTHApril 858.334 293.371 2.926 0.003545 **
## MONTHMay 1138.064 340.546 3.342 0.000875 ***
## MONTHJune 669.104 386.404 1.732 0.083774 .
## MONTHJuly 181.690 416.044 0.437 0.662455
## MONTHAugust 648.674 391.143 1.658 0.097674 .
## MONTHSeptember 1600.807 351.613 4.553 6.22e-06 ***
## MONTHOctober 1930.646 294.271 6.561 1.03e-10 ***
## MONTHNovember 1510.300 260.027 5.808 9.50e-09 ***
## MONTHDecember 963.998 261.353 3.688 0.000243 ***
## WEEKDAYYES 69.745 110.118 0.633 0.526698
## BADWEATHERYES -1954.835 316.601 -6.174 1.11e-09 ***
## TEMP 184.596 42.011 4.394 1.28e-05 ***
## ATEMP -48.640 36.621 -1.328 0.184535
## HUMIDITY -25.341 3.623 -6.995 6.09e-12 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1341 on 714 degrees of freedom
## Multiple R-squared: 0.5315, Adjusted R-squared: 0.521
## F-statistic: 50.64 on 16 and 714 DF, p-value: < 2.2e-16
# Making a prediction about BAD weather
bw_data <- tibble(MONTH="February", WEEKDAY="YES", BADWEATHER="YES", TEMP=18, ATEMP=16, HUMIDITY=80)
predict(fit_bad_weather, bw_data)
# Making a prediction about GOOD weather
gw_data <- tibble(MONTH="July", WEEKDAY="YES", BADWEATHER="NO", TEMP=18, ATEMP=55, HUMIDITY=5)
predict(fit_bad_weather, gw_data)
fit_bad_weather_interact <-
linear_model %>%
fit(COUNT ~ BADWEATHER * WEEKDAY, data = dfb)
summary(fit_bad_weather_interact$fit)
##
## Call:
## stats::lm(formula = COUNT ~ BADWEATHER * WEEKDAY, data = data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -4206.7 -1262.1 -3.7 1405.3 4261.5
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 4452.5 131.5 33.861 < 2e-16 ***
## BADWEATHERYES -2637.1 852.2 -3.095 0.00205 **
## WEEKDAYYES 185.3 155.9 1.188 0.23514
## BADWEATHERYES:WEEKDAYYES -201.2 977.1 -0.206 0.83695
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1883 on 727 degrees of freedom
## Multiple R-squared: 0.05941, Adjusted R-squared: 0.05553
## F-statistic: 15.31 on 3 and 727 DF, p-value: 1.15e-09
# Checking linear regression assumptions
plot(fit_bad_weather$fit)
performance::check_collinearity(fit_bad_weather$fit)
#a simple linear regression model of the relationship between COUNT and BADWEATHER, excluding all other variables
fit_bad_weather_only <-
linear_model %>%
fit(COUNT ~ BADWEATHER, data = dfb)
#Model summary
summary(fit_bad_weather_only$fit)
##
## Call:
## stats::lm(formula = COUNT ~ BADWEATHER, data = data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -4153.2 -1257.7 1.8 1404.8 4129.8
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 4584.24 70.63 64.908 < 2e-16 ***
## BADWEATHERYES -2780.95 416.69 -6.674 4.93e-11 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1882 on 729 degrees of freedom
## Multiple R-squared: 0.05758, Adjusted R-squared: 0.05629
## F-statistic: 44.54 on 1 and 729 DF, p-value: 4.934e-11
#Setting seed for reproducibility and splitting the data into training and test sets to build predictive models
set.seed(3.14159)
dfb_split <- initial_split(dfb, prop = 0.75, strata = COUNT)
dfb_train <- training(dfb_split)
dfb_test <- testing(dfb_split)
#Model 1: fit_org
fit_org <-
linear_model %>%
fit(COUNT ~ MONTH + WEEKDAY + BADWEATHER + ATEMP + HUMIDITY, data = dfb_train)
summary(fit_org$fit)
##
## Call:
## stats::lm(formula = COUNT ~ MONTH + WEEKDAY + BADWEATHER + ATEMP +
## HUMIDITY, data = data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -3724.5 -1061.6 -239.4 1144.6 3784.4
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2720.872 335.471 8.111 3.51e-15 ***
## MONTHFebruary 502.237 303.232 1.656 0.098257 .
## MONTHMarch 1205.899 308.187 3.913 0.000103 ***
## MONTHApril 1635.207 329.834 4.958 9.61e-07 ***
## MONTHMay 1818.828 366.753 4.959 9.54e-07 ***
## MONTHJune 1377.136 425.348 3.238 0.001280 **
## MONTHJuly 1012.422 472.386 2.143 0.032550 *
## MONTHAugust 1307.145 427.175 3.060 0.002325 **
## MONTHSeptember 2259.819 382.118 5.914 5.99e-09 ***
## MONTHOctober 2714.125 331.458 8.188 1.98e-15 ***
## MONTHNovember 1935.616 294.755 6.567 1.23e-10 ***
## MONTHDecember 1356.684 287.740 4.715 3.09e-06 ***
## WEEKDAYYES 92.587 129.435 0.715 0.474729
## BADWEATHERYES -2003.008 383.283 -5.226 2.49e-07 ***
## ATEMP 104.629 14.378 7.277 1.23e-12 ***
## HUMIDITY -21.479 4.335 -4.955 9.74e-07 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1370 on 531 degrees of freedom
## Multiple R-squared: 0.5104, Adjusted R-squared: 0.4965
## F-statistic: 36.9 on 15 and 531 DF, p-value: < 2.2e-16
plot(fit_org$fit)
#Model 2: fit_new
fit_new <-
linear_model %>%
fit(COUNT ~ MONTH + WEEKDAY + BADWEATHER + ATEMP + HUMIDITY + WINDSPEED, data = dfb_train)
summary(fit_new$fit)
##
## Call:
## stats::lm(formula = COUNT ~ MONTH + WEEKDAY + BADWEATHER + ATEMP +
## HUMIDITY + WINDSPEED, data = data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -3291.1 -1001.0 -184.2 1127.8 3354.8
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 4008.065 399.906 10.023 < 2e-16 ***
## MONTHFebruary 499.184 295.007 1.692 0.09121 .
## MONTHMarch 1284.249 300.157 4.279 2.23e-05 ***
## MONTHApril 1725.855 321.300 5.371 1.17e-07 ***
## MONTHMay 1811.937 356.807 5.078 5.29e-07 ***
## MONTHJune 1267.191 414.280 3.059 0.00233 **
## MONTHJuly 859.943 460.386 1.868 0.06233 .
## MONTHAugust 1270.246 415.640 3.056 0.00236 **
## MONTHSeptember 2175.279 372.062 5.847 8.78e-09 ***
## MONTHOctober 2612.670 322.981 8.089 4.12e-15 ***
## MONTHNovember 1868.189 287.015 6.509 1.76e-10 ***
## MONTHDecember 1231.226 280.840 4.384 1.41e-05 ***
## WEEKDAYYES 45.988 126.202 0.364 0.71570
## BADWEATHERYES -1711.614 376.538 -4.546 6.79e-06 ***
## ATEMP 103.171 13.990 7.375 6.39e-13 ***
## HUMIDITY -27.484 4.353 -6.314 5.76e-10 ***
## WINDSPEED -64.107 11.509 -5.570 4.06e-08 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1333 on 530 degrees of freedom
## Multiple R-squared: 0.5374, Adjusted R-squared: 0.5235
## F-statistic: 38.49 on 16 and 530 DF, p-value: < 2.2e-16
plot(fit_new$fit)
anova(fit_org$fit, fit_new$fit)
#Let's also calculate the performance metrics rmse() and mae() using both the observed and predicted values stored in each resulting data frame
results_org <- fit_org %>%
predict(dfb_test) %>%
bind_cols(dfb_test) %>%
metrics(truth = COUNT, estimate = .pred) %>%
filter(.metric != "rsq")
results_org
results_new <- fit_new %>%
predict(dfb_test) %>%
bind_cols(dfb_test) %>%
metrics(truth = COUNT, estimate = .pred) %>%
filter(.metric != "rsq")
results_new