###Creating a column for total riders which include both casual and registered customers.
bikeshare2$total_riders <- bikeshare2$casual + bikeshare2$registered
bikeshare_lm <- lm(total_riders~poly(temp,3, raw=TRUE),data=bikeshare2)
summary(bikeshare_lm)
##
## Call:
## lm(formula = total_riders ~ poly(temp, 3, raw = TRUE), data = bikeshare2)
##
## Residuals:
## Min 1Q Median 3Q Max
## -4724.0 -1034.4 -99.6 1130.1 3160.1
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 518.9929 775.3459 0.669 0.503472
## poly(temp, 3, raw = TRUE)1 63.1408 134.5298 0.469 0.638964
## poly(temp, 3, raw = TRUE)2 16.6342 7.2173 2.305 0.021461 *
## poly(temp, 3, raw = TRUE)3 -0.4324 0.1208 -3.580 0.000366 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1423 on 727 degrees of freedom
## Multiple R-squared: 0.4627, Adjusted R-squared: 0.4604
## F-statistic: 208.6 on 3 and 727 DF, p-value: < 2.2e-16
# With the third degree poly, we have an R-squared of .46 and a standard error of 1423.
### Adding more variables to create a best fitting model
bikeshare_lm1 <- lm(total_riders~poly(temp,3, raw=TRUE)+
as.factor(Promotion)+as.factor(holiday)+as.factor(weathersit)+
as.factor(season)+as.factor(mnth)+windspeed+poly(humidity,3,raw=TRUE) ,data=bikeshare2)
summary(bikeshare_lm1)
##
## Call:
## lm(formula = total_riders ~ poly(temp, 3, raw = TRUE) + as.factor(Promotion) +
## as.factor(holiday) + as.factor(weathersit) + as.factor(season) +
## as.factor(mnth) + windspeed + poly(humidity, 3, raw = TRUE),
## data = bikeshare2)
##
## Residuals:
## Min 1Q Median 3Q Max
## -3410.0 -333.0 58.2 394.1 2511.3
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.754e+03 7.272e+02 2.412 0.016120 *
## poly(temp, 3, raw = TRUE)1 -3.246e+02 7.556e+01 -4.295 1.99e-05 ***
## poly(temp, 3, raw = TRUE)2 3.425e+01 4.097e+00 8.360 3.34e-16 ***
## poly(temp, 3, raw = TRUE)3 -7.140e-01 6.850e-02 -10.424 < 2e-16 ***
## as.factor(Promotion)1 1.941e+03 5.098e+01 38.077 < 2e-16 ***
## as.factor(holiday)1 -5.633e+02 1.500e+02 -3.756 0.000187 ***
## as.factor(weathersit)2 -3.223e+02 6.857e+01 -4.700 3.13e-06 ***
## as.factor(weathersit)3 -1.302e+03 1.921e+02 -6.779 2.56e-11 ***
## as.factor(season)2 8.386e+02 1.559e+02 5.378 1.03e-07 ***
## as.factor(season)3 1.191e+03 1.862e+02 6.397 2.89e-10 ***
## as.factor(season)4 1.659e+03 1.584e+02 10.472 < 2e-16 ***
## as.factor(mnth)2 4.909e+01 1.296e+02 0.379 0.704973
## as.factor(mnth)3 2.728e+02 1.514e+02 1.802 0.071962 .
## as.factor(mnth)4 -9.060e+01 2.202e+02 -0.411 0.680889
## as.factor(mnth)5 8.080e+01 2.346e+02 0.344 0.730607
## as.factor(mnth)6 5.902e+01 2.452e+02 0.241 0.809883
## as.factor(mnth)7 5.237e+01 2.723e+02 0.192 0.847545
## as.factor(mnth)8 -1.774e+02 2.634e+02 -0.674 0.500742
## as.factor(mnth)9 5.594e+01 2.353e+02 0.238 0.812192
## as.factor(mnth)10 -2.596e+02 2.177e+02 -1.192 0.233575
## as.factor(mnth)11 -5.014e+02 2.092e+02 -2.398 0.016766 *
## as.factor(mnth)12 -2.430e+02 1.652e+02 -1.472 0.141588
## windspeed -4.796e+01 5.290e+00 -9.067 < 2e-16 ***
## poly(humidity, 3, raw = TRUE)1 4.388e+01 3.071e+01 1.429 0.153430
## poly(humidity, 3, raw = TRUE)2 -4.070e-01 5.405e-01 -0.753 0.451664
## poly(humidity, 3, raw = TRUE)3 -1.492e-03 3.097e-03 -0.482 0.630145
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 667.1 on 705 degrees of freedom
## Multiple R-squared: 0.8855, Adjusted R-squared: 0.8814
## F-statistic: 218.1 on 25 and 705 DF, p-value: < 2.2e-16
## Testing for Multicollinearity
cor(bikeshare2[, c("temp", "holiday", "weathersit","season","mnth","Promotion", "temp","humidity", "windspeed")], method="spearman")%>%
datatable()
#Using diagnostic plots to check for linearity.
crPlots(bikeshare_lm1)
#There was a collinearity issue between month and season. The factor was .83.
# I ran a VIF test to see how much the collinearity between season and month is inflating the R-squared.
#VIF(bikeshare_lm1)
# With a VIF factor of 8, I decided to take out the season variable and just use month instead.
#Concerning linearity, please see the diagnostic graphs below, they mostly show that the data had a linear nature.
bikeshare_lm2 <- lm(total_riders~poly(temp,3, raw=TRUE)+
as.factor(Promotion)+as.factor(holiday)+as.factor(weathersit)+
as.factor(mnth)+windspeed+poly(humidity,3,raw=TRUE) ,data=bikeshare2)
summary(bikeshare_lm2)
##
## Call:
## lm(formula = total_riders ~ poly(temp, 3, raw = TRUE) + as.factor(Promotion) +
## as.factor(holiday) + as.factor(weathersit) + as.factor(mnth) +
## windspeed + poly(humidity, 3, raw = TRUE), data = bikeshare2)
##
## Residuals:
## Min 1Q Median 3Q Max
## -3347.4 -363.9 63.0 437.1 2282.7
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.451e+03 7.818e+02 1.856 0.063817 .
## poly(temp, 3, raw = TRUE)1 -2.933e+02 8.119e+01 -3.612 0.000325 ***
## poly(temp, 3, raw = TRUE)2 3.316e+01 4.402e+00 7.533 1.52e-13 ***
## poly(temp, 3, raw = TRUE)3 -6.997e-01 7.349e-02 -9.520 < 2e-16 ***
## as.factor(Promotion)1 1.936e+03 5.485e+01 35.308 < 2e-16 ***
## as.factor(holiday)1 -6.738e+02 1.609e+02 -4.188 3.16e-05 ***
## as.factor(weathersit)2 -3.195e+02 7.377e+01 -4.331 1.70e-05 ***
## as.factor(weathersit)3 -1.381e+03 2.063e+02 -6.696 4.35e-11 ***
## as.factor(mnth)2 2.137e+01 1.394e+02 0.153 0.878214
## as.factor(mnth)3 5.038e+02 1.518e+02 3.319 0.000950 ***
## as.factor(mnth)4 6.659e+02 1.684e+02 3.954 8.46e-05 ***
## as.factor(mnth)5 8.072e+02 1.899e+02 4.252 2.41e-05 ***
## as.factor(mnth)6 8.844e+02 2.149e+02 4.115 4.32e-05 ***
## as.factor(mnth)7 1.101e+03 2.369e+02 4.649 3.97e-06 ***
## as.factor(mnth)8 8.846e+02 2.197e+02 4.026 6.27e-05 ***
## as.factor(mnth)9 1.237e+03 1.961e+02 6.307 5.00e-10 ***
## as.factor(mnth)10 1.312e+03 1.697e+02 7.731 3.66e-14 ***
## as.factor(mnth)11 1.101e+03 1.535e+02 7.173 1.85e-12 ***
## as.factor(mnth)12 7.730e+02 1.438e+02 5.376 1.03e-07 ***
## windspeed -5.408e+01 5.655e+00 -9.563 < 2e-16 ***
## poly(humidity, 3, raw = TRUE)1 6.422e+01 3.297e+01 1.948 0.051849 .
## poly(humidity, 3, raw = TRUE)2 -9.159e-01 5.793e-01 -1.581 0.114311
## poly(humidity, 3, raw = TRUE)3 2.051e-03 3.313e-03 0.619 0.536025
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 717.8 on 708 degrees of freedom
## Multiple R-squared: 0.8669, Adjusted R-squared: 0.8627
## F-statistic: 209.5 on 22 and 708 DF, p-value: < 2.2e-16
#Diagnostic Graphs
crPlots(bikeshare_lm2)
#From my model, October is the month with the highest ridership in comparison to all the other months.
# If this month became cold and rainy, the coefficient on this month may change but not by a huge margin because the correlation between season and temperature is just about 33%.
# Keeping all the other factors constant, promotion itself increases the total ridership by about 1,936 riders in comparison to the other variables.
# since it is the variable with the highest coefficient in my model, we can conclude that the marketers cliam that promotion would improve ridership has been established.
#The effect of promotion on casual vs registered riders.
# To analyze this effect, we shall run the same model but with just the "casual" and "registered" variables to see their coefficients.
###CASUAL RIDERS
bikeshare_lm3 <- lm(casual~poly(temp,3, raw=TRUE)+
as.factor(Promotion)+as.factor(holiday)+as.factor(weathersit)+
as.factor(mnth) +windspeed+poly(humidity,3,raw=TRUE) ,data=bikeshare2)
summary(bikeshare_lm3)
##
## Call:
## lm(formula = casual ~ poly(temp, 3, raw = TRUE) + as.factor(Promotion) +
## as.factor(holiday) + as.factor(weathersit) + as.factor(mnth) +
## windspeed + poly(humidity, 3, raw = TRUE), data = bikeshare2)
##
## Residuals:
## Min 1Q Median 3Q Max
## -844.5 -334.7 -133.5 170.1 2073.6
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 7.113e+02 5.546e+02 1.282 0.200093
## poly(temp, 3, raw = TRUE)1 -1.136e+02 5.759e+01 -1.972 0.048989 *
## poly(temp, 3, raw = TRUE)2 1.023e+01 3.122e+00 3.276 0.001104 **
## poly(temp, 3, raw = TRUE)3 -2.049e-01 5.213e-02 -3.931 9.30e-05 ***
## as.factor(Promotion)1 2.836e+02 3.890e+01 7.291 8.25e-13 ***
## as.factor(holiday)1 3.036e+02 1.141e+02 2.661 0.007977 **
## as.factor(weathersit)2 -1.480e+02 5.233e+01 -2.827 0.004824 **
## as.factor(weathersit)3 -3.982e+02 1.463e+02 -2.721 0.006668 **
## as.factor(mnth)2 -1.532e+01 9.889e+01 -0.155 0.876937
## as.factor(mnth)3 3.233e+02 1.077e+02 3.003 0.002768 **
## as.factor(mnth)4 4.579e+02 1.195e+02 3.833 0.000138 ***
## as.factor(mnth)5 4.715e+02 1.347e+02 3.501 0.000492 ***
## as.factor(mnth)6 4.213e+02 1.524e+02 2.763 0.005872 **
## as.factor(mnth)7 5.478e+02 1.680e+02 3.260 0.001168 **
## as.factor(mnth)8 3.812e+02 1.558e+02 2.446 0.014696 *
## as.factor(mnth)9 4.256e+02 1.391e+02 3.059 0.002302 **
## as.factor(mnth)10 3.906e+02 1.204e+02 3.244 0.001235 **
## as.factor(mnth)11 1.989e+02 1.089e+02 1.827 0.068170 .
## as.factor(mnth)12 8.201e+01 1.020e+02 0.804 0.421588
## windspeed -1.572e+01 4.011e+00 -3.918 9.79e-05 ***
## poly(humidity, 3, raw = TRUE)1 1.898e+00 2.339e+01 0.081 0.935357
## poly(humidity, 3, raw = TRUE)2 -6.348e-02 4.109e-01 -0.154 0.877277
## poly(humidity, 3, raw = TRUE)3 3.550e-05 2.350e-03 0.015 0.987952
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 509.1 on 708 degrees of freedom
## Multiple R-squared: 0.4667, Adjusted R-squared: 0.4501
## F-statistic: 28.16 on 22 and 708 DF, p-value: < 2.2e-16
###REGISTERED RIDERS
bikeshare_lm4 <- lm(registered~poly(temp,3, raw=TRUE)+
as.factor(Promotion)+as.factor(holiday)+as.factor(weathersit)+
as.factor(mnth)+windspeed+poly(humidity,3,raw=TRUE) ,data=bikeshare2)
summary(bikeshare_lm4)
##
## Call:
## lm(formula = registered ~ poly(temp, 3, raw = TRUE) + as.factor(Promotion) +
## as.factor(holiday) + as.factor(weathersit) + as.factor(mnth) +
## windspeed + poly(humidity, 3, raw = TRUE), data = bikeshare2)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2962.8 -440.5 109.6 514.0 1893.0
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 7.401e+02 7.968e+02 0.929 0.3533
## poly(temp, 3, raw = TRUE)1 -1.797e+02 8.275e+01 -2.172 0.0302 *
## poly(temp, 3, raw = TRUE)2 2.293e+01 4.486e+00 5.111 4.13e-07 ***
## poly(temp, 3, raw = TRUE)3 -4.947e-01 7.490e-02 -6.605 7.80e-11 ***
## as.factor(Promotion)1 1.653e+03 5.590e+01 29.569 < 2e-16 ***
## as.factor(holiday)1 -9.774e+02 1.640e+02 -5.961 3.95e-09 ***
## as.factor(weathersit)2 -1.715e+02 7.519e+01 -2.281 0.0228 *
## as.factor(weathersit)3 -9.832e+02 2.103e+02 -4.676 3.50e-06 ***
## as.factor(mnth)2 3.669e+01 1.421e+02 0.258 0.7963
## as.factor(mnth)3 1.805e+02 1.547e+02 1.166 0.2438
## as.factor(mnth)4 2.080e+02 1.716e+02 1.212 0.2260
## as.factor(mnth)5 3.357e+02 1.935e+02 1.735 0.0832 .
## as.factor(mnth)6 4.632e+02 2.190e+02 2.115 0.0348 *
## as.factor(mnth)7 5.536e+02 2.414e+02 2.293 0.0221 *
## as.factor(mnth)8 5.035e+02 2.239e+02 2.248 0.0249 *
## as.factor(mnth)9 8.113e+02 1.999e+02 4.059 5.47e-05 ***
## as.factor(mnth)10 9.217e+02 1.730e+02 5.328 1.34e-07 ***
## as.factor(mnth)11 9.022e+02 1.565e+02 5.766 1.21e-08 ***
## as.factor(mnth)12 6.909e+02 1.465e+02 4.715 2.91e-06 ***
## windspeed -3.836e+01 5.764e+00 -6.656 5.65e-11 ***
## poly(humidity, 3, raw = TRUE)1 6.232e+01 3.361e+01 1.855 0.0641 .
## poly(humidity, 3, raw = TRUE)2 -8.524e-01 5.904e-01 -1.444 0.1492
## poly(humidity, 3, raw = TRUE)3 2.016e-03 3.377e-03 0.597 0.5507
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 731.5 on 708 degrees of freedom
## Multiple R-squared: 0.7868, Adjusted R-squared: 0.7802
## F-statistic: 118.8 on 22 and 708 DF, p-value: < 2.2e-16
# Looking at the model separately on the casual and registered riders, we can tell that the promotion had a greater impact on the registered riders than on the casual riders.
# We show a coefficient of 711 on the casual riders and an adjusted R-squared of .45, while we show a coefficient of 740 and an R-squared of .78.
# From that analysis, we can comfortably conslude that the registered riders respondent better to promotions. This could be because monthly promotions result to a better
# deal overall. Also, it could be because the registered riders are aware of all the promotion that's happenin since they may be receiving some sort of marketing information that's
# targetted towards them.
# In order to prepare an appropriate financial report to the officers, we shall need some financial information that we shall use to calculate things like profitability and cost.
# We need the price per ridership, the operational costs, the cost of replacing a bike, the cost per staff etc.