PACKAGES NEEDED

###Creating a column for total riders which include both casual and registered customers.

bikeshare2$total_riders <- bikeshare2$casual + bikeshare2$registered

Fitting the model in to a polynomial of three degree.

bikeshare_lm <- lm(total_riders~poly(temp,3, raw=TRUE),data=bikeshare2)
summary(bikeshare_lm)
## 
## Call:
## lm(formula = total_riders ~ poly(temp, 3, raw = TRUE), data = bikeshare2)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -4724.0 -1034.4   -99.6  1130.1  3160.1 
## 
## Coefficients:
##                            Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                518.9929   775.3459   0.669 0.503472    
## poly(temp, 3, raw = TRUE)1  63.1408   134.5298   0.469 0.638964    
## poly(temp, 3, raw = TRUE)2  16.6342     7.2173   2.305 0.021461 *  
## poly(temp, 3, raw = TRUE)3  -0.4324     0.1208  -3.580 0.000366 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1423 on 727 degrees of freedom
## Multiple R-squared:  0.4627, Adjusted R-squared:  0.4604 
## F-statistic: 208.6 on 3 and 727 DF,  p-value: < 2.2e-16
# With the third degree poly, we have an R-squared of .46 and a standard error of 1423.
### Adding more variables to create a best fitting model
bikeshare_lm1 <- lm(total_riders~poly(temp,3, raw=TRUE)+
                    as.factor(Promotion)+as.factor(holiday)+as.factor(weathersit)+
                    as.factor(season)+as.factor(mnth)+windspeed+poly(humidity,3,raw=TRUE) ,data=bikeshare2)
summary(bikeshare_lm1)
## 
## Call:
## lm(formula = total_riders ~ poly(temp, 3, raw = TRUE) + as.factor(Promotion) + 
##     as.factor(holiday) + as.factor(weathersit) + as.factor(season) + 
##     as.factor(mnth) + windspeed + poly(humidity, 3, raw = TRUE), 
##     data = bikeshare2)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -3410.0  -333.0    58.2   394.1  2511.3 
## 
## Coefficients:
##                                  Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                     1.754e+03  7.272e+02   2.412 0.016120 *  
## poly(temp, 3, raw = TRUE)1     -3.246e+02  7.556e+01  -4.295 1.99e-05 ***
## poly(temp, 3, raw = TRUE)2      3.425e+01  4.097e+00   8.360 3.34e-16 ***
## poly(temp, 3, raw = TRUE)3     -7.140e-01  6.850e-02 -10.424  < 2e-16 ***
## as.factor(Promotion)1           1.941e+03  5.098e+01  38.077  < 2e-16 ***
## as.factor(holiday)1            -5.633e+02  1.500e+02  -3.756 0.000187 ***
## as.factor(weathersit)2         -3.223e+02  6.857e+01  -4.700 3.13e-06 ***
## as.factor(weathersit)3         -1.302e+03  1.921e+02  -6.779 2.56e-11 ***
## as.factor(season)2              8.386e+02  1.559e+02   5.378 1.03e-07 ***
## as.factor(season)3              1.191e+03  1.862e+02   6.397 2.89e-10 ***
## as.factor(season)4              1.659e+03  1.584e+02  10.472  < 2e-16 ***
## as.factor(mnth)2                4.909e+01  1.296e+02   0.379 0.704973    
## as.factor(mnth)3                2.728e+02  1.514e+02   1.802 0.071962 .  
## as.factor(mnth)4               -9.060e+01  2.202e+02  -0.411 0.680889    
## as.factor(mnth)5                8.080e+01  2.346e+02   0.344 0.730607    
## as.factor(mnth)6                5.902e+01  2.452e+02   0.241 0.809883    
## as.factor(mnth)7                5.237e+01  2.723e+02   0.192 0.847545    
## as.factor(mnth)8               -1.774e+02  2.634e+02  -0.674 0.500742    
## as.factor(mnth)9                5.594e+01  2.353e+02   0.238 0.812192    
## as.factor(mnth)10              -2.596e+02  2.177e+02  -1.192 0.233575    
## as.factor(mnth)11              -5.014e+02  2.092e+02  -2.398 0.016766 *  
## as.factor(mnth)12              -2.430e+02  1.652e+02  -1.472 0.141588    
## windspeed                      -4.796e+01  5.290e+00  -9.067  < 2e-16 ***
## poly(humidity, 3, raw = TRUE)1  4.388e+01  3.071e+01   1.429 0.153430    
## poly(humidity, 3, raw = TRUE)2 -4.070e-01  5.405e-01  -0.753 0.451664    
## poly(humidity, 3, raw = TRUE)3 -1.492e-03  3.097e-03  -0.482 0.630145    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 667.1 on 705 degrees of freedom
## Multiple R-squared:  0.8855, Adjusted R-squared:  0.8814 
## F-statistic: 218.1 on 25 and 705 DF,  p-value: < 2.2e-16
## Testing for Multicollinearity
cor(bikeshare2[, c("temp", "holiday", "weathersit","season","mnth","Promotion", "temp","humidity", "windspeed")], method="spearman")%>%
datatable()
#Using diagnostic plots to check for linearity.
crPlots(bikeshare_lm1)

QUESTION 2

#There was a collinearity issue between month and season. The factor was .83.
# I ran a VIF test to see how much the collinearity between season and month is inflating the R-squared.

#VIF(bikeshare_lm1) 
# With a VIF factor of 8, I decided to take out the season variable and just use month instead.
#Concerning linearity, please see the diagnostic graphs below, they mostly show that the data had a linear nature.

bikeshare_lm2 <- lm(total_riders~poly(temp,3, raw=TRUE)+
                    as.factor(Promotion)+as.factor(holiday)+as.factor(weathersit)+
                    as.factor(mnth)+windspeed+poly(humidity,3,raw=TRUE) ,data=bikeshare2)
summary(bikeshare_lm2)
## 
## Call:
## lm(formula = total_riders ~ poly(temp, 3, raw = TRUE) + as.factor(Promotion) + 
##     as.factor(holiday) + as.factor(weathersit) + as.factor(mnth) + 
##     windspeed + poly(humidity, 3, raw = TRUE), data = bikeshare2)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -3347.4  -363.9    63.0   437.1  2282.7 
## 
## Coefficients:
##                                  Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                     1.451e+03  7.818e+02   1.856 0.063817 .  
## poly(temp, 3, raw = TRUE)1     -2.933e+02  8.119e+01  -3.612 0.000325 ***
## poly(temp, 3, raw = TRUE)2      3.316e+01  4.402e+00   7.533 1.52e-13 ***
## poly(temp, 3, raw = TRUE)3     -6.997e-01  7.349e-02  -9.520  < 2e-16 ***
## as.factor(Promotion)1           1.936e+03  5.485e+01  35.308  < 2e-16 ***
## as.factor(holiday)1            -6.738e+02  1.609e+02  -4.188 3.16e-05 ***
## as.factor(weathersit)2         -3.195e+02  7.377e+01  -4.331 1.70e-05 ***
## as.factor(weathersit)3         -1.381e+03  2.063e+02  -6.696 4.35e-11 ***
## as.factor(mnth)2                2.137e+01  1.394e+02   0.153 0.878214    
## as.factor(mnth)3                5.038e+02  1.518e+02   3.319 0.000950 ***
## as.factor(mnth)4                6.659e+02  1.684e+02   3.954 8.46e-05 ***
## as.factor(mnth)5                8.072e+02  1.899e+02   4.252 2.41e-05 ***
## as.factor(mnth)6                8.844e+02  2.149e+02   4.115 4.32e-05 ***
## as.factor(mnth)7                1.101e+03  2.369e+02   4.649 3.97e-06 ***
## as.factor(mnth)8                8.846e+02  2.197e+02   4.026 6.27e-05 ***
## as.factor(mnth)9                1.237e+03  1.961e+02   6.307 5.00e-10 ***
## as.factor(mnth)10               1.312e+03  1.697e+02   7.731 3.66e-14 ***
## as.factor(mnth)11               1.101e+03  1.535e+02   7.173 1.85e-12 ***
## as.factor(mnth)12               7.730e+02  1.438e+02   5.376 1.03e-07 ***
## windspeed                      -5.408e+01  5.655e+00  -9.563  < 2e-16 ***
## poly(humidity, 3, raw = TRUE)1  6.422e+01  3.297e+01   1.948 0.051849 .  
## poly(humidity, 3, raw = TRUE)2 -9.159e-01  5.793e-01  -1.581 0.114311    
## poly(humidity, 3, raw = TRUE)3  2.051e-03  3.313e-03   0.619 0.536025    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 717.8 on 708 degrees of freedom
## Multiple R-squared:  0.8669, Adjusted R-squared:  0.8627 
## F-statistic: 209.5 on 22 and 708 DF,  p-value: < 2.2e-16
#Diagnostic Graphs
crPlots(bikeshare_lm2)

QUESTION 3

#From my model, October is the month with the highest ridership in comparison to all the other months.
# If this month became cold and rainy, the coefficient on this month may change but not by a huge margin because the correlation between season and temperature is just about 33%.

QUESTION 4

# Keeping all the other factors constant, promotion itself increases the total ridership by about 1,936 riders in comparison to the other variables.
# since it is the variable with the highest coefficient in my model, we can conclude that the marketers cliam that promotion would improve ridership has been established.

QUESTION 5

#The effect of promotion on casual vs registered riders.

# To analyze this effect, we shall run the same model but with just the "casual" and "registered" variables to see their coefficients.

###CASUAL RIDERS

bikeshare_lm3 <- lm(casual~poly(temp,3, raw=TRUE)+
                      as.factor(Promotion)+as.factor(holiday)+as.factor(weathersit)+
                      as.factor(mnth) +windspeed+poly(humidity,3,raw=TRUE) ,data=bikeshare2)
summary(bikeshare_lm3)
## 
## Call:
## lm(formula = casual ~ poly(temp, 3, raw = TRUE) + as.factor(Promotion) + 
##     as.factor(holiday) + as.factor(weathersit) + as.factor(mnth) + 
##     windspeed + poly(humidity, 3, raw = TRUE), data = bikeshare2)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -844.5 -334.7 -133.5  170.1 2073.6 
## 
## Coefficients:
##                                  Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                     7.113e+02  5.546e+02   1.282 0.200093    
## poly(temp, 3, raw = TRUE)1     -1.136e+02  5.759e+01  -1.972 0.048989 *  
## poly(temp, 3, raw = TRUE)2      1.023e+01  3.122e+00   3.276 0.001104 ** 
## poly(temp, 3, raw = TRUE)3     -2.049e-01  5.213e-02  -3.931 9.30e-05 ***
## as.factor(Promotion)1           2.836e+02  3.890e+01   7.291 8.25e-13 ***
## as.factor(holiday)1             3.036e+02  1.141e+02   2.661 0.007977 ** 
## as.factor(weathersit)2         -1.480e+02  5.233e+01  -2.827 0.004824 ** 
## as.factor(weathersit)3         -3.982e+02  1.463e+02  -2.721 0.006668 ** 
## as.factor(mnth)2               -1.532e+01  9.889e+01  -0.155 0.876937    
## as.factor(mnth)3                3.233e+02  1.077e+02   3.003 0.002768 ** 
## as.factor(mnth)4                4.579e+02  1.195e+02   3.833 0.000138 ***
## as.factor(mnth)5                4.715e+02  1.347e+02   3.501 0.000492 ***
## as.factor(mnth)6                4.213e+02  1.524e+02   2.763 0.005872 ** 
## as.factor(mnth)7                5.478e+02  1.680e+02   3.260 0.001168 ** 
## as.factor(mnth)8                3.812e+02  1.558e+02   2.446 0.014696 *  
## as.factor(mnth)9                4.256e+02  1.391e+02   3.059 0.002302 ** 
## as.factor(mnth)10               3.906e+02  1.204e+02   3.244 0.001235 ** 
## as.factor(mnth)11               1.989e+02  1.089e+02   1.827 0.068170 .  
## as.factor(mnth)12               8.201e+01  1.020e+02   0.804 0.421588    
## windspeed                      -1.572e+01  4.011e+00  -3.918 9.79e-05 ***
## poly(humidity, 3, raw = TRUE)1  1.898e+00  2.339e+01   0.081 0.935357    
## poly(humidity, 3, raw = TRUE)2 -6.348e-02  4.109e-01  -0.154 0.877277    
## poly(humidity, 3, raw = TRUE)3  3.550e-05  2.350e-03   0.015 0.987952    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 509.1 on 708 degrees of freedom
## Multiple R-squared:  0.4667, Adjusted R-squared:  0.4501 
## F-statistic: 28.16 on 22 and 708 DF,  p-value: < 2.2e-16

###REGISTERED RIDERS

bikeshare_lm4 <- lm(registered~poly(temp,3, raw=TRUE)+
                      as.factor(Promotion)+as.factor(holiday)+as.factor(weathersit)+
                      as.factor(mnth)+windspeed+poly(humidity,3,raw=TRUE) ,data=bikeshare2)
summary(bikeshare_lm4)
## 
## Call:
## lm(formula = registered ~ poly(temp, 3, raw = TRUE) + as.factor(Promotion) + 
##     as.factor(holiday) + as.factor(weathersit) + as.factor(mnth) + 
##     windspeed + poly(humidity, 3, raw = TRUE), data = bikeshare2)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -2962.8  -440.5   109.6   514.0  1893.0 
## 
## Coefficients:
##                                  Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                     7.401e+02  7.968e+02   0.929   0.3533    
## poly(temp, 3, raw = TRUE)1     -1.797e+02  8.275e+01  -2.172   0.0302 *  
## poly(temp, 3, raw = TRUE)2      2.293e+01  4.486e+00   5.111 4.13e-07 ***
## poly(temp, 3, raw = TRUE)3     -4.947e-01  7.490e-02  -6.605 7.80e-11 ***
## as.factor(Promotion)1           1.653e+03  5.590e+01  29.569  < 2e-16 ***
## as.factor(holiday)1            -9.774e+02  1.640e+02  -5.961 3.95e-09 ***
## as.factor(weathersit)2         -1.715e+02  7.519e+01  -2.281   0.0228 *  
## as.factor(weathersit)3         -9.832e+02  2.103e+02  -4.676 3.50e-06 ***
## as.factor(mnth)2                3.669e+01  1.421e+02   0.258   0.7963    
## as.factor(mnth)3                1.805e+02  1.547e+02   1.166   0.2438    
## as.factor(mnth)4                2.080e+02  1.716e+02   1.212   0.2260    
## as.factor(mnth)5                3.357e+02  1.935e+02   1.735   0.0832 .  
## as.factor(mnth)6                4.632e+02  2.190e+02   2.115   0.0348 *  
## as.factor(mnth)7                5.536e+02  2.414e+02   2.293   0.0221 *  
## as.factor(mnth)8                5.035e+02  2.239e+02   2.248   0.0249 *  
## as.factor(mnth)9                8.113e+02  1.999e+02   4.059 5.47e-05 ***
## as.factor(mnth)10               9.217e+02  1.730e+02   5.328 1.34e-07 ***
## as.factor(mnth)11               9.022e+02  1.565e+02   5.766 1.21e-08 ***
## as.factor(mnth)12               6.909e+02  1.465e+02   4.715 2.91e-06 ***
## windspeed                      -3.836e+01  5.764e+00  -6.656 5.65e-11 ***
## poly(humidity, 3, raw = TRUE)1  6.232e+01  3.361e+01   1.855   0.0641 .  
## poly(humidity, 3, raw = TRUE)2 -8.524e-01  5.904e-01  -1.444   0.1492    
## poly(humidity, 3, raw = TRUE)3  2.016e-03  3.377e-03   0.597   0.5507    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 731.5 on 708 degrees of freedom
## Multiple R-squared:  0.7868, Adjusted R-squared:  0.7802 
## F-statistic: 118.8 on 22 and 708 DF,  p-value: < 2.2e-16
# Looking at the model separately on the casual and registered riders, we can tell that the promotion had a greater impact on the registered riders than on the casual riders.
# We show a coefficient of 711 on the casual riders and an adjusted R-squared of .45, while we show a coefficient of 740 and an R-squared of .78.
# From that analysis, we can comfortably conslude that the registered riders respondent better to promotions. This could be because monthly promotions result to a better
# deal overall. Also, it could be because the registered riders are aware of all the promotion that's happenin since they may be receiving some sort of marketing information that's
# targetted towards them.

QUESTION 6.

# In order to prepare an appropriate financial report to the officers, we shall need some financial information that we shall use to calculate things like profitability and cost.
# We need the price per ridership, the operational costs, the cost of replacing a bike, the cost per staff etc.