#Import the data from a web-hosted source
bikeshare <- read_csv("http://asayanalytics.com/bikeshare_csv")

bikeshare$total_riders <- bikeshare$casual + bikeshare$registered

1

bike_lm <- lm(total_riders~temp, data=bikeshare)
summary(bike_lm)
## 
## Call:
## lm(formula = total_riders ~ temp, data = bikeshare)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -4615.3 -1134.9  -104.4  1044.3  3737.8 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 1214.642    161.164   7.537 1.43e-13 ***
## temp         161.969      7.444  21.759  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1509 on 729 degrees of freedom
## Multiple R-squared:  0.3937, Adjusted R-squared:  0.3929 
## F-statistic: 473.5 on 1 and 729 DF,  p-value: < 2.2e-16
crPlots(bike_lm)

###2 Yes, problems were identified with the data. Based off the graphs you could see the model had issues with linearity.

bike_1 <- lm(total_riders ~ poly(temp,3,raw=TRUE) + as.factor(mnth) + as.factor(Promotion),data=bikeshare)
summary(bike_1)
## 
## Call:
## lm(formula = total_riders ~ poly(temp, 3, raw = TRUE) + as.factor(mnth) + 
##     as.factor(Promotion), data = bikeshare)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -5945.7  -387.1   158.8   588.3  2249.5 
## 
## Coefficients:
##                              Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                1703.79022  568.13134   2.999 0.002803 ** 
## poly(temp, 3, raw = TRUE)1 -314.88068  106.56376  -2.955 0.003231 ** 
## poly(temp, 3, raw = TRUE)2   30.86894    5.76927   5.351 1.18e-07 ***
## poly(temp, 3, raw = TRUE)3   -0.62064    0.09616  -6.454 2.00e-10 ***
## as.factor(mnth)2            157.01688  182.52260   0.860 0.389934    
## as.factor(mnth)3            606.41800  197.47066   3.071 0.002215 ** 
## as.factor(mnth)4            846.12165  217.92942   3.883 0.000113 ***
## as.factor(mnth)5           1046.37991  247.96456   4.220 2.76e-05 ***
## as.factor(mnth)6           1413.86795  274.79645   5.145 3.46e-07 ***
## as.factor(mnth)7           1508.12446  305.72428   4.933 1.01e-06 ***
## as.factor(mnth)8           1283.28950  283.51159   4.526 7.03e-06 ***
## as.factor(mnth)9           1372.93703  255.62372   5.371 1.06e-07 ***
## as.factor(mnth)10          1440.19697  220.38291   6.535 1.21e-10 ***
## as.factor(mnth)11          1326.09468  199.84205   6.636 6.38e-11 ***
## as.factor(mnth)12           798.70686  187.87247   4.251 2.41e-05 ***
## as.factor(Promotion)1      2101.91343   70.56984  29.785  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 943 on 715 degrees of freedom
## Multiple R-squared:  0.7679, Adjusted R-squared:  0.763 
## F-statistic: 157.7 on 15 and 715 DF,  p-value: < 2.2e-16

###3

bike_2 = lm(total_riders ~ poly(temp,3,raw=TRUE) + as.factor(mnth) + weathersit + as.factor(Promotion),data=bikeshare)
summary(bike_2)
## 
## Call:
## lm(formula = total_riders ~ poly(temp, 3, raw = TRUE) + as.factor(mnth) + 
##     weathersit + as.factor(Promotion), data = bikeshare)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -4662.2  -384.3    47.7   483.2  2840.5 
## 
## Coefficients:
##                              Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                2767.94290  494.02693   5.603 3.01e-08 ***
## poly(temp, 3, raw = TRUE)1 -281.55122   91.82268  -3.066 0.002249 ** 
## poly(temp, 3, raw = TRUE)2   30.19735    4.97007   6.076 2.01e-09 ***
## poly(temp, 3, raw = TRUE)3   -0.63440    0.08284  -7.658 6.14e-14 ***
## as.factor(mnth)2             78.27904  157.31151   0.498 0.618915    
## as.factor(mnth)3            589.79624  170.11265   3.467 0.000558 ***
## as.factor(mnth)4            802.91399  187.75335   4.276 2.16e-05 ***
## as.factor(mnth)5           1067.93372  213.61129   4.999 7.24e-07 ***
## as.factor(mnth)6           1392.30577  236.72497   5.882 6.24e-09 ***
## as.factor(mnth)7           1570.10342  263.39277   5.961 3.94e-09 ***
## as.factor(mnth)8           1357.90864  244.27430   5.559 3.84e-08 ***
## as.factor(mnth)9           1500.04215  220.35183   6.807 2.11e-11 ***
## as.factor(mnth)10          1533.67198  189.93917   8.075 2.88e-15 ***
## as.factor(mnth)11          1204.98441  172.32287   6.993 6.21e-12 ***
## as.factor(mnth)12           852.46710  161.87690   5.266 1.85e-07 ***
## weathersit                 -897.62907   56.82718 -15.796  < 2e-16 ***
## as.factor(Promotion)1      2051.77775   60.87459  33.705  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 812.3 on 714 degrees of freedom
## Multiple R-squared:  0.828,  Adjusted R-squared:  0.8242 
## F-statistic: 214.8 on 16 and 714 DF,  p-value: < 2.2e-16
crPlots(bike_2)

###3 July would have the highest number of riders. Yes, during the colder, rainer months the coefficeint would decrease.

###4

bike_3 = lm(total_riders ~ poly(temp,3,raw=TRUE) + as.factor(Promotion),data=bikeshare)
summary(bike_3)
## 
## Call:
## lm(formula = total_riders ~ poly(temp, 3, raw = TRUE) + as.factor(Promotion), 
##     data = bikeshare)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -5452.3  -545.1    78.2   640.8  2197.7 
## 
## Coefficients:
##                             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                 854.2501   539.8930   1.582    0.114    
## poly(temp, 3, raw = TRUE)1 -138.5526    93.9333  -1.475    0.141    
## poly(temp, 3, raw = TRUE)2   25.6884     5.0349   5.102 4.29e-07 ***
## poly(temp, 3, raw = TRUE)3   -0.5605     0.0842  -6.656 5.53e-11 ***
## as.factor(Promotion)1      2050.0850    73.6830  27.823  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 990.6 on 726 degrees of freedom
## Multiple R-squared:  0.7399, Adjusted R-squared:  0.7385 
## F-statistic: 516.4 on 4 and 726 DF,  p-value: < 2.2e-16
crPlots(bike_3)

#4 The promotion does appear to effect the relationship riders in a positive linear fashion.

#5

bike_promo <- lm(registered ~ as.factor(Promotion), data=bikeshare)
summary(bike_promo)
## 
## Call:
## lm(formula = registered ~ as.factor(Promotion), data = bikeshare)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -4561.5  -925.9   186.6   971.1  2364.5 
## 
## Coefficients:
##                       Estimate Std. Error t value Pr(>|t|)    
## (Intercept)            2728.36      65.73   41.51   <2e-16 ***
## as.factor(Promotion)1  1853.09      92.89   19.95   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1256 on 729 degrees of freedom
## Multiple R-squared:  0.3531, Adjusted R-squared:  0.3522 
## F-statistic:   398 on 1 and 729 DF,  p-value: < 2.2e-16
bike_promo <- lm(casual ~ as.factor(Promotion), data=bikeshare)
summary(bike_promo)
## 
## Call:
## lm(formula = casual ~ as.factor(Promotion), data = bikeshare)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1016.48  -499.40   -96.48   207.10  2391.52 
## 
## Coefficients:
##                       Estimate Std. Error t value Pr(>|t|)    
## (Intercept)             677.40      34.84  19.446  < 2e-16 ***
## as.factor(Promotion)1   341.08      49.23   6.928 9.38e-12 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 665.5 on 729 degrees of freedom
## Multiple R-squared:  0.06177,    Adjusted R-squared:  0.06049 
## F-statistic:    48 on 1 and 729 DF,  p-value: 9.383e-12

The registereed riders appear to be using the bikeshare more often than the casual rider.