#Import the data from a web-hosted source
bikeshare <- read_csv("http://asayanalytics.com/bikeshare_csv")
bikeshare$total_riders <- bikeshare$casual + bikeshare$registered
bike_lm <- lm(total_riders~temp, data=bikeshare)
summary(bike_lm)
##
## Call:
## lm(formula = total_riders ~ temp, data = bikeshare)
##
## Residuals:
## Min 1Q Median 3Q Max
## -4615.3 -1134.9 -104.4 1044.3 3737.8
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1214.642 161.164 7.537 1.43e-13 ***
## temp 161.969 7.444 21.759 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1509 on 729 degrees of freedom
## Multiple R-squared: 0.3937, Adjusted R-squared: 0.3929
## F-statistic: 473.5 on 1 and 729 DF, p-value: < 2.2e-16
crPlots(bike_lm)
###2 Yes, problems were identified with the data. Based off the graphs you could see the model had issues with linearity.
bike_1 <- lm(total_riders ~ poly(temp,3,raw=TRUE) + as.factor(mnth) + as.factor(Promotion),data=bikeshare)
summary(bike_1)
##
## Call:
## lm(formula = total_riders ~ poly(temp, 3, raw = TRUE) + as.factor(mnth) +
## as.factor(Promotion), data = bikeshare)
##
## Residuals:
## Min 1Q Median 3Q Max
## -5945.7 -387.1 158.8 588.3 2249.5
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1703.79022 568.13134 2.999 0.002803 **
## poly(temp, 3, raw = TRUE)1 -314.88068 106.56376 -2.955 0.003231 **
## poly(temp, 3, raw = TRUE)2 30.86894 5.76927 5.351 1.18e-07 ***
## poly(temp, 3, raw = TRUE)3 -0.62064 0.09616 -6.454 2.00e-10 ***
## as.factor(mnth)2 157.01688 182.52260 0.860 0.389934
## as.factor(mnth)3 606.41800 197.47066 3.071 0.002215 **
## as.factor(mnth)4 846.12165 217.92942 3.883 0.000113 ***
## as.factor(mnth)5 1046.37991 247.96456 4.220 2.76e-05 ***
## as.factor(mnth)6 1413.86795 274.79645 5.145 3.46e-07 ***
## as.factor(mnth)7 1508.12446 305.72428 4.933 1.01e-06 ***
## as.factor(mnth)8 1283.28950 283.51159 4.526 7.03e-06 ***
## as.factor(mnth)9 1372.93703 255.62372 5.371 1.06e-07 ***
## as.factor(mnth)10 1440.19697 220.38291 6.535 1.21e-10 ***
## as.factor(mnth)11 1326.09468 199.84205 6.636 6.38e-11 ***
## as.factor(mnth)12 798.70686 187.87247 4.251 2.41e-05 ***
## as.factor(Promotion)1 2101.91343 70.56984 29.785 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 943 on 715 degrees of freedom
## Multiple R-squared: 0.7679, Adjusted R-squared: 0.763
## F-statistic: 157.7 on 15 and 715 DF, p-value: < 2.2e-16
###3
bike_2 = lm(total_riders ~ poly(temp,3,raw=TRUE) + as.factor(mnth) + weathersit + as.factor(Promotion),data=bikeshare)
summary(bike_2)
##
## Call:
## lm(formula = total_riders ~ poly(temp, 3, raw = TRUE) + as.factor(mnth) +
## weathersit + as.factor(Promotion), data = bikeshare)
##
## Residuals:
## Min 1Q Median 3Q Max
## -4662.2 -384.3 47.7 483.2 2840.5
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2767.94290 494.02693 5.603 3.01e-08 ***
## poly(temp, 3, raw = TRUE)1 -281.55122 91.82268 -3.066 0.002249 **
## poly(temp, 3, raw = TRUE)2 30.19735 4.97007 6.076 2.01e-09 ***
## poly(temp, 3, raw = TRUE)3 -0.63440 0.08284 -7.658 6.14e-14 ***
## as.factor(mnth)2 78.27904 157.31151 0.498 0.618915
## as.factor(mnth)3 589.79624 170.11265 3.467 0.000558 ***
## as.factor(mnth)4 802.91399 187.75335 4.276 2.16e-05 ***
## as.factor(mnth)5 1067.93372 213.61129 4.999 7.24e-07 ***
## as.factor(mnth)6 1392.30577 236.72497 5.882 6.24e-09 ***
## as.factor(mnth)7 1570.10342 263.39277 5.961 3.94e-09 ***
## as.factor(mnth)8 1357.90864 244.27430 5.559 3.84e-08 ***
## as.factor(mnth)9 1500.04215 220.35183 6.807 2.11e-11 ***
## as.factor(mnth)10 1533.67198 189.93917 8.075 2.88e-15 ***
## as.factor(mnth)11 1204.98441 172.32287 6.993 6.21e-12 ***
## as.factor(mnth)12 852.46710 161.87690 5.266 1.85e-07 ***
## weathersit -897.62907 56.82718 -15.796 < 2e-16 ***
## as.factor(Promotion)1 2051.77775 60.87459 33.705 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 812.3 on 714 degrees of freedom
## Multiple R-squared: 0.828, Adjusted R-squared: 0.8242
## F-statistic: 214.8 on 16 and 714 DF, p-value: < 2.2e-16
crPlots(bike_2)
###3 July would have the highest number of riders. Yes, during the colder, rainer months the coefficeint would decrease.
###4
bike_3 = lm(total_riders ~ poly(temp,3,raw=TRUE) + as.factor(Promotion),data=bikeshare)
summary(bike_3)
##
## Call:
## lm(formula = total_riders ~ poly(temp, 3, raw = TRUE) + as.factor(Promotion),
## data = bikeshare)
##
## Residuals:
## Min 1Q Median 3Q Max
## -5452.3 -545.1 78.2 640.8 2197.7
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 854.2501 539.8930 1.582 0.114
## poly(temp, 3, raw = TRUE)1 -138.5526 93.9333 -1.475 0.141
## poly(temp, 3, raw = TRUE)2 25.6884 5.0349 5.102 4.29e-07 ***
## poly(temp, 3, raw = TRUE)3 -0.5605 0.0842 -6.656 5.53e-11 ***
## as.factor(Promotion)1 2050.0850 73.6830 27.823 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 990.6 on 726 degrees of freedom
## Multiple R-squared: 0.7399, Adjusted R-squared: 0.7385
## F-statistic: 516.4 on 4 and 726 DF, p-value: < 2.2e-16
crPlots(bike_3)
#4 The promotion does appear to effect the relationship riders in a positive linear fashion.
#5
bike_promo <- lm(registered ~ as.factor(Promotion), data=bikeshare)
summary(bike_promo)
##
## Call:
## lm(formula = registered ~ as.factor(Promotion), data = bikeshare)
##
## Residuals:
## Min 1Q Median 3Q Max
## -4561.5 -925.9 186.6 971.1 2364.5
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2728.36 65.73 41.51 <2e-16 ***
## as.factor(Promotion)1 1853.09 92.89 19.95 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1256 on 729 degrees of freedom
## Multiple R-squared: 0.3531, Adjusted R-squared: 0.3522
## F-statistic: 398 on 1 and 729 DF, p-value: < 2.2e-16
bike_promo <- lm(casual ~ as.factor(Promotion), data=bikeshare)
summary(bike_promo)
##
## Call:
## lm(formula = casual ~ as.factor(Promotion), data = bikeshare)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1016.48 -499.40 -96.48 207.10 2391.52
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 677.40 34.84 19.446 < 2e-16 ***
## as.factor(Promotion)1 341.08 49.23 6.928 9.38e-12 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 665.5 on 729 degrees of freedom
## Multiple R-squared: 0.06177, Adjusted R-squared: 0.06049
## F-statistic: 48 on 1 and 729 DF, p-value: 9.383e-12
The registereed riders appear to be using the bikeshare more often than the casual rider.