Reading dataset into R
bike <- read.csv("day.csv")
bike1 <- bike
Removing unnecessary variables and summarizing the dataset
bike <- bike[-c(1,2)]
summary(bike)
## season yr mnth holiday
## Min. :1.000 Min. :0.0000 Min. : 1.00 Min. :0.00000
## 1st Qu.:2.000 1st Qu.:0.0000 1st Qu.: 4.00 1st Qu.:0.00000
## Median :3.000 Median :1.0000 Median : 7.00 Median :0.00000
## Mean :2.497 Mean :0.5007 Mean : 6.52 Mean :0.02873
## 3rd Qu.:3.000 3rd Qu.:1.0000 3rd Qu.:10.00 3rd Qu.:0.00000
## Max. :4.000 Max. :1.0000 Max. :12.00 Max. :1.00000
## weekday workingday weathersit temp
## Min. :0.000 Min. :0.000 Min. :1.000 Min. :0.05913
## 1st Qu.:1.000 1st Qu.:0.000 1st Qu.:1.000 1st Qu.:0.33708
## Median :3.000 Median :1.000 Median :1.000 Median :0.49833
## Mean :2.997 Mean :0.684 Mean :1.395 Mean :0.49538
## 3rd Qu.:5.000 3rd Qu.:1.000 3rd Qu.:2.000 3rd Qu.:0.65542
## Max. :6.000 Max. :1.000 Max. :3.000 Max. :0.86167
## atemp hum windspeed casual
## Min. :0.07907 Min. :0.0000 Min. :0.02239 Min. : 2.0
## 1st Qu.:0.33784 1st Qu.:0.5200 1st Qu.:0.13495 1st Qu.: 315.5
## Median :0.48673 Median :0.6267 Median :0.18097 Median : 713.0
## Mean :0.47435 Mean :0.6279 Mean :0.19049 Mean : 848.2
## 3rd Qu.:0.60860 3rd Qu.:0.7302 3rd Qu.:0.23321 3rd Qu.:1096.0
## Max. :0.84090 Max. :0.9725 Max. :0.50746 Max. :3410.0
## registered cnt
## Min. : 20 Min. : 22
## 1st Qu.:2497 1st Qu.:3152
## Median :3662 Median :4548
## Mean :3656 Mean :4504
## 3rd Qu.:4776 3rd Qu.:5956
## Max. :6946 Max. :8714
Describing the dataset
library(car)
## Warning: package 'car' was built under R version 3.4.3
library(psych)
## Warning: package 'psych' was built under R version 3.4.3
##
## Attaching package: 'psych'
## The following object is masked from 'package:car':
##
## logit
library(lattice)
describe(bike)
## vars n mean sd median trimmed mad min max
## season 1 731 2.50 1.11 3.00 2.50 1.48 1.00 4.00
## yr 2 731 0.50 0.50 1.00 0.50 0.00 0.00 1.00
## mnth 3 731 6.52 3.45 7.00 6.52 4.45 1.00 12.00
## holiday 4 731 0.03 0.17 0.00 0.00 0.00 0.00 1.00
## weekday 5 731 3.00 2.00 3.00 3.00 2.97 0.00 6.00
## workingday 6 731 0.68 0.47 1.00 0.73 0.00 0.00 1.00
## weathersit 7 731 1.40 0.54 1.00 1.33 0.00 1.00 3.00
## temp 8 731 0.50 0.18 0.50 0.50 0.23 0.06 0.86
## atemp 9 731 0.47 0.16 0.49 0.48 0.20 0.08 0.84
## hum 10 731 0.63 0.14 0.63 0.63 0.16 0.00 0.97
## windspeed 11 731 0.19 0.08 0.18 0.19 0.07 0.02 0.51
## casual 12 731 848.18 686.62 713.00 744.95 587.11 2.00 3410.00
## registered 13 731 3656.17 1560.26 3662.00 3641.72 1712.40 20.00 6946.00
## cnt 14 731 4504.35 1937.21 4548.00 4517.19 2086.02 22.00 8714.00
## range skew kurtosis se
## season 3.00 0.00 -1.35 0.04
## yr 1.00 0.00 -2.00 0.02
## mnth 11.00 -0.01 -1.21 0.13
## holiday 1.00 5.63 29.75 0.01
## weekday 6.00 0.00 -1.26 0.07
## workingday 1.00 -0.79 -1.38 0.02
## weathersit 2.00 0.95 -0.15 0.02
## temp 0.80 -0.05 -1.12 0.01
## atemp 0.76 -0.13 -0.99 0.01
## hum 0.97 -0.07 -0.08 0.01
## windspeed 0.49 0.67 0.39 0.00
## casual 3408.00 1.26 1.29 25.40
## registered 6926.00 0.04 -0.72 57.71
## cnt 8692.00 -0.05 -0.82 71.65
Creating one-way tables for categorical variables
print("Distribution of Seasons in the data set")
## [1] "Distribution of Seasons in the data set"
print("1:spring, 2:summer, 3:fall, 4:winter")
## [1] "1:spring, 2:summer, 3:fall, 4:winter"
table(bike$season)
##
## 1 2 3 4
## 181 184 188 178
print("--------------------------------------")
## [1] "--------------------------------------"
print("Distribution of years in the data set")
## [1] "Distribution of years in the data set"
print("0: 2011. 1:2011")
## [1] "0: 2011. 1:2011"
table(bike$yr)
##
## 0 1
## 365 366
print("--------------------------------------")
## [1] "--------------------------------------"
print("Distribution of Months in the data set")
## [1] "Distribution of Months in the data set"
table(bike$mnth)
##
## 1 2 3 4 5 6 7 8 9 10 11 12
## 62 57 62 60 62 60 62 62 60 62 60 62
print("--------------------------------------")
## [1] "--------------------------------------"
print("Distribution of weekdays in the data set")
## [1] "Distribution of weekdays in the data set"
table(bike$weekday)
##
## 0 1 2 3 4 5 6
## 105 105 104 104 104 104 105
print("--------------------------------------")
## [1] "--------------------------------------"
print("Distribution of Holidays in the data set")
## [1] "Distribution of Holidays in the data set"
print("Holiday or not")
## [1] "Holiday or not"
table(bike$holiday)
##
## 0 1
## 710 21
print("--------------------------------------")
## [1] "--------------------------------------"
print("Distribution of Weather conditions in the data set")
## [1] "Distribution of Weather conditions in the data set"
print("1:Clear, 2:Mist, 3:Light Snow:Light Rain, 4:Heavy Rain")
## [1] "1:Clear, 2:Mist, 3:Light Snow:Light Rain, 4:Heavy Rain"
table(bike$weathersit)
##
## 1 2 3
## 463 247 21
barplot(table(bike$weathersit), main = "Distribution of Season types in the dataframe", xlab = "Season", ylab = "Count")

Average bikes perday in both the years
aggregate(bike$cnt, by=list(Year=bike$yr), mean)
## Year x
## 1 0 3405.762
## 2 1 5599.934
Renting of bikes per season
print("1:spring, 2:summer, 3:fall, 4:winter")
## [1] "1:spring, 2:summer, 3:fall, 4:winter"
aggregate(bike$cnt, by=list(Season=bike$season), mean)
## Season x
## 1 1 2604.133
## 2 2 4992.332
## 3 3 5644.303
## 4 4 4728.163
plot(bike$season,bike$cnt, main = "renting of bikes per season")

Average daily bike trips per month
aggregate(bike$cnt, by=list(Month=bike$mnth), mean)
## Month x
## 1 1 2176.339
## 2 2 2655.298
## 3 3 3692.258
## 4 4 4484.900
## 5 5 5349.774
## 6 6 5772.367
## 7 7 5563.677
## 8 8 5664.419
## 9 9 5766.517
## 10 10 5199.226
## 11 11 4247.183
## 12 12 3403.806
plot(bike$mnth,bike$cnt, main = "renting of bikes per month")

Renting of bikes per weekday
aggregate(bike$cnt, by=list(Day_of_the_week=bike$weekday), mean)
## Day_of_the_week x
## 1 0 4228.829
## 2 1 4338.124
## 3 2 4510.663
## 4 3 4548.538
## 5 4 4667.260
## 6 5 4690.288
## 7 6 4550.543
plot(bike$weekday,bike$cnt, main = "renting of bikes per weekday")

Renting of bikes on holidays
aggregate(bike$cnt, by=list(Holiday=bike$holiday), mean)
## Holiday x
## 1 0 4527.104
## 2 1 3735.000
plot(bike$holiday,bike$cnt, main = "renting of bikes on holidays vs regular days")

Renting of bikes according to weather
aggregate(bike$cnt, by=list(Type_of_weather=bike$weathersit), mean)
## Type_of_weather x
## 1 1 4876.786
## 2 2 4035.862
## 3 3 1803.286
plot(bike$weathersit,bike$cnt, main = "renting of bikes according to weather")

plot(bike$temp,bike$cnt, main = "renting of bikes according to weather")

Histograms for various weather variables
histogram(bike$temp, col = "Red")

histogram(bike$hum,col = "Green")

histogram(bike$temp, col = "purple")

Bike Sharing Data CORRGRAM
library(corrgram)
## Warning: package 'corrgram' was built under R version 3.4.3
corrgram(bike, order=TRUE,
main="Bike Sharing Data CORRGRAM",
lower.panel=panel.shade, upper.panel=panel.pie,
diag.panel=panel.minmax, text.panel=panel.txt)

Correlation between various variables
cor(bike, method = "pearson")
## season yr mnth holiday
## season 1.000000000 -0.001844343 0.831440114 -0.010536659
## yr -0.001844343 1.000000000 -0.001792434 0.007954311
## mnth 0.831440114 -0.001792434 1.000000000 0.019190895
## holiday -0.010536659 0.007954311 0.019190895 1.000000000
## weekday -0.003079881 -0.005460765 0.009509313 -0.101960269
## workingday 0.012484963 -0.002012621 -0.005900951 -0.253022700
## weathersit 0.019211028 -0.048726541 0.043528098 -0.034626841
## temp 0.334314856 0.047603572 0.220205335 -0.028555535
## atemp 0.342875613 0.046106149 0.227458630 -0.032506692
## hum 0.205444765 -0.110651045 0.222203691 -0.015937479
## windspeed -0.229046337 -0.011817060 -0.207501752 0.006291507
## casual 0.210399165 0.248545664 0.123005889 0.054274203
## registered 0.411623051 0.594248168 0.293487830 -0.108744863
## cnt 0.406100371 0.566709708 0.279977112 -0.068347716
## weekday workingday weathersit temp
## season -0.0030798813 0.012484963 0.01921103 0.3343148564
## yr -0.0054607652 -0.002012621 -0.04872654 0.0476035719
## mnth 0.0095093129 -0.005900951 0.04352810 0.2202053352
## holiday -0.1019602689 -0.253022700 -0.03462684 -0.0285555350
## weekday 1.0000000000 0.035789674 0.03108747 -0.0001699624
## workingday 0.0357896736 1.000000000 0.06120043 0.0526598102
## weathersit 0.0310874694 0.061200430 1.00000000 -0.1206022365
## temp -0.0001699624 0.052659810 -0.12060224 1.0000000000
## atemp -0.0075371318 0.052182275 -0.12158335 0.9917015532
## hum -0.0522321004 0.024327046 0.59104460 0.1269629390
## windspeed 0.0142821241 -0.018796487 0.03951106 -0.1579441204
## casual 0.0599226375 -0.518044191 -0.24735300 0.5432846617
## registered 0.0573674440 0.303907117 -0.26038771 0.5400119662
## cnt 0.0674434124 0.061156063 -0.29739124 0.6274940090
## atemp hum windspeed casual registered
## season 0.342875613 0.20544476 -0.229046337 0.21039916 0.41162305
## yr 0.046106149 -0.11065104 -0.011817060 0.24854566 0.59424817
## mnth 0.227458630 0.22220369 -0.207501752 0.12300589 0.29348783
## holiday -0.032506692 -0.01593748 0.006291507 0.05427420 -0.10874486
## weekday -0.007537132 -0.05223210 0.014282124 0.05992264 0.05736744
## workingday 0.052182275 0.02432705 -0.018796487 -0.51804419 0.30390712
## weathersit -0.121583354 0.59104460 0.039511059 -0.24735300 -0.26038771
## temp 0.991701553 0.12696294 -0.157944120 0.54328466 0.54001197
## atemp 1.000000000 0.13998806 -0.183642967 0.54386369 0.54419176
## hum 0.139988060 1.00000000 -0.248489099 -0.07700788 -0.09108860
## windspeed -0.183642967 -0.24848910 1.000000000 -0.16761335 -0.21744898
## casual 0.543863690 -0.07700788 -0.167613349 1.00000000 0.39528245
## registered 0.544191758 -0.09108860 -0.217448981 0.39528245 1.00000000
## cnt 0.631065700 -0.10065856 -0.234544997 0.67280443 0.94551692
## cnt
## season 0.40610037
## yr 0.56670971
## mnth 0.27997711
## holiday -0.06834772
## weekday 0.06744341
## workingday 0.06115606
## weathersit -0.29739124
## temp 0.62749401
## atemp 0.63106570
## hum -0.10065856
## windspeed -0.23454500
## casual 0.67280443
## registered 0.94551692
## cnt 1.00000000
cor(bike[c(1:7,14)], method = "pearson")
## season yr mnth holiday
## season 1.000000000 -0.001844343 0.831440114 -0.010536659
## yr -0.001844343 1.000000000 -0.001792434 0.007954311
## mnth 0.831440114 -0.001792434 1.000000000 0.019190895
## holiday -0.010536659 0.007954311 0.019190895 1.000000000
## weekday -0.003079881 -0.005460765 0.009509313 -0.101960269
## workingday 0.012484963 -0.002012621 -0.005900951 -0.253022700
## weathersit 0.019211028 -0.048726541 0.043528098 -0.034626841
## cnt 0.406100371 0.566709708 0.279977112 -0.068347716
## weekday workingday weathersit cnt
## season -0.003079881 0.012484963 0.01921103 0.40610037
## yr -0.005460765 -0.002012621 -0.04872654 0.56670971
## mnth 0.009509313 -0.005900951 0.04352810 0.27997711
## holiday -0.101960269 -0.253022700 -0.03462684 -0.06834772
## weekday 1.000000000 0.035789674 0.03108747 0.06744341
## workingday 0.035789674 1.000000000 0.06120043 0.06115606
## weathersit 0.031087469 0.061200430 1.00000000 -0.29739124
## cnt 0.067443412 0.061156063 -0.29739124 1.00000000
cor(bike[c(8:14)], method = "pearson")
## temp atemp hum windspeed casual
## temp 1.0000000 0.9917016 0.12696294 -0.1579441 0.54328466
## atemp 0.9917016 1.0000000 0.13998806 -0.1836430 0.54386369
## hum 0.1269629 0.1399881 1.00000000 -0.2484891 -0.07700788
## windspeed -0.1579441 -0.1836430 -0.24848910 1.0000000 -0.16761335
## casual 0.5432847 0.5438637 -0.07700788 -0.1676133 1.00000000
## registered 0.5400120 0.5441918 -0.09108860 -0.2174490 0.39528245
## cnt 0.6274940 0.6310657 -0.10065856 -0.2345450 0.67280443
## registered cnt
## temp 0.5400120 0.6274940
## atemp 0.5441918 0.6310657
## hum -0.0910886 -0.1006586
## windspeed -0.2174490 -0.2345450
## casual 0.3952825 0.6728044
## registered 1.0000000 0.9455169
## cnt 0.9455169 1.0000000
Scatterplot for various factors vs Counts
scatterplot(bike1$dteday,bike1$cnt)

scatterplot(bike$mnth,bike$cnt)

scatterplot(bike$weekday,bike$cnt)

scatterplot(bike$windspeed,bike$cnt)

scatterplot(bike$temp,bike$cnt)

scatterplot(bike$hum,bike$cnt)

Scatterplot showing distribution of demand across Casual, Registered and Total.
a<-scatterplot(bike$weekday,bike$cnt)

b<-scatterplot(bike$weekday,bike$casual)

c<-scatterplot(bike$weekday,bike$registered)

Linear model including all the variables
model3 <-lm(bike$cnt~bike$season+bike$yr+bike$mnth+bike$holiday+bike$weekday+bike$workingday+bike$weathersit+bike$windspeed+bike$hum)
summary(model3)
##
## Call:
## lm(formula = bike$cnt ~ bike$season + bike$yr + bike$mnth + bike$holiday +
## bike$weekday + bike$workingday + bike$weathersit + bike$windspeed +
## bike$hum)
##
## Residuals:
## Min 1Q Median 3Q Max
## -4916.0 -917.1 112.6 921.6 3501.7
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 3147.19 313.02 10.054 < 2e-16 ***
## bike$season 899.44 74.19 12.123 < 2e-16 ***
## bike$yr 2155.91 91.48 23.566 < 2e-16 ***
## bike$mnth -94.77 23.84 -3.975 7.74e-05 ***
## bike$holiday -593.91 282.54 -2.102 0.0359 *
## bike$weekday 77.07 22.89 3.367 0.0008 ***
## bike$workingday 225.15 101.12 2.226 0.0263 *
## bike$weathersit -1066.68 107.44 -9.928 < 2e-16 ***
## bike$windspeed -3062.71 632.35 -4.843 1.56e-06 ***
## bike$hum 563.64 432.63 1.303 0.1930
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1227 on 721 degrees of freedom
## Multiple R-squared: 0.6037, Adjusted R-squared: 0.5988
## F-statistic: 122 on 9 and 721 DF, p-value: < 2.2e-16
Linear model including some variables
model4 <-lm(bike$cnt~bike$season+bike$yr+bike$mnth+bike$holiday+bike$weekday+bike$workingday+bike$weathersit+bike$windspeed)
summary(model4)
##
## Call:
## lm(formula = bike$cnt ~ bike$season + bike$yr + bike$mnth + bike$holiday +
## bike$weekday + bike$workingday + bike$weathersit + bike$windspeed)
##
## Residuals:
## Min 1Q Median 3Q Max
## -4861.3 -903.0 95.9 933.5 3528.5
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 3417.80 234.29 14.588 < 2e-16 ***
## bike$season 903.34 74.17 12.180 < 2e-16 ***
## bike$yr 2142.34 90.93 23.560 < 2e-16 ***
## bike$mnth -92.38 23.78 -3.885 0.000112 ***
## bike$holiday -596.58 282.67 -2.111 0.035153 *
## bike$weekday 74.33 22.80 3.259 0.001169 **
## bike$workingday 222.44 101.15 2.199 0.028185 *
## bike$weathersit -979.20 83.91 -11.669 < 2e-16 ***
## bike$windspeed -3309.88 603.52 -5.484 5.74e-08 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1228 on 722 degrees of freedom
## Multiple R-squared: 0.6028, Adjusted R-squared: 0.5984
## F-statistic: 137 on 8 and 722 DF, p-value: < 2.2e-16
Linear model including highly associated variables
model5 <-lm(bike$cnt~bike$season+bike$yr+bike$mnth+bike$weekday+bike$weathersit+bike$windspeed)
summary(model5)
##
## Call:
## lm(formula = bike$cnt ~ bike$season + bike$yr + bike$mnth + bike$weekday +
## bike$weathersit + bike$windspeed)
##
## Residuals:
## Min 1Q Median 3Q Max
## -5014.4 -915.3 134.9 923.4 3362.8
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 3511.98 225.75 15.557 < 2e-16 ***
## bike$season 916.71 74.59 12.290 < 2e-16 ***
## bike$yr 2141.37 91.58 23.383 < 2e-16 ***
## bike$mnth -97.04 23.91 -4.059 5.47e-05 ***
## bike$weekday 81.21 22.84 3.555 0.000403 ***
## bike$weathersit -961.08 84.33 -11.397 < 2e-16 ***
## bike$windspeed -3349.88 607.68 -5.513 4.92e-08 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1236 on 724 degrees of freedom
## Multiple R-squared: 0.596, Adjusted R-squared: 0.5926
## F-statistic: 178 on 6 and 724 DF, p-value: < 2.2e-16