Reading dataset into R

bike <- read.csv("day.csv")
bike1 <- bike

Removing unnecessary variables and summarizing the dataset

bike <- bike[-c(1,2)]
summary(bike)
##      season            yr              mnth          holiday       
##  Min.   :1.000   Min.   :0.0000   Min.   : 1.00   Min.   :0.00000  
##  1st Qu.:2.000   1st Qu.:0.0000   1st Qu.: 4.00   1st Qu.:0.00000  
##  Median :3.000   Median :1.0000   Median : 7.00   Median :0.00000  
##  Mean   :2.497   Mean   :0.5007   Mean   : 6.52   Mean   :0.02873  
##  3rd Qu.:3.000   3rd Qu.:1.0000   3rd Qu.:10.00   3rd Qu.:0.00000  
##  Max.   :4.000   Max.   :1.0000   Max.   :12.00   Max.   :1.00000  
##     weekday        workingday      weathersit         temp        
##  Min.   :0.000   Min.   :0.000   Min.   :1.000   Min.   :0.05913  
##  1st Qu.:1.000   1st Qu.:0.000   1st Qu.:1.000   1st Qu.:0.33708  
##  Median :3.000   Median :1.000   Median :1.000   Median :0.49833  
##  Mean   :2.997   Mean   :0.684   Mean   :1.395   Mean   :0.49538  
##  3rd Qu.:5.000   3rd Qu.:1.000   3rd Qu.:2.000   3rd Qu.:0.65542  
##  Max.   :6.000   Max.   :1.000   Max.   :3.000   Max.   :0.86167  
##      atemp              hum           windspeed           casual      
##  Min.   :0.07907   Min.   :0.0000   Min.   :0.02239   Min.   :   2.0  
##  1st Qu.:0.33784   1st Qu.:0.5200   1st Qu.:0.13495   1st Qu.: 315.5  
##  Median :0.48673   Median :0.6267   Median :0.18097   Median : 713.0  
##  Mean   :0.47435   Mean   :0.6279   Mean   :0.19049   Mean   : 848.2  
##  3rd Qu.:0.60860   3rd Qu.:0.7302   3rd Qu.:0.23321   3rd Qu.:1096.0  
##  Max.   :0.84090   Max.   :0.9725   Max.   :0.50746   Max.   :3410.0  
##    registered        cnt      
##  Min.   :  20   Min.   :  22  
##  1st Qu.:2497   1st Qu.:3152  
##  Median :3662   Median :4548  
##  Mean   :3656   Mean   :4504  
##  3rd Qu.:4776   3rd Qu.:5956  
##  Max.   :6946   Max.   :8714

Describing the dataset

library(car)
## Warning: package 'car' was built under R version 3.4.3
library(psych)
## Warning: package 'psych' was built under R version 3.4.3
## 
## Attaching package: 'psych'
## The following object is masked from 'package:car':
## 
##     logit
library(lattice)
describe(bike)
##            vars   n    mean      sd  median trimmed     mad   min     max
## season        1 731    2.50    1.11    3.00    2.50    1.48  1.00    4.00
## yr            2 731    0.50    0.50    1.00    0.50    0.00  0.00    1.00
## mnth          3 731    6.52    3.45    7.00    6.52    4.45  1.00   12.00
## holiday       4 731    0.03    0.17    0.00    0.00    0.00  0.00    1.00
## weekday       5 731    3.00    2.00    3.00    3.00    2.97  0.00    6.00
## workingday    6 731    0.68    0.47    1.00    0.73    0.00  0.00    1.00
## weathersit    7 731    1.40    0.54    1.00    1.33    0.00  1.00    3.00
## temp          8 731    0.50    0.18    0.50    0.50    0.23  0.06    0.86
## atemp         9 731    0.47    0.16    0.49    0.48    0.20  0.08    0.84
## hum          10 731    0.63    0.14    0.63    0.63    0.16  0.00    0.97
## windspeed    11 731    0.19    0.08    0.18    0.19    0.07  0.02    0.51
## casual       12 731  848.18  686.62  713.00  744.95  587.11  2.00 3410.00
## registered   13 731 3656.17 1560.26 3662.00 3641.72 1712.40 20.00 6946.00
## cnt          14 731 4504.35 1937.21 4548.00 4517.19 2086.02 22.00 8714.00
##              range  skew kurtosis    se
## season        3.00  0.00    -1.35  0.04
## yr            1.00  0.00    -2.00  0.02
## mnth         11.00 -0.01    -1.21  0.13
## holiday       1.00  5.63    29.75  0.01
## weekday       6.00  0.00    -1.26  0.07
## workingday    1.00 -0.79    -1.38  0.02
## weathersit    2.00  0.95    -0.15  0.02
## temp          0.80 -0.05    -1.12  0.01
## atemp         0.76 -0.13    -0.99  0.01
## hum           0.97 -0.07    -0.08  0.01
## windspeed     0.49  0.67     0.39  0.00
## casual     3408.00  1.26     1.29 25.40
## registered 6926.00  0.04    -0.72 57.71
## cnt        8692.00 -0.05    -0.82 71.65

Creating one-way tables for categorical variables

print("Distribution of Seasons in the data set")
## [1] "Distribution of Seasons in the data set"
print("1:spring, 2:summer, 3:fall, 4:winter")
## [1] "1:spring, 2:summer, 3:fall, 4:winter"
table(bike$season)
## 
##   1   2   3   4 
## 181 184 188 178
print("--------------------------------------")
## [1] "--------------------------------------"
print("Distribution of years in the data set")
## [1] "Distribution of years in the data set"
print("0: 2011. 1:2011")
## [1] "0: 2011. 1:2011"
table(bike$yr)
## 
##   0   1 
## 365 366
print("--------------------------------------")
## [1] "--------------------------------------"
print("Distribution of Months in the data set")
## [1] "Distribution of Months in the data set"
table(bike$mnth)
## 
##  1  2  3  4  5  6  7  8  9 10 11 12 
## 62 57 62 60 62 60 62 62 60 62 60 62
print("--------------------------------------")
## [1] "--------------------------------------"
print("Distribution of weekdays in the data set")
## [1] "Distribution of weekdays in the data set"
table(bike$weekday)
## 
##   0   1   2   3   4   5   6 
## 105 105 104 104 104 104 105
print("--------------------------------------")
## [1] "--------------------------------------"
print("Distribution of Holidays in the data set")
## [1] "Distribution of Holidays in the data set"
print("Holiday or not")
## [1] "Holiday or not"
table(bike$holiday)
## 
##   0   1 
## 710  21
print("--------------------------------------")
## [1] "--------------------------------------"
print("Distribution of Weather conditions in the data set")
## [1] "Distribution of Weather conditions in the data set"
print("1:Clear, 2:Mist, 3:Light Snow:Light Rain, 4:Heavy Rain")
## [1] "1:Clear, 2:Mist, 3:Light Snow:Light Rain, 4:Heavy Rain"
table(bike$weathersit)
## 
##   1   2   3 
## 463 247  21
barplot(table(bike$weathersit), main = "Distribution of Season types in the dataframe", xlab = "Season", ylab = "Count")

Average bikes perday in both the years

aggregate(bike$cnt, by=list(Year=bike$yr), mean)
##   Year        x
## 1    0 3405.762
## 2    1 5599.934

Renting of bikes per season

print("1:spring, 2:summer, 3:fall, 4:winter")
## [1] "1:spring, 2:summer, 3:fall, 4:winter"
aggregate(bike$cnt, by=list(Season=bike$season), mean)
##   Season        x
## 1      1 2604.133
## 2      2 4992.332
## 3      3 5644.303
## 4      4 4728.163
plot(bike$season,bike$cnt, main = "renting of bikes per season")

Average daily bike trips per month

aggregate(bike$cnt, by=list(Month=bike$mnth), mean)
##    Month        x
## 1      1 2176.339
## 2      2 2655.298
## 3      3 3692.258
## 4      4 4484.900
## 5      5 5349.774
## 6      6 5772.367
## 7      7 5563.677
## 8      8 5664.419
## 9      9 5766.517
## 10    10 5199.226
## 11    11 4247.183
## 12    12 3403.806
plot(bike$mnth,bike$cnt, main = "renting of bikes per month")

Renting of bikes per weekday

aggregate(bike$cnt, by=list(Day_of_the_week=bike$weekday), mean)
##   Day_of_the_week        x
## 1               0 4228.829
## 2               1 4338.124
## 3               2 4510.663
## 4               3 4548.538
## 5               4 4667.260
## 6               5 4690.288
## 7               6 4550.543
plot(bike$weekday,bike$cnt, main = "renting of bikes per weekday")

Renting of bikes on holidays

aggregate(bike$cnt, by=list(Holiday=bike$holiday), mean)
##   Holiday        x
## 1       0 4527.104
## 2       1 3735.000
plot(bike$holiday,bike$cnt, main = "renting of bikes on holidays vs regular days")

Renting of bikes according to weather

aggregate(bike$cnt, by=list(Type_of_weather=bike$weathersit), mean)
##   Type_of_weather        x
## 1               1 4876.786
## 2               2 4035.862
## 3               3 1803.286
plot(bike$weathersit,bike$cnt, main = "renting of bikes according to weather")

plot(bike$temp,bike$cnt, main = "renting of bikes according to weather")

Histograms for various weather variables

histogram(bike$temp, col = "Red")

histogram(bike$hum,col = "Green")

histogram(bike$temp, col = "purple")

Bike Sharing Data CORRGRAM

library(corrgram)
## Warning: package 'corrgram' was built under R version 3.4.3
corrgram(bike, order=TRUE,
         main="Bike Sharing Data CORRGRAM",
         lower.panel=panel.shade, upper.panel=panel.pie,
         diag.panel=panel.minmax, text.panel=panel.txt)

Correlation between various variables

cor(bike, method = "pearson")
##                  season           yr         mnth      holiday
## season      1.000000000 -0.001844343  0.831440114 -0.010536659
## yr         -0.001844343  1.000000000 -0.001792434  0.007954311
## mnth        0.831440114 -0.001792434  1.000000000  0.019190895
## holiday    -0.010536659  0.007954311  0.019190895  1.000000000
## weekday    -0.003079881 -0.005460765  0.009509313 -0.101960269
## workingday  0.012484963 -0.002012621 -0.005900951 -0.253022700
## weathersit  0.019211028 -0.048726541  0.043528098 -0.034626841
## temp        0.334314856  0.047603572  0.220205335 -0.028555535
## atemp       0.342875613  0.046106149  0.227458630 -0.032506692
## hum         0.205444765 -0.110651045  0.222203691 -0.015937479
## windspeed  -0.229046337 -0.011817060 -0.207501752  0.006291507
## casual      0.210399165  0.248545664  0.123005889  0.054274203
## registered  0.411623051  0.594248168  0.293487830 -0.108744863
## cnt         0.406100371  0.566709708  0.279977112 -0.068347716
##                  weekday   workingday  weathersit          temp
## season     -0.0030798813  0.012484963  0.01921103  0.3343148564
## yr         -0.0054607652 -0.002012621 -0.04872654  0.0476035719
## mnth        0.0095093129 -0.005900951  0.04352810  0.2202053352
## holiday    -0.1019602689 -0.253022700 -0.03462684 -0.0285555350
## weekday     1.0000000000  0.035789674  0.03108747 -0.0001699624
## workingday  0.0357896736  1.000000000  0.06120043  0.0526598102
## weathersit  0.0310874694  0.061200430  1.00000000 -0.1206022365
## temp       -0.0001699624  0.052659810 -0.12060224  1.0000000000
## atemp      -0.0075371318  0.052182275 -0.12158335  0.9917015532
## hum        -0.0522321004  0.024327046  0.59104460  0.1269629390
## windspeed   0.0142821241 -0.018796487  0.03951106 -0.1579441204
## casual      0.0599226375 -0.518044191 -0.24735300  0.5432846617
## registered  0.0573674440  0.303907117 -0.26038771  0.5400119662
## cnt         0.0674434124  0.061156063 -0.29739124  0.6274940090
##                   atemp         hum    windspeed      casual  registered
## season      0.342875613  0.20544476 -0.229046337  0.21039916  0.41162305
## yr          0.046106149 -0.11065104 -0.011817060  0.24854566  0.59424817
## mnth        0.227458630  0.22220369 -0.207501752  0.12300589  0.29348783
## holiday    -0.032506692 -0.01593748  0.006291507  0.05427420 -0.10874486
## weekday    -0.007537132 -0.05223210  0.014282124  0.05992264  0.05736744
## workingday  0.052182275  0.02432705 -0.018796487 -0.51804419  0.30390712
## weathersit -0.121583354  0.59104460  0.039511059 -0.24735300 -0.26038771
## temp        0.991701553  0.12696294 -0.157944120  0.54328466  0.54001197
## atemp       1.000000000  0.13998806 -0.183642967  0.54386369  0.54419176
## hum         0.139988060  1.00000000 -0.248489099 -0.07700788 -0.09108860
## windspeed  -0.183642967 -0.24848910  1.000000000 -0.16761335 -0.21744898
## casual      0.543863690 -0.07700788 -0.167613349  1.00000000  0.39528245
## registered  0.544191758 -0.09108860 -0.217448981  0.39528245  1.00000000
## cnt         0.631065700 -0.10065856 -0.234544997  0.67280443  0.94551692
##                    cnt
## season      0.40610037
## yr          0.56670971
## mnth        0.27997711
## holiday    -0.06834772
## weekday     0.06744341
## workingday  0.06115606
## weathersit -0.29739124
## temp        0.62749401
## atemp       0.63106570
## hum        -0.10065856
## windspeed  -0.23454500
## casual      0.67280443
## registered  0.94551692
## cnt         1.00000000
cor(bike[c(1:7,14)], method = "pearson")
##                  season           yr         mnth      holiday
## season      1.000000000 -0.001844343  0.831440114 -0.010536659
## yr         -0.001844343  1.000000000 -0.001792434  0.007954311
## mnth        0.831440114 -0.001792434  1.000000000  0.019190895
## holiday    -0.010536659  0.007954311  0.019190895  1.000000000
## weekday    -0.003079881 -0.005460765  0.009509313 -0.101960269
## workingday  0.012484963 -0.002012621 -0.005900951 -0.253022700
## weathersit  0.019211028 -0.048726541  0.043528098 -0.034626841
## cnt         0.406100371  0.566709708  0.279977112 -0.068347716
##                 weekday   workingday  weathersit         cnt
## season     -0.003079881  0.012484963  0.01921103  0.40610037
## yr         -0.005460765 -0.002012621 -0.04872654  0.56670971
## mnth        0.009509313 -0.005900951  0.04352810  0.27997711
## holiday    -0.101960269 -0.253022700 -0.03462684 -0.06834772
## weekday     1.000000000  0.035789674  0.03108747  0.06744341
## workingday  0.035789674  1.000000000  0.06120043  0.06115606
## weathersit  0.031087469  0.061200430  1.00000000 -0.29739124
## cnt         0.067443412  0.061156063 -0.29739124  1.00000000
cor(bike[c(8:14)], method = "pearson")
##                  temp      atemp         hum  windspeed      casual
## temp        1.0000000  0.9917016  0.12696294 -0.1579441  0.54328466
## atemp       0.9917016  1.0000000  0.13998806 -0.1836430  0.54386369
## hum         0.1269629  0.1399881  1.00000000 -0.2484891 -0.07700788
## windspeed  -0.1579441 -0.1836430 -0.24848910  1.0000000 -0.16761335
## casual      0.5432847  0.5438637 -0.07700788 -0.1676133  1.00000000
## registered  0.5400120  0.5441918 -0.09108860 -0.2174490  0.39528245
## cnt         0.6274940  0.6310657 -0.10065856 -0.2345450  0.67280443
##            registered        cnt
## temp        0.5400120  0.6274940
## atemp       0.5441918  0.6310657
## hum        -0.0910886 -0.1006586
## windspeed  -0.2174490 -0.2345450
## casual      0.3952825  0.6728044
## registered  1.0000000  0.9455169
## cnt         0.9455169  1.0000000

Scatterplot for various factors vs Counts

scatterplot(bike1$dteday,bike1$cnt)

scatterplot(bike$mnth,bike$cnt)

scatterplot(bike$weekday,bike$cnt)

scatterplot(bike$windspeed,bike$cnt)

scatterplot(bike$temp,bike$cnt)

scatterplot(bike$hum,bike$cnt)

Scatterplot showing distribution of demand across Casual, Registered and Total.

a<-scatterplot(bike$weekday,bike$cnt)

b<-scatterplot(bike$weekday,bike$casual)

c<-scatterplot(bike$weekday,bike$registered)

Linear model including all the variables

model3 <-lm(bike$cnt~bike$season+bike$yr+bike$mnth+bike$holiday+bike$weekday+bike$workingday+bike$weathersit+bike$windspeed+bike$hum)
summary(model3)
## 
## Call:
## lm(formula = bike$cnt ~ bike$season + bike$yr + bike$mnth + bike$holiday + 
##     bike$weekday + bike$workingday + bike$weathersit + bike$windspeed + 
##     bike$hum)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -4916.0  -917.1   112.6   921.6  3501.7 
## 
## Coefficients:
##                 Estimate Std. Error t value Pr(>|t|)    
## (Intercept)      3147.19     313.02  10.054  < 2e-16 ***
## bike$season       899.44      74.19  12.123  < 2e-16 ***
## bike$yr          2155.91      91.48  23.566  < 2e-16 ***
## bike$mnth         -94.77      23.84  -3.975 7.74e-05 ***
## bike$holiday     -593.91     282.54  -2.102   0.0359 *  
## bike$weekday       77.07      22.89   3.367   0.0008 ***
## bike$workingday   225.15     101.12   2.226   0.0263 *  
## bike$weathersit -1066.68     107.44  -9.928  < 2e-16 ***
## bike$windspeed  -3062.71     632.35  -4.843 1.56e-06 ***
## bike$hum          563.64     432.63   1.303   0.1930    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1227 on 721 degrees of freedom
## Multiple R-squared:  0.6037, Adjusted R-squared:  0.5988 
## F-statistic:   122 on 9 and 721 DF,  p-value: < 2.2e-16

Linear model including some variables

model4 <-lm(bike$cnt~bike$season+bike$yr+bike$mnth+bike$holiday+bike$weekday+bike$workingday+bike$weathersit+bike$windspeed)
summary(model4)
## 
## Call:
## lm(formula = bike$cnt ~ bike$season + bike$yr + bike$mnth + bike$holiday + 
##     bike$weekday + bike$workingday + bike$weathersit + bike$windspeed)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -4861.3  -903.0    95.9   933.5  3528.5 
## 
## Coefficients:
##                 Estimate Std. Error t value Pr(>|t|)    
## (Intercept)      3417.80     234.29  14.588  < 2e-16 ***
## bike$season       903.34      74.17  12.180  < 2e-16 ***
## bike$yr          2142.34      90.93  23.560  < 2e-16 ***
## bike$mnth         -92.38      23.78  -3.885 0.000112 ***
## bike$holiday     -596.58     282.67  -2.111 0.035153 *  
## bike$weekday       74.33      22.80   3.259 0.001169 ** 
## bike$workingday   222.44     101.15   2.199 0.028185 *  
## bike$weathersit  -979.20      83.91 -11.669  < 2e-16 ***
## bike$windspeed  -3309.88     603.52  -5.484 5.74e-08 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1228 on 722 degrees of freedom
## Multiple R-squared:  0.6028, Adjusted R-squared:  0.5984 
## F-statistic:   137 on 8 and 722 DF,  p-value: < 2.2e-16

Linear model including highly associated variables

model5 <-lm(bike$cnt~bike$season+bike$yr+bike$mnth+bike$weekday+bike$weathersit+bike$windspeed)
summary(model5)
## 
## Call:
## lm(formula = bike$cnt ~ bike$season + bike$yr + bike$mnth + bike$weekday + 
##     bike$weathersit + bike$windspeed)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -5014.4  -915.3   134.9   923.4  3362.8 
## 
## Coefficients:
##                 Estimate Std. Error t value Pr(>|t|)    
## (Intercept)      3511.98     225.75  15.557  < 2e-16 ***
## bike$season       916.71      74.59  12.290  < 2e-16 ***
## bike$yr          2141.37      91.58  23.383  < 2e-16 ***
## bike$mnth         -97.04      23.91  -4.059 5.47e-05 ***
## bike$weekday       81.21      22.84   3.555 0.000403 ***
## bike$weathersit  -961.08      84.33 -11.397  < 2e-16 ***
## bike$windspeed  -3349.88     607.68  -5.513 4.92e-08 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1236 on 724 degrees of freedom
## Multiple R-squared:  0.596,  Adjusted R-squared:  0.5926 
## F-statistic:   178 on 6 and 724 DF,  p-value: < 2.2e-16