train=read.csv("hour.csv")
View(train)
nrow(train) #number of rows
## [1] 17379
ncol(train) #number of columns
## [1] 17
library(psych)
summary(train)
## instant dteday season yr
## Min. : 1 2011-01-01: 24 Min. :1.000 Min. :0.0000
## 1st Qu.: 4346 2011-01-08: 24 1st Qu.:2.000 1st Qu.:0.0000
## Median : 8690 2011-01-09: 24 Median :3.000 Median :1.0000
## Mean : 8690 2011-01-10: 24 Mean :2.502 Mean :0.5026
## 3rd Qu.:13034 2011-01-13: 24 3rd Qu.:3.000 3rd Qu.:1.0000
## Max. :17379 2011-01-15: 24 Max. :4.000 Max. :1.0000
## (Other) :17235
## mnth hr holiday weekday
## Min. : 1.000 Min. : 0.00 Min. :0.00000 Min. :0.000
## 1st Qu.: 4.000 1st Qu.: 6.00 1st Qu.:0.00000 1st Qu.:1.000
## Median : 7.000 Median :12.00 Median :0.00000 Median :3.000
## Mean : 6.538 Mean :11.55 Mean :0.02877 Mean :3.004
## 3rd Qu.:10.000 3rd Qu.:18.00 3rd Qu.:0.00000 3rd Qu.:5.000
## Max. :12.000 Max. :23.00 Max. :1.00000 Max. :6.000
##
## workingday weathersit temp atemp
## Min. :0.0000 Min. :1.000 Min. :0.020 Min. :0.0000
## 1st Qu.:0.0000 1st Qu.:1.000 1st Qu.:0.340 1st Qu.:0.3333
## Median :1.0000 Median :1.000 Median :0.500 Median :0.4848
## Mean :0.6827 Mean :1.425 Mean :0.497 Mean :0.4758
## 3rd Qu.:1.0000 3rd Qu.:2.000 3rd Qu.:0.660 3rd Qu.:0.6212
## Max. :1.0000 Max. :4.000 Max. :1.000 Max. :1.0000
##
## hum windspeed casual registered
## Min. :0.0000 Min. :0.0000 Min. : 0.00 Min. : 0.0
## 1st Qu.:0.4800 1st Qu.:0.1045 1st Qu.: 4.00 1st Qu.: 34.0
## Median :0.6300 Median :0.1940 Median : 17.00 Median :115.0
## Mean :0.6272 Mean :0.1901 Mean : 35.68 Mean :153.8
## 3rd Qu.:0.7800 3rd Qu.:0.2537 3rd Qu.: 48.00 3rd Qu.:220.0
## Max. :1.0000 Max. :0.8507 Max. :367.00 Max. :886.0
##
## cnt
## Min. : 1.0
## 1st Qu.: 40.0
## Median :142.0
## Mean :189.5
## 3rd Qu.:281.0
## Max. :977.0
##
str(train)
## 'data.frame': 17379 obs. of 17 variables:
## $ instant : int 1 2 3 4 5 6 7 8 9 10 ...
## $ dteday : Factor w/ 731 levels "2011-01-01","2011-01-02",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ season : int 1 1 1 1 1 1 1 1 1 1 ...
## $ yr : int 0 0 0 0 0 0 0 0 0 0 ...
## $ mnth : int 1 1 1 1 1 1 1 1 1 1 ...
## $ hr : int 0 1 2 3 4 5 6 7 8 9 ...
## $ holiday : int 0 0 0 0 0 0 0 0 0 0 ...
## $ weekday : int 6 6 6 6 6 6 6 6 6 6 ...
## $ workingday: int 0 0 0 0 0 0 0 0 0 0 ...
## $ weathersit: int 1 1 1 1 1 2 1 1 1 1 ...
## $ temp : num 0.24 0.22 0.22 0.24 0.24 0.24 0.22 0.2 0.24 0.32 ...
## $ atemp : num 0.288 0.273 0.273 0.288 0.288 ...
## $ hum : num 0.81 0.8 0.8 0.75 0.75 0.75 0.8 0.86 0.75 0.76 ...
## $ windspeed : num 0 0 0 0 0 0.0896 0 0 0 0 ...
## $ casual : int 3 8 5 3 0 0 2 1 1 8 ...
## $ registered: int 13 32 27 10 1 1 0 2 7 6 ...
## $ cnt : int 16 40 32 13 1 1 2 3 8 14 ...
describe(train)
## vars n mean sd median trimmed mad min
## instant 1 17379 8690.00 5017.03 8690.00 8690.00 6441.90 1.00
## dteday* 2 17379 367.17 210.07 367.00 367.24 269.83 1.00
## season 3 17379 2.50 1.11 3.00 2.50 1.48 1.00
## yr 4 17379 0.50 0.50 1.00 0.50 0.00 0.00
## mnth 5 17379 6.54 3.44 7.00 6.54 4.45 1.00
## hr 6 17379 11.55 6.91 12.00 11.56 8.90 0.00
## holiday 7 17379 0.03 0.17 0.00 0.00 0.00 0.00
## weekday 8 17379 3.00 2.01 3.00 3.00 2.97 0.00
## workingday 9 17379 0.68 0.47 1.00 0.73 0.00 0.00
## weathersit 10 17379 1.43 0.64 1.00 1.30 0.00 1.00
## temp 11 17379 0.50 0.19 0.50 0.50 0.24 0.02
## atemp 12 17379 0.48 0.17 0.48 0.48 0.20 0.00
## hum 13 17379 0.63 0.19 0.63 0.63 0.22 0.00
## windspeed 14 17379 0.19 0.12 0.19 0.18 0.13 0.00
## casual 15 17379 35.68 49.31 17.00 25.13 23.72 0.00
## registered 16 17379 153.79 151.36 115.00 129.26 131.95 0.00
## cnt 17 17379 189.46 181.39 142.00 162.04 166.05 1.00
## max range skew kurtosis se
## instant 17379.00 17378.00 0.00 -1.20 38.06
## dteday* 731.00 730.00 0.00 -1.19 1.59
## season 4.00 3.00 -0.01 -1.33 0.01
## yr 1.00 1.00 -0.01 -2.00 0.00
## mnth 12.00 11.00 -0.01 -1.20 0.03
## hr 23.00 23.00 -0.01 -1.20 0.05
## holiday 1.00 1.00 5.64 29.78 0.00
## weekday 6.00 6.00 0.00 -1.26 0.02
## workingday 1.00 1.00 -0.79 -1.38 0.00
## weathersit 4.00 3.00 1.23 0.35 0.00
## temp 1.00 0.98 -0.01 -0.94 0.00
## atemp 1.00 1.00 -0.09 -0.85 0.00
## hum 1.00 1.00 -0.11 -0.83 0.00
## windspeed 0.85 0.85 0.57 0.59 0.00
## casual 367.00 367.00 2.50 7.57 0.37
## registered 886.00 886.00 1.56 2.75 1.15
## cnt 977.00 976.00 1.28 1.42 1.38
mytable_season<- with(train,table(season))
mytable_season
## season
## 1 2 3 4
## 4242 4409 4496 4232
prop.table((mytable_season))
## season
## 1 2 3 4
## 0.2440877 0.2536970 0.2587030 0.2435123
mytable_mnth<- with(train,table(mnth))
mytable_mnth
## mnth
## 1 2 3 4 5 6 7 8 9 10 11 12
## 1429 1341 1473 1437 1488 1440 1488 1475 1437 1451 1437 1483
prop.table((mytable_mnth))
## mnth
## 1 2 3 4 5 6
## 0.08222567 0.07716209 0.08475747 0.08268600 0.08562058 0.08285862
## 7 8 9 10 11 12
## 0.08562058 0.08487255 0.08268600 0.08349157 0.08268600 0.08533287
mytable_weathersit<- with(train,table(weathersit))
mytable_weathersit
## weathersit
## 1 2 3 4
## 11413 4544 1419 3
prop.table((mytable_weathersit))
## weathersit
## 1 2 3 4
## 0.6567121238 0.2614649865 0.0816502676 0.0001726221
mytable_weekday<- with(train,table(weekday))
mytable_weekday
## weekday
## 0 1 2 3 4 5 6
## 2502 2479 2453 2475 2471 2487 2512
prop.table((mytable_weekday))
## weekday
## 0 1 2 3 4 5 6
## 0.1439669 0.1426434 0.1411474 0.1424133 0.1421831 0.1431037 0.1445423
mytable_season_mnth <- xtabs(~ season+mnth, data=train)
mytable_season_mnth
## mnth
## season 1 2 3 4 5 6 7 8 9 10 11 12
## 1 1429 1341 949 0 0 0 0 0 0 0 0 523
## 2 0 0 524 1437 1488 960 0 0 0 0 0 0
## 3 0 0 0 0 0 480 1488 1475 1053 0 0 0
## 4 0 0 0 0 0 0 0 0 384 1451 1437 960
margin.table(mytable_season_mnth,1)
## season
## 1 2 3 4
## 4242 4409 4496 4232
prop.table(mytable_season_mnth,1)
## mnth
## season 1 2 3 4 5 6
## 1 0.33686940 0.31612447 0.22371523 0.00000000 0.00000000 0.00000000
## 2 0.00000000 0.00000000 0.11884781 0.32592425 0.33749149 0.21773645
## 3 0.00000000 0.00000000 0.00000000 0.00000000 0.00000000 0.10676157
## 4 0.00000000 0.00000000 0.00000000 0.00000000 0.00000000 0.00000000
## mnth
## season 7 8 9 10 11 12
## 1 0.00000000 0.00000000 0.00000000 0.00000000 0.00000000 0.12329090
## 2 0.00000000 0.00000000 0.00000000 0.00000000 0.00000000 0.00000000
## 3 0.33096085 0.32806940 0.23420819 0.00000000 0.00000000 0.00000000
## 4 0.00000000 0.00000000 0.09073724 0.34286389 0.33955577 0.22684310
margin.table(mytable_season_mnth,2)
## mnth
## 1 2 3 4 5 6 7 8 9 10 11 12
## 1429 1341 1473 1437 1488 1440 1488 1475 1437 1451 1437 1483
prop.table(mytable_season_mnth,2)
## mnth
## season 1 2 3 4 5 6
## 1 1.0000000 1.0000000 0.6442634 0.0000000 0.0000000 0.0000000
## 2 0.0000000 0.0000000 0.3557366 1.0000000 1.0000000 0.6666667
## 3 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 0.3333333
## 4 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000
## mnth
## season 7 8 9 10 11 12
## 1 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 0.3526635
## 2 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000
## 3 1.0000000 1.0000000 0.7327766 0.0000000 0.0000000 0.0000000
## 4 0.0000000 0.0000000 0.2672234 1.0000000 1.0000000 0.6473365
library(gmodels)
CrossTable(train$season,train$weathersit)
##
##
## Cell Contents
## |-------------------------|
## | N |
## | Chi-square contribution |
## | N / Row Total |
## | N / Col Total |
## | N / Table Total |
## |-------------------------|
##
##
## Total Observations in Table: 17379
##
##
## | train$weathersit
## train$season | 1 | 2 | 3 | 4 | Row Total |
## -------------|-----------|-----------|-----------|-----------|-----------|
## 1 | 2665 | 1205 | 369 | 3 | 4242 |
## | 5.236 | 8.286 | 1.480 | 7.023 | |
## | 0.628 | 0.284 | 0.087 | 0.001 | 0.244 |
## | 0.234 | 0.265 | 0.260 | 1.000 | |
## | 0.153 | 0.069 | 0.021 | 0.000 | |
## -------------|-----------|-----------|-----------|-----------|-----------|
## 2 | 2859 | 1144 | 406 | 0 | 4409 |
## | 0.459 | 0.067 | 5.879 | 0.761 | |
## | 0.648 | 0.259 | 0.092 | 0.000 | 0.254 |
## | 0.251 | 0.252 | 0.286 | 0.000 | |
## | 0.165 | 0.066 | 0.023 | 0.000 | |
## -------------|-----------|-----------|-----------|-----------|-----------|
## 3 | 3280 | 947 | 269 | 0 | 4496 |
## | 36.309 | 44.433 | 26.215 | 0.776 | |
## | 0.730 | 0.211 | 0.060 | 0.000 | 0.259 |
## | 0.287 | 0.208 | 0.190 | 0.000 | |
## | 0.189 | 0.054 | 0.015 | 0.000 | |
## -------------|-----------|-----------|-----------|-----------|-----------|
## 4 | 2609 | 1248 | 375 | 0 | 4232 |
## | 10.424 | 18.090 | 2.511 | 0.731 | |
## | 0.616 | 0.295 | 0.089 | 0.000 | 0.244 |
## | 0.229 | 0.275 | 0.264 | 0.000 | |
## | 0.150 | 0.072 | 0.022 | 0.000 | |
## -------------|-----------|-----------|-----------|-----------|-----------|
## Column Total | 11413 | 4544 | 1419 | 3 | 17379 |
## | 0.657 | 0.261 | 0.082 | 0.000 | |
## -------------|-----------|-----------|-----------|-----------|-----------|
##
##
mytable_season_weekday <- xtabs(~ season+weekday, data=train)
mytable_season_weekday
## weekday
## season 0 1 2 3 4 5 6
## 1 638 612 571 582 578 615 646
## 2 623 644 624 646 624 624 624
## 3 617 623 647 648 671 648 642
## 4 624 600 611 599 598 600 600
margin.table(mytable_season_weekday,1)
## season
## 1 2 3 4
## 4242 4409 4496 4232
prop.table(mytable_season_weekday,1)
## weekday
## season 0 1 2 3 4 5
## 1 0.1504008 0.1442716 0.1346063 0.1371994 0.1362565 0.1449788
## 2 0.1413019 0.1460649 0.1415287 0.1465185 0.1415287 0.1415287
## 3 0.1372331 0.1385676 0.1439057 0.1441281 0.1492438 0.1441281
## 4 0.1474480 0.1417769 0.1443762 0.1415406 0.1413043 0.1417769
## weekday
## season 6
## 1 0.1522867
## 2 0.1415287
## 3 0.1427936
## 4 0.1417769
margin.table(mytable_season_weekday,2)
## weekday
## 0 1 2 3 4 5 6
## 2502 2479 2453 2475 2471 2487 2512
prop.table(mytable_season_weekday,2)
## weekday
## season 0 1 2 3 4 5
## 1 0.2549960 0.2468737 0.2327762 0.2351515 0.2339134 0.2472859
## 2 0.2490008 0.2597822 0.2543824 0.2610101 0.2525293 0.2509047
## 3 0.2466027 0.2513110 0.2637587 0.2618182 0.2715500 0.2605549
## 4 0.2494005 0.2420331 0.2490828 0.2420202 0.2420073 0.2412545
## weekday
## season 6
## 1 0.2571656
## 2 0.2484076
## 3 0.2555732
## 4 0.2388535
mytable_mnth_weathersit <- xtabs(~ mnth+weathersit, data=train)
mytable_mnth_weathersit
## weathersit
## mnth 1 2 3 4
## 1 890 428 108 3
## 2 862 342 137 0
## 3 903 441 129 0
## 4 923 372 142 0
## 5 944 401 143 0
## 6 1093 275 72 0
## 7 1172 254 62 0
## 8 1086 302 87 0
## 9 879 414 144 0
## 10 875 412 164 0
## 11 958 378 101 0
## 12 828 525 130 0
margin.table(mytable_mnth_weathersit,1)
## mnth
## 1 2 3 4 5 6 7 8 9 10 11 12
## 1429 1341 1473 1437 1488 1440 1488 1475 1437 1451 1437 1483
prop.table(mytable_mnth_weathersit,1)
## weathersit
## mnth 1 2 3 4
## 1 0.62281316 0.29951015 0.07557733 0.00209937
## 2 0.64280388 0.25503356 0.10216257 0.00000000
## 3 0.61303462 0.29938900 0.08757637 0.00000000
## 4 0.64231037 0.25887265 0.09881698 0.00000000
## 5 0.63440860 0.26948925 0.09610215 0.00000000
## 6 0.75902778 0.19097222 0.05000000 0.00000000
## 7 0.78763441 0.17069892 0.04166667 0.00000000
## 8 0.73627119 0.20474576 0.05898305 0.00000000
## 9 0.61169102 0.28810021 0.10020877 0.00000000
## 10 0.60303239 0.28394211 0.11302550 0.00000000
## 11 0.66666667 0.26304802 0.07028532 0.00000000
## 12 0.55832771 0.35401214 0.08766015 0.00000000
margin.table(mytable_mnth_weathersit,2)
## weathersit
## 1 2 3 4
## 11413 4544 1419 3
prop.table(mytable_mnth_weathersit,2)
## weathersit
## mnth 1 2 3 4
## 1 0.07798125 0.09419014 0.07610994 1.00000000
## 2 0.07552791 0.07526408 0.09654686 0.00000000
## 3 0.07912030 0.09705106 0.09090909 0.00000000
## 4 0.08087269 0.08186620 0.10007047 0.00000000
## 5 0.08271270 0.08824824 0.10077519 0.00000000
## 6 0.09576798 0.06051937 0.05073996 0.00000000
## 7 0.10268992 0.05589789 0.04369274 0.00000000
## 8 0.09515465 0.06646127 0.06131078 0.00000000
## 9 0.07701744 0.09110915 0.10147992 0.00000000
## 10 0.07666696 0.09066901 0.11557435 0.00000000
## 11 0.08393937 0.08318662 0.07117689 0.00000000
## 12 0.07254885 0.11553697 0.09161381 0.00000000
mytable_mnth_weekday <- xtabs(~ mnth+weekday, data=train)
mytable_mnth_weekday
## weekday
## mnth 0 1 2 3 4 5 6
## 1 237 236 198 180 175 189 214
## 2 189 188 183 212 189 188 192
## 3 188 188 215 214 238 214 216
## 4 216 214 192 191 192 216 216
## 5 216 216 240 216 216 192 192
## 6 192 192 192 216 216 216 216
## 7 240 216 216 192 192 216 216
## 8 185 216 216 240 216 216 186
## 9 216 191 191 192 215 216 216
## 10 216 217 203 215 192 192 216
## 11 192 191 216 216 214 216 192
## 12 215 214 191 191 216 216 240
margin.table(mytable_mnth_weekday,1)
## mnth
## 1 2 3 4 5 6 7 8 9 10 11 12
## 1429 1341 1473 1437 1488 1440 1488 1475 1437 1451 1437 1483
prop.table(mytable_mnth_weekday,1)
## weekday
## mnth 0 1 2 3 4 5 6
## 1 0.1658502 0.1651505 0.1385584 0.1259622 0.1224633 0.1322603 0.1497551
## 2 0.1409396 0.1401939 0.1364653 0.1580910 0.1409396 0.1401939 0.1431767
## 3 0.1276307 0.1276307 0.1459606 0.1452817 0.1615750 0.1452817 0.1466395
## 4 0.1503132 0.1489214 0.1336117 0.1329158 0.1336117 0.1503132 0.1503132
## 5 0.1451613 0.1451613 0.1612903 0.1451613 0.1451613 0.1290323 0.1290323
## 6 0.1333333 0.1333333 0.1333333 0.1500000 0.1500000 0.1500000 0.1500000
## 7 0.1612903 0.1451613 0.1451613 0.1290323 0.1290323 0.1451613 0.1451613
## 8 0.1254237 0.1464407 0.1464407 0.1627119 0.1464407 0.1464407 0.1261017
## 9 0.1503132 0.1329158 0.1329158 0.1336117 0.1496173 0.1503132 0.1503132
## 10 0.1488629 0.1495520 0.1399035 0.1481737 0.1323225 0.1323225 0.1488629
## 11 0.1336117 0.1329158 0.1503132 0.1503132 0.1489214 0.1503132 0.1336117
## 12 0.1449764 0.1443021 0.1287930 0.1287930 0.1456507 0.1456507 0.1618341
margin.table(mytable_mnth_weekday,2)
## weekday
## 0 1 2 3 4 5 6
## 2502 2479 2453 2475 2471 2487 2512
prop.table(mytable_mnth_weekday,2)
## weekday
## mnth 0 1 2 3 4 5
## 1 0.09472422 0.09519968 0.08071749 0.07272727 0.07082153 0.07599517
## 2 0.07553957 0.07583703 0.07460253 0.08565657 0.07648725 0.07559308
## 3 0.07513989 0.07583703 0.08764778 0.08646465 0.09631728 0.08604745
## 4 0.08633094 0.08632513 0.07827150 0.07717172 0.07770134 0.08685163
## 5 0.08633094 0.08713191 0.09783938 0.08727273 0.08741400 0.07720145
## 6 0.07673861 0.07745058 0.07827150 0.08727273 0.08741400 0.08685163
## 7 0.09592326 0.08713191 0.08805544 0.07757576 0.07770134 0.08685163
## 8 0.07394085 0.08713191 0.08805544 0.09696970 0.08741400 0.08685163
## 9 0.08633094 0.07704720 0.07786384 0.07757576 0.08700931 0.08685163
## 10 0.08633094 0.08753530 0.08275581 0.08686869 0.07770134 0.07720145
## 11 0.07673861 0.07704720 0.08805544 0.08727273 0.08660461 0.08685163
## 12 0.08593125 0.08632513 0.07786384 0.07717172 0.08741400 0.08685163
## weekday
## mnth 6
## 1 0.08519108
## 2 0.07643312
## 3 0.08598726
## 4 0.08598726
## 5 0.07643312
## 6 0.08598726
## 7 0.08598726
## 8 0.07404459
## 9 0.08598726
## 10 0.08598726
## 11 0.07643312
## 12 0.09554140
mytable_weathersit_weekday <- xtabs(~ weathersit+weekday, data=train)
mytable_weathersit_weekday
## weekday
## weathersit 0 1 2 3 4 5 6
## 1 1765 1582 1522 1568 1656 1645 1675
## 2 568 726 694 613 636 659 648
## 3 169 170 237 293 179 183 188
## 4 0 1 0 1 0 0 1
margin.table(mytable_weathersit_weekday,1)
## weathersit
## 1 2 3 4
## 11413 4544 1419 3
prop.table(mytable_weathersit_weekday,1)
## weekday
## weathersit 0 1 2 3 4 5
## 1 0.1546482 0.1386139 0.1333567 0.1373872 0.1450977 0.1441339
## 2 0.1250000 0.1597711 0.1527289 0.1349032 0.1399648 0.1450264
## 3 0.1190980 0.1198027 0.1670190 0.2064834 0.1261452 0.1289641
## 4 0.0000000 0.3333333 0.0000000 0.3333333 0.0000000 0.0000000
## weekday
## weathersit 6
## 1 0.1467625
## 2 0.1426056
## 3 0.1324877
## 4 0.3333333
margin.table(mytable_weathersit_weekday,2)
## weekday
## 0 1 2 3 4 5 6
## 2502 2479 2453 2475 2471 2487 2512
prop.table(mytable_weathersit_weekday,2)
## weekday
## weathersit 0 1 2 3
## 1 0.7054356515 0.6381605486 0.6204647371 0.6335353535
## 2 0.2270183853 0.2928600242 0.2829188748 0.2476767677
## 3 0.0675459632 0.0685760387 0.0966163881 0.1183838384
## 4 0.0000000000 0.0004033885 0.0000000000 0.0004040404
## weekday
## weathersit 4 5 6
## 1 0.6701740186 0.6614394853 0.6667993631
## 2 0.2573856738 0.2649778850 0.2579617834
## 3 0.0724403076 0.0735826297 0.0748407643
## 4 0.0000000000 0.0000000000 0.0003980892
boxplot(train$cnt~train$hr,xlab="hour", ylab="number of users",col="red")
##### We can observe that morning 7-8 and evening 5-6 are the most preferable timings for the riders
boxplot(train$cnt~train$temp,xlab="temperature", ylab="number of users",col="maroon")
boxplot(train$cnt~train$hum,xlab="humidity", ylab="number of users",col="blue")
##### We can observe that mean of number of users for humidity <0.53 are greater than the mean of number of users for humidity>0.53 ##### Users might prefer less humid weather conditions.
boxplot(train$cnt~train$atemp,xlab="feels like temperature", ylab="number of users",col="green")
boxplot(train$cnt~train$windspeed,xlab="windspeed", ylab="number of users",col="brown")
boxplot(train$cnt~train$yr,xlab="Year", ylab="Count",col="brown")
boxplot(train$casual~train$holiday,xlab="holiday", ylab="non-registered users",col="lightblue")
aggregate(casual~holiday,data=train,mean)
## holiday casual
## 1 0 35.40838
## 2 1 44.71800
boxplot(train$registered~train$holiday,xlab="holiday", ylab="registered users",col="darkgreen")
aggregate(registered~holiday,data=train,mean)
## holiday registered
## 1 0 155.0202
## 2 1 112.1520
boxplot(train$casual~train$weekday,xlab="days", ylab="non-registered users",col="lightblue")
aggregate(casual~weekday,data=train,sum)
## weekday casual
## 1 0 140521
## 2 1 70784
## 3 2 57843
## 4 3 57319
## 5 4 61460
## 6 5 78238
## 7 6 153852
boxplot(train$registered~train$weekday,xlab="weekday", ylab="registered users",col="darkgreen")
aggregate(registered~weekday,data=train,sum)
## weekday registered
## 1 0 303506
## 2 1 384719
## 3 2 411266
## 4 3 415729
## 5 4 423935
## 6 5 409552
## 7 6 323955
par(mfrow=c(4,2))
par(mar = rep(2, 4))
hist(train$season,col="magenta",main="Histogram of Season")
hist(train$weather,col="purple",main = "Histogram of Weather")
hist(train$hum,col="violet",main = "Histogram of Humidity")
hist(train$holiday,col="red",main = "Histogram of Holiday")
hist(train$workingday,col="blue",main = "Histogram of Working Day")
hist(train$temp,col="green",main="Temperature")
hist(train$atemp,col="brown",main="Feels like temperature")
hist(train$windspeed,col="orange",main = "Histogram of WindSpeed")
plot(hr~registered,data=train,cex=0.1)
plot(hr~casual,data=train,cex=0.1)
plot(hr~cnt,data=train,cex=0.1)
my_data<-train[,c(3:17)]
cor(my_data)
## season yr mnth hr
## season 1.000000000 -0.010742486 0.830385892 -0.006116901
## yr -0.010742486 1.000000000 -0.010472929 -0.003867005
## mnth 0.830385892 -0.010472929 1.000000000 -0.005771909
## hr -0.006116901 -0.003867005 -0.005771909 1.000000000
## holiday -0.009584526 0.006691617 0.018430325 0.000479136
## weekday -0.002335350 -0.004484851 0.010400061 -0.003497739
## workingday 0.013743102 -0.002196005 -0.003476922 0.002284998
## weathersit -0.014523552 -0.019156853 0.005399522 -0.020202528
## temp 0.312025237 0.040913380 0.201691494 0.137603494
## atemp 0.319379811 0.039221595 0.208096131 0.133749965
## hum 0.150624745 -0.083546421 0.164411443 -0.276497828
## windspeed -0.149772751 -0.008739533 -0.135386323 0.137251568
## casual 0.120206447 0.142778528 0.068457301 0.301201730
## registered 0.174225633 0.253684310 0.122272967 0.374140710
## cnt 0.178055731 0.250494899 0.120637760 0.394071498
## holiday weekday workingday weathersit
## season -0.009584526 -0.002335350 0.013743102 -0.014523552
## yr 0.006691617 -0.004484851 -0.002196005 -0.019156853
## mnth 0.018430325 0.010400061 -0.003476922 0.005399522
## hr 0.000479136 -0.003497739 0.002284998 -0.020202528
## holiday 1.000000000 -0.102087791 -0.252471370 -0.017036113
## weekday -0.102087791 1.000000000 0.035955071 0.003310740
## workingday -0.252471370 0.035955071 1.000000000 0.044672224
## weathersit -0.017036113 0.003310740 0.044672224 1.000000000
## temp -0.027340477 -0.001794927 0.055390317 -0.102639936
## atemp -0.030972737 -0.008820945 0.054667235 -0.105563108
## hum -0.010588465 -0.037158268 0.015687512 0.418130329
## windspeed 0.003987632 0.011501545 -0.011829789 0.026225652
## casual 0.031563628 0.032721415 -0.300942486 -0.152627885
## registered -0.047345424 0.021577888 0.134325791 -0.120965520
## cnt -0.030927303 0.026899860 0.030284368 -0.142426138
## temp atemp hum windspeed casual
## season 0.312025237 0.319379811 0.15062475 -0.149772751 0.12020645
## yr 0.040913380 0.039221595 -0.08354642 -0.008739533 0.14277853
## mnth 0.201691494 0.208096131 0.16441144 -0.135386323 0.06845730
## hr 0.137603494 0.133749965 -0.27649783 0.137251568 0.30120173
## holiday -0.027340477 -0.030972737 -0.01058846 0.003987632 0.03156363
## weekday -0.001794927 -0.008820945 -0.03715827 0.011501545 0.03272142
## workingday 0.055390317 0.054667235 0.01568751 -0.011829789 -0.30094249
## weathersit -0.102639936 -0.105563108 0.41813033 0.026225652 -0.15262788
## temp 1.000000000 0.987672139 -0.06988139 -0.023125262 0.45961565
## atemp 0.987672139 1.000000000 -0.05191770 -0.062336043 0.45408007
## hum -0.069881391 -0.051917696 1.00000000 -0.290104895 -0.34702809
## windspeed -0.023125262 -0.062336043 -0.29010490 1.000000000 0.09028678
## casual 0.459615646 0.454080065 -0.34702809 0.090286775 1.00000000
## registered 0.335360849 0.332558635 -0.27393312 0.082320847 0.50661770
## cnt 0.404772276 0.400929304 -0.32291074 0.093233784 0.69456408
## registered cnt
## season 0.17422563 0.17805573
## yr 0.25368431 0.25049490
## mnth 0.12227297 0.12063776
## hr 0.37414071 0.39407150
## holiday -0.04734542 -0.03092730
## weekday 0.02157789 0.02689986
## workingday 0.13432579 0.03028437
## weathersit -0.12096552 -0.14242614
## temp 0.33536085 0.40477228
## atemp 0.33255864 0.40092930
## hum -0.27393312 -0.32291074
## windspeed 0.08232085 0.09323378
## casual 0.50661770 0.69456408
## registered 1.00000000 0.97215073
## cnt 0.97215073 1.00000000
library(corrgram)
corrgram(my_data, order=NULL,panel = panel.cor,lower.panel=panel.shade, text.panel=panel.txt, main="Corrgram of all variables")
library(car)
scatterplotMatrix(formula=~cnt+weekday+holiday+workingday,data=train,cex=0.1,diagonal="histogram")
library(car)
scatterplotMatrix(formula=~cnt+temp+hum+atemp,cex=0.1,data=train,diagonal="histogram")
mytable_reg_weekday<-xtabs(~workingday+registered,data=train)
#addmargins(mytable_reg_weekday)
chisq.test(mytable_reg_weekday)
## Warning in chisq.test(mytable_reg_weekday): Chi-squared approximation may
## be incorrect
##
## Pearson's Chi-squared test
##
## data: mytable_reg_weekday
## X-squared = 1341.1, df = 775, p-value < 2.2e-16
mytable_cas_hol<-xtabs(~holiday+casual,data=train)
#addmargins(mytable_cas_hol)
chisq.test(mytable_cas_hol)
## Warning in chisq.test(mytable_cas_hol): Chi-squared approximation may be
## incorrect
##
## Pearson's Chi-squared test
##
## data: mytable_cas_hol
## X-squared = 466.89, df = 321, p-value = 1.811e-07