library(data.table)
train<-fread(file="C:\\Users\\r631758\\Desktop\\r631758\\R codes\\BikeShare\\train.csv",stringsAsFactors = FALSE)
test<-fread(file="C:\\Users\\r631758\\Desktop\\r631758\\R codes\\BikeShare\\test.csv",stringsAsFactors = FALSE )
test$registered=0
test$casual=0
test$count=0
data=rbind(train,test)
summary(data)
datetime season holiday workingday weather temp atemp humidity windspeed casual
Length:17379 Min. :1.000 Min. :0.00000 Min. :0.0000 Min. :1.000 Min. : 0.82 Min. : 0.00 Min. : 0.00 Min. : 0.000 Min. : 0.00
Class :character 1st Qu.:2.000 1st Qu.:0.00000 1st Qu.:0.0000 1st Qu.:1.000 1st Qu.:13.94 1st Qu.:16.66 1st Qu.: 48.00 1st Qu.: 7.002 1st Qu.: 0.00
Mode :character Median :3.000 Median :0.00000 Median :1.0000 Median :1.000 Median :20.50 Median :24.24 Median : 63.00 Median :12.998 Median : 3.00
Mean :2.502 Mean :0.02877 Mean :0.6827 Mean :1.425 Mean :20.38 Mean :23.79 Mean : 62.72 Mean :12.737 Mean : 22.56
3rd Qu.:3.000 3rd Qu.:0.00000 3rd Qu.:1.0000 3rd Qu.:2.000 3rd Qu.:27.06 3rd Qu.:31.06 3rd Qu.: 78.00 3rd Qu.:16.998 3rd Qu.: 26.00
Max. :4.000 Max. :1.00000 Max. :1.0000 Max. :4.000 Max. :41.00 Max. :50.00 Max. :100.00 Max. :56.997 Max. :367.00
registered count
Min. : 0.00 Min. : 0
1st Qu.: 0.00 1st Qu.: 0
Median : 23.00 Median : 28
Mean : 97.44 Mean :120
3rd Qu.:155.00 3rd Qu.:192
Max. :886.00 Max. :977
par(mfrow=c(4,2))
par(mar=rep(2,4))
hist(data$season)
hist(data$weather)
hist(data$humidity)
hist(data$holiday)
hist(data$workingday)
hist(data$temp)
hist(data$atemp)
hist(data$windspeed)

prop.table(table(data$weather))
1 2 3 4
0.6567121238 0.2614649865 0.0816502676 0.0001726221
change to factor
data$season=as.factor(data$season)
data$weather=as.factor(data$weather)
data$holiday=as.factor(data$holiday)
data$workingday=as.factor(data$workingday)
get time from date
data$hour=substr(data$datetime,12,13)
data$hour=as.factor(data$hour)
hour trend
train=data[as.integer(substr(data$datetime,9,10))<20, ]
test=data[as.integer(substr(data$data$datetime,9,10))>19, ]
boxplot(train$count~train$hour,xlab="hour",ylab="count of users")

boxplot(train$casual~train$hour,xlab="hour",ylab="count of users")

boxplot(train$registered~train$hour,xlab="hour",ylab="count of users")

day trend
date<-substr(data$datetime,1,10)
days<-weekdays(as.Date(date))
data$day=days
train=data[as.integer(substr(data$datetime,9,10))<20, ]
test=data[as.integer(substr(data$data$datetime,9,10))>19, ]
boxplot(train$casual~train$day,xlab="hour",ylab="count of casual users")

boxplot(train$registered~train$day,xlab="hour",ylab="count of registered users")

year trend
data$year=substr(data$datetime,1,4)
data$year=as.factor(data$year)
train=data[as.integer(substr(data$datetime,9,10))<20,]
test=data[as.integer(substr(data$datetime,9,10))>19,]
boxplot(train$count~train$year,xlab="year", ylab="count")

weather
boxplot(train$casual~train$weather,xlab="weather",ylab="count of casual users")

boxplot(train$registered~train$weather,xlab="weather",ylab="count of registered users")

for continuous variables
sub=data.frame(train$registered, train$casual, train$count, train$temp, train$humidity, train$atemp, train$windspeed)
cor(sub)
train.registered train.casual train.count train.temp train.humidity train.atemp train.windspeed
train.registered 1.00000000 0.49724969 0.9709481 0.31857128 -0.26545787 0.31463539 0.09105166
train.casual 0.49724969 1.00000000 0.6904136 0.46709706 -0.34818690 0.46206654 0.09227619
train.count 0.97094811 0.69041357 1.0000000 0.39445364 -0.31737148 0.38978444 0.10136947
train.temp 0.31857128 0.46709706 0.3944536 1.00000000 -0.06494877 0.98494811 -0.01785201
train.humidity -0.26545787 -0.34818690 -0.3173715 -0.06494877 1.00000000 -0.04353571 -0.31860699
train.atemp 0.31463539 0.46206654 0.3897844 0.98494811 -0.04353571 1.00000000 -0.05747300
train.windspeed 0.09105166 0.09227619 0.1013695 -0.01785201 -0.31860699 -0.05747300 1.00000000
summary(train$hour)
Min. 1st Qu. Median Mean 3rd Qu. Max.
1.00 7.00 13.00 12.54 19.00 24.00
use rpart for decision tree
library(rpart)
library(rattle)
package <U+393C><U+3E31>rattle<U+393C><U+3E32> was built under R version 3.4.3Error in inDL(x, as.logical(local), as.logical(now), ...) :
unable to load shared object 'C:/Users/r631758/Documents/R/win-library/3.4/RGtk2/libs/x64/RGtk2.dll':
LoadLibrary failure: The specified module could not be found.
Failed to load RGtk2 dynamic library, attempting to install it.trying URL 'http://ftp.gnome.org/pub/gnome/binaries/win64/gtk+/2.22/gtk+-bundle_2.22.1-20101229_win64.zip'
Content type 'application/zip' length 25830230 bytes (24.6 MB)
downloaded 24.6 MB
'C:\Users\r631758\Documents\R\win-library\3.4\RGtk2\gtk\x64\etc\gtk-2.0' already existsLearn more about GTK+ at http://www.gtk.org
If the package still does not load, please ensure that GTK+ is installed and that it is on your PATH environment variable
IN ANY CASE, RESTART R BEFORE TRYING TO LOAD THE PACKAGE AGAIN
Rattle: A free graphical interface for data science with R.
Version 5.1.0 Copyright (c) 2006-2017 Togaware Pty Ltd.
Type 'rattle()' to shake, rattle, and roll your data.
library(rpart.plot)
library(RColorBrewer)
d=rpart(registered~hour,data=train)
fancyRpartPlot(d)

data$year_part[data$year=='2011']=1
data$year_part[data$year=='2011' & data$month>3]=2
data$year_part[data$year=='2011' & data$month>6]=3
data$year_part[data$year=='2011' & data$month>9]=4
data$year_part[data$year=='2012']=5
data$year_part[data$year=='2012' & data$month>3]=6
data$year_part[data$year=='2012' & data$month>6]=7
data$year_part[data$year=='2012' & data$month>9]=8
table(data$year_part)
1 2 3 4 5 6 7 8
2067 2183 2192 2203 2176 2182 2208 2168
model building
library(randomForest)
data$hour=as.factor(data$hour)
data$day_type=as.factor(data$day_type)
data$day=as.factor(data$day)
data$logreg=log(data$registered+1)
data$logcas=log(data$casual+1)
train=data[as.integer(substr(data$datetime,9,10))<20,]
test=data[as.integer(substr(data$datetime,9,10))>19,]
#predicting the log of registered users.
set.seed(415)
fit1 <- randomForest(logreg ~ hour +workingday+day+holiday+ day_type +humidity+atemp+windspeed+season+weather+dp_reg+weekend+year+year_part, data=train,importance=TRUE, ntree=250)
LS0tDQp0aXRsZTogIkJpa2UgU2hhcmluZyBEZW1hbmQiDQpvdXRwdXQ6IGh0bWxfbm90ZWJvb2sNCi0tLQ0KDQpgYGB7cn0NCmxpYnJhcnkoZGF0YS50YWJsZSkNCnRyYWluPC1mcmVhZChmaWxlPSJDOlxcVXNlcnNcXHI2MzE3NThcXERlc2t0b3BcXHI2MzE3NThcXFIgY29kZXNcXEJpa2VTaGFyZVxcdHJhaW4uY3N2IixzdHJpbmdzQXNGYWN0b3JzID0gRkFMU0UpDQp0ZXN0PC1mcmVhZChmaWxlPSJDOlxcVXNlcnNcXHI2MzE3NThcXERlc2t0b3BcXHI2MzE3NThcXFIgY29kZXNcXEJpa2VTaGFyZVxcdGVzdC5jc3YiLHN0cmluZ3NBc0ZhY3RvcnMgPSBGQUxTRSApDQp0ZXN0JHJlZ2lzdGVyZWQ9MA0KdGVzdCRjYXN1YWw9MA0KdGVzdCRjb3VudD0wDQpkYXRhPXJiaW5kKHRyYWluLHRlc3QpDQpgYGANCg0KDQpgYGB7cn0NCnN0cihkYXRhKQ0Kc3VtbWFyeShkYXRhKQ0KYGBgDQoNCmBgYHtyfQ0KcGFyKG1mcm93PWMoNCwyKSkNCnBhcihtYXI9cmVwKDIsNCkpDQpoaXN0KGRhdGEkc2Vhc29uKQ0KaGlzdChkYXRhJHdlYXRoZXIpDQpoaXN0KGRhdGEkaHVtaWRpdHkpDQpoaXN0KGRhdGEkaG9saWRheSkNCmhpc3QoZGF0YSR3b3JraW5nZGF5KQ0KaGlzdChkYXRhJHRlbXApDQpoaXN0KGRhdGEkYXRlbXApDQpoaXN0KGRhdGEkd2luZHNwZWVkKQ0KYGBgDQpgYGB7cn0NCnByb3AudGFibGUodGFibGUoZGF0YSR3ZWF0aGVyKSkNCmBgYA0KDQojY2hhbmdlIHRvIGZhY3Rvcg0KYGBge3J9DQpkYXRhJHNlYXNvbj1hcy5mYWN0b3IoZGF0YSRzZWFzb24pDQpkYXRhJHdlYXRoZXI9YXMuZmFjdG9yKGRhdGEkd2VhdGhlcikNCmRhdGEkaG9saWRheT1hcy5mYWN0b3IoZGF0YSRob2xpZGF5KQ0KZGF0YSR3b3JraW5nZGF5PWFzLmZhY3RvcihkYXRhJHdvcmtpbmdkYXkpDQpgYGANCg0KDQojZ2V0IHRpbWUgZnJvbSBkYXRlDQpgYGB7cn0NCmRhdGEkaG91cj1zdWJzdHIoZGF0YSRkYXRldGltZSwxMiwxMykNCmRhdGEkaG91cj1hcy5mYWN0b3IoZGF0YSRob3VyKQ0KYGBgDQoNCiNob3VyIHRyZW5kDQpgYGB7cn0NCnRyYWluPWRhdGFbYXMuaW50ZWdlcihzdWJzdHIoZGF0YSRkYXRldGltZSw5LDEwKSk8MjAsIF0NCnRlc3Q9ZGF0YVthcy5pbnRlZ2VyKHN1YnN0cihkYXRhJGRhdGEkZGF0ZXRpbWUsOSwxMCkpPjE5LCBdDQpib3hwbG90KHRyYWluJGNvdW50fnRyYWluJGhvdXIseGxhYj0iaG91ciIseWxhYj0iY291bnQgb2YgdXNlcnMiKQ0KDQpib3hwbG90KHRyYWluJGNhc3VhbH50cmFpbiRob3VyLHhsYWI9ImhvdXIiLHlsYWI9ImNvdW50IG9mIGNhc3VhbCB1c2VycyIpDQoNCmJveHBsb3QodHJhaW4kcmVnaXN0ZXJlZH50cmFpbiRob3VyLHhsYWI9ImhvdXIiLHlsYWI9ImNvdW50IG9mIHJlZ2lzdGVyZWQgdXNlcnMiKQ0KYGBgDQoNCiNsb2cgdHJhbnNmb3JtYXRpb24gb2YgY291bnQNCmBgYHtyfQ0KYm94cGxvdChsb2codHJhaW4kY291bnQpfnRyYWluJGhvdXIseGxhYj0iaG91ciIseWxhYj0ibG9nKGNvdW50KSIpDQpgYGANCg0KI2RheSB0cmVuZA0KYGBge3J9DQpkYXRlPC1zdWJzdHIoZGF0YSRkYXRldGltZSwxLDEwKQ0KZGF5czwtd2Vla2RheXMoYXMuRGF0ZShkYXRlKSkNCmRhdGEkZGF5PWRheXMNCg0KdHJhaW49ZGF0YVthcy5pbnRlZ2VyKHN1YnN0cihkYXRhJGRhdGV0aW1lLDksMTApKTwyMCwgXQ0KdGVzdD1kYXRhW2FzLmludGVnZXIoc3Vic3RyKGRhdGEkZGF0YSRkYXRldGltZSw5LDEwKSk+MTksIF0NCg0KYm94cGxvdCh0cmFpbiRjYXN1YWx+dHJhaW4kZGF5LHhsYWI9ImRheSIseWxhYj0iY291bnQgb2YgY2FzdWFsIHVzZXJzIikNCg0KYm94cGxvdCh0cmFpbiRyZWdpc3RlcmVkfnRyYWluJGRheSx4bGFiPSJkYXkiLHlsYWI9ImNvdW50IG9mIHJlZ2lzdGVyZWQgdXNlcnMiKQ0KDQoNCmBgYA0KDQojeWVhciB0cmVuZA0KYGBge3J9DQpkYXRhJHllYXI9c3Vic3RyKGRhdGEkZGF0ZXRpbWUsMSw0KQ0KZGF0YSR5ZWFyPWFzLmZhY3RvcihkYXRhJHllYXIpDQp0cmFpbj1kYXRhW2FzLmludGVnZXIoc3Vic3RyKGRhdGEkZGF0ZXRpbWUsOSwxMCkpPDIwLF0NCnRlc3Q9ZGF0YVthcy5pbnRlZ2VyKHN1YnN0cihkYXRhJGRhdGV0aW1lLDksMTApKT4xOSxdDQpib3hwbG90KHRyYWluJGNvdW50fnRyYWluJHllYXIseGxhYj0ieWVhciIsIHlsYWI9ImNvdW50IikNCg0KYGBgDQoNCiN3ZWF0aGVyDQpgYGB7cn0NCmJveHBsb3QodHJhaW4kY2FzdWFsfnRyYWluJHdlYXRoZXIseGxhYj0id2VhdGhlciIseWxhYj0iY291bnQgb2YgY2FzdWFsIHVzZXJzIikNCg0KYm94cGxvdCh0cmFpbiRyZWdpc3RlcmVkfnRyYWluJHdlYXRoZXIseGxhYj0id2VhdGhlciIseWxhYj0iY291bnQgb2YgcmVnaXN0ZXJlZCB1c2VycyIpDQpgYGANCg0KDQojZm9yIGNvbnRpbnVvdXMgdmFyaWFibGVzIA0KYGBge3J9DQpzdWI9ZGF0YS5mcmFtZSh0cmFpbiRyZWdpc3RlcmVkLCB0cmFpbiRjYXN1YWwsIHRyYWluJGNvdW50LCB0cmFpbiR0ZW1wLCB0cmFpbiRodW1pZGl0eSwgdHJhaW4kYXRlbXAsIHRyYWluJHdpbmRzcGVlZCkNCmNvcihzdWIpDQpgYGANCg0KYGBge3J9DQpzdW1tYXJ5KHRyYWluJGhvdXIpDQp0cmFpbiRob3VyPWFzLmludGVnZXIodHJhaW4kaG91cikNCnRlc3QkaG91cj1hcy5pbnRlZ2VyKHRlc3QkaG91cikNCmBgYA0KDQojdXNlIHJwYXJ0IGZvciBkZWNpc2lvbiB0cmVlDQpgYGB7cn0NCmxpYnJhcnkocnBhcnQpDQpsaWJyYXJ5KHJhdHRsZSkNCmxpYnJhcnkocnBhcnQucGxvdCkNCmxpYnJhcnkoUkNvbG9yQnJld2VyKQ0KZD1ycGFydChyZWdpc3RlcmVkfmhvdXIsZGF0YT10cmFpbikNCmZhbmN5UnBhcnRQbG90KGQpDQpgYGANCg0KDQpgYGB7cn0NCmRhdGE9cmJpbmQodHJhaW4sdGVzdCkNCmRhdGEkZHBfcmVnPTANCmRhdGEkZHBfcmVnW2RhdGEkaG91cjw4XT0xDQpkYXRhJGRwX3JlZ1tkYXRhJGhvdXI+PTIyXT0yDQpkYXRhJGRwX3JlZ1tkYXRhJGhvdXI+OSAmIGRhdGEkaG91cjwxOF09Mw0KZGF0YSRkcF9yZWdbZGF0YSRob3VyPT04XT00DQpkYXRhJGRwX3JlZ1tkYXRhJGhvdXI9PTldPTUNCmRhdGEkZHBfcmVnW2RhdGEkaG91cj09MjAgfCBkYXRhJGhvdXI9PTIxXT02DQpkYXRhJGRwX3JlZ1tkYXRhJGhvdXI9PTE5IHwgZGF0YSRob3VyPT0xOF09Nw0KDQpkYXRhJG1vbnRoPXN1YnN0cihkYXRhJGRhdGV0aW1lLDYsNykNCmRhdGEkbW9udGg9YXMuaW50ZWdlcihkYXRhJG1vbnRoKQ0KDQoNCmRhdGEkeWVhcl9wYXJ0W2RhdGEkeWVhcj09JzIwMTEnXT0xDQpkYXRhJHllYXJfcGFydFtkYXRhJHllYXI9PScyMDExJyAmIGRhdGEkbW9udGg+M109Mg0KZGF0YSR5ZWFyX3BhcnRbZGF0YSR5ZWFyPT0nMjAxMScgJiBkYXRhJG1vbnRoPjZdPTMNCmRhdGEkeWVhcl9wYXJ0W2RhdGEkeWVhcj09JzIwMTEnICYgZGF0YSRtb250aD45XT00DQpkYXRhJHllYXJfcGFydFtkYXRhJHllYXI9PScyMDEyJ109NQ0KZGF0YSR5ZWFyX3BhcnRbZGF0YSR5ZWFyPT0nMjAxMicgJiBkYXRhJG1vbnRoPjNdPTYNCmRhdGEkeWVhcl9wYXJ0W2RhdGEkeWVhcj09JzIwMTInICYgZGF0YSRtb250aD42XT03DQpkYXRhJHllYXJfcGFydFtkYXRhJHllYXI9PScyMDEyJyAmIGRhdGEkbW9udGg+OV09OA0KdGFibGUoZGF0YSR5ZWFyX3BhcnQpDQoNCmRhdGEkZGF5X3R5cGU9IiINCmRhdGEkZGF5X3R5cGVbZGF0YSRob2xpZGF5PT0wICYgZGF0YSR3b3JraW5nZGF5PT0wXT0id2Vla2VuZCINCmRhdGEkZGF5X3R5cGVbZGF0YSRob2xpZGF5PT0xXT0iaG9saWRheSINCmRhdGEkZGF5X3R5cGVbZGF0YSRob2xpZGF5PT0wICYgZGF0YSR3b3JraW5nZGF5PT0xXT0id29ya2luZyBkYXkiDQoNCmRhdGEkd2Vla2VuZD0wDQpkYXRhJHdlZWtlbmRbZGF0YSRkYXk9PSJTdW5kYXkiIHwgZGF0YSRkYXk9PSJTYXR1cmRheSIgXT0xDQoNCmBgYA0KDQoNCiNtb2RlbCBidWlsZGluZw0KYGBge3J9DQpsaWJyYXJ5KHJhbmRvbUZvcmVzdCkNCg0KZGF0YSRob3VyPWFzLmZhY3RvcihkYXRhJGhvdXIpDQpkYXRhJGRheV90eXBlPWFzLmZhY3RvcihkYXRhJGRheV90eXBlKQ0KZGF0YSRkYXk9YXMuZmFjdG9yKGRhdGEkZGF5KQ0KDQpkYXRhJGxvZ3JlZz1sb2coZGF0YSRyZWdpc3RlcmVkKzEpDQpkYXRhJGxvZ2Nhcz1sb2coZGF0YSRjYXN1YWwrMSkNCg0KdHJhaW49ZGF0YVthcy5pbnRlZ2VyKHN1YnN0cihkYXRhJGRhdGV0aW1lLDksMTApKTwyMCxdDQp0ZXN0PWRhdGFbYXMuaW50ZWdlcihzdWJzdHIoZGF0YSRkYXRldGltZSw5LDEwKSk+MTksXQ0KDQojcHJlZGljdGluZyB0aGUgbG9nIG9mIHJlZ2lzdGVyZWQgdXNlcnMuDQpzZXQuc2VlZCg0MTUpDQpmaXQxIDwtIHJhbmRvbUZvcmVzdChsb2dyZWcgfiBob3VyICt3b3JraW5nZGF5K2RheStob2xpZGF5KyBkYXlfdHlwZSAraHVtaWRpdHkrYXRlbXArd2luZHNwZWVkK3NlYXNvbit3ZWF0aGVyK2RwX3JlZyt3ZWVrZW5kK3llYXIreWVhcl9wYXJ0LCBkYXRhPXRyYWluLGltcG9ydGFuY2U9VFJVRSwgbnRyZWU9MjUwKQ0KcHJlZDE9cHJlZGljdChmaXQxLHRlc3QpDQp0ZXN0JGxvZ3JlZz1wcmVkMQ0KDQpzZXQuc2VlZCg0MTUpDQpmaXQyIDwtIHJhbmRvbUZvcmVzdChsb2djYXMgfmhvdXIgKyBkYXlfdHlwZStkYXkraHVtaWRpdHkrYXRlbXArdGVtcF9jYXMrd2luZHNwZWVkK3NlYXNvbit3ZWF0aGVyK2hvbGlkYXkrd29ya2luZ2RheStkcF9jYXMrd2Vla2VuZCt5ZWFyK3llYXJfcGFydCwgZGF0YT10cmFpbixpbXBvcnRhbmNlPVRSVUUsIG50cmVlPTI1MCkNCnByZWQyPXByZWRpY3QoZml0Mix0ZXN0KQ0KdGVzdCRsb2djYXM9cHJlZDINCg0KdGVzdCRyZWdpc3RlcmVkPWV4cCh0ZXN0JGxvZ3JlZyktMQ0KdGVzdCRjYXN1YWw9ZXhwKHRlc3QkbG9nY2FzKS0xDQp0ZXN0JGNvdW50PXRlc3QkY2FzdWFsK3Rlc3QkcmVnaXN0ZXJlZA0KczwtZGF0YS5mcmFtZShkYXRldGltZT10ZXN0JGRhdGV0aW1lLGNvdW50PXRlc3QkY291bnQpDQpgYGANCg0K