Sorting data

##################day time function
daytime<- function(data4=bikedata){
  a<- strptime(data4$Start.Date,format="%d/%m/%Y %R",tz="UTC")
  b<- format(a, "%R")
  Time <- hour(hm(b))
  timeofday <- hour(hm("00:00", "5:00", "12:00", "18:00", "23:59"))
  names <- c("Night", "Morning", "Afternoon", "Evening")
  z<-cut(x=Time, breaks=timeofday, labels=names, include.lowest=TRUE)
  return(z)
}
#######################################################season 
Season <- function(data2) {
  d<- data2$Start.Date
  WS <- as.Date("21/12/2017", format = "%d/%m/%Y") # Winter 
  SE <- as.Date("20/3/2017",  format = "%d/%m/%Y") # Spring 
  SS <- as.Date("21/6/2017",  format = "%d/%m/%Y") # Summer 
  FE <- as.Date("22/9/2017",  format = "%d/%m/%Y") # Autumn
  
  ifelse (d >= WS | d < SE, "Winter",
          ifelse (d >= SE & d < SS, "Spring",
                  ifelse (d >= SS & d < FE, "Summer", "Autumn")))
}
bikedata$Daytime<- daytime(data4=bikedata)
bikedata$Start.Date<- as.Date(strptime(bikedata$Start.Date,
                                              format="%d/%m/%Y %H:%M",tz="UTC"))

bikedata$weekdays<- wday(bikedata$Start.Date,label = T)

bikedata$weekdays<- factor(bikedata$weekdays,ordered = FALSE)

bikedata$season<-as.factor(Season(data2=bikedata))

x<-plyr::count(bikedata,vars = c("Start.Date","Daytime","weekdays","season"))
colnames(x)[5]<- c("ntrips")
bikedata<-join(x,bikedata)
## Joining by: Start.Date, Daytime, weekdays, season
bikedata<- bikedata%>% dplyr::select("Start.Date","Daytime","weekdays","season","ntrips")


head(bikedata,10)
##    Start.Date Daytime weekdays season ntrips
## 1  2017-01-04   Night      Wed Winter    292
## 2  2017-01-04   Night      Wed Winter    292
## 3  2017-01-04   Night      Wed Winter    292
## 4  2017-01-04   Night      Wed Winter    292
## 5  2017-01-04   Night      Wed Winter    292
## 6  2017-01-04   Night      Wed Winter    292
## 7  2017-01-04   Night      Wed Winter    292
## 8  2017-01-04   Night      Wed Winter    292
## 9  2017-01-04   Night      Wed Winter    292
## 10 2017-01-04   Night      Wed Winter    292
bikedata<- bikedata[!duplicated(bikedata),]
bikedata<- bikedata%>% dplyr::select("ntrips","weekdays","season","Daytime")

bikedata<- na.omit(bikedata)
head(bikedata,20)
##       ntrips weekdays season   Daytime
## 1        292      Wed Winter     Night
## 293     9487      Wed Winter   Morning
## 9780    9366      Wed Winter Afternoon
## 19146   2810      Wed Winter   Evening
## 21956    325    Thurs Winter     Night
## 22281  10244    Thurs Winter   Morning
## 32525   9752    Thurs Winter Afternoon
## 42277   3029    Thurs Winter   Evening
## 45306    392      Fri Winter     Night
## 45698   9950      Fri Winter   Morning
## 55648   6701      Fri Winter Afternoon
## 62349   1767      Fri Winter   Evening
## 64116    453      Sat Winter     Night
## 64569   4230      Sat Winter   Morning
## 68799   6711      Sat Winter Afternoon
## 75510   2328      Sat Winter   Evening
## 77838    925      Sun Winter     Night
## 78763   4166      Sun Winter   Morning
## 82929   6491      Sun Winter Afternoon
## 89420   1753      Sun Winter   Evening
str(bikedata)
## 'data.frame':    1148 obs. of  4 variables:
##  $ ntrips  : int  292 9487 9366 2810 325 10244 9752 3029 392 9950 ...
##  $ weekdays: Factor w/ 7 levels "Sun","Mon","Tues",..: 4 4 4 4 5 5 5 5 6 6 ...
##  $ season  : Factor w/ 4 levels "Autumn","Spring",..: 4 4 4 4 4 4 4 4 4 4 ...
##  $ Daytime : Factor w/ 4 levels "Night","Morning",..: 1 2 3 4 1 2 3 4 1 2 ...

Exploratory analysis

###################weekdays and season
ggplot(data=bikedata,aes(x=weekdays,y=ntrips))+
  geom_boxplot(aes(fill=season))+facet_grid(~season)+
   ggtitle("Boxplot of the number of trips by weekdays and season")+
  theme(plot.title = element_text(hjust = 0.5))+
  theme(axis.text.x = element_text(angle = 45, hjust = 1))

Relatively speaking, Weekends have fewer number of trips, especially during winter and autumn. However the seasonal patterns on its own does not seems to have a big influence.

##############weekdays and daytime
ggplot(data=bikedata,aes(x=weekdays,y=ntrips))+
  geom_boxplot(aes(fill=Daytime))+facet_grid(~Daytime)+
  ggtitle("Boxplot of the number of trips by weekdays and daytime")+
  theme(plot.title = element_text(hjust = 0.5))+
  theme(axis.text.x = element_text(angle = 45, hjust = 1))

A dramatically difference inbetween each daytimes which seems to be dominated by Morning and afternoon, suggesting citizens may cycle to and back from work. Change in weekdays also alter the daytime slightly, as there are fewer trips on morning in weekends, however during the night time, weekends have slightly higher number of trips relatively. These may leads to the conclusion that citizens tends to using bikes little more often at night time during weekends, and morning and afternoon time in weekdays.

#########Season and daytime
ggplot(data=bikedata,aes(x=Daytime,y=ntrips))+
  geom_boxplot(aes(fill=season))+facet_grid(~season)+
  ggtitle("Boxplot of the number of trips by season and daytime")+
  theme(plot.title = element_text(hjust = 0.5))+
  theme(axis.text.x = element_text(angle = 45, hjust = 1))

sum(bikedata[which(bikedata$season=="Summer"),]$ntrips)
## [1] 2470355
sum(bikedata[which(bikedata$season=="Autumn"),]$ntrips)
## [1] 1497932
sum(bikedata[which(bikedata$season=="Spring"),]$ntrips)
## [1] 2906197
sum(bikedata[which(bikedata$season=="Winter"),]$ntrips)
## [1] 1560046

seasonal changes have small impact on the daytime, in Evening and afternoon, the number of trips increases from autumn and dropped all the way down in Winter again.