head(data1)
## # A tibble: 6 x 7
## Duration End.Date EndStation.Id Start.Date StartStation.Id
## <int> <chr> <int> <chr> <int>
## 1 3600 03/01/2017 07:42 510 03/01/2017 06:42 201
## 2 180 03/01/2017 19:03 386 03/01/2017 19:00 381
## 3 300 03/01/2017 17:23 68 03/01/2017 17:18 109
## 4 360 03/01/2017 17:39 254 03/01/2017 17:33 68
## 5 300 02/01/2017 16:03 378 02/01/2017 15:58 219
## 6 300 01/01/2017 11:09 356 01/01/2017 11:04 219
## # ... with 2 more variables: StartStation.Name <chr>,
## # EndStation.Name <chr>
head(coor)
## # A tibble: 6 x 3
## id lat long
## <int> <dbl> <dbl>
## 1 1 51.5 -0.110
## 2 2 51.5 -0.198
## 3 3 51.5 -0.0846
## 4 4 51.5 -0.121
## 5 5 51.5 -0.157
## 6 6 51.5 -0.144
The data is spilt in to weekdays and weekends to analyse the difference (if any) between them. Working out the day of the week from by dates and merging coordinates in to the main dataset.
data1$weekdays<-weekdays(
strptime(data1$Start.Date,format="%d/%m/%Y %H:%M",tz="UTC")
)
data1<-data1 %>% select("StartStation.Id","EndStation.Id","Start.Date","StartStation.Name",
"EndStation.Name","weekdays")
data1$week <- ifelse(data1$weekdays %in% c("Saturday", "Sunday"), "weekend", "weekday")
data1<- data1[data1$week=="weekday",]
a<- coor
colnames(a)<- c("StartStation.Id","s.lat","s.lon")
x<-plyr::join(data1,a)
## Joining by: StartStation.Id
aa<- coor
colnames(aa)<- c("EndStation.Id","e.lat","e.lon")
z<-plyr::join(x,aa)
## Joining by: EndStation.Id
z<- na.omit(z)
x<- plyr::count(z,vars = c("StartStation.Id","EndStation.Id"))
data1<- join(z,x)
## Joining by: StartStation.Id, EndStation.Id
head(data1) # the data now becomes
## StartStation.Id EndStation.Id Start.Date
## 1 201 510 03/01/2017 06:42
## 2 381 386 03/01/2017 19:00
## 3 109 68 03/01/2017 17:18
## 4 68 254 03/01/2017 17:33
## 5 219 378 02/01/2017 15:58
## 6 405 219 03/01/2017 22:00
## StartStation.Name
## 1 Dorset Square, Marylebone
## 2 Charlotte Street, Fitzrovia
## 3 Soho Square , Soho
## 4 Theobald's Road , Holborn
## 5 Bramham Gardens, Earl's Court
## 6 Gloucester Road Station, South Kensington
## EndStation.Name weekdays week s.lat
## 1 Westferry DLR, Limehouse Tuesday weekday 51.52260
## 2 Moor Street, Soho Tuesday weekday 51.51953
## 3 Theobald's Road , Holborn Tuesday weekday 51.51563
## 4 Chadwell Street, Angel Tuesday weekday 51.52060
## 5 Natural History Museum, South Kensington Monday weekday 51.49016
## 6 Bramham Gardens, Earl's Court Tuesday weekday 51.49419
## s.lon e.lat e.lon freq
## 1 -0.1611134 51.50930 -0.0259960 9
## 2 -0.1357773 51.51353 -0.1301108 64
## 3 -0.1323288 51.52060 -0.1166885 108
## 4 -0.1166885 51.53052 -0.1064085 107
## 5 -0.1903936 51.49559 -0.1790776 229
## 6 -0.1826709 51.49016 -0.1903936 297
Working out most popular Starting and Ending stations
q<- data1
s.top<- dplyr::select(q,"s.lat","s.lon","StartStation.Name")
sc<- plyr::count(s.top$StartStation.Name)
colnames(sc)<- c("StartStation.Name","Freq")
scj<- plyr::join(sc,s.top)
## Joining by: StartStation.Name
scj<- scj[!duplicated(scj),]
scj<-arrange(scj,desc(Freq))[1:5,]
#########################ending station
e.top<- dplyr::select(q,"e.lat","e.lon","EndStation.Name")
ec<- plyr::count(e.top$EndStation.Name)
colnames(ec)<- c("EndStation.Name","Freq1")
ecj<- plyr::join(ec,e.top)
## Joining by: EndStation.Name
ecj<- ecj[!duplicated(ecj),]
ecj<-arrange(ecj,desc(Freq1))[1:5,]
Remove duplicate rows
weekdaymap<- distinct(q,s.lat ,s.lon ,e.lat ,e.lon ,freq)
head(weekdaymap) #This is the data we are using
## s.lat s.lon e.lat e.lon freq
## 1 51.52260 -0.1611134 51.50930 -0.0259960 9
## 2 51.51953 -0.1357773 51.51353 -0.1301108 64
## 3 51.51563 -0.1323288 51.52060 -0.1166885 108
## 4 51.52060 -0.1166885 51.53052 -0.1064085 107
## 5 51.49016 -0.1903936 51.49559 -0.1790776 229
## 6 51.49419 -0.1826709 51.49016 -0.1903936 297
map<- get_map(location = c(lon =mean(q$s.lon),lat = mean(q$s.lat)),
source="google",maptype="roadmap",zoom=11)
## Map from URL : http://maps.googleapis.com/maps/api/staticmap?center=51.509045,-0.125019&zoom=11&size=640x640&scale=2&maptype=roadmap&language=en-EN&sensor=false
ggmap(map)+
geom_segment(data=weekdaymap,aes(x=s.lon,y=s.lat,xend=e.lon, yend=e.lat,alpha=freq),
colour= "black",size=0.00001)+
theme(plot.title = element_text(hjust = 0.5,size = 13))+
xlim(range(q$s.lon))+
ylim(range(q$s.lat))+
labs(alpha="Trips")+
ggtitle("Weekdays Bicycle trip in London",
subtitle = "Figure 1")+
theme(plot.title = element_text(hjust = 0.5,size = 15))+
theme(plot.subtitle = element_text(size =15))+
geom_point(data=weekdaymap,aes(x=s.lon,y=s.lat),size=0.001,alpha=0.3)+
geom_label_repel(data=scj, aes(x=s.lon,y=s.lat,label=StartStation.Name),
color="blue",nudge_y = 0.03,nudge_x = -0.1,label.size = 0.05,
size=5,alpha=0.6
)+
# arrow =arrow(length =unit(0.5,"cm")))+
geom_label_repel(data=ecj, aes(x=e.lon,y=e.lat,label=EndStation.Name),
color="red",segment.size = 0.5,segment.colour = "red",
#arrow = arrow(length = unit(0.5,"cm")),
nudge_y = -0.03,nudge_x = 0.15,size=5,alpha=0.6)+
theme(plot.caption = element_text(hjust=0.5))+
geom_segment(data=weekdaymap,aes(x=s.lon,y=s.lat,xend=e.lon, yend=e.lat,alpha=freq),
colour= "black",size=0.00001)+
geom_segment(data=weekdaymap,aes(x=s.lon,y=s.lat,xend=e.lon, yend=e.lat,alpha=freq),
colour= "black",size=0.00001)+
geom_segment(data=weekdaymap,aes(x=s.lon,y=s.lat,xend=e.lon, yend=e.lat,alpha=freq),
colour= "black",size=0.00001)+
scale_alpha_continuous(range = c(0,0.45),limits=c(1,20975))
## Scale for 'x' is already present. Adding another scale for 'x', which
## will replace the existing scale.
## Scale for 'y' is already present. Adding another scale for 'y', which
## will replace the existing scale.
The above illustration (Figure 1) shows the bicycle route patterns on weekdays with the top five most visited start stations labelled in blue and end stations in red.The weekday bicycle routes show more stations are visited in the centre of the London.
ggmap(map)+
geom_segment(data=weekendmap,aes(x=s.lon,y=s.lat,xend=e.lon, yend=e.lat,alpha=freq),
colour= "black",size=0.00001)+
theme(plot.title = element_text(hjust = 0.5,size = 13))+
xlim(range(q$s.lon))+
ylim(range(q$s.lat))+
labs(alpha="Trips")+
ggtitle("Weekends Bicycle trip in London",
subtitle = "Figure 2")+
theme(plot.title = element_text(hjust = 0.5,size = 15))+
theme(plot.subtitle = element_text(size =15))+
geom_point(data=weekendmap,aes(x=s.lon,y=s.lat),size=0.001,alpha=0.3)+
geom_label_repel(data=scj, aes(x=s.lon,y=s.lat,label=StartStation.Name),
color="blue",nudge_y = 0.03,nudge_x = -0.1,label.size = 0.05,
size=5,alpha=0.6
)+
# arrow =arrow(length =unit(0.5,"cm")))+
geom_label_repel(data=ecj, aes(x=e.lon,y=e.lat,label=EndStation.Name),
color="red",segment.size = 0.5,segment.colour = "red",
#arrow = arrow(length = unit(0.5,"cm")),
nudge_y = -0.03,nudge_x = 0.15,size=5,alpha=0.6)+
theme(plot.caption = element_text(hjust=0.5))+
geom_segment(data=weekendmap,aes(x=s.lon,y=s.lat,xend=e.lon, yend=e.lat,alpha=freq),
colour= "black",size=0.00001)+
geom_segment(data=weekendmap,aes(x=s.lon,y=s.lat,xend=e.lon, yend=e.lat,alpha=freq),
colour= "black",size=0.00001)+
geom_segment(data=weekendmap,aes(x=s.lon,y=s.lat,xend=e.lon, yend=e.lat,alpha=freq),
colour= "black",size=0.00001)+
scale_alpha_continuous(range = c(0,0.80),limits=c(1,20975))
## Scale for 'x' is already present. Adding another scale for 'x', which
## will replace the existing scale.
## Scale for 'y' is already present. Adding another scale for 'y', which
## will replace the existing scale.
In Figure 2, barely any routes can be seen on the map. In fact, the only visible routes are centred around Hyde Park while the journeys seen previously around Waterloo and Belgrove have disappeared. One explanation for this could be that since Waterloo is a central train station, commuters who use the public bicycles to travel to work are not using them on the weekends, so without commuters the bikes are being used less around the larger central railway stations.