This is an exploratory data visualization project for NYC bicycle trips.
http://www.citibikenyc.com/system-data.
bk <- read.csv("citibike-tripdata.csv")
bk <- bk[, c("start.station.latitude", "start.station.longitude", "end.station.latitude", "end.station.longitude", "bikeid", "gender")]
count1 <- as.data.frame(table(bk$bikeid))
mrg <- merge(bk, count1, by.x= "bikeid", by.y="Var1")
# give columns more meaningful names
colnames(mrg) <- c("id", "lat.o", "lon.o", "lat.e", "lon.e", "gender", "count")
bkG1 <- mrg[which(mrg$gender == '1'),]
bkG2 <- mrg[which(mrg$gender == '2'),]
# this plot only shows 1 type of geom_segment
Now the data is ready to plot, lets look at the data where a given bike trip is connected with a segment line from the departure bike station to the destination bike station
p1 <- ggplot(mrg, aes(lon.o, lat.o)) +
# xlim(-162, -42) + ylim(15, 50) +
coord_map()
#coord_cartesian(ylim=c(23,50), xlim=c(-65,-130))
p1 <- p1 +ggtitle("Bike Trips in NYC")
p1 <- p1 + geom_point(aes(x=lon.o, y=lat.o), color="midnightblue", alpha=.8, size = .3)
p1<- p1 + geom_segment(aes(x=lon.o, y=lat.o,xend=lon.e, yend=lat.e, alpha=count), colour="white", size = .0011)
p1 <- p1 + scale_alpha_continuous(range = c(0.0019, 0.0022))
p1 <- p1 + theme(panel.background = element_rect(fill='black', colour='black'),
axis.line=element_blank(),
axis.text.x=element_blank(),
axis.text.y=element_blank(),
axis.ticks=element_blank(),
axis.title.x=element_blank(),
axis.title.y=element_blank(),
legend.position="none",
panel.grid.minor=element_blank(),
panel.grid.major=element_blank(),
panel.grid.major=element_blank(),
panel.grid.minor=element_blank(),
panel.grid.major=element_blank(),
panel.grid.minor=element_blank())
It is extrememly difficult to see what is going on with the data. There are so many data points that everything gets blurred together.
Now lets try splitting and coloring the data by gender
# ...
p2 <- ggplot(bkG1, aes(lon.o, lat.o)) +
coord_map()
# ....
p2 <- p2 + geom_segment(data = bkG2, aes(x=lon.o, y=lat.o,xend=lon.e, yend=lat.e, alpha=count), colour="white", size = .0011)
# ....
Now lets see how it looks when we combine the total group with dark blue lines and the genders which have lighter blue and white lines.
# This plot splits genders and plots against totals
p3 <- ggplot(mrg, aes(lon.o, lat.o)) +
coord_map()
#...
p3 <- p3 + geom_segment(aes(x=lon.o, y=lat.o,xend=lon.e, yend=lat.e, alpha=count), colour="dodgerblue3", size = .0011)
#...
p3 <- p3 + geom_segment(data = bkG1, aes(x=lon.o, y=lat.o,xend=lon.e, yend=lat.e, alpha=count), colour="deepskyblue3", size = .0011)
p3 <- p3 + geom_segment(data = bkG2, aes(x=lon.o, y=lat.o,xend=lon.e, yend=lat.e, alpha=count), colour="white", size = .0011)
#...