tripduration starttime stoptime start.station.id
1 364 2017-09-01 00:02:01 2017-09-01 00:08:05 3183
2 357 2017-09-01 00:08:12 2017-09-01 00:14:09 3187
3 432 2017-09-01 00:10:12 2017-09-01 00:17:24 3195
4 934 2017-09-01 00:10:11 2017-09-01 00:25:46 3272
5 932 2017-09-01 00:10:16 2017-09-01 00:25:48 3272
6 414 2017-09-01 00:15:32 2017-09-01 00:22:26 3186
start.station.name start.station.latitude start.station.longitude
1 Exchange Place 40.71625 -74.03346
2 Warren St 40.72112 -74.03805
3 Sip Ave 40.73074 -74.06378
4 Jersey & 3rd 40.72333 -74.04595
5 Jersey & 3rd 40.72333 -74.04595
6 Grove St PATH 40.71959 -74.04312
end.station.id end.station.name end.station.latitude
1 3276 Marin Light Rail 40.71458
2 3199 Newport Pkwy 40.72874
3 3280 Astor Place 40.71928
4 3207 Oakland Ave 40.73760
5 3207 Oakland Ave 40.73760
6 3480 WS Don't Use 0.00000
end.station.longitude bikeid usertype birth.year gender
1 -74.04282 29670 Subscriber 1989 1
2 -74.03211 26163 Subscriber 1980 1
3 -74.07126 26273 Subscriber 1988 1
4 -74.05248 26297 Subscriber 1991 1
5 -74.05248 29247 Subscriber 1993 2
6 0.00000 29589 Customer NULL 0
tripduration starttime stoptime
Min. : 61.0 Length:33119 Length:33119
1st Qu.: 238.0 Class :character Class :character
Median : 355.0 Mode :character Mode :character
Mean : 756.9
3rd Qu.: 610.0
Max. :2181628.0
start.station.id start.station.name start.station.latitude
Min. :3183 Length:33119 Min. :40.69
1st Qu.:3187 Class :character 1st Qu.:40.72
Median :3203 Mode :character Median :40.72
Mean :3217 Mean :40.72
3rd Qu.:3225 3rd Qu.:40.73
Max. :3481 Max. :40.75
start.station.longitude end.station.id end.station.name
Min. :-74.10 Min. : 128 Length:33119
1st Qu.:-74.05 1st Qu.:3186 Class :character
Median :-74.04 Median :3202 Mode :character
Mean :-74.05 Mean :3217
3rd Qu.:-74.04 3rd Qu.:3220
Max. :-74.03 Max. :3481
end.station.latitude end.station.longitude bikeid
Min. : 0.00 Min. :-74.10 Min. :15250
1st Qu.:40.72 1st Qu.:-74.05 1st Qu.:26280
Median :40.72 Median :-74.04 Median :29284
Mean :39.97 Mean :-72.68 Mean :28500
3rd Qu.:40.73 3rd Qu.:-74.04 3rd Qu.:29577
Max. :40.78 Max. : 0.00 Max. :31957
usertype birth.year gender
Length:33119 Length:33119 Min. :0.000
Class :character Class :character 1st Qu.:1.000
Mode :character Mode :character Median :1.000
Mean :1.141
3rd Qu.:1.000
Max. :2.000
tripduration starttime stoptime
"integer" "character" "character"
start.station.id start.station.name start.station.latitude
"integer" "character" "numeric"
start.station.longitude end.station.id end.station.name
"numeric" "integer" "character"
end.station.latitude end.station.longitude bikeid
"numeric" "numeric" "integer"
usertype birth.year gender
"character" "character" "integer"
Column {data-width=300} |
---
title: "Final Project for ANLY 512-51- R-2017/Fall - Data Visualization"
output:
flexdashboard::flex_dashboard:
orientation: columns
vertical_layout: fill
source_code: embed
---
```{r setup, include=FALSE}
library(flexdashboard)
library("ggplot2")
library(dplyr)
library(data.table)
data <- fread("JC-201709-citibike-tripdata.csv", header=TRUE, showProgress = FALSE, stringsAsFactors = FALSE, data.table = FALSE)
names(data)
names(data) <- make.names(names(data))
names(data)
class(data)
```
Page 1
==================================
Column {data-width=600}
-----------------------------------------------------------------------
### Data Preview
```{r}
head(data)
```
Column {data-width=400}
-----------------------------------------------------------------------
### Data Summary
```{r}
summary(data)
```
### Data Class
```{r}
attach(data)
sapply(data,class)
```
Page 2
==================================
Row {data-heigh=550}
-----------------------------------------------------------------------
### User Birth Year
```{r}
data$birth.year <- as.integer(data$birth.year)
ggplot(data) + geom_histogram(aes(birth.year),fill="red")
```
Row {data-heigh=350}
-----------------------------------------------------------------------
### User Type
```{r}
ggplot(data) + geom_bar(aes(usertype,fill=usertype), width=0.2)
```
Page 3
==================================
column
-----------------------------------------------------------------------
### Average Trip Duration by User Type
```{r}
ggplot(data, aes(x = usertype, y = tripduration)) + stat_summary(fun.y = "mean", geom = "bar") + ylab("Avg Trip Duration")
```
column
-----------------------------------------------------------------------
### Average Trip Duration by Gender
```{r}
data$gender <- as.factor(data$gender)
levels(data$gender) <- c("UNKNOWN", "MALE", "FEMALE")
ggplot(data, aes(x = gender, y = tripduration)) + stat_summary(fun.y = "mean", geom = "bar") + ylab("Avg Trip Duration")
```
Page 4
==================================
-----------------------------------------------------------------------
### Weekday Usage
```{r}
require(lubridate)
data$starttime <- ymd_hms(data$starttime)
data$stoptime <- ymd_hms(data$stoptime)
data$duration <-data$stoptime - data$starttime
data$myduration <- difftime(data$stoptime, data$starttime, units="secs")
data$weekday <- wday(data$starttime, label=TRUE)
data$hour <- as.factor(hour(data$starttime))
ggplot(data) + geom_bar(aes(x=weekday, y=(..count..)/sum(..count..), fill=weekday)) + theme_bw() + ylab("Proportion")
```
Page 5
==================================
-----------------------------------------------------------------------
Column {data-width=300}
-----------------------------------------------------------------------
### Chart 1
```{r}
ggplot(data) + geom_bar(aes(x=hour, y=(..count..)/sum(..count..)), fill="lightblue") + theme_light() + ylab("Proportion")
```
Page 6
==================================
-----------------------------------------------------------------------
### Hourly Weekday
```{r}
data %>% subset( (usertype == "Subscriber") & !(weekday %in% c("Sat","Sun"))) %>%
ggplot(.) + geom_bar(aes(x=hour, y=(..count..)/sum(..count..), fill=weekday), position = "dodge") + ylab("Proportion") + scale_fill_brewer(palette = "YlOrRd")
```