Divvy Bikes Analysis at UChicago by RCC

Install packages

suppressWarnings(library(readr))
suppressWarnings(library(ggplot2))
suppressMessages(library(cowplot))
suppressMessages(library(tidyverse))

Check and set working directory

getwd()

## [1] "/Users/codethedral/Google Drive/GitHub/R-intro-divvy/analysis"

#setwd("C:/Users/jorge/Google Drive/GitHub/R-intro-divvy/analysis")#surface
setwd("/Users/codethedral/Google Drive/GitHub/R-intro-divvy/analysis")#mac

Version of R

sessionInfo()

## R version 3.5.1 (2018-07-02)
## Platform: x86_64-apple-darwin15.6.0 (64-bit)
## Running under: macOS High Sierra 10.13.6
## 
## Matrix products: default
## BLAS: /Library/Frameworks/R.framework/Versions/3.5/Resources/lib/libRblas.0.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/3.5/Resources/lib/libRlapack.dylib
## 
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
##  [1] forcats_0.3.0   stringr_1.3.1   dplyr_0.7.5     purrr_0.2.4    
##  [5] tidyr_0.8.1     tibble_1.4.2    tidyverse_1.2.1 cowplot_0.9.3  
##  [9] ggplot2_2.2.1   readr_1.1.1    
## 
## loaded via a namespace (and not attached):
##  [1] Rcpp_0.12.17     cellranger_1.1.0 pillar_1.2.2     compiler_3.5.1  
##  [5] plyr_1.8.4       bindr_0.1.1      tools_3.5.1      digest_0.6.15   
##  [9] lubridate_1.7.4  jsonlite_1.5     lattice_0.20-35  evaluate_0.10.1 
## [13] gtable_0.2.0     nlme_3.1-137     pkgconfig_2.0.1  rlang_0.2.0     
## [17] psych_1.8.4      cli_1.0.0        rstudioapi_0.7   parallel_3.5.1  
## [21] yaml_2.1.19      haven_1.1.2      bindrcpp_0.2.2   xml2_1.2.0      
## [25] httr_1.3.1       knitr_1.20       hms_0.4.2        rprojroot_1.3-2 
## [29] grid_3.5.1       tidyselect_0.2.4 glue_1.2.0       R6_2.2.2        
## [33] readxl_1.1.0     foreign_0.8-70   rmarkdown_1.9    modelr_0.1.2    
## [37] reshape2_1.4.3   magrittr_1.5     backports_1.1.2  scales_0.5.0    
## [41] htmltools_0.3.6  rvest_0.3.2      mnormt_1.5-5     assertthat_0.2.0
## [45] colorspace_1.3-2 stringi_1.2.2    lazyeval_0.2.1   munsell_0.4.3   
## [49] broom_0.4.5      crayon_1.3.4

ls()

## character(0)

Clean environment.

rm(list = ls())

Sys.glob("*csv")

##  [1] "Divvy_Stations_2016_Q1Q2.csv" "Divvy_Stations_2016_Q3.csv"  
##  [3] "Divvy_Stations_2016_Q4.csv"   "Divvy_Stations_2017_Q1Q2.csv"
##  [5] "Divvy_Stations_2017_Q3Q4.csv" "Divvy_Trips_2016_04.csv"     
##  [7] "Divvy_Trips_2016_05.csv"      "Divvy_Trips_2016_06.csv"     
##  [9] "Divvy_Trips_2016_Q1.csv"      "Divvy_Trips_2016_Q3.csv"     
## [11] "Divvy_Trips_2016_Q4.csv"      "Divvy_Trips_2017_Q1.csv"     
## [13] "Divvy_Trips_2017_Q2.csv"      "Divvy_Trips_2017_Q3.csv"     
## [15] "Divvy_Trips_2017_Q4.csv"

Read one of the csv files

glimpse(read.csv("Divvy_Stations_2017_Q1Q2.csv"))

## Observations: 582
## Variables: 7
## $ id          <int> 456, 101, 109, 21, 80, 346, 341, 480, 444, 511, 37...
## $ name        <fct> 2112 W Peterson Ave, 63rd St Beach, 900 W Harrison...
## $ city        <fct> Chicago, Chicago, Chicago, Chicago, Chicago, Chica...
## $ latitude    <dbl> 41.99118, 41.78102, 41.87468, 41.87773, 41.88157, ...
## $ longitude   <dbl> -87.68359, -87.57612, -87.65002, -87.65479, -87.65...
## $ dpcapacity  <int> 15, 23, 19, 15, 19, 15, 35, 15, 11, 15, 35, 15, 15...
## $ online_date <fct> 2/10/2015 14:04:42, 7/16/2013 01:27:50, 7/18/2013 ...

Import station data

stations<-read.csv("Divvy_Stations_2017_Q3Q4.csv", stringsAsFactors = FALSE)

ls()

## [1] "stations"

class(stations)

## [1] "data.frame"

Inspect station data.

nrow(stations)

## [1] 585

ncol(stations)

## [1] 8

head(stations)

##   id                        name    city latitude longitude dpcapacity
## 1  2         Buckingham Fountain Chicago 41.87639 -87.62033         27
## 2  3              Shedd Aquarium Chicago 41.86723 -87.61536         55
## 3  4              Burnham Harbor Chicago 41.85627 -87.61335         23
## 4  5      State St & Harrison St Chicago 41.87405 -87.62772         23
## 5  6              Dusable Harbor Chicago 41.88504 -87.61279         39
## 6  7 Field Blvd & South Water St Chicago 41.88635 -87.61752         19
##       online_date  X
## 1 6/10/2013 10:43 NA
## 2 6/10/2013 10:44 NA
## 3 6/10/2013 10:46 NA
## 4 6/10/2013 10:46 NA
## 5 6/10/2013 11:18 NA
## 6 6/19/2013 10:00 NA

summary(stations)

##        id            name               city              latitude    
##  Min.   :  2.0   Length:585         Length:585         Min.   :41.74  
##  1st Qu.:163.0   Class :character   Class :character   1st Qu.:41.85  
##  Median :315.0   Mode  :character   Mode  :character   Median :41.89  
##  Mean   :317.3                                         Mean   :41.89  
##  3rd Qu.:475.0                                         3rd Qu.:41.93  
##  Max.   :626.0                                         Max.   :42.06  
##    longitude        dpcapacity    online_date           X          
##  Min.   :-87.80   Min.   : 0.00   Length:585         Mode:logical  
##  1st Qu.:-87.68   1st Qu.:15.00   Class :character   NA's:585      
##  Median :-87.65   Median :15.00   Mode  :character                 
##  Mean   :-87.66   Mean   :17.46                                    
##  3rd Qu.:-87.63   3rd Qu.:19.00                                    
##  Max.   :-87.55   Max.   :55.00

Inspect the data in more detail:

sapply(stations, class)

##          id        name        city    latitude   longitude  dpcapacity 
##   "integer" "character" "character"   "numeric"   "numeric"   "integer" 
## online_date           X 
## "character"   "logical"

object.size(stations)

## 108264 bytes

print(object.size(stations),units = "Kb")

## 105.7 Kb

Examining dpcapacity

x<-stations$dpcapacity
class(x)

## [1] "integer"

length(x)

## [1] 585

summary(x)

##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    0.00   15.00   15.00   17.46   19.00   55.00

table(x)

## x
##   0  11  12  15  16  19  20  23  27  28  31  35  39  43  47  55 
##   2 106   1 256   7 106   2  47  20   1  14   4  12   1   4   2

stations$name[1:4]

## [1] "Buckingham Fountain"    "Shedd Aquarium"        
## [3] "Burnham Harbor"         "State St & Harrison St"

Select first 4 rows of “name” column:

stations[1:4, 2]

## [1] "Buckingham Fountain"    "Shedd Aquarium"        
## [3] "Burnham Harbor"         "State St & Harrison St"

stations[1:4, "name"]#this command does the same as the one before

## [1] "Buckingham Fountain"    "Shedd Aquarium"        
## [3] "Burnham Harbor"         "State St & Harrison St"

Select first 4 rows and multiple columns:

stations[1:4, c(2, 3, 4)]

##                     name    city latitude
## 1    Buckingham Fountain Chicago 41.87639
## 2         Shedd Aquarium Chicago 41.86723
## 3         Burnham Harbor Chicago 41.85627
## 4 State St & Harrison St Chicago 41.87405

stations[1:4, c("name","city","latitude")]#different sintax, same output

##                     name    city latitude
## 1    Buckingham Fountain Chicago 41.87639
## 2         Shedd Aquarium Chicago 41.86723
## 3         Burnham Harbor Chicago 41.85627
## 4 State St & Harrison St Chicago 41.87405

Getting the row and column names:

colnames(stations)

## [1] "id"          "name"        "city"        "latitude"    "longitude"  
## [6] "dpcapacity"  "online_date" "X"

#rownames(stations)

Take an even closer look at “dpcapacity”

subset(stations,dpcapacity == 0)

##      id                     name    city latitude longitude dpcapacity
## 541 581 Commercial Ave & 83rd St Chicago 41.74461 -87.55120          0
## 542 582   Phillips Ave & 83rd St Chicago 41.74469 -87.56607          0
##         online_date  X
## 541 5/27/2016 11:33 NA
## 542 5/27/2016 11:34 NA

subset(stations,dpcapacity >= 40)

##      id                         name    city latitude longitude dpcapacity
## 2     3               Shedd Aquarium Chicago 41.86723 -87.61536         55
## 32   35      Streeter Dr & Grand Ave Chicago 41.89228 -87.61204         47
## 40   43 Michigan Ave & Washington St Chicago 41.88389 -87.62465         43
## 79   90              Millennium Park Chicago 41.88103 -87.62408         47
## 86   97                 Field Museum Chicago 41.86531 -87.61787         55
## 174 192          Canal St & Adams St Chicago 41.87926 -87.63990         47
## 177 195    Columbus Dr & Randolph St Chicago 41.88473 -87.61952         47
##         online_date  X
## 2   6/10/2013 10:44 NA
## 32  6/22/2013 21:12 NA
## 40  6/25/2013 10:57 NA
## 79  6/26/2013 19:51 NA
## 86  6/30/2013 13:25 NA
## 174  8/6/2013 13:27 NA
## 177  8/7/2013 14:11 NA

filter(stations, dpcapacity==0)#same output but using filter from dplyr

##    id                     name    city latitude longitude dpcapacity
## 1 581 Commercial Ave & 83rd St Chicago 41.74461 -87.55120          0
## 2 582   Phillips Ave & 83rd St Chicago 41.74469 -87.56607          0
##       online_date  X
## 1 5/27/2016 11:33 NA
## 2 5/27/2016 11:34 NA

filter

## function (.data, ...) 
## {
##     UseMethod("filter")
## }
## <bytecode: 0x7ff6b05f20c8>
## <environment: namespace:dplyr>

filter(stations, dpcapacity >= 40)

##    id                         name    city latitude longitude dpcapacity
## 1   3               Shedd Aquarium Chicago 41.86723 -87.61536         55
## 2  35      Streeter Dr & Grand Ave Chicago 41.89228 -87.61204         47
## 3  43 Michigan Ave & Washington St Chicago 41.88389 -87.62465         43
## 4  90              Millennium Park Chicago 41.88103 -87.62408         47
## 5  97                 Field Museum Chicago 41.86531 -87.61787         55
## 6 192          Canal St & Adams St Chicago 41.87926 -87.63990         47
## 7 195    Columbus Dr & Randolph St Chicago 41.88473 -87.61952         47
##       online_date  X
## 1 6/10/2013 10:44 NA
## 2 6/22/2013 21:12 NA
## 3 6/25/2013 10:57 NA
## 4 6/26/2013 19:51 NA
## 5 6/30/2013 13:25 NA
## 6  8/6/2013 13:27 NA
## 7  8/7/2013 14:11 NA

Let’s order the data set from largest to smallest dpcapacity

rows <- order(stations$dpcapacity,decreasing=TRUE)
stations2 <- stations[rows,]
head(stations2)

##      id                      name    city latitude longitude dpcapacity
## 2     3            Shedd Aquarium Chicago 41.86723 -87.61536         55
## 86   97              Field Museum Chicago 41.86531 -87.61787         55
## 32   35   Streeter Dr & Grand Ave Chicago 41.89228 -87.61204         47
## 79   90           Millennium Park Chicago 41.88103 -87.62408         47
## 174 192       Canal St & Adams St Chicago 41.87926 -87.63990         47
## 177 195 Columbus Dr & Randolph St Chicago 41.88473 -87.61952         47
##         online_date  X
## 2   6/10/2013 10:44 NA
## 86  6/30/2013 13:25 NA
## 32  6/22/2013 21:12 NA
## 79  6/26/2013 19:51 NA
## 174  8/6/2013 13:27 NA
## 177  8/7/2013 14:11 NA

tail(stations2)

##      id                              name     city latitude longitude
## 553 593              Halsted St & 59th St  Chicago 41.78754 -87.64487
## 567 607           Cuyler Ave & Augusta St Oak Park 41.89817 -87.78306
## 568 608         Humphrey Ave & Ontario St Oak Park 41.89025 -87.77828
## 573 613 Wisconsin Ave & Madison St (Temp) Oak Park 41.87990 -87.80391
## 541 581          Commercial Ave & 83rd St  Chicago 41.74461 -87.55120
## 542 582            Phillips Ave & 83rd St  Chicago 41.74469 -87.56607
##     dpcapacity     online_date  X
## 553         11 5/27/2016 11:41 NA
## 567         11 6/23/2016 12:22 NA
## 568         11 6/23/2016 12:23 NA
## 573         11 6/23/2016 12:26 NA
## 541          0 5/27/2016 11:33 NA
## 542          0 5/27/2016 11:34 NA

Take a closer look at the “city” column

x <- stations$city
class(x)

## [1] "character"

summary(x)

##    Length     Class      Mode 
##       585 character character

x <- factor(stations$city)
class(x)

## [1] "factor"

summary(x)

##  Chicago Chicago  Evanston Oak Park 
##      520       41       11       13

For some reason Chicago shows twice, even as a factor.

Fix up.

#Select the rows that need to be fix
rows <- which(stations$city == "Chicago ")

stations[rows,"city"] <- "Chicago"
summary(stations$city)

##    Length     Class      Mode 
##       585 character character

Now all the entries under “Chicago” are the same.

stations$city <- factor(stations$city) 
summary(stations$city)

##  Chicago Evanston Oak Park 
##      561       11       13

What’s a factor?

x <- stations$city
attributes(x)

## $levels
## [1] "Chicago"  "Evanston" "Oak Park"
## 
## $class
## [1] "factor"

unclass(x)

##   [1] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
##  [36] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
##  [71] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## [106] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## [141] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## [176] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## [211] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## [246] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## [281] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## [316] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## [351] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## [386] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## [421] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## [456] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## [491] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## [526] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 2 2 2 2
## [561] 2 2 2 2 2 3 3 3 3 3 3 3 3 3 3 3 3 3 1 1 1 1 1 2 1
## attr(,"levels")
## [1] "Chicago"  "Evanston" "Oak Park"

Save your environment:

save.image("divvy_analysis.RData")

#To load it, type the following:
load("divvy_analysis.RData")

Load the Divvy trip data:

trips <- read.csv("Divvy_Trips_2017_Q4.csv", stringsAsFactors = FALSE)
nrow(trips)

## [1] 669239

ncol(trips)

## [1] 12

print(object.size(trips), units = "Mb")

## 59.2 Mb

Let’s read the CSV file with the read_csv function, this is faster than read.csv

trips <- read_csv("Divvy_Trips_2017_Q4.csv")

## Parsed with column specification:
## cols(
##   trip_id = col_integer(),
##   start_time = col_character(),
##   end_time = col_character(),
##   bikeid = col_integer(),
##   tripduration = col_integer(),
##   from_station_id = col_integer(),
##   from_station_name = col_character(),
##   to_station_id = col_integer(),
##   to_station_name = col_character(),
##   usertype = col_character(),
##   gender = col_character(),
##   birthyear = col_integer()
## )

class(trips)

## [1] "tbl_df"     "tbl"        "data.frame"

Converting the tibble trips data set into a data frame.

class(trips) <- "data.frame"

vignette(package = "readr")
vignette("readr")

## starting httpd help server ... done

How do i know which packages are already installed?

rownames(installed.packages())

##   [1] "abind"         "assertthat"    "backports"     "base"         
##   [5] "base64enc"     "BH"            "bindr"         "bindrcpp"     
##   [9] "bitops"        "boot"          "broom"         "callr"        
##  [13] "caret"         "caTools"       "cellranger"    "class"        
##  [17] "cli"           "clipr"         "cluster"       "codetools"    
##  [21] "colorspace"    "compiler"      "cowplot"       "crayon"       
##  [25] "curl"          "CVST"          "data.table"    "DataExplorer" 
##  [29] "datasets"      "DBI"           "dbplyr"        "ddalpha"      
##  [33] "DEoptimR"      "dichromat"     "digest"        "dimRed"       
##  [37] "DMwR"          "doParallel"    "dplyr"         "DRR"          
##  [41] "evaluate"      "expsmooth"     "extraDistr"    "fma"          
##  [45] "forcats"       "foreach"       "forecast"      "foreign"      
##  [49] "formatR"       "fpp"           "fracdiff"      "gains"        
##  [53] "gdata"         "geometry"      "geosphere"     "GGally"       
##  [57] "ggfortify"     "ggmap"         "ggplot2"       "ggrepel"      
##  [61] "glue"          "gower"         "gplots"        "graphics"     
##  [65] "grDevices"     "grid"          "gridBase"      "gridExtra"    
##  [69] "gtable"        "gtools"        "haven"         "highr"        
##  [73] "hms"           "htmltools"     "htmlwidgets"   "hts"          
##  [77] "httpuv"        "httr"          "igraph"        "imputeTS"     
##  [81] "inline"        "ipred"         "ISLR"          "iterators"    
##  [85] "jpeg"          "jsonlite"      "kernlab"       "KernSmooth"   
##  [89] "knitr"         "labeling"      "later"         "lattice"      
##  [93] "lava"          "lazyeval"      "leaps"         "lmtest"       
##  [97] "locfit"        "lubridate"     "magic"         "magrittr"     
## [101] "mapdata"       "mapproj"       "maps"          "markdown"     
## [105] "MASS"          "Matrix"        "matrixcalc"    "methods"      
## [109] "mgcv"          "mime"          "mnormt"        "ModelMetrics" 
## [113] "modelr"        "MuMIn"         "munsell"       "NbClust"      
## [117] "NCmisc"        "networkD3"     "nlme"          "nnet"         
## [121] "numDeriv"      "openssl"       "packrat"       "parallel"     
## [125] "pillar"        "pkgconfig"     "PKI"           "plogr"        
## [129] "pls"           "plyr"          "png"           "poLCA"        
## [133] "praise"        "prettyunits"   "pROC"          "processx"     
## [137] "prodlim"       "proftools"     "progress"      "promises"     
## [141] "prophet"       "proto"         "psych"         "purrr"        
## [145] "qqplotr"       "quadprog"      "quantmod"      "R6"           
## [149] "RColorBrewer"  "Rcpp"          "RcppEigen"     "RcppRoll"     
## [153] "RCurl"         "reader"        "readr"         "readxl"       
## [157] "recipes"       "rematch"       "reprex"        "reshape"      
## [161] "reshape2"      "RgoogleMaps"   "rJava"         "rjson"        
## [165] "RJSONIO"       "rlang"         "rmarkdown"     "robustbase"   
## [169] "ROCR"          "rpart"         "rprojroot"     "rsconnect"    
## [173] "rstan"         "rstudioapi"    "rvest"         "scales"       
## [177] "scatterplot3d" "selectr"       "sfsmisc"       "shiny"        
## [181] "sourcetools"   "sp"            "SparseM"       "spatial"      
## [185] "splines"       "SQUAREM"       "StanHeaders"   "stats"        
## [189] "stats4"        "stinepack"     "stringi"       "stringr"      
## [193] "survival"      "tcltk"         "testthat"      "tibble"       
## [197] "tidyr"         "tidyselect"    "tidyverse"     "timeDate"     
## [201] "tools"         "treemap"       "TSA"           "tseries"      
## [205] "TTR"           "urca"          "uroot"         "utf8"         
## [209] "utils"         "varhandle"     "viridisLite"   "whisker"      
## [213] "withr"         "xlsx"          "xlsxjars"      "xml2"         
## [217] "xtable"        "xts"           "yaml"          "zoo"

Where do the packages live?

.libPaths()

## [1] "/Library/Frameworks/R.framework/Versions/3.5/Resources/library"

Reviewing the trip data:

nrow(trips)

## [1] 669239

ncol(trips)

## [1] 12

head(trips)

##    trip_id       start_time         end_time bikeid tripduration
## 1 17536701 12/31/2017 23:58    1/1/2018 0:03   3304          284
## 2 17536700 12/31/2017 23:54    1/1/2018 0:18   5975         1402
## 3 17536699 12/31/2017 23:54    1/1/2018 0:18   4906         1441
## 4 17536698 12/31/2017 23:48 12/31/2017 23:53   5667          315
## 5 17536697 12/31/2017 23:42 12/31/2017 23:47   5353          272
## 6 17536696 12/31/2017 23:41 12/31/2017 23:51   5840          589
##   from_station_id                   from_station_name to_station_id
## 1             159           Claremont Ave & Hirsch St            69
## 2             145 Mies van der Rohe Way & Chestnut St           145
## 3             145 Mies van der Rohe Way & Chestnut St           145
## 4             340           Clark St & Wrightwood Ave           143
## 5             240        Sheridan Rd & Irving Park Rd           245
## 6              93           Sheffield Ave & Willow St           343
##                       to_station_name   usertype gender birthyear
## 1              Damen Ave & Pierce Ave Subscriber   Male      1988
## 2 Mies van der Rohe Way & Chestnut St   Customer   <NA>        NA
## 3 Mies van der Rohe Way & Chestnut St   Customer   <NA>        NA
## 4           Sedgwick St & Webster Ave Subscriber   Male      1963
## 5          Clarendon Ave & Junior Ter Subscriber   Male      1977
## 6         Racine Ave & Wrightwood Ave Subscriber   Male      1988

summary(trips)

##     trip_id          start_time          end_time             bikeid    
##  Min.   :16734066   Length:669239      Length:669239      Min.   :   1  
##  1st Qu.:16932824   Class :character   Class :character   1st Qu.:1966  
##  Median :17130688   Mode  :character   Mode  :character   Median :3905  
##  Mean   :17132520                                         Mean   :3717  
##  3rd Qu.:17334366                                         3rd Qu.:5590  
##  Max.   :17536701                                         Max.   :6471  
##                                                                         
##   tripduration     from_station_id from_station_name  to_station_id  
##  Min.   :   60.0   Min.   :  2     Length:669239      Min.   :  2.0  
##  1st Qu.:  347.0   1st Qu.: 76     Class :character   1st Qu.: 76.0  
##  Median :  567.0   Median :161     Mode  :character   Median :157.0  
##  Mean   :  779.4   Mean   :184                        Mean   :183.3  
##  3rd Qu.:  940.0   3rd Qu.:280                        3rd Qu.:275.0  
##  Max.   :85466.0   Max.   :626                        Max.   :626.0  
##                                                                      
##  to_station_name      usertype            gender            birthyear    
##  Length:669239      Length:669239      Length:669239      Min.   :1918   
##  Class :character   Class :character   Class :character   1st Qu.:1975   
##  Mode  :character   Mode  :character   Mode  :character   Median :1985   
##                                                           Mean   :1982   
##                                                           3rd Qu.:1990   
##                                                           Max.   :2004   
##                                                           NA's   :78827