In this notebook I download and unzip the Ford Go Bike data.
library(tidyverse)
library(tictoc)
library(ggmap)
library(skimr)
library(lubridate)
library(forcats)
Download the files. First one is not zipped, the remaining are zipped.
URL <- "https://s3.amazonaws.com/fordgobike-data/2017-fordgobike-tripdata.csv"
download.file(URL, destfile = "./data/2017-fordgobike-tripdata.csv", method="curl")
% Total % Received % Xferd Average Speed Time Time Time Current
Dload Upload Total Spent Left Speed
0 0 0 0 0 0 0 0 --:--:-- --:--:-- --:--:-- 0
0 0 0 0 0 0 0 0 --:--:-- --:--:-- --:--:-- 0
0 112M 0 101k 0 0 101k 0 0:18:53 0:00:01 0:18:52 97916
0 112M 0 390k 0 0 195k 0 0:09:49 0:00:02 0:09:47 189k
0 112M 0 764k 0 0 254k 0 0:07:31 0:00:03 0:07:28 249k
1 112M 1 1155k 0 0 288k 0 0:06:38 0:00:04 0:06:34 284k
1 112M 1 1580k 0 0 316k 0 0:06:04 0:00:05 0:05:59 319k
1 112M 1 2039k 0 0 339k 0 0:05:38 0:00:06 0:05:32 387k
2 112M 2 2583k 0 0 369k 0 0:05:12 0:00:07 0:05:05 438k
2 112M 2 3348k 0 0 418k 0 0:04:35 0:00:08 0:04:27 516k
3 112M 3 4487k 0 0 498k 0 0:03:51 0:00:09 0:03:42 666k
5 112M 5 6017k 0 0 601k 0 0:03:11 0:00:10 0:03:01 887k
6 112M 6 7649k 0 0 695k 0 0:02:45 0:00:11 0:02:34 1122k
8 112M 8 9349k 0 0 779k 0 0:02:27 0:00:12 0:02:15 1353k
9 112M 9 10.5M 0 0 827k 0 0:02:19 0:00:13 0:02:06 1482k
10 112M 10 11.9M 0 0 876k 0 0:02:11 0:00:14 0:01:57 1557k
12 112M 12 13.5M 0 0 924k 0 0:02:04 0:00:15 0:01:49 1570k
13 112M 13 15.0M 0 0 962k 0 0:01:59 0:00:16 0:01:43 1550k
14 112M 14 16.6M 0 0 1000k 0 0:01:55 0:00:17 0:01:38 1533k
16 112M 16 18.2M 0 0 1036k 0 0:01:51 0:00:18 0:01:33 1581k
17 112M 17 20.1M 0 0 1086k 0 0:01:46 0:00:19 0:01:27 1672k
19 112M 19 22.4M 0 0 1149k 0 0:01:40 0:00:20 0:01:20 1822k
22 112M 22 25.1M 0 0 1226k 0 0:01:33 0:00:21 0:01:12 2070k
25 112M 25 28.7M 0 0 1336k 0 0:01:26 0:00:22 0:01:04 2475k
29 112M 29 33.4M 0 0 1490k 0 0:01:17 0:00:23 0:00:54 3124k
35 112M 35 39.3M 0 0 1680k 0 0:01:08 0:00:24 0:00:44 3940k
42 112M 42 47.3M 0 0 1939k 0 0:00:59 0:00:25 0:00:34 5101k
50 112M 50 56.9M 0 0 2242k 0 0:00:51 0:00:26 0:00:25 6508k
60 112M 60 68.2M 0 0 2589k 0 0:00:44 0:00:27 0:00:17 8104k
71 112M 71 80.2M 0 0 2935k 0 0:00:39 0:00:28 0:00:11 9582k
80 112M 80 90.0M 0 0 3180k 0 0:00:36 0:00:29 0:00:07 10.1M
89 112M 89 100M 0 0 3439k 0 0:00:33 0:00:30 0:00:03 10.6M
99 112M 99 112M 0 0 3709k 0 0:00:31 0:00:31 --:--:-- 11.0M
100 112M 100 112M 0 0 3715k 0 0:00:31 0:00:31 --:--:-- 11.0M
URL <- "https://s3.amazonaws.com/fordgobike-data/201801-fordgobike-tripdata.csv.zip"
download.file(URL, destfile = "./data/201801-fordgobike-tripdata.csv.zip", method="curl")
% Total % Received % Xferd Average Speed Time Time Time Current
Dload Upload Total Spent Left Speed
0 0 0 0 0 0 0 0 --:--:-- --:--:-- --:--:-- 0
2 3251k 2 86674 0 0 86674 0 0:00:38 --:--:-- 0:00:38 98k
25 3251k 25 815k 0 0 815k 0 0:00:03 0:00:01 0:00:02 438k
100 3251k 100 3251k 0 0 1625k 0 0:00:02 0:00:02 --:--:-- 1162k
URL <- "https://s3.amazonaws.com/fordgobike-data/201802-fordgobike-tripdata.csv.zip"
download.file(URL, destfile = "./data/201802-fordgobike-tripdata.csv.zip", method="curl")
% Total % Received % Xferd Average Speed Time Time Time Current
Dload Upload Total Spent Left Speed
0 0 0 0 0 0 0 0 --:--:-- --:--:-- --:--:-- 0
0 3698k 0 34450 0 0 34450 0 0:01:49 --:--:-- 0:01:49 44053
23 3698k 23 883k 0 0 883k 0 0:00:04 0:00:01 0:00:03 495k
100 3698k 100 3698k 0 0 1849k 0 0:00:02 0:00:02 --:--:-- 1384k
URL <- "https://s3.amazonaws.com/fordgobike-data/201803-fordgobike-tripdata.csv.zip"
download.file(URL, destfile = "./data/201803-fordgobike-tripdata.csv.zip", method="curl")
% Total % Received % Xferd Average Speed Time Time Time Current
Dload Upload Total Spent Left Speed
0 0 0 0 0 0 0 0 --:--:-- --:--:-- --:--:-- 0
4 3901k 4 168k 0 0 168k 0 0:00:23 0:00:01 0:00:22 165k
47 3901k 47 1835k 0 0 1835k 0 0:00:02 0:00:01 0:00:01 924k
100 3901k 100 3901k 0 0 1950k 0 0:00:02 0:00:02 --:--:-- 1486k
URL <- "https://s3.amazonaws.com/fordgobike-data/201804-fordgobike-tripdata.csv.zip"
download.file(URL, destfile = "./data/201804-fordgobike-tripdata.csv.zip", method="curl")
% Total % Received % Xferd Average Speed Time Time Time Current
Dload Upload Total Spent Left Speed
0 0 0 0 0 0 0 0 --:--:-- --:--:-- --:--:-- 0
0 0 0 0 0 0 0 0 --:--:-- --:--:-- --:--:-- 0
8 4613k 8 373k 0 0 373k 0 0:00:12 0:00:01 0:00:11 298k
63 4613k 63 2906k 0 0 1453k 0 0:00:03 0:00:02 0:00:01 1291k
100 4613k 100 4613k 0 0 2306k 0 0:00:02 0:00:02 --:--:-- 1789k
Unzip downloaded files.
unzip("./data/201801-fordgobike-tripdata.csv.zip",exdir="./data")
unzip("./data/201802-fordgobike-tripdata.csv.zip",exdir="./data")
unzip("./data/201803-fordgobike-tripdata.csv.zip",exdir="./data")
unzip("./data/201804-fordgobike-tripdata.csv.zip",exdir="./data")
Clean up data directory.
fn <- "./data/201801-fordgobike-tripdata.csv.zip"
if (file.exists(fn)) file.remove(fn)
[1] TRUE
fn <- "./data/201802-fordgobike-tripdata.csv.zip"
if (file.exists(fn)) file.remove(fn)
[1] TRUE
fn <- "./data/201803-fordgobike-tripdata.csv.zip"
if (file.exists(fn)) file.remove(fn)
[1] TRUE
fn <- "./data/201804-fordgobike-tripdata.csv.zip"
if (file.exists(fn)) file.remove(fn)
[1] TRUE
Read the.csv files
fordgobike2017 <- read_csv(file="./data/2017-fordgobike-tripdata.csv")
Parsed with column specification:
cols(
duration_sec = col_integer(),
start_time = col_datetime(format = ""),
end_time = col_datetime(format = ""),
start_station_id = col_integer(),
start_station_name = col_character(),
start_station_latitude = col_double(),
start_station_longitude = col_double(),
end_station_id = col_integer(),
end_station_name = col_character(),
end_station_latitude = col_double(),
end_station_longitude = col_double(),
bike_id = col_integer(),
user_type = col_character(),
member_birth_year = col_integer(),
member_gender = col_character()
)
|================== | 18% 21 MB
|================== | 18% 21 MB
|================== | 18% 21 MB
|================== | 19% 21 MB
|================== | 19% 21 MB
|================== | 19% 21 MB
|================== | 19% 21 MB
|=================== | 19% 22 MB
|=================== | 19% 22 MB
|=================== | 19% 22 MB
|=================== | 19% 22 MB
|=================== | 20% 22 MB
|=================== | 20% 22 MB
|=================== | 20% 22 MB
|=================== | 20% 23 MB
|==================== | 20% 23 MB
|==================== | 20% 23 MB
|==================== | 20% 23 MB
|==================== | 21% 23 MB
|==================== | 21% 23 MB
|==================== | 21% 23 MB
|==================== | 21% 24 MB
|==================== | 21% 24 MB
|===================== | 21% 24 MB
|===================== | 21% 24 MB
|===================== | 21% 24 MB
|===================== | 22% 24 MB
|===================== | 22% 24 MB
|===================== | 22% 25 MB
|===================== | 22% 25 MB
|===================== | 22% 25 MB
|===================== | 22% 25 MB
|====================== | 22% 25 MB
|====================== | 22% 25 MB
|====================== | 23% 25 MB
|====================== | 23% 26 MB
|====================== | 23% 26 MB
|====================== | 23% 26 MB
|====================== | 23% 26 MB
|====================== | 23% 26 MB
|======================= | 23% 26 MB
|======================= | 23% 26 MB
|======================= | 24% 27 MB
|======================= | 24% 27 MB
|======================= | 24% 27 MB
|======================= | 24% 27 MB
|======================= | 24% 27 MB
|======================= | 24% 27 MB
|======================== | 24% 27 MB
|======================== | 24% 28 MB
|======================== | 25% 28 MB
|======================== | 25% 28 MB
|======================== | 25% 28 MB
|======================== | 25% 28 MB
|======================== | 25% 28 MB
|======================== | 25% 28 MB
|========================= | 25% 29 MB
|========================= | 26% 29 MB
|========================= | 26% 29 MB
|========================= | 26% 29 MB
|========================= | 26% 29 MB
|========================= | 26% 29 MB
|========================= | 26% 29 MB
|========================= | 26% 30 MB
|========================== | 26% 30 MB
|========================== | 27% 30 MB
|========================== | 27% 30 MB
|========================== | 27% 30 MB
|========================== | 27% 30 MB
|========================== | 27% 30 MB
|========================== | 27% 31 MB
|========================== | 27% 31 MB
|=========================== | 27% 31 MB
|=========================== | 28% 31 MB
|=========================== | 28% 31 MB
|=========================== | 28% 31 MB
|=========================== | 28% 31 MB
|=========================== | 28% 32 MB
|=========================== | 28% 32 MB
|=========================== | 28% 32 MB
|============================ | 28% 32 MB
|============================ | 29% 32 MB
|============================ | 29% 32 MB
|============================ | 29% 32 MB
|============================ | 29% 33 MB
|============================ | 29% 33 MB
|============================ | 29% 33 MB
|============================ | 29% 33 MB
|============================= | 29% 33 MB
|============================= | 30% 33 MB
|============================= | 30% 34 MB
|============================= | 30% 34 MB
|============================= | 30% 34 MB
|============================= | 30% 34 MB
|============================= | 30% 34 MB
|============================= | 30% 34 MB
|============================== | 30% 34 MB
|============================== | 31% 35 MB
|============================== | 31% 35 MB
|============================== | 31% 35 MB
|============================== | 31% 35 MB
|============================== | 31% 35 MB
|============================== | 31% 35 MB
|============================== | 31% 35 MB
|=============================== | 32% 36 MB
|=============================== | 32% 36 MB
|=============================== | 32% 36 MB
|=============================== | 32% 36 MB
|=============================== | 32% 36 MB
|=============================== | 32% 36 MB
|=============================== | 32% 36 MB
|=============================== | 32% 37 MB
|================================ | 33% 37 MB
|================================ | 33% 37 MB
|================================ | 33% 37 MB
|================================ | 33% 37 MB
|================================ | 33% 37 MB
|================================ | 33% 37 MB
|================================ | 33% 38 MB
|================================ | 33% 38 MB
|================================= | 34% 38 MB
|================================= | 34% 38 MB
|================================= | 34% 38 MB
|================================= | 34% 38 MB
|================================= | 34% 38 MB
|================================= | 34% 39 MB
|================================= | 34% 39 MB
|================================= | 34% 39 MB
|================================== | 35% 39 MB
|================================== | 35% 39 MB
|================================== | 35% 39 MB
|================================== | 35% 39 MB
|================================== | 35% 40 MB
|================================== | 35% 40 MB
|================================== | 35% 40 MB
|================================== | 35% 40 MB
|=================================== | 36% 40 MB
|=================================== | 36% 40 MB
|=================================== | 36% 40 MB
|=================================== | 36% 41 MB
|=================================== | 36% 41 MB
|=================================== | 36% 41 MB
|=================================== | 36% 41 MB
|=================================== | 37% 41 MB
|==================================== | 37% 41 MB
|==================================== | 37% 41 MB
|==================================== | 37% 42 MB
|==================================== | 37% 42 MB
|==================================== | 37% 42 MB
|==================================== | 37% 42 MB
|==================================== | 37% 42 MB
|==================================== | 38% 42 MB
|===================================== | 38% 42 MB
|===================================== | 38% 43 MB
|===================================== | 38% 43 MB
|===================================== | 38% 43 MB
|===================================== | 38% 43 MB
|===================================== | 38% 43 MB
|===================================== | 38% 43 MB
|===================================== | 39% 43 MB
|====================================== | 39% 44 MB
|====================================== | 39% 44 MB
|====================================== | 39% 44 MB
|====================================== | 39% 44 MB
|====================================== | 39% 44 MB
|====================================== | 39% 44 MB
|====================================== | 39% 44 MB
|====================================== | 40% 45 MB
|======================================= | 40% 45 MB
|======================================= | 40% 45 MB
|======================================= | 40% 45 MB
|======================================= | 40% 45 MB
|======================================= | 40% 45 MB
|======================================= | 40% 45 MB
|======================================= | 40% 46 MB
|======================================= | 41% 46 MB
|======================================== | 41% 46 MB
|======================================== | 41% 46 MB
|======================================== | 41% 46 MB
|======================================== | 41% 46 MB
|======================================== | 41% 46 MB
|======================================== | 41% 47 MB
|======================================== | 42% 47 MB
|======================================== | 42% 47 MB
|======================================== | 42% 47 MB
|========================================= | 42% 47 MB
|========================================= | 42% 47 MB
|========================================= | 42% 47 MB
|========================================= | 42% 48 MB
|========================================= | 42% 48 MB
|========================================= | 43% 48 MB
|========================================= | 43% 48 MB
|========================================= | 43% 48 MB
|========================================== | 43% 48 MB
|========================================== | 43% 48 MB
|========================================== | 43% 49 MB
|========================================== | 43% 49 MB
|========================================== | 43% 49 MB
|========================================== | 44% 49 MB
|========================================== | 44% 49 MB
|========================================== | 44% 49 MB
|=========================================== | 44% 49 MB
|=========================================== | 44% 50 MB
|=========================================== | 44% 50 MB
|=========================================== | 44% 50 MB
|=========================================== | 44% 50 MB
|=========================================== | 45% 50 MB
|=========================================== | 45% 50 MB
|=========================================== | 45% 51 MB
|============================================ | 45% 51 MB
|============================================ | 45% 51 MB
|============================================ | 45% 51 MB
|============================================ | 45% 51 MB
|============================================ | 45% 51 MB
|============================================ | 46% 51 MB
|============================================ | 46% 52 MB
|============================================ | 46% 52 MB
|============================================= | 46% 52 MB
|============================================= | 46% 52 MB
|============================================= | 46% 52 MB
|============================================= | 46% 52 MB
|============================================= | 46% 52 MB
|============================================= | 47% 53 MB
|============================================= | 47% 53 MB
|============================================= | 47% 53 MB
|============================================== | 47% 53 MB
|============================================== | 47% 53 MB
|============================================== | 47% 53 MB
|============================================== | 47% 53 MB
|============================================== | 48% 54 MB
|============================================== | 48% 54 MB
|============================================== | 48% 54 MB
|============================================== | 48% 54 MB
|=============================================== | 48% 54 MB
|=============================================== | 48% 54 MB
|=============================================== | 48% 54 MB
|=============================================== | 48% 55 MB
|=============================================== | 49% 55 MB
|=============================================== | 49% 55 MB
|=============================================== | 49% 55 MB
|=============================================== | 49% 55 MB
|================================================ | 49% 55 MB
|================================================ | 49% 55 MB
|================================================ | 49% 56 MB
|================================================ | 49% 56 MB
|================================================ | 50% 56 MB
|================================================ | 50% 56 MB
|================================================ | 50% 56 MB
|================================================ | 50% 56 MB
|================================================= | 50% 56 MB
|================================================= | 50% 57 MB
|================================================= | 50% 57 MB
|================================================= | 50% 57 MB
|================================================= | 51% 57 MB
|================================================= | 51% 57 MB
|================================================= | 51% 57 MB
|================================================= | 51% 57 MB
|================================================== | 51% 58 MB
|================================================== | 51% 58 MB
|================================================== | 51% 58 MB
|================================================== | 51% 58 MB
|================================================== | 52% 58 MB
|================================================== | 52% 58 MB
|================================================== | 52% 58 MB
|================================================== | 52% 59 MB
|=================================================== | 52% 59 MB
|=================================================== | 52% 59 MB
|=================================================== | 52% 59 MB
|=================================================== | 53% 59 MB
|=================================================== | 53% 59 MB
|=================================================== | 53% 59 MB
|=================================================== | 53% 60 MB
|=================================================== | 53% 60 MB
|==================================================== | 53% 60 MB
|==================================================== | 53% 60 MB
|==================================================== | 53% 60 MB
|==================================================== | 54% 60 MB
|==================================================== | 54% 60 MB
|==================================================== | 54% 61 MB
|==================================================== | 54% 61 MB
|==================================================== | 54% 61 MB
|===================================================== | 54% 61 MB
|===================================================== | 54% 61 MB
|===================================================== | 54% 61 MB
|===================================================== | 55% 61 MB
|===================================================== | 55% 62 MB
|===================================================== | 55% 62 MB
|===================================================== | 55% 62 MB
|===================================================== | 55% 62 MB
|====================================================== | 55% 62 MB
|====================================================== | 55% 62 MB
|====================================================== | 55% 62 MB
|====================================================== | 56% 63 MB
|====================================================== | 56% 63 MB
|====================================================== | 56% 63 MB
|====================================================== | 56% 63 MB
|====================================================== | 56% 63 MB
|======================================================= | 56% 63 MB
|======================================================= | 56% 63 MB
|======================================================= | 56% 64 MB
|======================================================= | 57% 64 MB
|======================================================= | 57% 64 MB
|======================================================= | 57% 64 MB
|======================================================= | 57% 64 MB
|======================================================= | 57% 64 MB
|======================================================== | 57% 64 MB
|======================================================== | 57% 65 MB
|======================================================== | 58% 65 MB
|======================================================== | 58% 65 MB
|======================================================== | 58% 65 MB
|======================================================== | 58% 65 MB
|======================================================== | 58% 65 MB
|======================================================== | 58% 65 MB
|========================================================= | 58% 66 MB
|========================================================= | 58% 66 MB
|========================================================= | 59% 66 MB
|========================================================= | 59% 66 MB
|========================================================= | 59% 66 MB
|========================================================= | 59% 66 MB
|========================================================= | 59% 66 MB
|========================================================= | 59% 67 MB
|========================================================== | 59% 67 MB
|========================================================== | 59% 67 MB
|========================================================== | 60% 67 MB
|========================================================== | 60% 67 MB
|========================================================== | 60% 67 MB
|========================================================== | 60% 68 MB
|========================================================== | 60% 68 MB
|========================================================== | 60% 68 MB
|=========================================================== | 60% 68 MB
|=========================================================== | 60% 68 MB
|=========================================================== | 61% 68 MB
|=========================================================== | 61% 68 MB
|=========================================================== | 61% 69 MB
|=========================================================== | 61% 69 MB
|=========================================================== | 61% 69 MB
|=========================================================== | 61% 69 MB
|=========================================================== | 61% 69 MB
|============================================================ | 61% 69 MB
|============================================================ | 62% 69 MB
|============================================================ | 62% 70 MB
|============================================================ | 62% 70 MB
|============================================================ | 62% 70 MB
|============================================================ | 62% 70 MB
|============================================================ | 62% 70 MB
|============================================================ | 62% 70 MB
|============================================================= | 63% 70 MB
|============================================================= | 63% 71 MB
|============================================================= | 63% 71 MB
|============================================================= | 63% 71 MB
|============================================================= | 63% 71 MB
|============================================================= | 63% 71 MB
|============================================================= | 63% 71 MB
|============================================================= | 63% 71 MB
|============================================================== | 64% 72 MB
|============================================================== | 64% 72 MB
|============================================================== | 64% 72 MB
|============================================================== | 64% 72 MB
|============================================================== | 64% 72 MB
|============================================================== | 64% 72 MB
|============================================================== | 64% 72 MB
|============================================================== | 64% 73 MB
|=============================================================== | 65% 73 MB
|=============================================================== | 65% 73 MB
|=============================================================== | 65% 73 MB
|=============================================================== | 65% 73 MB
|=============================================================== | 65% 73 MB
|=============================================================== | 65% 73 MB
|=============================================================== | 65% 74 MB
|=============================================================== | 65% 74 MB
|================================================================ | 66% 74 MB
|================================================================ | 66% 74 MB
|================================================================ | 66% 74 MB
|================================================================ | 66% 74 MB
|================================================================ | 66% 74 MB
|================================================================ | 66% 75 MB
|================================================================ | 66% 75 MB
|================================================================ | 66% 75 MB
|================================================================= | 67% 75 MB
|================================================================= | 67% 75 MB
|================================================================= | 67% 75 MB
|================================================================= | 67% 75 MB
|================================================================= | 67% 76 MB
|================================================================= | 67% 76 MB
|================================================================= | 67% 76 MB
|================================================================= | 68% 76 MB
|================================================================== | 68% 76 MB
|================================================================== | 68% 76 MB
|================================================================== | 68% 76 MB
|================================================================== | 68% 77 MB
|================================================================== | 68% 77 MB
|================================================================== | 68% 77 MB
|================================================================== | 68% 77 MB
|================================================================== | 69% 77 MB
|=================================================================== | 69% 77 MB
|=================================================================== | 69% 77 MB
|=================================================================== | 69% 78 MB
|=================================================================== | 69% 78 MB
|=================================================================== | 69% 78 MB
|=================================================================== | 69% 78 MB
|=================================================================== | 69% 78 MB
|=================================================================== | 70% 78 MB
|==================================================================== | 70% 78 MB
|==================================================================== | 70% 79 MB
|==================================================================== | 70% 79 MB
|==================================================================== | 70% 79 MB
|==================================================================== | 70% 79 MB
|==================================================================== | 70% 79 MB
|==================================================================== | 70% 79 MB
|==================================================================== | 71% 79 MB
|===================================================================== | 71% 80 MB
|===================================================================== | 71% 80 MB
|===================================================================== | 71% 80 MB
|===================================================================== | 71% 80 MB
|===================================================================== | 71% 80 MB
|===================================================================== | 71% 80 MB
|===================================================================== | 71% 80 MB
|===================================================================== | 72% 81 MB
|====================================================================== | 72% 81 MB
|====================================================================== | 72% 81 MB
|====================================================================== | 72% 81 MB
|====================================================================== | 72% 81 MB
|====================================================================== | 72% 81 MB
|====================================================================== | 72% 81 MB
|====================================================================== | 73% 82 MB
|====================================================================== | 73% 82 MB
|======================================================================= | 73% 82 MB
|======================================================================= | 73% 82 MB
|======================================================================= | 73% 82 MB
|======================================================================= | 73% 82 MB
|======================================================================= | 73% 82 MB
|======================================================================= | 73% 83 MB
|======================================================================= | 74% 83 MB
|======================================================================= | 74% 83 MB
|======================================================================== | 74% 83 MB
|======================================================================== | 74% 83 MB
|======================================================================== | 74% 83 MB
|======================================================================== | 74% 83 MB
|======================================================================== | 74% 84 MB
|======================================================================== | 74% 84 MB
|======================================================================== | 75% 84 MB
|======================================================================== | 75% 84 MB
|========================================================================= | 75% 84 MB
|========================================================================= | 75% 84 MB
|========================================================================= | 75% 84 MB
|========================================================================= | 75% 85 MB
|========================================================================= | 75% 85 MB
|========================================================================= | 75% 85 MB
|========================================================================= | 76% 85 MB
|========================================================================= | 76% 85 MB
|========================================================================== | 76% 85 MB
|========================================================================== | 76% 86 MB
|========================================================================== | 76% 86 MB
|========================================================================== | 76% 86 MB
|========================================================================== | 76% 86 MB
|========================================================================== | 76% 86 MB
|========================================================================== | 77% 86 MB
|========================================================================== | 77% 86 MB
|=========================================================================== | 77% 87 MB
|=========================================================================== | 77% 87 MB
|=========================================================================== | 77% 87 MB
|=========================================================================== | 77% 87 MB
|=========================================================================== | 77% 87 MB
|=========================================================================== | 77% 87 MB
|=========================================================================== | 78% 87 MB
|=========================================================================== | 78% 88 MB
|============================================================================ | 78% 88 MB
|============================================================================ | 78% 88 MB
|============================================================================ | 78% 88 MB
|============================================================================ | 78% 88 MB
|============================================================================ | 78% 88 MB
|============================================================================ | 79% 88 MB
|============================================================================ | 79% 89 MB
|============================================================================ | 79% 89 MB
|============================================================================= | 79% 89 MB
|============================================================================= | 79% 89 MB
|============================================================================= | 79% 89 MB
|============================================================================= | 79% 89 MB
|============================================================================= | 79% 89 MB
|============================================================================= | 80% 90 MB
|============================================================================= | 80% 90 MB
|============================================================================= | 80% 90 MB
|============================================================================== | 80% 90 MB
|============================================================================== | 80% 90 MB
|============================================================================== | 80% 90 MB
|============================================================================== | 80% 90 MB
|============================================================================== | 80% 91 MB
|============================================================================== | 81% 91 MB
|============================================================================== | 81% 91 MB
|============================================================================== | 81% 91 MB
|=============================================================================== | 81% 91 MB
|=============================================================================== | 81% 91 MB
|=============================================================================== | 81% 91 MB
|=============================================================================== | 81% 92 MB
|=============================================================================== | 81% 92 MB
|=============================================================================== | 82% 92 MB
|=============================================================================== | 82% 92 MB
|=============================================================================== | 82% 92 MB
|================================================================================ | 82% 92 MB
|================================================================================ | 82% 92 MB
|================================================================================ | 82% 93 MB
|================================================================================ | 82% 93 MB
|================================================================================ | 82% 93 MB
|================================================================================ | 83% 93 MB
|================================================================================ | 83% 93 MB
|================================================================================ | 83% 93 MB
|================================================================================ | 83% 93 MB
|================================================================================= | 83% 94 MB
|================================================================================= | 83% 94 MB
|================================================================================= | 83% 94 MB
|================================================================================= | 84% 94 MB
|================================================================================= | 84% 94 MB
|================================================================================= | 84% 94 MB
|================================================================================= | 84% 94 MB
|================================================================================== | 84% 95 MB
|================================================================================== | 84% 95 MB
|================================================================================== | 84% 95 MB
|================================================================================== | 84% 95 MB
|================================================================================== | 85% 95 MB
|================================================================================== | 85% 95 MB
|================================================================================== | 85% 95 MB
|================================================================================== | 85% 96 MB
|================================================================================== | 85% 96 MB
|=================================================================================== | 85% 96 MB
|=================================================================================== | 85% 96 MB
|=================================================================================== | 85% 96 MB
|=================================================================================== | 86% 96 MB
|=================================================================================== | 86% 96 MB
|=================================================================================== | 86% 97 MB
|=================================================================================== | 86% 97 MB
|=================================================================================== | 86% 97 MB
|==================================================================================== | 86% 97 MB
|==================================================================================== | 86% 97 MB
|==================================================================================== | 86% 97 MB
|==================================================================================== | 87% 97 MB
|==================================================================================== | 87% 98 MB
|==================================================================================== | 87% 98 MB
|==================================================================================== | 87% 98 MB
|==================================================================================== | 87% 98 MB
|===================================================================================== | 87% 98 MB
|===================================================================================== | 87% 98 MB
|===================================================================================== | 88% 98 MB
|===================================================================================== | 88% 99 MB
|===================================================================================== | 88% 99 MB
|===================================================================================== | 88% 99 MB
|===================================================================================== | 88% 99 MB
|===================================================================================== | 88% 99 MB
|====================================================================================== | 88% 99 MB
|====================================================================================== | 88% 99 MB
|====================================================================================== | 89% 100 MB
|====================================================================================== | 89% 100 MB
|====================================================================================== | 89% 100 MB
|====================================================================================== | 89% 100 MB
|====================================================================================== | 89% 100 MB
|====================================================================================== | 89% 100 MB
|======================================================================================= | 89% 101 MB
|======================================================================================= | 89% 101 MB
|======================================================================================= | 90% 101 MB
|======================================================================================= | 90% 101 MB
|======================================================================================= | 90% 101 MB
|======================================================================================= | 90% 101 MB
|======================================================================================= | 90% 101 MB
|======================================================================================= | 90% 102 MB
|======================================================================================== | 90% 102 MB
|======================================================================================== | 90% 102 MB
|======================================================================================== | 91% 102 MB
|======================================================================================== | 91% 102 MB
|======================================================================================== | 91% 102 MB
|======================================================================================== | 91% 102 MB
|======================================================================================== | 91% 103 MB
|======================================================================================== | 91% 103 MB
|========================================================================================= | 91% 103 MB
|========================================================================================= | 91% 103 MB
|========================================================================================= | 92% 103 MB
|========================================================================================= | 92% 103 MB
|========================================================================================= | 92% 103 MB
|========================================================================================= | 92% 104 MB
|========================================================================================= | 92% 104 MB
|========================================================================================= | 92% 104 MB
|========================================================================================== | 92% 104 MB
|========================================================================================== | 93% 104 MB
|========================================================================================== | 93% 104 MB
|========================================================================================== | 93% 104 MB
|========================================================================================== | 93% 105 MB
|========================================================================================== | 93% 105 MB
|========================================================================================== | 93% 105 MB
|========================================================================================== | 93% 105 MB
|=========================================================================================== | 93% 105 MB
|=========================================================================================== | 94% 105 MB
|=========================================================================================== | 94% 105 MB
|=========================================================================================== | 94% 106 MB
|=========================================================================================== | 94% 106 MB
|=========================================================================================== | 94% 106 MB
|=========================================================================================== | 94% 106 MB
|=========================================================================================== | 94% 106 MB
|============================================================================================ | 94% 106 MB
|============================================================================================ | 95% 106 MB
|============================================================================================ | 95% 107 MB
|============================================================================================ | 95% 107 MB
|============================================================================================ | 95% 107 MB
|============================================================================================ | 95% 107 MB
|============================================================================================ | 95% 107 MB
|============================================================================================ | 95% 107 MB
|============================================================================================= | 95% 107 MB
|============================================================================================= | 96% 108 MB
|============================================================================================= | 96% 108 MB
|============================================================================================= | 96% 108 MB
|============================================================================================= | 96% 108 MB
|============================================================================================= | 96% 108 MB
|============================================================================================= | 96% 108 MB
|============================================================================================= | 96% 108 MB
|============================================================================================== | 97% 109 MB
|============================================================================================== | 97% 109 MB
|============================================================================================== | 97% 109 MB
|============================================================================================== | 97% 109 MB
|============================================================================================== | 97% 109 MB
|============================================================================================== | 97% 109 MB
|============================================================================================== | 97% 110 MB
|=============================================================================================== | 97% 110 MB
|=============================================================================================== | 98% 110 MB
|=============================================================================================== | 98% 110 MB
|=============================================================================================== | 98% 110 MB
|=============================================================================================== | 98% 110 MB
|=============================================================================================== | 98% 110 MB
|=============================================================================================== | 98% 111 MB
|=============================================================================================== | 98% 111 MB
|================================================================================================| 99% 111 MB
|================================================================================================| 99% 111 MB
|================================================================================================| 99% 111 MB
|================================================================================================| 99% 111 MB
|================================================================================================| 99% 111 MB
|================================================================================================| 99% 112 MB
|================================================================================================| 99% 112 MB
|================================================================================================| 99% 112 MB
|=================================================================================================| 100% 112 MB
fordgobike201801 <- read_csv(file="./data/201801-fordgobike-tripdata.csv")
Parsed with column specification:
cols(
duration_sec = col_integer(),
start_time = col_datetime(format = ""),
end_time = col_datetime(format = ""),
start_station_id = col_integer(),
start_station_name = col_character(),
start_station_latitude = col_double(),
start_station_longitude = col_double(),
end_station_id = col_integer(),
end_station_name = col_character(),
end_station_latitude = col_double(),
end_station_longitude = col_double(),
bike_id = col_integer(),
user_type = col_character(),
member_birth_year = col_integer(),
member_gender = col_character(),
bike_share_for_all_trip = col_character()
)
fordgobike201802 <- read_csv(file="./data/201802-fordgobike-tripdata.csv")
Parsed with column specification:
cols(
duration_sec = col_integer(),
start_time = col_datetime(format = ""),
end_time = col_datetime(format = ""),
start_station_id = col_integer(),
start_station_name = col_character(),
start_station_latitude = col_double(),
start_station_longitude = col_double(),
end_station_id = col_integer(),
end_station_name = col_character(),
end_station_latitude = col_double(),
end_station_longitude = col_double(),
bike_id = col_integer(),
user_type = col_character(),
member_birth_year = col_integer(),
member_gender = col_character(),
bike_share_for_all_trip = col_character()
)
fordgobike201803 <- read_csv(file="./data/201803-fordgobike-tripdata.csv")
Parsed with column specification:
cols(
duration_sec = col_integer(),
start_time = col_datetime(format = ""),
end_time = col_datetime(format = ""),
start_station_id = col_integer(),
start_station_name = col_character(),
start_station_latitude = col_double(),
start_station_longitude = col_double(),
end_station_id = col_integer(),
end_station_name = col_character(),
end_station_latitude = col_double(),
end_station_longitude = col_double(),
bike_id = col_integer(),
user_type = col_character(),
member_birth_year = col_integer(),
member_gender = col_character(),
bike_share_for_all_trip = col_character()
)
fordgobike201804 <- read_csv(file="./data/201804-fordgobike-tripdata.csv")
Parsed with column specification:
cols(
duration_sec = col_integer(),
start_time = col_datetime(format = ""),
end_time = col_datetime(format = ""),
start_station_id = col_integer(),
start_station_name = col_character(),
start_station_latitude = col_double(),
start_station_longitude = col_double(),
end_station_id = col_integer(),
end_station_name = col_character(),
end_station_latitude = col_double(),
end_station_longitude = col_double(),
bike_id = col_integer(),
user_type = col_character(),
member_birth_year = col_integer(),
member_gender = col_character(),
bike_share_for_all_trip = col_character()
)
Check the head() and tail() of the data.frames that are loaded.
head(fordgobike2017)
head(fordgobike201801)
head(fordgobike201802)
head(fordgobike201803)
head(fordgobike201804)
tail(fordgobike2017)
tail(fordgobike201801)
tail(fordgobike201802)
tail(fordgobike201803)
tail(fordgobike201804)
dim(fordgobike2017)
[1] 519700 15
fordgobike2017 %>% count()
nrow(fordgobike201801) + nrow(fordgobike201802) + nrow(fordgobike201803) + nrow(fordgobike201804)
[1] 444071
fordgobike2018 <- bind_rows(fordgobike201801, fordgobike201802, fordgobike201803, fordgobike201804)
dim(fordgobike2018)
[1] 444071 16
fordgobike2018 %>% count()
fordgobike <- bind_rows(fordgobike2017, fordgobike2018)
dim(fordgobike)
[1] 963771 16
fordgobike %>% count()
dim(fordgobike)
[1] 963771 16
fordgobike <- fordgobike %>% mutate(age = 2018 - member_birth_year)
fordgobike %>% count()
dim(fordgobike)
[1] 963771 17
fordgobike <- fordgobike %>% mutate(year=year(start_time), month=month(start_time), day=day(start_time) )
fordgobike %>% count()
dim(fordgobike)
[1] 963771 20
fordgobike <- fordgobike %>% mutate(week_day = wday(start_time) )
levels <- c("M","T","W","TH","F","SAT","SUN")
fordgobike$week_day <- factor(fordgobike$week_day, levels = levels)
fordgobike %>% count()
dim(fordgobike)
[1] 963771 21
today()
[1] "2018-05-31"
now()
[1] "2018-05-31 06:54:05 PDT"
Age
fordgobike %>% group_by( age ) %>% count()
fordgobike %>% group_by( age ) %>% summary()
duration_sec start_time end_time start_station_id
Min. : 61.0 Min. :2017-06-28 09:47:36 Min. :2017-06-28 09:52:55 Min. : 3.0
1st Qu.: 368.0 1st Qu.:2017-10-04 11:40:04 1st Qu.:2017-10-04 11:56:47 1st Qu.: 27.0
Median : 575.0 Median :2017-12-14 16:35:16 Median :2017-12-14 16:49:41 Median : 75.0
Mean : 996.8 Mean :2017-12-16 02:25:36 Mean :2017-12-16 02:42:13 Mean :102.9
3rd Qu.: 900.0 3rd Qu.:2018-03-01 17:17:44 3rd Qu.:2018-03-01 17:27:42 3rd Qu.:162.0
Max. :86369.0 Max. :2018-04-30 23:58:45 Max. :2018-05-01 18:56:06 Max. :345.0
start_station_name start_station_latitude start_station_longitude end_station_id end_station_name
Length:963771 Min. :37.31 Min. :-122.4 Min. : 3.0 Length:963771
Class :character 1st Qu.:37.77 1st Qu.:-122.4 1st Qu.: 25.0 Class :character
Mode :character Median :37.78 Median :-122.4 Median : 71.0 Mode :character
Mean :37.77 Mean :-122.4 Mean :100.3
3rd Qu.:37.80 3rd Qu.:-122.4 3rd Qu.:158.0
Max. :37.88 Max. :-121.9 Max. :345.0
end_station_latitude end_station_longitude bike_id user_type member_birth_year
Min. :37.31 Min. :-122.4 Min. : 10 Length:963771 Min. :1886
1st Qu.:37.77 1st Qu.:-122.4 1st Qu.: 914 Class :character 1st Qu.:1975
Median :37.78 Median :-122.4 Median :1929 Mode :character Median :1983
Mean :37.77 Mean :-122.4 Mean :1846 Mean :1981
3rd Qu.:37.80 3rd Qu.:-122.4 3rd Qu.:2737 3rd Qu.:1989
Max. :37.88 Max. :-121.9 Max. :4073 Max. :2000
NA's :100885
member_gender bike_share_for_all_trip age year month day
Length:963771 Length:963771 Min. : 18 Min. :2017 Min. : 1.000 Min. : 1.00
Class :character Class :character 1st Qu.: 29 1st Qu.:2017 1st Qu.: 3.000 1st Qu.: 8.00
Mode :character Mode :character Median : 35 Median :2017 Median : 7.000 Median :16.00
Mean : 37 Mean :2017 Mean : 6.459 Mean :15.96
3rd Qu.: 43 3rd Qu.:2018 3rd Qu.:10.000 3rd Qu.:24.00
Max. :132 Max. :2018 Max. :12.000 Max. :31.00
NA's :100885
week_day
M : 0
T : 0
W : 0
TH : 0
F : 0
(Other): 0
NA's :963771
skim(fordgobike)
Skim summary statistics
n obs: 963771
n variables: 21
Variable type: character
variable missing complete n min max empty n_unique
1 bike_share_for_all_trip 519700 444071 963771 2 3 0 2
2 end_station_name 0 963771 963771 9 63 0 299
3 member_gender 100658 863113 963771 4 6 0 3
4 start_station_name 0 963771 963771 9 63 0 299
5 user_type 0 963771 963771 8 10 0 2
Variable type: factor
variable missing complete n n_unique top_counts ordered
1 week_day 963771 0 963771 0 NA: 963771, M: 0, T: 0, W: 0 FALSE
Variable type: integer
variable missing complete n mean sd min p25 median p75 max
1 bike_id 0 963771 963771 1846.03 1056.45 10 914 1929 2737 4073
2 day 0 963771 963771 15.96 8.79 1 8 16 24 31
3 duration_sec 0 963771 963771 996.78 3092.37 61 368 575 900 86369
4 end_station_id 0 963771 963771 100.33 89.47 3 25 71 158 345
5 member_birth_year 100885 862886 963771 1981 10.61 1886 1975 1983 1989 2000
6 start_station_id 0 963771 963771 102.91 90.23 3 27 75 162 345
hist
1 <U+2587><U+2586><U+2586><U+2586><U+2587><U+2587><U+2585><U+2582>
2 <U+2587><U+2587><U+2587><U+2587><U+2586><U+2587><U+2587><U+2587>
3 <U+2587><U+2581><U+2581><U+2581><U+2581><U+2581><U+2581><U+2581>
4 <U+2587><U+2585><U+2583><U+2581><U+2582><U+2581><U+2581><U+2581>
5 <U+2581><U+2581><U+2581><U+2581><U+2581><U+2583><U+2587><U+2587>
6 <U+2587><U+2585><U+2583><U+2582><U+2582><U+2581><U+2581><U+2581>
Variable type: numeric
variable missing complete n mean sd min p25 median p75 max
1 age 100885 862886 963771 37 10.61 18 29 35 43 132
2 end_station_latitude 0 963771 963771 37.77 0.095 37.31 37.77 37.78 37.8 37.88
3 end_station_longitude 0 963771 963771 -122.36 0.11 -122.44 -122.41 -122.4 -122.39 -121.87
4 month 0 963771 963771 6.46 3.8 1 3 7 10 12
5 start_station_latitude 0 963771 963771 37.77 0.095 37.31 37.77 37.78 37.8 37.88
6 start_station_longitude 0 963771 963771 -122.36 0.11 -122.44 -122.41 -122.4 -122.39 -121.87
7 year 0 963771 963771 2017.46 0.5 2017 2017 2017 2018 2018
hist
1 <U+2587><U+2587><U+2583><U+2581><U+2581><U+2581><U+2581><U+2581>
2 <U+2581><U+2581><U+2581><U+2581><U+2581><U+2581><U+2587><U+2581>
3 <U+2587><U+2581><U+2582><U+2581><U+2581><U+2581><U+2581><U+2581>
4 <U+2587><U+2585><U+2585><U+2581><U+2582><U+2587><U+2585><U+2587>
5 <U+2581><U+2581><U+2581><U+2581><U+2581><U+2581><U+2587><U+2581>
6 <U+2587><U+2581><U+2582><U+2581><U+2581><U+2581><U+2581><U+2581>
7 <U+2587><U+2581><U+2581><U+2581><U+2581><U+2581><U+2581><U+2587>
|==================================================================== | 80% ~1 s remaining
Variable type: POSIXct
variable missing complete n min max median n_unique
1 end_time 0 963771 963771 2017-06-28 2018-05-01 2017-12-14 963733
2 start_time 0 963771 963771 2017-06-28 2018-04-30 2017-12-14 963739
|======================================================================================|100% ~0 s remaining
fordgobike %>% ggplot(aes(x=age)) + geom_histogram()
fordgobike %>% filter(age <= 80) %>% ggplot(aes(x=age)) + geom_histogram()
fordgobike %>% filter(age <= 100) %>% ggplot(aes(x=age)) + geom_histogram()
fordgobike %>% filter(age > 100) %>% ggplot(aes(x=age)) + geom_histogram()
fordgobike %>% group_by( member_gender, age ) %>% count()
fordgobike %>% ggplot(aes(x=age, class=member_gender)) + geom_histogram()
fordgobike %>% ggplot(aes(x=age, class=member_gender)) + geom_histogram(aes(y=..density..))
fordgobike %>% filter(age <= 80) %>% ggplot(aes(x=age)) + geom_histogram()
fordgobike %>% filter(age <= 80) %>% ggplot(aes(x=age, color=member_gender)) +
geom_histogram(position="identity") +
facet_grid(member_gender ~ .)
fordgobike %>% filter(age <= 80) %>% ggplot(aes(x=age, color=member_gender)) +
geom_histogram(aes(y=..density..),position="identity") +
facet_grid(member_gender ~ .)
Year and day of week.
fordgobike %>% ggplot(aes(x=year)) + geom_bar()
fordgobike %>% ggplot(aes(x=month)) + geom_bar() + facet_grid(year ~ .)
fordgobike %>% ggplot(aes(x=day)) + geom_bar() + facet_grid(year ~ .)
fordgobike %>% ggplot(aes(x=as.factor(week_day))) + geom_bar()
library(biganalytics)
Loading required package: bigmemory
Loading required package: bigmemory.sri
Loading required package: foreach
Attaching package: 㤼㸱foreach㤼㸲
The following objects are masked from 㤼㸱package:purrr㤼㸲:
accumulate, when
Loading required package: biglm
Loading required package: DBI
# run in parallel, the doMC package runs on Windows
library(doParallel)
Loading required package: iterators
Loading required package: parallel
registerDoParallel(cores = 8)
fordgobike_subset <- fordgobike[,c(6,7)]
fordgobike_subset2 <- as.matrix(fordgobike_subset)
cl <- bigkmeans(fordgobike_subset2, 3, nstart=8)
fordgobike_subset %>% ggplot(aes(x=start_station_longitude, y=start_station_latitude, color=cl$cluster)) +
geom_point()
fordgobike_subset <- fordgobike[,c(6,7)]
cl <- kmeans(fordgobike_subset, 3)
fordgobike_subset %>% ggplot(aes(x=start_station_longitude, y=start_station_latitude, color=cl$cluster)) +
geom_point()
NA
cl$centers
start_station_latitude start_station_longitude
1 37.33454 -121.8909
2 37.82669 -122.2651
3 37.77905 -122.4070
bayarea <- get_map(location = "hayward")
Map from URL : http://maps.googleapis.com/maps/api/staticmap?center=hayward&zoom=10&size=640x640&scale=2&maptype=terrain&language=en-EN&sensor=false
Information from URL : http://maps.googleapis.com/maps/api/geocode/json?address=hayward&sensor=false
ggmap(bayarea) +
geom_point(data = fordgobike_subset, aes(x = start_station_longitude, y = start_station_latitude, color = cl$cluster, alpha = 0.1), size = 2, shape = 19) # 21
fordgobike <- fordgobike %>% mutate(city = cl$cluster)
fordgobike %>% head()
fordgobike_subset <- fordgobike[,c(10,11)]
cl <- kmeans(fordgobike_subset, 3)
fordgobike_subset %>% ggplot(aes(x=end_station_longitude, y=end_station_latitude, color=cl$cluster)) +
geom_point()
cl$centers
end_station_latitude end_station_longitude
1 37.77950 -122.4059
2 37.33461 -121.8909
3 37.82582 -122.2658
fordgobike %>% ggplot(aes(x=member_gender, y=duration_sec)) + geom_bar(stat="Identity")
fordgobike %>% ggplot(aes(x=duration_sec, y=..density..)) +
scale_x_continuous(limits = c(0, 10000)) +
geom_histogram() +
geom_density(aes(y=..density..))
fordgobike %>% ggplot(aes(log(x=duration_sec), y=..density..)) +
geom_histogram() +
geom_density(aes(y=..density..))
fordgobike %>% ggplot(aes(x=duration_sec, y=..density..)) +
scale_x_continuous(limits = c(0, 10000)) +
geom_histogram() +
geom_density(aes(y=..density..)) +
facet_grid(member_gender ~ .)
fordgobike %>% ggplot(aes(log(x=duration_sec), y=..density..)) +
geom_histogram() +
geom_density(aes(y=..density..)) +
facet_grid(member_gender ~ .)
fordgobike %>% ggplot(aes(x=duration_sec, y=..density..)) +
scale_x_continuous(limits = c(0, 10000)) +
geom_histogram() +
geom_density(aes(y=..density..)) +
facet_grid(city ~ .)
fordgobike %>% ggplot(aes(log(x=duration_sec), y=..density..)) +
geom_histogram() +
geom_density(aes(y=..density..)) +
facet_grid(city ~ .)
fordgobike %>% ggplot(aes(x=duration_sec, y=..density..)) +
scale_x_continuous(limits = c(0, 10000)) +
geom_histogram() +
geom_density(aes(y=..density..)) +
facet_grid(city ~ .)
fordgobike %>% ggplot(aes(log(x=duration_sec), y=..density..)) +
geom_histogram() +
geom_density(aes(y=..density..)) +
facet_grid(city ~ .)