trips2022 <- read.csv("trips2022_400sliceb.csv")
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
glimpse(trips2022)
## Rows: 400,000
## Columns: 7
## $ rideable_type <chr> "classic_bike", "classic_bike", "classic_bike"…
## $ member_casual <chr> "member", "member", "casual", "member", "membe…
## $ started_at_month <int> 8, 12, 7, 11, 1, 11, 7, 7, 6, 7, 9, 7, 9, 8, 5…
## $ started_at_season <chr> "summer", "winter", "summer", "fall", "winter"…
## $ start_time_hour <int> 19, 17, 14, 11, 10, 22, 11, 19, 14, 15, 22, 21…
## $ started_at_weekday_name <chr> "Tuesday", "Tuesday", "Saturday", "Thursday", …
## $ ride_length_min <int> 19, 5, 184, 2, 2, 4, 6, 14, 10, 8, 15, 9, 29, …
summary(trips2022)
## rideable_type member_casual started_at_month started_at_season
## Length:400000 Length:400000 Min. : 1.000 Length:400000
## Class :character Class :character 1st Qu.: 5.000 Class :character
## Mode :character Mode :character Median : 7.000 Mode :character
## Mean : 7.076
## 3rd Qu.: 9.000
## Max. :12.000
## start_time_hour started_at_weekday_name ride_length_min
## Min. : 0.0 Length:400000 Min. : 1.00
## 1st Qu.:11.0 Class :character 1st Qu.: 6.00
## Median :15.0 Mode :character Median : 10.00
## Mean :14.2 Mean : 17.01
## 3rd Qu.:18.0 3rd Qu.: 19.00
## Max. :23.0 Max. :3911.00
trips2022$ones <- 1
#copy Create new column
trips2022$ride_length_bin <- ''
#Create bins, column class will be character, anything that's not assigned will be labelled '0.5-5'
trips2022$ride_length_bin[trips2022$ride_length_min <=5] <- '001-005'
trips2022$ride_length_bin[trips2022$ride_length_min <=10 & trips2022$ride_length_min >5] <- '006-010'
trips2022$ride_length_bin[trips2022$ride_length_min <=20 & trips2022$ride_length_min >10] <- '011-020'
trips2022$ride_length_bin[trips2022$ride_length_min <=30 & trips2022$ride_length_min >20] <- '021-030'
trips2022$ride_length_bin[trips2022$ride_length_min <=60 & trips2022$ride_length_min >30] <- '031-060'
trips2022$ride_length_bin[trips2022$ride_length_min <=240 & trips2022$ride_length_min >60] <- '061-240'
trips2022$ride_length_bin[trips2022$ride_length_min <=480 & trips2022$ride_length_min >240] <- '241-480'
trips2022$ride_length_bin[trips2022$ride_length_min >480] <- '>480'
glimpse(trips2022)
## Rows: 400,000
## Columns: 9
## $ rideable_type <chr> "classic_bike", "classic_bike", "classic_bike"…
## $ member_casual <chr> "member", "member", "casual", "member", "membe…
## $ started_at_month <int> 8, 12, 7, 11, 1, 11, 7, 7, 6, 7, 9, 7, 9, 8, 5…
## $ started_at_season <chr> "summer", "winter", "summer", "fall", "winter"…
## $ start_time_hour <int> 19, 17, 14, 11, 10, 22, 11, 19, 14, 15, 22, 21…
## $ started_at_weekday_name <chr> "Tuesday", "Tuesday", "Saturday", "Thursday", …
## $ ride_length_min <int> 19, 5, 184, 2, 2, 4, 6, 14, 10, 8, 15, 9, 29, …
## $ ones <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1…
## $ ride_length_bin <chr> "011-020", "001-005", "061-240", "001-005", "0…
library(ggplot2)
# From <http://www.cookbook-r.com/Graphs/Colors_(ggplot2)/#a-colorblind-friendly-palette>
#The palette with grey:
cbPalette <- c("#999999", "#E69F00", "#56B4E9", "#009E73", "#F0E442", "#0072B2", "#D55E00", "#CC79A7")
trips2022_member <- (filter(trips2022, member_casual == 'member'))
trips2022_casual <- (filter(trips2022, member_casual == 'casual'))
member_casual_count <- count(trips2022, member_casual)
print(member_casual_count)
## member_casual n
## 1 casual 161744
## 2 member 238256
member_count <- nrow(trips2022_member)
pct_member <- ((member_count)/(nrow(trips2022)))*100
casual_count <- nrow(trips2022_casual)
pct_casual <- ((casual_count)/(nrow(trips2022)))*100
percent_member_casual <- c('member' = pct_member, 'casual' = pct_casual)
print(percent_member_casual)
## member casual
## 59.564 40.436
#count casual or member users per season
user_season_ct <- aggregate(trips2022$ones, by = list(trips2022$member_casual, trips2022$started_at_season), FUN = sum)
#rename rows
colnames(user_season_ct) <- c('member_casual', 'season', 'count')
# number of members or casuals see 3a.2 above
#filter and calculate percent
#member
member_season_ct <- filter(user_season_ct, member_casual == 'member')
member_season_pct <- cbind(member_season_ct, ((member_season_ct$count / member_count) * 100))
colnames(member_season_pct) <- c('member_casual', 'season', 'count', 'percent')
#casual
casual_season_ct <- filter(user_season_ct, member_casual == 'casual')
casual_season_pct <- cbind(casual_season_ct, ((casual_season_ct$count / casual_count) * 100))
colnames(casual_season_pct) <- c('member_casual', 'season', 'count', 'percent')
#combine both into one table (can use cbind instead of rbind for comparison table; could also use bind_rows or bind_cols through dplyr)
casual_member_season <- rbind(casual_season_pct, member_season_pct)
casual_member_season2 <- cbind(casual_season_pct, member_season_pct)
casual_member_season2
## member_casual season count percent member_casual season count percent
## 1 casual fall 40717 25.173731 member fall 68958 28.942818
## 2 casual spring 34983 21.628623 member spring 55955 23.485243
## 3 casual summer 80601 49.832451 member summer 90454 37.965046
## 4 casual winter 5443 3.365194 member winter 22889 9.606893
ggplot(data = casual_member_season) + geom_bar(aes(x = season, y = count, fill = member_casual), stat = 'identity', position = 'dodge') + scale_fill_manual(values=cbPalette) +
scale_x_discrete(limits = c('winter', 'spring', 'summer', 'fall')) +
labs(caption = 'winter = Dec, Jan, Feb; spring = Mar, Apr, May; summer = Jun, Jul, Aug; fall = Sep, Oct, Nov')
#### 4.b2 Graph seasonal preference percent within casual or member
groups. - There was a stronger preference for summer within casual users
(50% of casual users preferred summer) compared to members (38% of
members preferred summer).
ggplot(data = casual_member_season) + geom_bar(aes(x = season, y = percent, fill = member_casual), stat = 'identity', position = 'dodge') +
facet_wrap(~member_casual) +
scale_fill_manual(values=cbPalette) +
scale_x_discrete(limits = c('winter', 'spring', 'summer', 'fall')) +
labs(caption = 'winter = Dec, Jan, Feb; spring = Mar, Apr, May; summer = Jun, Jul, Aug; fall = Sep, Oct, Nov')
#count casual or member users per month
user_month_ct <- aggregate(trips2022$ones, by = list(trips2022$member_casual, trips2022$started_at_month), FUN = sum)
#rename rows
colnames(user_month_ct) <- c('member_casual', 'month', 'count')
# number of members or casuals see 3a.2 above
#filter and calculate percent
#member
member_month_ct <- filter(user_month_ct, member_casual == 'member')
member_month_pct <- cbind(member_month_ct, ((member_month_ct$count / member_count) * 100))
colnames(member_month_pct) <- c('member_casual', 'month', 'count', 'percent')
#casual
casual_month_ct <- filter(user_month_ct, member_casual == 'casual')
casual_month_pct <- cbind(casual_month_ct, ((casual_month_ct$count / casual_count) * 100))
colnames(casual_month_pct) <- c('member_casual', 'month', 'count', 'percent')
#combine both into one table (can use cbind instead of rbind for comparison table; could also use bind_rows or bind_cols through dplyr)
casual_member_monthly <- rbind(casual_month_pct, member_month_pct)
casual_member_monthly2 <- cbind(casual_month_pct, member_month_pct)
#Add column with full month names
casual_member_monthly$month_name <- month.name[casual_member_monthly$month]
casual_member_monthly2$month_name <- month.name[casual_member_monthly2$month]
#Display table 2
casual_member_monthly2
## member_casual month count percent member_casual month count percent
## 1 casual 1 1118 0.6912157 member 1 6410 2.690383
## 2 casual 2 1409 0.8711297 member 2 6970 2.925425
## 3 casual 3 6271 3.8771145 member 3 13615 5.714442
## 4 casual 4 8522 5.2688199 member 4 16517 6.932459
## 5 casual 5 20190 12.4826887 member 5 25823 10.838342
## 6 casual 6 27081 16.7431249 member 6 29963 12.575969
## 7 casual 7 28451 17.5901424 member 7 29976 12.581425
## 8 casual 8 25069 15.4991839 member 8 30515 12.807652
## 9 casual 9 20083 12.4165348 member 9 28619 12.011870
## 10 casual 10 13794 8.5282916 member 10 23927 10.042559
## 11 casual 11 6840 4.2289049 member 11 16412 6.888389
## 12 casual 12 2916 1.8028489 member 12 9509 3.991085
## month_name
## 1 January
## 2 February
## 3 March
## 4 April
## 5 May
## 6 June
## 7 July
## 8 August
## 9 September
## 10 October
## 11 November
## 12 December
ggplot(data = casual_member_monthly) + geom_bar(aes(x = month_name, y = count, fill = member_casual), stat = 'identity', position = 'dodge') +
scale_fill_manual(values = cbPalette) +
scale_x_discrete(limits = c('January', 'February', 'March', 'April', 'May', 'June', 'July', 'August', 'September', 'October', 'November', 'December')) +
theme(axis.text.x = element_text(angle= 90, vjust = 0.1, hjust = 1))
#aggregate to create new data frame for creating a side by side plot
ggplot(data = casual_member_monthly) + geom_bar(aes(x = month_name, y = percent, fill = member_casual), stat = 'identity', position = 'dodge') +
facet_wrap(~member_casual) +
scale_fill_manual(values=cbPalette) +
scale_x_discrete(limits = c('January', 'February', 'March', 'April', 'May', 'June', 'July', 'August', 'September', 'October', 'November', 'December')) +
scale_y_continuous(breaks = seq(0, 22, by = 2)) +
theme(axis.text.x = element_text(angle= 90, vjust = 0.1, hjust = 1))
#count casual or member users per day
user_day_ct <- aggregate(trips2022$ones, by = list(trips2022$member_casual, trips2022$started_at_weekday_name), FUN = sum)
#rename rows
colnames(user_day_ct) <- c('member_casual', 'day', 'count')
# number of members or casuals see 3a.2 above
#filter and calculate percent
#member
member_day_ct <- filter(user_day_ct, member_casual == 'member')
member_day_pct <- cbind(member_day_ct, ((member_day_ct$count / member_count) * 100))
colnames(member_day_pct) <- c('member_casual', 'day', 'count', 'percent')
#casual
casual_day_ct <- filter(user_day_ct, member_casual == 'casual')
casual_day_pct <- cbind(casual_day_ct, ((casual_day_ct$count / casual_count) * 100))
colnames(casual_day_pct) <- c('member_casual', 'day', 'count', 'percent')
#combine both into one table (can use cbind instead of rbind for comparison table; could also use bind_rows or bind_cols through dplyr)
casual_member_day <- rbind(casual_day_pct, member_day_pct)
casual_member_day2 <- cbind(casual_day_pct, member_day_pct)
casual_member_day2
## member_casual day count percent member_casual day count percent
## 1 casual Friday 22819 14.10810 member Friday 32860 13.79189
## 2 casual Monday 19538 12.07958 member Monday 33948 14.24854
## 3 casual Saturday 33720 20.84776 member Saturday 30878 12.96001
## 4 casual Sunday 27556 17.03680 member Sunday 27292 11.45491
## 5 casual Thursday 21169 13.08797 member Thursday 38093 15.98826
## 6 casual Tuesday 18320 11.32654 member Tuesday 37576 15.77127
## 7 casual Wednesday 18622 11.51326 member Wednesday 37609 15.78512
ggplot(data = casual_member_day) + geom_bar(aes(x = day, y = count, fill = member_casual), stat = 'identity', position = 'dodge') +
scale_fill_manual(values = cbPalette) +
scale_x_discrete(limits = c('Sunday', 'Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday')) +
theme(axis.text.x = element_text(angle= 90, vjust = 0.1, hjust = 1))
#aggregate to create new data frame for creating a side by side plot
ggplot(data = casual_member_day) + geom_bar(aes(x = day, y = percent, fill = member_casual), stat = 'identity', position = 'dodge') +
facet_wrap(~member_casual) +
scale_fill_manual(values = cbPalette) +
scale_x_discrete(limits = c('Sunday', 'Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday')) +
theme(axis.text.x = element_text(angle= 90, vjust = 0.1, hjust = 1))
#count casual or member users per hour
user_hour_ct <- aggregate(trips2022$ones, by = list(trips2022$member_casual, trips2022$start_time_hour), FUN = sum)
#rename rows
colnames(user_hour_ct) <- c('member_casual', 'day', 'count')
# number of members or casuals see 3a.2 above
#filter and calculate percent
#member
member_hour_ct <- filter(user_hour_ct, member_casual == 'member')
member_hour_pct <- cbind(member_hour_ct, ((member_hour_ct$count / member_count) * 100))
colnames(member_hour_pct) <- c('member_casual', 'hour', 'count', 'percent')
#casual
casual_hour_ct <- filter(user_hour_ct, member_casual == 'casual')
casual_hour_pct <- cbind(casual_hour_ct, ((casual_hour_ct$count / casual_count) * 100))
colnames(casual_hour_pct) <- c('member_casual', 'hour', 'count', 'percent')
#combine both into one table (can use cbind instead of rbind for comparison table; could also use bind_rows or bind_cols through dplyr)
casual_member_hour <- rbind(casual_hour_pct, member_hour_pct)
casual_member_hour2 <- cbind(casual_hour_pct, member_hour_pct)
casual_member_hour2
## member_casual hour count percent member_casual hour count percent
## 1 casual 0 3026 1.8708577 member 0 2392 1.0039621
## 2 casual 1 1955 1.2087002 member 1 1458 0.6119468
## 3 casual 2 1223 0.7561331 member 2 779 0.3269592
## 4 casual 3 677 0.4185627 member 3 493 0.2069203
## 5 casual 4 454 0.2806905 member 4 522 0.2190921
## 6 casual 5 827 0.5113018 member 5 2448 1.0274663
## 7 casual 6 2051 1.2680532 member 6 7043 2.9560641
## 8 casual 7 3478 2.1503116 member 7 12785 5.3660768
## 9 casual 8 4857 3.0028935 member 8 15195 6.3775938
## 10 casual 9 5126 3.1692057 member 9 10248 4.3012558
## 11 casual 10 6704 4.1448214 member 10 9636 4.0443892
## 12 casual 11 8644 5.3442477 member 11 11463 4.8112115
## 13 casual 12 10272 6.3507765 member 12 13140 5.5150762
## 14 casual 13 10621 6.5665496 member 13 12957 5.4382681
## 15 casual 14 11175 6.9090662 member 14 12670 5.3178094
## 16 casual 15 12344 7.6318132 member 15 15671 6.5773790
## 17 casual 16 13981 8.6439064 member 16 21300 8.9399637
## 18 casual 17 15641 9.6702196 member 17 25564 10.7296354
## 19 casual 18 13822 8.5456029 member 18 20491 8.6004130
## 20 casual 19 10630 6.5721140 member 19 14638 6.1438117
## 21 casual 20 7639 4.7228954 member 20 10087 4.2336814
## 22 casual 21 6390 3.9506875 member 21 7806 3.2763078
## 23 casual 22 5970 3.6910179 member 22 5835 2.4490464
## 24 casual 23 4237 2.6195717 member 23 3635 1.5256699
ggplot(data = casual_member_hour) + geom_bar(aes(x = hour, y = count, fill = member_casual), stat = 'identity', position = 'dodge') +
scale_fill_manual(values = cbPalette) +
scale_x_continuous(breaks = seq(0,23, by = 1)) +
theme(axis.text.x = element_text(angle= 90, vjust = 0.1, hjust = 1))
#aggregate to create new data frame for creating a side by side plot
ggplot(data = casual_member_hour) + geom_bar(aes(x = hour, y = percent, fill = member_casual), stat = 'identity', position = 'dodge') +
scale_fill_manual(values = cbPalette) +
scale_x_continuous(breaks = seq(0, 23, by = 2)) +
scale_y_continuous(breaks = seq(0, 15, by = 1)) +
facet_wrap(~member_casual) +
theme(axis.text.x = element_text(angle= 90, vjust = 0.1, hjust = 1))
#count members and casuals for each bin
# number of members or casuals see 3a.2 above
#members and casuals
user_ride_length <- aggregate(trips2022$ones, by = list(trips2022$member_casual, trips2022$ride_length_bin), FUN = sum)
colnames(user_ride_length) <- c('member_casual', 'ride_length_bin', 'count')
#members calculate percent
member_ride_length_ct <- filter(user_ride_length, member_casual == 'member')
member_ride_length_pct <- (member_ride_length_ct$count/member_count)*100
member_ride_length <- cbind(member_ride_length_ct, member_ride_length_pct)
colnames(member_ride_length) <- c('member_casual', 'ride_length_bin', 'count', 'percent')
#casuals calculate percent
casual_ride_length_ct <- filter(user_ride_length, member_casual == 'casual')
casual_ride_length_pct <- (casual_ride_length_ct$count/casual_count)*100
casual_ride_length <- cbind(casual_ride_length_ct, casual_ride_length_pct)
colnames(casual_ride_length) <- c('member_casual', 'ride_length_bin', 'count', 'percent')
# bind member and casuals
user_ride_length_pct1 <- rbind(casual_ride_length, member_ride_length)
user_ride_length_pct2 <- cbind(casual_ride_length, member_ride_length)
user_ride_length_pct2
## member_casual ride_length_bin count percent member_casual ride_length_bin
## 1 casual >480 255 0.1576565 member >480
## 2 casual 001-005 23221 14.3566376 member 001-005
## 3 casual 006-010 38646 23.8933129 member 006-010
## 4 casual 011-020 46404 28.6897814 member 011-020
## 5 casual 021-030 21213 13.1151697 member 021-030
## 6 casual 031-060 20577 12.7219557 member 031-060
## 7 casual 061-240 11122 6.8762983 member 061-240
## 8 casual 241-480 306 0.1891879 member 241-480
## count percent
## 1 64 0.02686186
## 2 69639 29.22864482
## 3 71716 30.10039621
## 4 61349 25.74919414
## 5 21131 8.86903163
## 6 12968 5.44288496
## 7 1321 0.55444564
## 8 68 0.02854073
ggplot(data = user_ride_length_pct1) + geom_bar(aes(x = ride_length_bin, y = count, fill = member_casual), stat = 'identity', position = 'dodge') +
scale_fill_manual(values = cbPalette) +
scale_x_discrete(limits = c('001-005', '006-010', '011-020', '021-030', '031-060', '061-240', '241-480', '>480')) +
theme(axis.text.x = element_text(angle= 90, vjust = 0.1, hjust = 1)) +
labs (x = 'ride_length_bin (mins)')
ggplot(data = user_ride_length_pct1) + geom_bar(aes(x = ride_length_bin, y = percent, fill = member_casual), stat = 'identity', position = 'dodge') +
scale_fill_manual(values = cbPalette) +
scale_x_discrete(limits = c('001-005', '006-010', '011-020', '021-030', '031-060', '061-240', '241-480', '>480')) +
scale_y_continuous(breaks = seq(0, 35, by = 5)) +
labs (x = 'ride_length_bin (mins)') +
facet_wrap(~member_casual) +
theme(axis.text.x = element_text(angle= 90, vjust = 0.1, hjust = 1))
season_day <- aggregate(trips2022$ones, by = list(trips2022$member_casual, trips2022$started_at_season, trips2022$started_at_weekday_name), FUN = sum)
colnames(season_day) <- c('member_casual', 'season', 'day', 'count')
ggplot(data = season_day) + geom_bar(aes(x = day, y = count, fill = member_casual), stat = 'identity', position = 'dodge') +
facet_wrap(~factor(season, levels = c('winter', 'spring', 'summer', 'fall'))) +
scale_fill_manual(values = cbPalette) +
scale_x_discrete(limits = c('Sunday', 'Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday')) +
theme(axis.text.x = element_text(angle= 90, vjust = 0.1, hjust = 1)) +
labs(caption = 'winter = Dec, Jan, Feb; spring = Mar, Apr, May; summer = Jun, Jul, Aug; fall = Sep, Oct, Nov')
season_hour <- aggregate(trips2022$ones, by = list(trips2022$member_casual, trips2022$started_at_season, trips2022$started_at_weekday_name, trips2022$start_time_hour), FUN = sum)
colnames(season_hour) <- c('member_casual', 'season', 'day', 'hour', 'count')
ggplot(data = season_hour) + geom_bar(aes(x = hour, y = count, fill = member_casual), stat = 'identity', position = 'dodge') +
facet_grid(~factor(season, levels = c('winter', 'spring', 'summer', 'fall')) ~factor(day, levels = c('Sunday', 'Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday'))) +
scale_fill_manual(values = cbPalette) +
theme(axis.text.x = element_text(angle= 90, vjust = 0.1, hjust = 1))
season_length <- aggregate(trips2022$ones, by = list(trips2022$member_casual, trips2022$started_at_season, trips2022$started_at_weekday_name, trips2022$ride_length_bin), FUN = sum)
colnames(season_length) <- c('member_casual', 'season', 'day', 'length_mins', 'count')
ggplot(data = season_length) + geom_bar(aes(x = length_mins, y = count, fill = member_casual), stat = 'identity', position = 'dodge') +
facet_grid(~factor(season, levels = c('winter', 'spring', 'summer', 'fall')) ~factor(day, levels = c('Sunday', 'Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday'))) +
scale_fill_manual(values = cbPalette) +
scale_x_discrete(limits = c('001-005', '006-010', '011-020', '021-030', '031-060', '061-240', '241-480', '>480')) +
theme(axis.text.x = element_text(angle= 90, vjust = 0.1, hjust = 1))
Within this sub-sample, 59.5% were members and 40.4% were casual users, which differs by 0.1% when the full data (after dropping missing values) was analysed (#3).
There is an uptick in bike share use in the summer: 49% of casual use was in the summer, compared to 38% of member use (#4).
The monthly use pattern supports the seasonal variation in bike share use, with highest use in June, July, August for casual users. Member use shows a wider peak, with greater use in June, July, August, September (#5).
Daily patterns indicate a preference for weekend use by casual users, as opposed to a preference for weekday use by members (#6).
Hourly use patterns indicate that casual users prefer to use bike shares later in the day in the late morning to early evening. In countrast, members indicate peaks in use during the morning and late afternoon-early evening rush hours (#7).
The most popular ride duration for both casual users and members were between 6 to 20 minutes. In addition, there was a high percentage of short-duration use among members (30% for members compared to 14% for casuals), which lasted 1 - 5 minutes (#8).
A higher percentage of casual bikers compared to members also used the bike shares between 21 - 240 minutes (#8).
The daily preferences of casual users and members were consistent throughout the seasons, as indicated by the similarities in distributions (#9).
The hourly preferences of casual users and members were also consistent throughout the seasons, on weekedays (#10). On weekends, the preference of using the bike shares later in the day was observed for both members and users. Moreover, on weekends in summer, and Saturdays in spring - casual users outnumbered member bike share users.
The duration preference overall patterns were consistent with patterns on weekdays (#11). However, on weekends, particularly in the summer - members are less likely to take short-duration trips; and more casual users used the bikes for 11 - 60 minutes than on the weekdays.
Both members and casual users seem to use the bike share service for recreational purposes on weekends, in the summer, in the late morning - early evening. The additional popularity of longer ride duration among casual users suggests that in addition to being mindful of aspects surrounding recreational use, comfort and physical accessability may become important to this group.
A limitation of this analysis is that over 20% of rows were dropped because of missing start/end station names and/or IDs - as discussed in the cleaning exercise notebook. Further investigation is needed to determine whether the decisions made during the cleaning process were appropriate.
It is also assumed that the patterns in this 400K row slice of the larger data-set is representative of the whole. The percent distribution between users, across the temporal variables agreed to within 0.1% (data not shown).