Notes: Installing and loading packages
library(tidyverse)## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.2 ✔ readr 2.1.4
## ✔ forcats 1.0.0 ✔ stringr 1.5.0
## ✔ ggplot2 3.4.2 ✔ tibble 3.2.1
## ✔ lubridate 1.9.2 ✔ tidyr 1.3.0
## ✔ purrr 1.0.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(here)## here() starts at C:/Users/SWill/Documents/R Studio CITY BIKES
library(skimr)
library(janitor)##
## Attaching package: 'janitor'
##
## The following objects are masked from 'package:stats':
##
## chisq.test, fisher.test
library(dplyr)
library(tidyr)
library(readr)
library(magrittr)##
## Attaching package: 'magrittr'
##
## The following object is masked from 'package:purrr':
##
## set_names
##
## The following object is masked from 'package:tidyr':
##
## extract
library(ggplot2)
library(conflicted)getwd()## [1] "C:/Users/SWill/Documents/R Studio CITY BIKES"
setwd("C:/Users/SWill/Documents/R Studio CITY BIKES")Notes: Upload Divvy data sets (csv files) according to Divvy Exercise R Script NOT THE PREVIOUS 12 MONTHS OF CYCLISTIC TRIP DATA
q1_2020 <- read_csv("Divvy_Trips_2020_Q1.csv")## Rows: 426887 Columns: 13
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (5): ride_id, rideable_type, start_station_name, end_station_name, memb...
## dbl (6): start_station_id, end_station_id, start_lat, start_lng, end_lat, e...
## dttm (2): started_at, ended_at
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
q2_2019 <- read_csv("Divvy_Trips_2019_Q2.csv")## Rows: 1108163 Columns: 12
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (4): 03 - Rental Start Station Name, 02 - Rental End Station Name, User...
## dbl (5): 01 - Rental Details Rental ID, 01 - Rental Details Bike ID, 03 - R...
## num (1): 01 - Rental Details Duration In Seconds Uncapped
## dttm (2): 01 - Rental Details Local Start Time, 01 - Rental Details Local En...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
q3_2019 <- read_csv("Divvy_Trips_2019_Q3.csv")## Rows: 1640718 Columns: 12
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (4): from_station_name, to_station_name, usertype, gender
## dbl (5): trip_id, bikeid, from_station_id, to_station_id, birthyear
## num (1): tripduration
## dttm (2): start_time, end_time
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
q4_2019 <- read_csv("Divvy_Trips_2019_Q4.csv")## Rows: 704054 Columns: 12
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (4): from_station_name, to_station_name, usertype, gender
## dbl (5): trip_id, bikeid, from_station_id, to_station_id, birthyear
## num (1): tripduration
## dttm (2): start_time, end_time
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
Notes: Use ‘rename()’ function to rename columns to make them consistent with q1_2020
(q2_2019 <- rename(q2_2019
,ride_id = "01 - Rental Details Rental ID"
,rideable_type = "01 - Rental Details Bike ID"
,started_at = "01 - Rental Details Local Start Time"
,ended_at = "01 - Rental Details Local End Time"
,start_station_name = "03 - Rental Start Station Name"
,start_station_id = "03 - Rental Start Station ID"
,end_station_name = "02 - Rental End Station Name"
,end_station_id = "02 - Rental End Station ID"
,member_casual = "User Type"))## # A tibble: 1,108,163 × 12
## ride_id started_at ended_at rideable_type
## <dbl> <dttm> <dttm> <dbl>
## 1 22178529 2019-04-01 00:02:22 2019-04-01 00:09:48 6251
## 2 22178530 2019-04-01 00:03:02 2019-04-01 00:20:30 6226
## 3 22178531 2019-04-01 00:11:07 2019-04-01 00:15:19 5649
## 4 22178532 2019-04-01 00:13:01 2019-04-01 00:18:58 4151
## 5 22178533 2019-04-01 00:19:26 2019-04-01 00:36:13 3270
## 6 22178534 2019-04-01 00:19:39 2019-04-01 00:23:56 3123
## 7 22178535 2019-04-01 00:26:33 2019-04-01 00:35:41 6418
## 8 22178536 2019-04-01 00:29:48 2019-04-01 00:36:11 4513
## 9 22178537 2019-04-01 00:32:07 2019-04-01 01:07:44 3280
## 10 22178538 2019-04-01 00:32:19 2019-04-01 01:07:39 5534
## # ℹ 1,108,153 more rows
## # ℹ 8 more variables: `01 - Rental Details Duration In Seconds Uncapped` <dbl>,
## # start_station_id <dbl>, start_station_name <chr>, end_station_id <dbl>,
## # end_station_name <chr>, member_casual <chr>, `Member Gender` <chr>,
## # `05 - Member Details Member Birthday Year` <dbl>
(q3_2019 <- rename(q3_2019
,ride_id = trip_id
,rideable_type = bikeid
,started_at = start_time
,ended_at = end_time
,start_station_name = from_station_name
,start_station_id = from_station_id
,end_station_name = to_station_name
,end_station_id = to_station_id
,member_casual = usertype))## # A tibble: 1,640,718 × 12
## ride_id started_at ended_at rideable_type tripduration
## <dbl> <dttm> <dttm> <dbl> <dbl>
## 1 23479388 2019-07-01 00:00:27 2019-07-01 00:20:41 3591 1214
## 2 23479389 2019-07-01 00:01:16 2019-07-01 00:18:44 5353 1048
## 3 23479390 2019-07-01 00:01:48 2019-07-01 00:27:42 6180 1554
## 4 23479391 2019-07-01 00:02:07 2019-07-01 00:27:10 5540 1503
## 5 23479392 2019-07-01 00:02:13 2019-07-01 00:22:26 6014 1213
## 6 23479393 2019-07-01 00:02:21 2019-07-01 00:07:31 4941 310
## 7 23479394 2019-07-01 00:02:24 2019-07-01 00:23:12 3770 1248
## 8 23479395 2019-07-01 00:02:26 2019-07-01 00:28:16 5442 1550
## 9 23479396 2019-07-01 00:02:34 2019-07-01 00:28:57 2957 1583
## 10 23479397 2019-07-01 00:02:45 2019-07-01 00:29:14 6091 1589
## # ℹ 1,640,708 more rows
## # ℹ 7 more variables: start_station_id <dbl>, start_station_name <chr>,
## # end_station_id <dbl>, end_station_name <chr>, member_casual <chr>,
## # gender <chr>, birthyear <dbl>
(q4_2019 <- rename(q4_2019
,ride_id = trip_id
,rideable_type = bikeid
,started_at = start_time
,ended_at = end_time
,start_station_name = from_station_name
,start_station_id = from_station_id
,end_station_name = to_station_name
,end_station_id = to_station_id
,member_casual = usertype))## # A tibble: 704,054 × 12
## ride_id started_at ended_at rideable_type tripduration
## <dbl> <dttm> <dttm> <dbl> <dbl>
## 1 25223640 2019-10-01 00:01:39 2019-10-01 00:17:20 2215 940
## 2 25223641 2019-10-01 00:02:16 2019-10-01 00:06:34 6328 258
## 3 25223642 2019-10-01 00:04:32 2019-10-01 00:18:43 3003 850
## 4 25223643 2019-10-01 00:04:32 2019-10-01 00:43:43 3275 2350
## 5 25223644 2019-10-01 00:04:34 2019-10-01 00:35:42 5294 1867
## 6 25223645 2019-10-01 00:04:38 2019-10-01 00:10:51 1891 373
## 7 25223646 2019-10-01 00:04:52 2019-10-01 00:22:45 1061 1072
## 8 25223647 2019-10-01 00:04:57 2019-10-01 00:29:16 1274 1458
## 9 25223648 2019-10-01 00:05:20 2019-10-01 00:29:18 6011 1437
## 10 25223649 2019-10-01 00:05:20 2019-10-01 02:23:46 2957 8306
## # ℹ 704,044 more rows
## # ℹ 7 more variables: start_station_id <dbl>, start_station_name <chr>,
## # end_station_id <dbl>, end_station_name <chr>, member_casual <chr>,
## # gender <chr>, birthyear <dbl>
q2_2019 <- mutate(q2_2019, ride_id = as.character(ride_id)
,rideable_type = as.character(rideable_type))
q3_2019 <- mutate(q3_2019, ride_id = as.character(ride_id)
,rideable_type = as.character(rideable_type))
q4_2019 <- mutate(q4_2019, ride_id = as.character(ride_id)
,rideable_type = as.character(rideable_type))all_trips <- bind_rows(q2_2019, q3_2019, q4_2019, q1_2020)all_trips <- all_trips %>%
select(-c(start_lat, start_lng, end_lat, end_lng, birthyear, gender, "01 - Rental Details Duration In Seconds Uncapped", "05 - Member Details Member Birthday Year", "Member Gender", "tripduration"))Notes: Use ‘colnames()’ function to inspect the new table that has been created
colnames(all_trips)## [1] "ride_id" "started_at" "ended_at"
## [4] "rideable_type" "start_station_id" "start_station_name"
## [7] "end_station_id" "end_station_name" "member_casual"
nrow(all_trips)## [1] 3879822
dim(all_trips)## [1] 3879822 9
head(all_trips)## # A tibble: 6 × 9
## ride_id started_at ended_at rideable_type start_station_id
## <chr> <dttm> <dttm> <chr> <dbl>
## 1 221785… 2019-04-01 00:02:22 2019-04-01 00:09:48 6251 81
## 2 221785… 2019-04-01 00:03:02 2019-04-01 00:20:30 6226 317
## 3 221785… 2019-04-01 00:11:07 2019-04-01 00:15:19 5649 283
## 4 221785… 2019-04-01 00:13:01 2019-04-01 00:18:58 4151 26
## 5 221785… 2019-04-01 00:19:26 2019-04-01 00:36:13 3270 202
## 6 221785… 2019-04-01 00:19:39 2019-04-01 00:23:56 3123 420
## # ℹ 4 more variables: start_station_name <chr>, end_station_id <dbl>,
## # end_station_name <chr>, member_casual <chr>
tail(all_trips)## # A tibble: 6 × 9
## ride_id started_at ended_at rideable_type start_station_id
## <chr> <dttm> <dttm> <chr> <dbl>
## 1 6F4D22… 2020-03-10 10:40:27 2020-03-10 10:40:29 docked_bike 675
## 2 ADDAA3… 2020-03-10 10:40:06 2020-03-10 10:40:07 docked_bike 675
## 3 82B10F… 2020-03-07 15:25:55 2020-03-07 16:14:03 docked_bike 161
## 4 AA0D5A… 2020-03-01 13:12:38 2020-03-01 13:38:29 docked_bike 141
## 5 329636… 2020-03-07 18:02:45 2020-03-07 18:13:18 docked_bike 672
## 6 064EC7… 2020-03-08 13:03:57 2020-03-08 13:32:27 docked_bike 110
## # ℹ 4 more variables: start_station_name <chr>, end_station_id <dbl>,
## # end_station_name <chr>, member_casual <chr>
str(all_trips) ## tibble [3,879,822 × 9] (S3: tbl_df/tbl/data.frame)
## $ ride_id : chr [1:3879822] "22178529" "22178530" "22178531" "22178532" ...
## $ started_at : POSIXct[1:3879822], format: "2019-04-01 00:02:22" "2019-04-01 00:03:02" ...
## $ ended_at : POSIXct[1:3879822], format: "2019-04-01 00:09:48" "2019-04-01 00:20:30" ...
## $ rideable_type : chr [1:3879822] "6251" "6226" "5649" "4151" ...
## $ start_station_id : num [1:3879822] 81 317 283 26 202 420 503 260 211 211 ...
## $ start_station_name: chr [1:3879822] "Daley Center Plaza" "Wood St & Taylor St" "LaSalle St & Jackson Blvd" "McClurg Ct & Illinois St" ...
## $ end_station_id : num [1:3879822] 56 59 174 133 129 426 500 499 211 211 ...
## $ end_station_name : chr [1:3879822] "Desplaines St & Kinzie St" "Wabash Ave & Roosevelt Rd" "Canal St & Madison St" "Kingsbury St & Kinzie St" ...
## $ member_casual : chr [1:3879822] "Subscriber" "Subscriber" "Subscriber" "Subscriber" ...
summary(all_trips)## ride_id started_at
## Length:3879822 Min. :2019-04-01 00:02:22.00
## Class :character 1st Qu.:2019-06-23 07:49:09.25
## Mode :character Median :2019-08-14 17:43:38.00
## Mean :2019-08-26 00:49:59.38
## 3rd Qu.:2019-10-12 12:10:21.00
## Max. :2020-03-31 23:51:34.00
##
## ended_at rideable_type start_station_id
## Min. :2019-04-01 00:09:48.00 Length:3879822 Min. : 1.0
## 1st Qu.:2019-06-23 08:20:27.75 Class :character 1st Qu.: 77.0
## Median :2019-08-14 18:02:04.00 Mode :character Median :174.0
## Mean :2019-08-26 01:14:37.06 Mean :202.9
## 3rd Qu.:2019-10-12 12:36:16.75 3rd Qu.:291.0
## Max. :2020-05-19 20:10:34.00 Max. :675.0
##
## start_station_name end_station_id end_station_name member_casual
## Length:3879822 Min. : 1.0 Length:3879822 Length:3879822
## Class :character 1st Qu.: 77.0 Class :character Class :character
## Mode :character Median :174.0 Mode :character Mode :character
## Mean :203.8
## 3rd Qu.:291.0
## Max. :675.0
## NA's :1
skim_without_charts(all_trips)| Name | all_trips |
| Number of rows | 3879822 |
| Number of columns | 9 |
| _______________________ | |
| Column type frequency: | |
| character | 5 |
| numeric | 2 |
| POSIXct | 2 |
| ________________________ | |
| Group variables | None |
Variable type: character
| skim_variable | n_missing | complete_rate | min | max | empty | n_unique | whitespace |
|---|---|---|---|---|---|---|---|
| ride_id | 0 | 1 | 7 | 16 | 0 | 3879822 | 0 |
| rideable_type | 0 | 1 | 1 | 11 | 0 | 6004 | 0 |
| start_station_name | 0 | 1 | 5 | 43 | 0 | 643 | 0 |
| end_station_name | 1 | 1 | 5 | 43 | 0 | 644 | 0 |
| member_casual | 0 | 1 | 6 | 10 | 0 | 4 | 0 |
Variable type: numeric
| skim_variable | n_missing | complete_rate | mean | sd | p0 | p25 | p50 | p75 | p100 |
|---|---|---|---|---|---|---|---|---|---|
| start_station_id | 0 | 1 | 202.90 | 157.14 | 1 | 77 | 174 | 291 | 675 |
| end_station_id | 1 | 1 | 203.76 | 157.19 | 1 | 77 | 174 | 291 | 675 |
Variable type: POSIXct
| skim_variable | n_missing | complete_rate | min | max | median | n_unique |
|---|---|---|---|---|---|---|
| started_at | 0 | 1 | 2019-04-01 00:02:22 | 2020-03-31 23:51:34 | 2019-08-14 17:43:38 | 3362333 |
| ended_at | 0 | 1 | 2019-04-01 00:09:48 | 2020-05-19 20:10:34 | 2019-08-14 18:02:04 | 3299507 |
glimpse(all_trips)## Rows: 3,879,822
## Columns: 9
## $ ride_id <chr> "22178529", "22178530", "22178531", "22178532", "22…
## $ started_at <dttm> 2019-04-01 00:02:22, 2019-04-01 00:03:02, 2019-04-…
## $ ended_at <dttm> 2019-04-01 00:09:48, 2019-04-01 00:20:30, 2019-04-…
## $ rideable_type <chr> "6251", "6226", "5649", "4151", "3270", "3123", "64…
## $ start_station_id <dbl> 81, 317, 283, 26, 202, 420, 503, 260, 211, 211, 304…
## $ start_station_name <chr> "Daley Center Plaza", "Wood St & Taylor St", "LaSal…
## $ end_station_id <dbl> 56, 59, 174, 133, 129, 426, 500, 499, 211, 211, 232…
## $ end_station_name <chr> "Desplaines St & Kinzie St", "Wabash Ave & Roosevel…
## $ member_casual <chr> "Subscriber", "Subscriber", "Subscriber", "Subscrib…
all_trips$date <- as.Date(all_trips$started_at)
all_trips$month <- format(as.Date(all_trips$date), "%m")
all_trips$day <- format(as.Date(all_trips$date), "%d")
all_trips$year <- format(as.Date(all_trips$date), "%Y")
all_trips$day_of_week <- format(as.Date(all_trips$date), "%A") all_trips$ride_length <- difftime(all_trips$ended_at,all_trips$started_at)str(all_trips)## tibble [3,879,822 × 15] (S3: tbl_df/tbl/data.frame)
## $ ride_id : chr [1:3879822] "22178529" "22178530" "22178531" "22178532" ...
## $ started_at : POSIXct[1:3879822], format: "2019-04-01 00:02:22" "2019-04-01 00:03:02" ...
## $ ended_at : POSIXct[1:3879822], format: "2019-04-01 00:09:48" "2019-04-01 00:20:30" ...
## $ rideable_type : chr [1:3879822] "6251" "6226" "5649" "4151" ...
## $ start_station_id : num [1:3879822] 81 317 283 26 202 420 503 260 211 211 ...
## $ start_station_name: chr [1:3879822] "Daley Center Plaza" "Wood St & Taylor St" "LaSalle St & Jackson Blvd" "McClurg Ct & Illinois St" ...
## $ end_station_id : num [1:3879822] 56 59 174 133 129 426 500 499 211 211 ...
## $ end_station_name : chr [1:3879822] "Desplaines St & Kinzie St" "Wabash Ave & Roosevelt Rd" "Canal St & Madison St" "Kingsbury St & Kinzie St" ...
## $ member_casual : chr [1:3879822] "Subscriber" "Subscriber" "Subscriber" "Subscriber" ...
## $ date : Date[1:3879822], format: "2019-04-01" "2019-04-01" ...
## $ month : chr [1:3879822] "04" "04" "04" "04" ...
## $ day : chr [1:3879822] "01" "01" "01" "01" ...
## $ year : chr [1:3879822] "2019" "2019" "2019" "2019" ...
## $ day_of_week : chr [1:3879822] "Monday" "Monday" "Monday" "Monday" ...
## $ ride_length : 'difftime' num [1:3879822] 446 1048 252 357 ...
## ..- attr(*, "units")= chr "secs"
table(all_trips$member_casual)##
## casual Customer member Subscriber
## 48480 857474 378407 2595461
all_trips <- all_trips %>%
mutate(member_casual = recode(member_casual,"Subscriber" = "member","Customer" = "casual")) table(all_trips$member_casual)##
## casual member
## 905954 2973868
is.factor(all_trips$ride_length)## [1] FALSE
all_trips$ride_length <- as.numeric(as.character(all_trips$ride_length))is.numeric(all_trips$ride_length)## [1] TRUE
all_trips_v2 <- all_trips[!(all_trips$start_station_name == "HQ QR" | all_trips$ride_length<0),] mean(all_trips_v2$ride_length)## [1] 1479.139
median(all_trips_v2$ride_length)## [1] 712
max(all_trips_v2$ride_length)## [1] 9387024
min(all_trips_v2$ride_length)## [1] 1
summary(all_trips_v2$ride_length)## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 1 412 712 1479 1289 9387024
aggregate(all_trips_v2$ride_length ~ all_trips_v2$member_casual, FUN = mean)## all_trips_v2$member_casual all_trips_v2$ride_length
## 1 casual 3552.7502
## 2 member 850.0662
aggregate(all_trips_v2$ride_length ~ all_trips_v2$member_casual, FUN = median)## all_trips_v2$member_casual all_trips_v2$ride_length
## 1 casual 1546
## 2 member 589
aggregate(all_trips_v2$ride_length ~ all_trips_v2$member_casual, FUN = max)## all_trips_v2$member_casual all_trips_v2$ride_length
## 1 casual 9387024
## 2 member 9056634
aggregate(all_trips_v2$ride_length ~ all_trips_v2$member_casual, FUN = min)## all_trips_v2$member_casual all_trips_v2$ride_length
## 1 casual 2
## 2 member 1
aggregate(all_trips_v2$ride_length ~ all_trips_v2$member_casual + all_trips_v2$day_of_week, FUN = mean)## all_trips_v2$member_casual all_trips_v2$day_of_week all_trips_v2$ride_length
## 1 casual Friday 3773.8351
## 2 member Friday 824.5305
## 3 casual Monday 3372.2869
## 4 member Monday 842.5726
## 5 casual Saturday 3331.9138
## 6 member Saturday 968.9337
## 7 casual Sunday 3581.4054
## 8 member Sunday 919.9746
## 9 casual Thursday 3682.9847
## 10 member Thursday 823.9278
## 11 casual Tuesday 3596.3599
## 12 member Tuesday 826.1427
## 13 casual Wednesday 3718.6619
## 14 member Wednesday 823.9996
all_trips_v2$day_of_week <- ordered(all_trips_v2$day_of_week, levels=c("Sunday", "Monday", "Tuesday", "Wednesday", "Thursday", "Friday", "Saturday"))aggregate(all_trips_v2$ride_length ~ all_trips_v2$member_casual + all_trips_v2$day_of_week, FUN = mean)## all_trips_v2$member_casual all_trips_v2$day_of_week all_trips_v2$ride_length
## 1 casual Sunday 3581.4054
## 2 member Sunday 919.9746
## 3 casual Monday 3372.2869
## 4 member Monday 842.5726
## 5 casual Tuesday 3596.3599
## 6 member Tuesday 826.1427
## 7 casual Wednesday 3718.6619
## 8 member Wednesday 823.9996
## 9 casual Thursday 3682.9847
## 10 member Thursday 823.9278
## 11 casual Friday 3773.8351
## 12 member Friday 824.5305
## 13 casual Saturday 3331.9138
## 14 member Saturday 968.9337
all_trips_v2 %>%
mutate(weekday = wday(started_at, label = TRUE)) %>%
group_by(member_casual, weekday) %>%
summarise(number_of_rides = n() ,average_duration = mean(ride_length)) %>%
arrange(member_casual, weekday)## `summarise()` has grouped output by 'member_casual'. You can override using the
## `.groups` argument.
## # A tibble: 14 × 4
## # Groups: member_casual [2]
## member_casual weekday number_of_rides average_duration
## <chr> <ord> <int> <dbl>
## 1 casual Sun 181293 3581.
## 2 casual Mon 103296 3372.
## 3 casual Tue 90510 3596.
## 4 casual Wed 92457 3719.
## 5 casual Thu 102679 3683.
## 6 casual Fri 122404 3774.
## 7 casual Sat 209543 3332.
## 8 member Sun 267965 920.
## 9 member Mon 472196 843.
## 10 member Tue 508445 826.
## 11 member Wed 500329 824.
## 12 member Thu 484177 824.
## 13 member Fri 452790 825.
## 14 member Sat 287958 969.
all_trips_v2 %>%
mutate(weekday = wday(started_at, label = TRUE)) %>%
group_by(member_casual, weekday) %>%
summarise(number_of_rides = n(),average_duration = mean(ride_length)) %>%
arrange(member_casual, weekday) %>%
ggplot(aes(x = weekday, y = number_of_rides, fill = member_casual)) +
geom_col(position = "dodge")## `summarise()` has grouped output by 'member_casual'. You can override using the
## `.groups` argument.
## Visualization for average duration.
all_trips_v2 %>%
mutate(weekday = wday(started_at, label = TRUE)) %>%
group_by(member_casual, weekday) %>%
summarise(number_of_rides = n() ,average_duration = mean(ride_length)) %>%
arrange(member_casual, weekday) %>%
ggplot(aes(x = weekday, y = average_duration, fill = member_casual)) +
geom_col(position = "dodge")## `summarise()` has grouped output by 'member_casual'. You can override using the
## `.groups` argument.
## EXPORT SUMMARY FILE (counts)
counts <- aggregate(all_trips_v2$ride_length ~ all_trips_v2$member_casual + all_trips_v2$day_of_week, FUN = mean)write.csv(counts, "C:\\Users\\SWill\\Documents\\R Studio CITY BIKES\\avg_ride_length.csv", row.names=FALSE)sessionInfo()## R version 4.2.3 (2023-03-15 ucrt)
## Platform: x86_64-w64-mingw32/x64 (64-bit)
## Running under: Windows 10 x64 (build 22621)
##
## Matrix products: default
##
## locale:
## [1] LC_COLLATE=English_United Kingdom.utf8
## [2] LC_CTYPE=English_United Kingdom.utf8
## [3] LC_MONETARY=English_United Kingdom.utf8
## [4] LC_NUMERIC=C
## [5] LC_TIME=English_United Kingdom.utf8
##
## attached base packages:
## [1] stats graphics grDevices utils datasets methods base
##
## other attached packages:
## [1] conflicted_1.2.0 magrittr_2.0.3 janitor_2.2.0 skimr_2.1.5
## [5] here_1.0.1 lubridate_1.9.2 forcats_1.0.0 stringr_1.5.0
## [9] dplyr_1.1.2 purrr_1.0.1 readr_2.1.4 tidyr_1.3.0
## [13] tibble_3.2.1 ggplot2_3.4.2 tidyverse_2.0.0
##
## loaded via a namespace (and not attached):
## [1] tidyselect_1.2.0 xfun_0.38 bslib_0.4.0 repr_1.1.6
## [5] snakecase_0.11.0 colorspace_2.0-3 vctrs_0.6.1 generics_0.1.3
## [9] htmltools_0.5.3 yaml_2.3.5 base64enc_0.1-3 utf8_1.2.2
## [13] rlang_1.1.0 jquerylib_0.1.4 pillar_1.9.0 glue_1.6.2
## [17] withr_2.5.0 bit64_4.0.5 lifecycle_1.0.3 munsell_0.5.0
## [21] gtable_0.3.0 memoise_2.0.1 evaluate_0.16 labeling_0.4.2
## [25] knitr_1.39 tzdb_0.3.0 fastmap_1.1.0 parallel_4.2.3
## [29] fansi_1.0.3 highr_0.9 scales_1.2.1 cachem_1.0.6
## [33] vroom_1.6.1 jsonlite_1.8.4 farver_2.1.1 bit_4.0.4
## [37] hms_1.1.2 digest_0.6.29 stringi_1.7.8 grid_4.2.3
## [41] rprojroot_2.0.3 cli_3.6.1 tools_4.2.3 sass_0.4.2
## [45] crayon_1.5.1 pkgconfig_2.0.3 ellipsis_0.3.2 timechange_0.1.1
## [49] rmarkdown_2.15 rstudioapi_0.14 R6_2.5.1 compiler_4.2.3