All packages required for this analysis are now being loaded
library(lubridate)
library(skimr)
library(readr)
library(ggplot2)
library(dplyr)
library(tidyverse)
library(tidyr)
library(janitor)
library(readxl)
The downloaded datasets from the cyclist archive, from December 2022 to November 2023, were collected and loaded as dataframes.
dec_2022<- read_excel("Cleaned\\202212-divvy-tripdata.xlsx")
jan_2023 <- read_excel("Cleaned\\202301-divvy-tripdata.xlsx")
feb_2023 <- read_excel("Cleaned\\202302-divvy-tripdata.xlsx")
mar_2023 <- read_excel("Cleaned\\202303-divvy-tripdata.xlsx")
april_2023 <- read_excel("Cleaned\\202304-divvy-tripdata.xlsx")
may_2023 <- read_excel("Cleaned\\202305-divvy-tripdata.xlsx")
june_2023 <- read_excel("Cleaned\\202306-divvy-tripdata.xlsx")
july_2023 <- read_excel("Cleaned\\202307-divvy-tripdata.xlsx")
aug_2023 <- read_excel("Cleaned\\202308-divvy-tripdata.xlsx")
sept_2023 <- read_excel("Cleaned\\202309-divvy-tripdata.xlsx")
oct_2023 <- read_excel("Cleaned\\202310-divvy-tripdata.xlsx")
nov_2023 <- read_excel("Cleaned\\202311-divvy-tripdata.xlsx")
The datasets had already been skimmed through in excel to remove duplicate values and ensure they had the same number and column names
compare_df_cols(dec_2022,jan_2023,feb_2023,mar_2023,april_2023,may_2023,june_2023,july_2023,aug_2023,sept_2023,nov_2023)
## column_name dec_2022 jan_2023 feb_2023
## 1 end_station_name character character character
## 2 ended_at POSIXct, POSIXt POSIXct, POSIXt POSIXct, POSIXt
## 3 member_casual character character character
## 4 ride_id character character character
## 5 rideable_type character character character
## 6 start_station_name character character character
## 7 started_at POSIXct, POSIXt POSIXct, POSIXt POSIXct, POSIXt
## mar_2023 april_2023 may_2023 june_2023
## 1 character character character character
## 2 POSIXct, POSIXt POSIXct, POSIXt POSIXct, POSIXt POSIXct, POSIXt
## 3 character character character character
## 4 character character character character
## 5 character character character character
## 6 character character character character
## 7 POSIXct, POSIXt POSIXct, POSIXt POSIXct, POSIXt POSIXct, POSIXt
## july_2023 aug_2023 sept_2023 nov_2023
## 1 character character character character
## 2 POSIXct, POSIXt POSIXct, POSIXt POSIXct, POSIXt POSIXct, POSIXt
## 3 character character character character
## 4 character character character character
## 5 character character character character
## 6 character character character character
## 7 POSIXct, POSIXt POSIXct, POSIXt POSIXct, POSIXt POSIXct, POSIXt
The function ‘rbind()’ was used to combine all the dataframes created from the collected datasets.
#Combining the datasets into one dataframe
all_trips <- rbind(dec_2022,jan_2023,feb_2023,mar_2023,april_2023,may_2023,june_2023,july_2023,aug_2023,sept_2023,nov_2023)
#Checking
skim_without_charts(all_trips)
| Name | all_trips |
| Number of rows | 5140497 |
| Number of columns | 7 |
| _______________________ | |
| Column type frequency: | |
| character | 5 |
| POSIXct | 2 |
| ________________________ | |
| Group variables | None |
Variable type: character
| skim_variable | n_missing | complete_rate | min | max | empty | n_unique | whitespace |
|---|---|---|---|---|---|---|---|
| ride_id | 0 | 1.00 | 12 | 23 | 0 | 5140497 | 0 |
| rideable_type | 0 | 1.00 | 11 | 13 | 0 | 3 | 0 |
| start_station_name | 784877 | 0.85 | 3 | 64 | 0 | 1586 | 0 |
| end_station_name | 833183 | 0.84 | 3 | 64 | 0 | 1594 | 0 |
| member_casual | 0 | 1.00 | 6 | 6 | 0 | 2 | 0 |
Variable type: POSIXct
| skim_variable | n_missing | complete_rate | min | max | median | n_unique |
|---|---|---|---|---|---|---|
| started_at | 0 | 1 | 2022-12-01 00:01:22 | 2023-11-30 23:59:14 | 2023-06-30 21:20:49 | 4330373 |
| ended_at | 0 | 1 | 2022-12-01 00:03:41 | 2023-12-01 20:42:31 | 2023-06-30 21:41:21 | 4340620 |
From the date column, the weekdays were extracted and put into a new column. The days, months and years were also extracted and converted to date format from the date column, each put into new columns like so:
#adding new columns to separate the date into day, month, year and weekday
all_trips$date <- as.Date(all_trips$started_at)
all_trips$day <- as.Date(all_trips$date, "%d")
all_trips$month <- as.Date(all_trips$date, "%m")
all_trips$year <- as.Date(all_trips$date, "%y")
all_trips$day_of_week <- weekdays(all_trips$date)
#checking
colnames(all_trips)
## [1] "ride_id" "rideable_type" "started_at"
## [4] "ended_at" "start_station_name" "end_station_name"
## [7] "member_casual" "date" "day"
## [10] "month" "year" "day_of_week"
str(all_trips)
## tibble [5,140,497 × 12] (S3: tbl_df/tbl/data.frame)
## $ ride_id : chr [1:5140497] "65DBD2F447EC51C2" "0C201AA7EA0EA1AD" "E0B148CCB358A49D" "54C5775D2B7C9188" ...
## $ rideable_type : chr [1:5140497] "electric_bike" "classic_bike" "electric_bike" "classic_bike" ...
## $ started_at : POSIXct[1:5140497], format: "2022-12-05 10:47:18" "2022-12-18 06:42:33" ...
## $ ended_at : POSIXct[1:5140497], format: "2022-12-05 10:56:34" "2022-12-18 07:08:44" ...
## $ start_station_name: chr [1:5140497] "Clifton Ave & Armitage Ave" "Broadway & Belmont Ave" "Sangamon St & Lake St" "Shields Ave & 31st St" ...
## $ end_station_name : chr [1:5140497] "Sedgwick St & Webster Ave" "Sedgwick St & Webster Ave" "St. Clair St & Erie St" "Damen Ave & Madison St" ...
## $ member_casual : chr [1:5140497] "member" "casual" "member" "member" ...
## $ date : Date[1:5140497], format: "2022-12-05" "2022-12-18" ...
## $ day : Date[1:5140497], format: "2022-12-05" "2022-12-18" ...
## $ month : Date[1:5140497], format: "2022-12-05" "2022-12-18" ...
## $ year : Date[1:5140497], format: "2022-12-05" "2022-12-18" ...
## $ day_of_week : chr [1:5140497] "Monday" "Sunday" "Tuesday" "Tuesday" ...
dim(all_trips)
## [1] 5140497 12
Another column, ‘ride_length’ was added to get the length of the rides, using the ‘difftime()’ function. The output was gotten by calculating the difference in time of the rides from the ‘started_at’ column to the ‘ended_at’ column, and was converted as minutes.
# Creating a column to calculate the length of each ride
# Calculating the time difference from started at to ended at in minutes
# First, creating a new column to get the ride length
all_trips$ride_length <- difftime(all_trips$ended_at, all_trips$started_at, units="min")
The ride_length gotten was converted to numeric and rounded off to 2 decimal places.
#converting ride length difftime format to numeric
all_trips$ride_length <- as.numeric (all_trips$ride_length)
#rounding off to 2 decimal places
all_trips$ride_length <- round(all_trips$ride_length, 2)
Note: The datasets had already been skimmed through in excel to remove duplicate values and ensure they had the same number and names of columns.
The data cleaning process began by removing all null values, and the cleaned data was assigned a new data frame, all_trips_clean.
#Cleaning the data, removing nulls
all_trips_clean <- all_trips %>%
#filtering out negative ride lengths or ride lengths less than zero
drop_na %>%
filter(ride_length > 0)
#Checking for number or rows and columns
dim(all_trips_clean)
## [1] 3896073 13
In order to get the count of Member and Casual riders, the dataframe was grouped by the member_casual column and visualized for better understanding like so:
#Analyzing the data
all_trips_clean %>%
group_by(member_casual) %>%
summarise(number_of_rides=n()) %>%
select(member_casual, number_of_rides)
## # A tibble: 2 × 2
## member_casual number_of_rides
## <chr> <int>
## 1 casual 1396265
## 2 member 2499808
The number of rides, average ride length, maximum and minimum ride lengths were summarised, and the percentage of rides for each member type was gotten like so:
#Summarizing the data
all_trips_clean %>%
group_by(member_casual) %>%
summarise(number_of_rides=n(), avg_ride_length=mean(ride_length),
max_ride_length=max(ride_length), min_ride_length = min(ride_length)) %>%
mutate(ride_length_percentage = number_of_rides * 100/sum(number_of_rides))
## # A tibble: 2 × 6
## member_casual number_of_rides avg_ride_length max_ride_length min_ride_length
## <chr> <int> <dbl> <dbl> <dbl>
## 1 casual 1396265 23.1 12136. 0.02
## 2 member 2499808 12.2 1498. 0.02
## # ℹ 1 more variable: ride_length_percentage <dbl>
all_trips_clean %>%
group_by(member_casual,rideable_type) %>%
summarise(number_of_rides=n())
## # A tibble: 5 × 3
## # Groups: member_casual [2]
## member_casual rideable_type number_of_rides
## <chr> <chr> <int>
## 1 casual classic_bike 782631
## 2 casual docked_bike 78112
## 3 casual electric_bike 535522
## 4 member classic_bike 1609387
## 5 member electric_bike 890421
#Checking the average of ride length using the rideable types
all_trips_clean %>%
group_by(member_casual, rideable_type) %>%
summarise(average_ride_length=mean(ride_length), number_of_rides=n())
## # A tibble: 5 × 4
## # Groups: member_casual [2]
## member_casual rideable_type average_ride_length number_of_rides
## <chr> <chr> <dbl> <int>
## 1 casual classic_bike 25.7 782631
## 2 casual docked_bike 53.6 78112
## 3 casual electric_bike 14.8 535522
## 4 member classic_bike 13.1 1609387
## 5 member electric_bike 10.6 890421
#statistical summary of ride length
summary(all_trips_clean$ride_length)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.02 5.65 9.88 16.08 17.65 12136.30
The month column was formatted from the date column as Character, rather than as numeric, and was ordered to see the number of rides of the Member and Causal Riders in each month. The weekdays column was ordered from Sunday to Saturday and arranged to see what days in the week had more rides between the Member and Casual riders.
# ordering rides by the month and day of the week to see which month and day has most rides
# first order the month accordingly, from December 2022 to November 2023
# first order the day of week accordingly, from Sunday to Saturday
all_trips_clean$month <- format(all_trips_clean$date, "%B")
all_trips_clean$month <- ordered(all_trips_clean$month,levels=c("December", "January", "February", "March", "April", "May", "June", "July", "August", "September", "October", "November"))
all_trips_clean$day_of_week <- ordered(all_trips_clean$day_of_week, levels=c("Sunday", "Monday", "Tuesday", "Wednesday", "Thursday", "Friday", "Saturday"))
#Check
str(all_trips_clean)
## tibble [3,896,073 × 13] (S3: tbl_df/tbl/data.frame)
## $ ride_id : chr [1:3896073] "65DBD2F447EC51C2" "0C201AA7EA0EA1AD" "E0B148CCB358A49D" "54C5775D2B7C9188" ...
## $ rideable_type : chr [1:3896073] "electric_bike" "classic_bike" "electric_bike" "classic_bike" ...
## $ started_at : POSIXct[1:3896073], format: "2022-12-05 10:47:18" "2022-12-18 06:42:33" ...
## $ ended_at : POSIXct[1:3896073], format: "2022-12-05 10:56:34" "2022-12-18 07:08:44" ...
## $ start_station_name: chr [1:3896073] "Clifton Ave & Armitage Ave" "Broadway & Belmont Ave" "Sangamon St & Lake St" "Shields Ave & 31st St" ...
## $ end_station_name : chr [1:3896073] "Sedgwick St & Webster Ave" "Sedgwick St & Webster Ave" "St. Clair St & Erie St" "Damen Ave & Madison St" ...
## $ member_casual : chr [1:3896073] "member" "casual" "member" "member" ...
## $ date : Date[1:3896073], format: "2022-12-05" "2022-12-18" ...
## $ day : Date[1:3896073], format: "2022-12-05" "2022-12-18" ...
## $ month : Ord.factor w/ 12 levels "December"<"January"<..: 1 1 1 1 1 1 1 1 1 1 ...
## $ year : Date[1:3896073], format: "2022-12-05" "2022-12-18" ...
## $ day_of_week : Ord.factor w/ 7 levels "Sunday"<"Monday"<..: 2 1 3 3 4 6 3 3 3 4 ...
## $ ride_length : num [1:3896073] 9.27 26.18 12.1 29.02 14.18 ...
#for month
all_trips_clean %>%
group_by(member_casual, month) %>%
summarise(number_of_rides=n(), average_ride_length=mean(ride_length)) %>%
arrange(month)
## # A tibble: 22 × 4
## # Groups: member_casual [2]
## member_casual month number_of_rides average_ride_length
## <chr> <ord> <int> <dbl>
## 1 casual December 31502 14.8
## 2 member December 103891 10.2
## 3 casual January 29618 14.9
## 4 member January 118662 10.0
## 5 casual February 32842 17.7
## 6 member February 116956 10.4
## 7 casual March 46786 16.7
## 8 member March 153647 10.2
## 9 casual April 110526 22.6
## 10 member April 213647 11.6
## # ℹ 12 more rows
#for day of week
all_trips_clean %>%
group_by(member_casual, day_of_week) %>%
summarise(number_of_rides=n(), average_ride_length=mean(ride_length)) %>%
arrange(day_of_week)
## # A tibble: 14 × 4
## # Groups: member_casual [2]
## member_casual day_of_week number_of_rides average_ride_length
## <chr> <ord> <int> <dbl>
## 1 casual Sunday 225343 26.5
## 2 member Sunday 268520 13.7
## 3 casual Monday 157303 22.7
## 4 member Monday 338094 11.6
## 5 casual Tuesday 162031 20.6
## 6 member Tuesday 395443 11.7
## 7 casual Wednesday 165323 19.6
## 8 member Wednesday 404132 11.6
## 9 casual Thursday 185238 20.2
## 10 member Thursday 413455 11.6
## 11 casual Friday 211194 22.6
## 12 member Friday 360950 12.1
## 13 casual Saturday 289833 26.1
## 14 member Saturday 319214 13.7
#Number of rides by day of the week, based on rideable/bike type
all_trips_clean %>%
group_by(member_casual, day_of_week, rideable_type) %>%
summarise(number_of_rides=n(), average_ride_length=mean(ride_length))
## # A tibble: 35 × 5
## # Groups: member_casual, day_of_week [14]
## member_casual day_of_week rideable_type number_of_rides average_ride_length
## <chr> <ord> <chr> <int> <dbl>
## 1 casual Sunday classic_bike 133412 28.7
## 2 casual Sunday docked_bike 14818 54.6
## 3 casual Sunday electric_bike 77113 17.3
## 4 casual Monday classic_bike 84977 25.6
## 5 casual Monday docked_bike 9070 53.9
## 6 casual Monday electric_bike 63256 14.4
## 7 casual Tuesday classic_bike 86655 23.1
## 8 casual Tuesday docked_bike 8633 52.9
## 9 casual Tuesday electric_bike 66743 13.1
## 10 casual Wednesday classic_bike 87070 22.3
## # ℹ 25 more rows
To get the most used Start Stations for casual riders, the data was filtered to only casual riders and summarised to show the number of rides, then this filtered data was assigned to a new data frame, to make visualizing easier.
# Analyzing to see the top start and end stations casual riders use
#Start stations
all_trips_clean %>%
group_by(member_casual, start_station_name) %>%
summarise(number_of_rides=n()) %>%
arrange(desc(number_of_rides)) %>%
filter(member_casual == "casual") %>%
select(start_station_name, number_of_rides)
## # A tibble: 1,459 × 3
## # Groups: member_casual [1]
## member_casual start_station_name number_of_rides
## <chr> <chr> <int>
## 1 casual Streeter Dr & Grand Ave 40348
## 2 casual DuSable Lake Shore Dr & Monroe St 26294
## 3 casual Michigan Ave & Oak St 19689
## 4 casual DuSable Lake Shore Dr & North Blvd 17745
## 5 casual Millennium Park 17405
## 6 casual Shedd Aquarium 15290
## 7 casual Theater on the Lake 14173
## 8 casual Dusable Harbor 13030
## 9 casual Montrose Harbor 10313
## 10 casual Adler Planetarium 10189
## # ℹ 1,449 more rows
#assigning the above to a data frame
#top_ten_start stations for casual riders
casual_start_stations <- all_trips_clean %>%
group_by(member_casual, start_station_name) %>%
summarise(number_of_rides=n()) %>%
arrange(desc(number_of_rides)) %>%
filter(member_casual == "casual") %>%
select(start_station_name, number_of_rides)
casual_top_start_stations <- head(casual_start_stations,10)
View(casual_top_start_stations)
#if need be
#tail(all_trips_clean_casual,10)
To get the top end stations for Casual riders, the process above was repeated, but was done with the ‘end_station_name’ column
casual_end_stations <- all_trips_clean %>%
group_by(member_casual, end_station_name) %>%
summarise(number_of_rides=n()) %>%
arrange(desc(number_of_rides)) %>%
filter(member_casual == "casual") %>%
select(end_station_name, number_of_rides)
casual_top_end_stations <- head(casual_end_stations,10)
View(casual_top_end_stations)
To get the most used Start Stations for Member riders, the data was filtered to only Member riders and summarised to show the number of rides. This filtered data was then assigned to a new data frame, to make visualizing easier.
all_trips_clean %>%
group_by(member_casual, start_station_name) %>%
summarise(number_of_rides=n()) %>%
arrange(desc(number_of_rides)) %>%
filter(member_casual == "member") %>%
select(start_station_name, number_of_rides)
## # A tibble: 1,374 × 3
## # Groups: member_casual [1]
## member_casual start_station_name number_of_rides
## <chr> <chr> <int>
## 1 member Kingsbury St & Kinzie St 21497
## 2 member Clinton St & Washington Blvd 21453
## 3 member Clark St & Elm St 20669
## 4 member Wells St & Concord Ln 17292
## 5 member Clinton St & Madison St 16584
## 6 member Wells St & Elm St 16524
## 7 member Loomis St & Lexington St 15598
## 8 member University Ave & 57th St 15203
## 9 member Streeter Dr & Grand Ave 15109
## 10 member Broadway & Barry Ave 15075
## # ℹ 1,364 more rows
#assigning the above to a data frame
#top_ten_start stations for casual riders
member_start_stations <- all_trips_clean %>%
group_by(member_casual, start_station_name) %>%
summarise(number_of_rides=n()) %>%
arrange(desc(number_of_rides)) %>%
filter(member_casual == "member") %>%
select(start_station_name, number_of_rides)
member_top_start_stations <- head(member_start_stations,10)
To get the top end stations for Member riders, the process above was repeated, but was done with the ‘end_station_name’ column
member_end_stations <- all_trips_clean %>%
group_by(member_casual, end_station_name) %>%
summarise(number_of_rides=n()) %>%
arrange(desc(number_of_rides)) %>%
filter(member_casual == "casual") %>%
select(end_station_name, number_of_rides)
#to get the top 10 end stations for member riders, you can run the 2 lines of code below
member_top_end_stations <- head(member_end_stations,10)
View(member_top_end_stations)
The above can be done in this way as well, to get the Start and End Stations used by Casual and Member Riders jointly.
# Analyzing to see the top start and end stations casual riders use
all_trips_clean %>%
group_by(member_casual, start_station_name, end_station_name) %>%
summarise(number_of_rides=n()) %>%
arrange(desc(number_of_rides)) %>%
filter(member_casual == "casual") %>%
select(start_station_name, end_station_name, number_of_rides)
## # A tibble: 118,373 × 4
## # Groups: member_casual, start_station_name [1,459]
## member_casual start_station_name end_station_name number_of_rides
## <chr> <chr> <chr> <int>
## 1 casual Streeter Dr & Grand Ave Streeter Dr & G… 8099
## 2 casual DuSable Lake Shore Dr & Monro… DuSable Lake Sh… 6224
## 3 casual DuSable Lake Shore Dr & Monro… Streeter Dr & G… 4275
## 4 casual Michigan Ave & Oak St Michigan Ave & … 3986
## 5 casual Millennium Park Millennium Park 3157
## 6 casual Dusable Harbor Dusable Harbor 2521
## 7 casual Montrose Harbor Montrose Harbor 2322
## 8 casual Streeter Dr & Grand Ave DuSable Lake Sh… 2181
## 9 casual DuSable Lake Shore Dr & North… DuSable Lake Sh… 1924
## 10 casual Shedd Aquarium Shedd Aquarium 1841
## # ℹ 118,363 more rows
# Analyzing to see the top start stations member riders use
all_trips_clean %>%
group_by(member_casual, start_station_name,end_station_name) %>%
summarise(number_of_rides=n()) %>%
arrange(desc(number_of_rides)) %>%
filter(member_casual == "member") %>%
select(start_station_name, end_station_name,number_of_rides)
## # A tibble: 139,535 × 4
## # Groups: member_casual, start_station_name [1,374]
## member_casual start_station_name end_station_name number_of_rides
## <chr> <chr> <chr> <int>
## 1 member Ellis Ave & 60th St University Ave & 57th… 4473
## 2 member Ellis Ave & 60th St Ellis Ave & 55th St 4345
## 3 member Calumet Ave & 33rd St State St & 33rd St 4266
## 4 member State St & 33rd St Calumet Ave & 33rd St 4231
## 5 member University Ave & 57th St Ellis Ave & 60th St 4178
## 6 member Ellis Ave & 55th St Ellis Ave & 60th St 4058
## 7 member Loomis St & Lexington St Morgan St & Polk St 3001
## 8 member Morgan St & Polk St Loomis St & Lexington… 2727
## 9 member MLK Jr Dr & 29th St State St & 33rd St 2166
## 10 member State St & 33rd St MLK Jr Dr & 29th St 2045
## # ℹ 139,525 more rows
The above analyses were visualized for clearer view and understanding.
#bar chart showing the count of members and casual riders
ggplot(data = all_trips_clean) +
geom_bar(mapping = aes(x=member_casual, fill=member_casual)) +
labs(title = "Count of Rides (Dec. 2022 - Nov. 2023)",subtitle = "Between Riders and Members", x="Rider type", y="Count of rides")+
theme(plot.title = element_text(size = 13))
all_trips_clean %>%
group_by(member_casual) %>%
summarise(number_of_rides=n()) %>%
select(member_casual,number_of_rides)
## # A tibble: 2 × 2
## member_casual number_of_rides
## <chr> <int>
## 1 casual 1396265
## 2 member 2499808
all_trips_clean %>%
group_by(member_casual) %>%
summarise(number_of_rides=n()) %>%
mutate(ride_length_percentage = number_of_rides * 100/sum(number_of_rides)) %>%
ggplot(mapping = aes(x="",y=ride_length_percentage, fill=member_casual))+
geom_bar(stat = "identity")+
labs(title= "Ride Percentage between Casual and Member riders", x="Rider type")+
coord_polar("y")+
theme_void()
all_trips_clean %>%
group_by(member_casual,rideable_type) %>%
summarise(number_of_rides=n()) %>%
ggplot(aes(x=member_casual, y=number_of_rides, fill=rideable_type)) +
geom_col()+
facet_grid(~rideable_type)+
labs(title= "Number of Rides based on bike type", subtitle="Between Casual and Member riders", x="Rider type")+
theme(axis.title.x = element_text(size = 9), axis.text.x = element_text(size = 8, angle=50, hjust = 1), axis.title.y = element_text(size = 9))
all_trips_clean %>%
group_by(member_casual, rideable_type) %>%
summarise(average_ride_length=mean(ride_length), number_of_rides=n()) %>%
ggplot(aes(x=member_casual, y=average_ride_length, fill=rideable_type))+
geom_col()+
facet_grid(~rideable_type)+
labs(title= "Average ride length between Casual and Member riders", x="Rider type")+
theme(axis.title.x = element_text(size = 9), axis.text.x = element_text(size = 8, angle=50, hjust = 1), axis.title.y = element_text(size = 9))
all_trips_clean %>%
group_by(member_casual, month) %>%
summarise(number_of_rides=n(), average_ride_length=mean(ride_length)) %>%
ggplot(aes(x=month, y=number_of_rides, fill=member_casual)) +
geom_col(position = "dodge2")+
labs(title = "Number of rides per month", x="Month")+
theme(axis.text.x = element_text(angle=50, hjust = 1))
all_trips_clean %>%
group_by(member_casual, day_of_week) %>%
summarise(number_of_rides=n(), average_ride_length=mean(ride_length)) %>%
ggplot(aes(x=day_of_week, y=number_of_rides, fill=member_casual)) +
geom_col(position = "dodge2")+
labs(title = "Number of Rides per day of the week", subtitle = "Based on rider type", x = "Day of the week", y = "Number of rides")+
theme(axis.text.x = element_text(angle = 50, hjust = 1))
all_trips_clean %>%
group_by(member_casual, day_of_week, rideable_type) %>%
summarise(number_of_rides=n(), average_ride_length=mean(ride_length)) %>%
ggplot(aes(x=day_of_week, y=number_of_rides, fill=rideable_type))+
geom_col(position = "dodge2")+
facet_grid(~member_casual)+
theme(axis.text.x = element_text(size = 7, angle = 50, hjust = 1))
casual_top_start_stations %>%
ggplot(aes(x=start_station_name, y=number_of_rides))+
geom_col(fill="darkgreen") +coord_flip()+
labs(title = "Top Start Stations for Casual Riders")
casual_top_end_stations %>%
ggplot(aes(x=end_station_name, y=number_of_rides))+
geom_col(fill="red") +coord_flip()+
labs(title = "Top End Stations for Casual Riders")
member_top_start_stations %>%
ggplot(aes(x=start_station_name, y=number_of_rides))+
geom_col(fill="darkgreen") +coord_flip()+
labs(title = "Top Start Stations for Member Riders")
member_top_end_stations %>%
ggplot(aes(x=end_station_name, y=number_of_rides))+
geom_col(fill="red") +coord_flip()+
labs(title = "Top End Stations for Member Riders")