This file is the analysis done on the bicycle data . The buisness task is the Growth of an Virtual bicycle company.

Analysis :

Installing packages

library(ggplot2)
library(dplyr)
library(tidyverse)
library(readr)
Divvy_Trips_2019_Q1_Sheet5_1_ <- read_csv("C:/project_01/Manipulating_data/findings data/Rides per Day/Divvy_Trips_2019_Q1 - Sheet5 (1).csv")
View(Divvy_Trips_2019_Q1_Sheet5_1_)


library(readr)
Divvy_Trips_2020_Q1_Sheet2_1_ <- read_csv("C:/project_01/Manipulating_data/findings data/Rides per Day/Divvy_Trips_2020_Q1 - Sheet2 (1).csv")
View(Divvy_Trips_2020_Q1_Sheet2_1_)

Glimpse of the Data

head(Divvy_Trips_2019_Q1_Sheet5_1_)
## # A tibble: 6 × 4
##   Weekdays  Rides Customer Subscriber
##   <chr>     <dbl>    <dbl>      <dbl>
## 1 Monday    50850     2556      34628
## 2 Tuesday   66778     3323      52050
## 3 Wednesday 74960     3790      57446
## 4 Thursday  69911     3348      53539
## 5 Friday    66140     3183      51751
## 6 Saturday  60663     2887      48355

Visualization

theme_set(theme_bw())
Divvy_Trips_2019_Q1_Sheet5_1_%>%
  ggplot(data = Divvy_Trips_2019_Q1_Sheet5_1_ , mapping = aes(x = Weekdays , y = Rides , fill = Weekdays))+
  geom_col( alpha = 0.5)+
  scale_x_discrete(breaks = c("Friday","Monday","Saturday","Sunday","Thursday","Tuesday","Wednesday"),
                   labels = c("Friday","Monday","Saturday","Sunday","Thursday","Tuesday","Wednesday"))+
  scale_y_continuous(breaks = c(20000,40000,60000,80000),
                   labels = c("20k","40k","60k","80k"))+
  labs(title = "No. of Rides / Day",
       x = "Weekdays ",
       y = " No. of Rides ",
       Color = "Week days")

key takeaway

Adding the user factor

ggplot(Divvy_Trips_2019_Q1_Sheet5_1_ , mapping = aes(x = Weekdays , y = Subscriber , fill = Weekdays))+
  geom_col(alpha = 0.5)+
  geom_col(aes (x = Weekdays , y = Customer ) )+
  scale_x_discrete(breaks = c("Friday","Monday","Saturday","Sunday","Thursday","Tuesday","Wednesday"),
                   labels = c("Friday","Monday","Saturday","Sunday","Thursday","Tuesday","Wednesday"))+
  scale_y_continuous(breaks = c(20000,40000,60000,80000),
                     labels = c("20k","40k","60k","80k"))+
  labs(title = "Each member Rides / Day",
       x = "Weekdays ",
       y = " Subscriber ",
       Color = "Week days")

key takeaway * There denser area shows the Daily customer rides. * The light area shows the daily Subscriber rides. * Subscribers uses more bikes often than customer.

Difference in bike ride duration

Divvy_Trips_2020_Q1_Sheet2_1_ %>%
  drop_na()%>%
  group_by(`Week days` , `Duration customer 2019` , `Duration subscriber 2019` , `Duration Customer 2020`,`Duration Subscriber 2020`)
## # A tibble: 7 × 8
## # Groups:   Week days, Duration customer 2019, Duration subscriber 2019,
## #   Duration Customer 2020, Duration Subscriber 2020 [7]
##   `Week days` Customer Subscriber Rides `Duration Customer 2020`
##   <chr>          <dbl>      <dbl> <dbl> <chr>                   
## 1 Monday          8866      41984 50850 13302:48:14             
## 2 Tuesday         6576      60202 66778 9495:07:28              
## 3 Wednesday       7796      67164 74960 12587:15:45             
## 4 Thursday        7763      62148 69911 7831:11:40              
## 5 Friday          6569      59571 66140 12714:20:40             
## 6 Saturday        6089      54574 60663 10292:14:22             
## 7 Sunday          4821      32764 37585 11169:24:26             
## # ℹ 3 more variables: `Duration Subscriber 2020` <chr>,
## #   `Duration customer 2019` <chr>, `Duration subscriber 2019` <chr>
week_days <- c("Monday", "Tuesday", "Wednesday", "Thursday", "Friday", "Saturday", "Sunday")
customer_duration <- c(798168.2, 569707.5, 755235.8, 469871.7, 762860.7, 617534.4, 670164.4)
subscriber_duration <- c(708645.9, 734922.7, 800916.3, 733588.5, 699680.4, 676067.1, 443287.3)

# Convert to a data frame
data <- data.frame(Weekday = factor(week_days, 
                                    levels = c("Monday", "Tuesday", "Wednesday", "Thursday", 
                                               "Friday", "Saturday", "Sunday")),
                   Customer = customer_duration, 
                   Subscriber = subscriber_duration)
ggplot(data, aes(x = Weekday, group = 1)) +  # Ensure group aesthetics are set
  geom_line(aes(y = Customer, color = "Customer"), size = 1) +
  geom_line(aes(y = Subscriber, color = "Subscriber"), size = 1) +
  scale_y_continuous(breaks = c(554.2793056,
                                395.6302083,
                                524.4693056,
                                326.2997917,
                                529.764375,
                                428.8433333,
                                465.3919444),
                     labels = c("554hr","395hr","524hr","326hr","529hr","428hr","465hr"))+
  labs(title = "Duration of Rides in 2020",
       x = "Week Days", 
       y = "Duration (Minutes)",
       color = "User type") +
  scale_color_manual(values = c("Customer" = "blue", "Subscriber" = "red")) +
  theme_minimal()

Conclusion:

  • There were more number of bike rides by subscribers.
  • There was a significant growth in subscriber in 2020.
  • Members use bike more often than Customers.