Data Preparation

library(tidyverse)
library(openintro)
library(flextable)
library(infer)
library(psych)
library(epiDisplay)
library(summarytools)

knitr::opts_chunk$set(tidy.opts=list(width.cutoff=50),tidy=TRUE,echo =TRUE)
set_flextable_defaults(fonts_ignore=TRUE)

projectid = "data-607-project"

# load Data
Citibike <- read.csv("202308-citibike-tripdata.csv")
Citibike %>%
  dfSummary()
## Data Frame Summary  
## Citibike  
## Dimensions: 4093169 x 13  
## Duplicates: 0  
## 
## -----------------------------------------------------------------------------------------------------------------------------
## No   Variable             Stats / Values                 Freqs (% of Valid)       Graph                  Valid      Missing  
## ---- -------------------- ------------------------------ ------------------------ ---------------------- ---------- ---------
## 1    ride_id              1. 0000021FE140CCC6                  1 (  0.0%)                                4093169    0        
##      [character]          2. 0000026F89F2441F                  1 (  0.0%)                                (100.0%)   (0.0%)   
##                           3. 000004121C91C853                  1 (  0.0%)                                                    
##                           4. 0000048DBE53542F                  1 (  0.0%)                                                    
##                           5. 00000647B60115E9                  1 (  0.0%)                                                    
##                           6. 00000861C5C15BD9                  1 (  0.0%)                                                    
##                           7. 00001955056D2FBC                  1 (  0.0%)                                                    
##                           8. 00001D54D3D085B0                  1 (  0.0%)                                                    
##                           9. 00001D73A3E06D23                  1 (  0.0%)                                                    
##                           10. 00001FA8D064DEA7                 1 (  0.0%)                                                    
##                           [ 4093159 others ]             4093159 (100.0%)         IIIIIIIIIIIIIIIIIII                        
## 
## 2    rideable_type        1. classic_bike                3784828 (92.5%)          IIIIIIIIIIIIIIIIII     4093169    0        
##      [character]          2. electric_bike                308341 ( 7.5%)          I                      (100.0%)   (0.0%)   
## 
## 3    started_at           1. 2023-08-23 18:19:01              17 (  0.0%)                                4093169    0        
##      [character]          2. 2023-08-23 17:19:50              16 (  0.0%)                                (100.0%)   (0.0%)   
##                           3. 2023-08-02 18:10:31              14 (  0.0%)                                                    
##                           4. 2023-08-15 17:23:28              14 (  0.0%)                                                    
##                           5. 2023-08-15 17:24:10              14 (  0.0%)                                                    
##                           6. 2023-08-15 18:05:49              14 (  0.0%)                                                    
##                           7. 2023-08-16 17:07:54              14 (  0.0%)                                                    
##                           8. 2023-08-19 12:16:07              14 (  0.0%)                                                    
##                           9. 2023-08-26 13:54:01              14 (  0.0%)                                                    
##                           10. 2023-08-30 17:14:00             14 (  0.0%)                                                    
##                           [ 1751370 others ]             4093024 (100.0%)         IIIIIIIIIIIIIIIIIII                        
## 
## 4    ended_at             1. 2023-08-24 14:11:35              69 (  0.0%)                                4093169    0        
##      [character]          2. 2023-08-09 15:21:33              62 (  0.0%)                                (100.0%)   (0.0%)   
##                           3. 2023-08-03 09:28:36              42 (  0.0%)                                                    
##                           4. 2023-08-19 18:03:44              42 (  0.0%)                                                    
##                           5. 2023-08-26 11:30:12              37 (  0.0%)                                                    
##                           6. 2023-08-28 12:46:51              35 (  0.0%)                                                    
##                           7. 2023-08-03 22:46:37              34 (  0.0%)                                                    
##                           8. 2023-08-05 16:39:11              32 (  0.0%)                                                    
##                           9. 2023-08-15 16:06:26              32 (  0.0%)                                                    
##                           10. 2023-08-18 18:02:23             29 (  0.0%)                                                    
##                           [ 1757210 others ]             4092755 (100.0%)         IIIIIIIIIIIIIIIIIII                        
## 
## 5    start_station_name   1. West St & Chambers St         14684 ( 0.4%)                                 4093169    0        
##      [character]          2. W 21 St & 6 Ave               14551 ( 0.4%)                                 (100.0%)   (0.0%)   
##                           3. Broadway & W 58 St            13745 ( 0.3%)                                                     
##                           4. 11 Ave & W 41 St              13714 ( 0.3%)                                                     
##                           5. E 17 St & Broadway            12197 ( 0.3%)                                                     
##                           6. 7 Ave & Central Park Sout     11927 ( 0.3%)                                                     
##                           7. 6 Ave & W 34 St               11824 ( 0.3%)                                                     
##                           8. W 30 St & 10 Ave              11753 ( 0.3%)                                                     
##                           9. W 31 St & 7 Ave               11474 ( 0.3%)                                                     
##                           10. W 34 St & Hudson Blvd E      11466 ( 0.3%)                                                     
##                           [ 1945 others ]                3965834 (96.9%)          IIIIIIIIIIIIIIIIIII                        
## 
## 6    start_station_id     1. 5329.03                       14684 ( 0.4%)                                 4093169    0        
##      [character]          2. 6140.05                       14551 ( 0.4%)                                 (100.0%)   (0.0%)   
##                           3. 6948.10                       13745 ( 0.3%)                                                     
##                           4. 6726.01                       13714 ( 0.3%)                                                     
##                           5. 5980.07                       12197 ( 0.3%)                                                     
##                           6. 6912.01                       11927 ( 0.3%)                                                     
##                           7. 6364.10                       11824 ( 0.3%)                                                     
##                           8. 6459.07                       11753 ( 0.3%)                                                     
##                           9. 6331.01                       11474 ( 0.3%)                                                     
##                           10. 6535.04                      11466 ( 0.3%)                                                     
##                           [ 1946 others ]                3965834 (96.9%)          IIIIIIIIIIIIIIIIIII                        
## 
## 7    end_station_name     1. West St & Chambers St         14714 ( 0.4%)                                 4093169    0        
##      [character]          2. W 21 St & 6 Ave               14576 ( 0.4%)                                 (100.0%)   (0.0%)   
##                           3. 11 Ave & W 41 St              13784 ( 0.3%)                                                     
##                           4. Broadway & W 58 St            12992 ( 0.3%)                                                     
##                           5. E 17 St & Broadway            12268 ( 0.3%)                                                     
##                           6. 7 Ave & Central Park Sout     11750 ( 0.3%)                                                     
##                           7. W 30 St & 10 Ave              11743 ( 0.3%)                                                     
##                           8. 6 Ave & W 34 St               11524 ( 0.3%)                                                     
##                           9. W 34 St & Hudson Blvd E       11468 ( 0.3%)                                                     
##                           10. W 31 St & 7 Ave              11454 ( 0.3%)                                                     
##                           [ 1986 others ]                3966896 (96.9%)          IIIIIIIIIIIIIIIIIII                        
## 
## 8    end_station_id       1. 5329.03                       14714 ( 0.4%)                                 4093169    0        
##      [character]          2. 6140.05                       14576 ( 0.4%)                                 (100.0%)   (0.0%)   
##                           3. 6726.01                       13784 ( 0.3%)                                                     
##                           4. 6948.10                       12992 ( 0.3%)                                                     
##                           5. 5980.07                       12268 ( 0.3%)                                                     
##                           6. 6912.01                       11750 ( 0.3%)                                                     
##                           7. 6459.07                       11743 ( 0.3%)                                                     
##                           8. 6364.10                       11524 ( 0.3%)                                                     
##                           9. 6535.04                       11468 ( 0.3%)                                                     
##                           10. 6331.01                      11454 ( 0.3%)                                                     
##                           [ 1987 others ]                3966896 (96.9%)          IIIIIIIIIIIIIIIIIII                        
## 
## 9    start_lat            Mean (sd) : 40.7 (0)           885055 distinct values           :              4093169    0        
##      [numeric]            min < med < max:                                              . : :            (100.0%)   (0.0%)   
##                           40.6 < 40.7 < 40.9                                            : : :                                
##                           IQR (CV) : 0 (0)                                            : : : : .                              
##                                                                                     . : : : : : : .                          
## 
## 10   start_lng            Mean (sd) : -74 (0)            724338 distinct values       :                  4093169    0        
##      [numeric]            min < med < max:                                          . :                  (100.0%)   (0.0%)   
##                           -74 < -74 < -73.8                                         : : .                                    
##                           IQR (CV) : 0 (0)                                          : : : :                                  
##                                                                                   . : : : : : : .                            
## 
## 11   end_lat              Mean (sd) : 40.7 (0.1)         2029 distinct values                       :    4090221    2948     
##      [numeric]            min < med < max:                                                          :    (99.9%)    (0.1%)   
##                           -37.3 < 40.7 < 41                                                         :                        
##                           IQR (CV) : 0 (0)                                                          :                        
##                                                                                                     :                        
## 
## 12   end_lng              Mean (sd) : -74 (0.1)          2025 distinct values               :            4090221    2948     
##      [numeric]            min < med < max:                                                  :            (99.9%)    (0.1%)   
##                           -173.4 < -74 < 0                                                  :                                
##                           IQR (CV) : 0 (0)                                                  :                                
##                                                                                             :                                
## 
## 13   member_casual        1. casual                       907197 (22.2%)          IIII                   4093169    0        
##      [character]          2. member                      3185972 (77.8%)          IIIIIIIIIIIIIII        (100.0%)   (0.0%)   
## -----------------------------------------------------------------------------------------------------------------------------

Research question

CitiBike have two different types of bikes (classic and electric) and memberships (members and casual). Compare and contrasting Casual bike riders and annual bike riders: Do they effect each other?

Cases

Citibike data set contains 4,093,169 rows, which consist of individual rides. For this experiment I will use approximately 25% of the sample consisting of 1,023,292 cases, and I will set the confident interval at 0.95(95%).

Data collection

Citibike provides downloadable csv files format of Citibike information in a publicly available database. The database itself was created on September, 2023 for the month of August, 2023 and was last updated on October 12, 2023. This citibike database contain more recent data until October 2023, and can be view or accessible at the [Citibike website] (https://citibikenyc.com/system-data).

Type of Study

This is an observational study, since I will be using data set from citibike database.

Data Source

For this assignment I will be using a random sample taken from https://s3.amazonaws.com/tripdata/index.html. This data come from the citi bike site https://citibikenyc.com/system-data

Dependent Variable

The dependent variable is the type of bike being rented on a day to day, this is categorical and qualitative.

Independent Variable

The independent variable is the type of members that are rented the bikes, this is qualitative.

Relevant summary statistics

ggplot(Citibike, aes(x = rideable_type)) + geom_bar()

tab1(Citibike$rideable_type,sort.group = "decreasing", cum.percent = F ) 

## Citibike$rideable_type : 
##               Frequency Percent
## classic_bike    3784828    92.5
## electric_bike    308341     7.5
##   Total         4093169   100.0
ggplot(Citibike, aes(x = member_casual)) + geom_bar()

tab1(Citibike$member_casual,sort.group = "decreasing", cum.percent = F ) 

## Citibike$member_casual : 
##         Frequency Percent
## member    3185972    77.8
## casual     907197    22.2
##   Total   4093169   100.0