Loading Packages

install.packages("lubridate")
install.packages("janitor")
library("tidyverse")
library("lubridate")
library("janitor")

R Markdown

Data Source: https://divvy-tripdata.s3.amazonaws.com/index.html

Importing and naming data into Dataframes

df1 <- read.csv("~/Desktop/CyclistTripData/202004-tripdata.csv")
df2 <- read.csv("~/Desktop/CyclistTripData/202005-tripdata.csv")
df3 <- read.csv("~/Desktop/CyclistTripData/202006-tripdata.csv")
df4 <- read.csv("~/Desktop/CyclistTripData/202007-tripdata.csv")
df5 <- read.csv("~/Desktop/CyclistTripData/202008-tripdata.csv")
df6 <- read.csv("~/Desktop/CyclistTripData/202009-tripdata.csv")
df7 <- read.csv("~/Desktop/CyclistTripData/202010-tripdata.csv")
df8 <- read.csv("~/Desktop/CyclistTripData/202011-tripdata.csv")
df9 <- read.csv("~/Desktop/CyclistTripData/202012-tripdata.csv")
df10 <- read.csv("~/Desktop/CyclistTripData/202101-tripdata.csv")
df11 <- read.csv("~/Desktop/CyclistTripData/202102-tripdata.csv")
df12 <- read.csv("~/Desktop/CyclistTripData/202103-tripdata.csv")

Combine twelve data frames into one

bike_rides <- rbind(df1,df2,df3,df4,df5,df6,df7,df8,df9,df10,df11,df12)

Verify

dim(bike_rides)

Removing any empty rows or columns

bike_rides <- janitor::remove_empty(bike_rides,which = c("cols"))
bike_rides <- janitor::remove_empty(bike_rides,which = c("rows"))

Verify

dim(bike_rides)

check and verify data types in data frame

str(bike_rides)

split dates and times into separate columns

bike_rides[c('Start_date', 'Start_Time')] <- str_split_fixed(bike_rides$started_at, ' ', 2)
bike_rides[c('End_date', 'End_Time')] <- str_split_fixed(bike_rides$ended_at, ' ', 2)

Using Lubridate Function to change date from a string to Date and Date-time

bike_rides$Start_date <- as_date(bike_rides$Start_date)
bike_rides$End_date <- as_date(bike_rides$End_date)

bike_rides$Start_Time <- strptime(bike_rides$Start_Time, format = "%H:%M:%OS")
bike_rides$End_Time <- strptime(bike_rides$End_Time, format = "%H:%M:%OS" )

check and verify data types in data frame

str(bike_rides)

Dropping columns that are not needed

bike_rides <- subset (bike_rides, select = -c(started_at,ended_at))

Calculating trip duration

bike_rides[c('Trip_Duration')] <- difftime(bike_rides$End_Time, bike_rides$Start_Time, 
                                      units = c("mins"))

verify new Trip_Duration column

str(bike_rides)

Saving data frame to a csv file

write.csv(bike_rides, "/Users/jmac/Desktop/bike1.csv")