Loading Packages
install.packages("lubridate")
install.packages("janitor")
library("tidyverse")
library("lubridate")
library("janitor")
R Markdown
Importing and naming data into Dataframes
df1 <- read.csv("~/Desktop/CyclistTripData/202004-tripdata.csv")
df2 <- read.csv("~/Desktop/CyclistTripData/202005-tripdata.csv")
df3 <- read.csv("~/Desktop/CyclistTripData/202006-tripdata.csv")
df4 <- read.csv("~/Desktop/CyclistTripData/202007-tripdata.csv")
df5 <- read.csv("~/Desktop/CyclistTripData/202008-tripdata.csv")
df6 <- read.csv("~/Desktop/CyclistTripData/202009-tripdata.csv")
df7 <- read.csv("~/Desktop/CyclistTripData/202010-tripdata.csv")
df8 <- read.csv("~/Desktop/CyclistTripData/202011-tripdata.csv")
df9 <- read.csv("~/Desktop/CyclistTripData/202012-tripdata.csv")
df10 <- read.csv("~/Desktop/CyclistTripData/202101-tripdata.csv")
df11 <- read.csv("~/Desktop/CyclistTripData/202102-tripdata.csv")
df12 <- read.csv("~/Desktop/CyclistTripData/202103-tripdata.csv")
Combine twelve data frames into one
bike_rides <- rbind(df1,df2,df3,df4,df5,df6,df7,df8,df9,df10,df11,df12)
Removing any empty rows or columns
bike_rides <- janitor::remove_empty(bike_rides,which = c("cols"))
bike_rides <- janitor::remove_empty(bike_rides,which = c("rows"))
check and verify data types in data frame
str(bike_rides)
split dates and times into separate columns
bike_rides[c('Start_date', 'Start_Time')] <- str_split_fixed(bike_rides$started_at, ' ', 2)
bike_rides[c('End_date', 'End_Time')] <- str_split_fixed(bike_rides$ended_at, ' ', 2)
Using Lubridate Function to change date from a string to Date and
Date-time
bike_rides$Start_date <- as_date(bike_rides$Start_date)
bike_rides$End_date <- as_date(bike_rides$End_date)
bike_rides$Start_Time <- strptime(bike_rides$Start_Time, format = "%H:%M:%OS")
bike_rides$End_Time <- strptime(bike_rides$End_Time, format = "%H:%M:%OS" )
check and verify data types in data frame
str(bike_rides)
Dropping columns that are not needed
bike_rides <- subset (bike_rides, select = -c(started_at,ended_at))
Calculating trip duration
bike_rides[c('Trip_Duration')] <- difftime(bike_rides$End_Time, bike_rides$Start_Time,
units = c("mins"))
verify new Trip_Duration column
str(bike_rides)
Saving data frame to a csv file
write.csv(bike_rides, "/Users/jmac/Desktop/bike1.csv")