Uber Data Analysis

Let’s load some data.

#Importing Library
library(ggplot2)
library(ggthemes)
library(lubridate)
## 
## Attaching package: 'lubridate'
## The following objects are masked from 'package:base':
## 
##     date, intersect, setdiff, union
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(tidyr)
library(DT)
library(scales)
#Create Vector of colors to be implemented in our Plots
colorsData <- c("#CC1011", "#665555", "#05a399", "#cfcaca", "#f5e840", "#0683c9", "#e075b0")

#Reading Data
setwd("/Volumes/gyan1 /uber data")
aprilData <- read.csv("uber-raw-data-apr14.csv")
mayData <- read.csv("uber-raw-data-may14.csv")
juneData <- read.csv("uber-raw-data-jun14.csv")
julyData <- read.csv("uber-raw-data-jul14.csv")
augustData <- read.csv("uber-raw-data-aug14.csv")
septemberData <- read.csv("uber-raw-data-sep14.csv")

#Combine All Files in one Dataset
uberData <- rbind(aprilData,mayData,juneData,julyData,augustData,septemberData)
head(uberData)
##          Date.Time     Lat      Lon   Base
## 1 4/1/2014 0:11:00 40.7690 -73.9549 B02512
## 2 4/1/2014 0:17:00 40.7267 -74.0345 B02512
## 3 4/1/2014 0:21:00 40.7316 -73.9873 B02512
## 4 4/1/2014 0:28:00 40.7588 -73.9776 B02512
## 5 4/1/2014 0:33:00 40.7594 -73.9722 B02512
## 6 4/1/2014 0:33:00 40.7383 -74.0403 B02512
dim(uberData)
## [1] 4534327       4
#Formatting Of Date and Time Column 
uberData$Date.Time <- as.POSIXct(uberData$Date.Time,format="%m/%d/%Y%H:%M:%S")
uberData$Time <- format(as.POSIXct(uberData$Date.Time,format="%m/%d/%Y %H:%M:%S"), format="%H:%M:%S")
uberData$Date.Time <- ymd_hms(uberData$Date.Time)

#Create Foctors
uberData$day <- factor(day(uberData$Date.Time))
uberData$month <- factor(month(uberData$Date.Time,label = TRUE))
uberData$year <- factor(year(uberData$Date.Time))
uberData$daysofweek <- factor(wday(uberData$Date.Time,label = TRUE))
uberData$hour <- factor(hour(hms(uberData$Time)))
uberData$minute <- factor(minute(hms(uberData$Time)))
uberData$second <- factor(second(hms(uberData$Time)))

#Plotting the trips by the hours in a day
dataUber <- uberData
hourData <- dataUber%>%group_by(hour)%>%
  dplyr::summarize(Total=n())
## `summarise()` ungrouping output (override with `.groups` argument)
head(hourData,10)
## # A tibble: 10 x 2
##    hour   Total
##    <fct>  <int>
##  1 0     103836
##  2 1      67227
##  3 2      45865
##  4 3      48287
##  5 4      55230
##  6 5      83939
##  7 6     143213
##  8 7     193094
##  9 8     190504
## 10 9     159967
#Trips Every Hour 
ggplot(hourData,aes(hour,Total))+
  geom_bar(stat = "identity",fill="red",color="yellow")+
  ggtitle("Trips Every Hour")+
  theme(legend.position = "none")+theme_light()+
  scale_y_continuous(labels = comma)+xlab("Hour")+ylab("Total Trips")

#Trips by Hour and month
monthHour <- dataUber%>%group_by(month,hour)%>%
  dplyr::summarize(Total=n())
## `summarise()` regrouping output by 'month' (override with `.groups` argument)
ggplot(monthHour,aes(hour,Total,fill=month))+
  geom_bar(stat = "identity")+ggtitle("Trips By Hour And Month")+
  theme_light()+scale_y_continuous(labels = comma)+xlab("Hour")

#Plotting Data By Trips During Every Day of the Month
dayGroup <- dataUber%>%group_by(day)%>%
  dplyr::summarize(Total=n())
## `summarise()` ungrouping output (override with `.groups` argument)
head(dayGroup,10)
## # A tibble: 10 x 2
##    day    Total
##    <fct>  <int>
##  1 1     127430
##  2 2     143201
##  3 3     142983
##  4 4     140923
##  5 5     147054
##  6 6     139886
##  7 7     143503
##  8 8     145984
##  9 9     155135
## 10 10    152500
ggplot(dayGroup,aes(day,Total))+
  geom_bar(stat = "identity",fill="purple",color="white")+
  ggtitle("Trips Every Day")+
  theme(legend.position = "none")+
  scale_y_continuous(labels = comma)+theme_light()+xlab("Day")

#Trips by Date and Month

daymg <- dataUber%>%
  group_by(month,day,daysofweek)%>%
  dplyr::summarize(Total=n())
## `summarise()` regrouping output by 'month', 'day' (override with `.groups` argument)
ggplot(daymg,aes(month,Total,fill=daysofweek))+
  geom_bar(stat = "identity",position = "dodge")+
  ggtitle("Trips By Day And Month")+
  scale_y_continuous(labels = comma)+
  scale_fill_manual(values = colorsData)+xlab("Month")+
  theme_light()

#Number of Trips Taking place During months in A Year 
monthGroup <- dataUber%>%
  group_by(month)%>%
  dplyr::summarize(Total=n())
## `summarise()` ungrouping output (override with `.groups` argument)
head(monthGroup)
## # A tibble: 6 x 2
##   month   Total
##   <ord>   <int>
## 1 Apr    564516
## 2 May    652435
## 3 Jun    663844
## 4 Jul    796121
## 5 Aug    829275
## 6 Sep   1028136
ggplot(monthGroup,aes(month,Total,fill=month))+
  geom_bar(stat = "identity")+
  ggtitle("Trips By Month")+theme(legend.position = "none")+
  scale_y_continuous(labels = comma)+theme_light()+xlab("Month")

monthWeekday <- dataUber%>%
  group_by(month,daysofweek)%>%
  summarize(Total=n())
## `summarise()` regrouping output by 'month' (override with `.groups` argument)
ggplot(monthWeekday,aes(month,Total,fill=daysofweek))+
  geom_bar(stat = "identity",position = "dodge")+
  ggtitle("Trips By Day And Month")+
  scale_y_continuous(labels = comma)+
  scale_fill_manual(values = colorsData)+xlab("Month")+
  theme_light()

#Number of trip by Bases
ggplot(dataUber,aes(Base))+
  geom_bar(fill="salmon")+
  scale_y_continuous(labels = comma)+
  ggtitle("Trips By Bases")+ylab("Count")+
  theme_light()

ggplot(dataUber,aes(Base,fill=month))+
  geom_bar(position = "dodge")+
  scale_y_continuous(labels = comma)+
  ggtitle("Trips By Bases And Month")+
  scale_fill_manual(values = colorsData)+ylab("Count")+
  theme_light()

ggplot(dataUber,aes(Base,fill=daysofweek))+
  geom_bar(position = "dodge")+
  scale_y_continuous(labels = comma)+
  ggtitle("Trips By Bases And Day Of Week")+
  scale_fill_manual(values = colorsData)+ylab("Count")+
  theme_light()

#Creating A Heatmap Visualisation of Day, hour and month
#.....Day and Hour...........
dayHour <- dataUber%>%
  group_by(day,hour)%>%
  summarize(Total=n())
## `summarise()` regrouping output by 'day' (override with `.groups` argument)
ggplot(dayHour,aes(day,hour,fill=Total))+
  geom_tile(color="White")+
  ggtitle("Heat Map By Hour And Day")

#.....Days and Month............
ggplot(daymg,aes(day,month,fill=Total))+
  geom_tile(color="white")+
  ggtitle("Heat Map By Month And Day")

# Month and Week Days
ggplot(monthWeekday,aes(daysofweek,month,fill=Total))+
  geom_tile(color="white")+
  ggtitle("Heat Map By Month And Day Of Week")

#Month and Bases
monthBase <- dataUber%>%
  group_by(Base,month)%>%
  summarize(Total=n())
## `summarise()` regrouping output by 'Base' (override with `.groups` argument)
ggplot(monthBase,aes(Base,month,fill=Total))+
  geom_tile(color="white")+
  ggtitle("Heat Map By Month And Bases")+ylab("Month")+
  theme_light()

#......Days Of Week and Bases...............
daysBases <- dataUber%>%
  group_by(Base,daysofweek)%>%
  summarize(Total=n())
## `summarise()` regrouping output by 'Base' (override with `.groups` argument)
ggplot(daysBases,aes(Base,daysofweek,fill=Total))+
  geom_tile(color="white")+
  ggtitle("Heat Map By Day Of Week And Bases")+
  ylab("Day Of Week")

#Creating A Map Visualization of Rides in NEW YORK
minLat <- 40.5774
maxLat <- 40.9176
minLong <- -74.15
maxLong <- -73.7004

uber <- na.omit(dataUber)
ggplot(uber,aes(Lon,Lat))+
  geom_point(size=1,color="blue")+
  scale_x_continuous(limits = c(minLong,maxLong))+
  scale_y_continuous(limits = c(minLat,maxLat))+
  theme_map()+
  ggtitle("New York Map Based On Uber Rides During 2014 From April To September")
## Warning: Removed 71701 rows containing missing values (geom_point).

ggplot(dataUber,aes(x=Lon,y=Lat,color=Base))+
  geom_point(size=1)+
  scale_x_continuous(limits = c(minLong,maxLong))+
  scale_y_continuous(limits = c(minLat,maxLat))+
  theme_map()+
  ggtitle("New York Map Based On Uber Rides During 2014 From April To September By Base")
## Warning: Removed 71701 rows containing missing values (geom_point).