This dataset gives you an idea about the time taken for food to be delivered based upon the restaurant location, delivery location, type of food, the transport that is used to deliver the food and the delivery person ratings.
final <- read.csv("C:/Users/sudhe/Desktop/R language/food delivery.csv")
head(final)
## ID Delivery_person_ID Delivery_person_Age Delivery_person_Ratings
## 1 4607 INDORES13DEL02 37 4.9
## 2 B379 BANGRES18DEL02 34 4.5
## 3 5D6D BANGRES19DEL01 23 4.4
## 4 7A6A COIMBRES13DEL02 38 4.7
## 5 70A2 CHENRES12DEL01 32 4.6
## 6 9BB4 HYDRES09DEL03 22 4.8
## Restaurant_latitude Restaurant_longitude Delivery_location_latitude
## 1 22.74505 75.89247 22.76505
## 2 12.91304 77.68324 13.04304
## 3 12.91426 77.67840 12.92426
## 4 11.00367 76.97649 11.05367
## 5 12.97279 80.24998 13.01279
## 6 17.43167 78.40832 17.46167
## Delivery_location_longitude Type_of_order Type_of_vehicle Time_taken.min.
## 1 75.91247 Snack motorcycle 24
## 2 77.81324 Snack scooter 33
## 3 77.68840 Drinks motorcycle 26
## 4 77.02649 Buffet motorcycle 21
## 5 80.28998 Snack scooter 30
## 6 78.43832 Buffet motorcycle 26
summary(final)
## ID Delivery_person_ID Delivery_person_Age
## Length:45593 Length:45593 Min. :15.00
## Class :character Class :character 1st Qu.:25.00
## Mode :character Mode :character Median :29.00
## Mean :29.54
## 3rd Qu.:34.00
## Max. :50.00
## Delivery_person_Ratings Restaurant_latitude Restaurant_longitude
## Min. :1.000 Min. :-30.91 Min. :-88.37
## 1st Qu.:4.600 1st Qu.: 12.93 1st Qu.: 73.17
## Median :4.700 Median : 18.55 Median : 75.90
## Mean :4.632 Mean : 17.02 Mean : 70.23
## 3rd Qu.:4.800 3rd Qu.: 22.73 3rd Qu.: 78.04
## Max. :6.000 Max. : 30.91 Max. : 88.43
## Delivery_location_latitude Delivery_location_longitude Type_of_order
## Min. : 0.01 Min. : 0.01 Length:45593
## 1st Qu.:12.99 1st Qu.:73.28 Class :character
## Median :18.63 Median :76.00 Mode :character
## Mean :17.47 Mean :70.85
## 3rd Qu.:22.79 3rd Qu.:78.11
## Max. :31.05 Max. :88.56
## Type_of_vehicle Time_taken.min.
## Length:45593 Min. :10.00
## Class :character 1st Qu.:19.00
## Mode :character Median :26.00
## Mean :26.29
## 3rd Qu.:32.00
## Max. :54.00
# Explore the food delivery time data frame with str()
str(final)
## 'data.frame': 45593 obs. of 11 variables:
## $ ID : chr "4607" "B379" "5D6D" "7A6A" ...
## $ Delivery_person_ID : chr "INDORES13DEL02" "BANGRES18DEL02" "BANGRES19DEL01" "COIMBRES13DEL02" ...
## $ Delivery_person_Age : int 37 34 23 38 32 22 33 35 22 36 ...
## $ Delivery_person_Ratings : num 4.9 4.5 4.4 4.7 4.6 4.8 4.7 4.6 4.8 4.2 ...
## $ Restaurant_latitude : num 22.7 12.9 12.9 11 13 ...
## $ Restaurant_longitude : num 75.9 77.7 77.7 77 80.2 ...
## $ Delivery_location_latitude : num 22.8 13 12.9 11.1 13 ...
## $ Delivery_location_longitude: num 75.9 77.8 77.7 77 80.3 ...
## $ Type_of_order : chr "Snack " "Snack " "Drinks " "Buffet " ...
## $ Type_of_vehicle : chr "motorcycle " "scooter " "motorcycle " "motorcycle " ...
## $ Time_taken.min. : int 24 33 26 21 30 26 40 32 34 46 ...
Data Layer: In the data Layer, the source of the information is to be visualized i.e the food delivery time dataset in the ggplot2 package.
for the plots,we are using ggplot2 library.ggplot2 is a popular R data visualization package that provides an intuitive and flexible framework for creating a wide range of high-quality, customized graphs and plots for data analysis and presentation.
library(ggplot2)
#Data Layer
ggplot(data = final) + labs(title ="food delivery time")
dataset into certain aesthetics.
library(ggplot2)
p <- ggplot(final, aes(x =Restaurant_latitude , y =Delivery_location_latitude, color =Type_of_vehicle )) +
geom_point() +
labs(title = "Food Delivery Time Data Plot")
print(p)
#scatter plot
ggplot(final, aes(x = Delivery_person_Ratings, y = Time_taken.min.)) +
geom_point(color = "purple") +
labs(title = "Scatter Plot ", x = "Delivery_person_Ratings", y = "Time_taken.min.")
Type_of_vehicle <- as.data.frame(table(final$Type_of_vehicle))
ggplot(Type_of_vehicle, mapping = aes(x = reorder(Var1, Freq), Freq))+
geom_col(width = 0.5, fill = "purple")+
theme_minimal()+
labs(title = "The most used transport to deliver orders")+
xlab("Type_of_vehicle")+
ylab("Count")+
theme(axis.text.x = element_text(face = "bold"))
Motorcycles are the most used mode of transport for delivering food and bicyles are the least preferred to deliver food.
Type_of_vehicle <- aggregate(Delivery_person_Ratings~Type_of_vehicle ,final, sum)
ggplot(Type_of_vehicle [1:10,], mapping = aes(y = reorder(Type_of_vehicle ,Delivery_person_Ratings), x = Delivery_person_Ratings))+
geom_bar(stat = "identity",
width = 0.5,
fill = "lightblue")+
theme_minimal()+
theme(axis.text.y = element_text(face = "bold"))+
labs(title = "Popular vehicles used",
subtitle = "By Delivery_person_Ratings ",
y = "Type_of_vehicle ")
## Warning: Removed 6 rows containing missing values (`position_stack()`).
then plotting the Histogram plot # Adding size
ggplot(data = final, aes( x =Restaurant_latitude , y =Delivery_location_latitude, size =Type_of_vehicle )) +
geom_point() +
labs(title = "Restaurant_latitude vs Delivery_location_latitude", x = "restaurant_latitude ", y = "Delivery_location_latitude")
## Warning: Using size for a discrete variable is not advised.
# Adding shape and color
ggplot(data = final, aes(x =Delivery_person_Ratings , y = Time_taken.min., col = factor(Delivery_person_Ratings ), shape = factor(Delivery_person_Age ))) +geom_point() +
labs(title = "Delivery_person_Ratings vs Time_taken.min.", x = "Delivery_person_Ratings", y = "Time_taken.min.")
## Warning: The shape palette can deal with a maximum of 6 discrete values because
## more than 6 becomes difficult to discriminate; you have 22. Consider
## specifying shapes manually if you must have them.
## Warning: Removed 34773 rows containing missing values (`geom_point()`).
final$Time_taken.min.=as.integer(final$Time_taken.min.)
ggplot(data = final, aes(x =Time_taken.min. )) +
geom_histogram(binwidth = 5,color="black", fill="lightblue") +
labs(title = "Histogram of Time_taken.min.", x = "Time_taken.min.", y = "Count")
#Boxplot – geom_boxplot() Scatter plot – geom_point()
ggplot(data = final, aes(x=as.factor(Type_of_order ), fill=Type_of_order )) +
geom_bar(stat="count")
bx <- ggplot(data = final, aes(x = factor(Type_of_order ), y = Time_taken.min. )) +
geom_boxplot(fill = "blue") +
ggtitle("Distribution of food deliveries") +
ylab("Time_taken.min.") +
xlab("Type_of_order ")
bx
Type_of_order = table(final$Type_of_order )
final.labels = names(Type_of_order )
share = round(Type_of_order /sum(Type_of_order )*100)
final.labels = paste(final.labels, share)
final.labels = paste(final.labels,"%",sep="")
pie(Type_of_order ,labels = final.labels,clockwise=TRUE, col=heat.colors(length(final.labels)), main="Frequency of Type_of_order ")