This is an R Markdown document assignment 1 by Mehreen Ali Gillani. I have downloaded dataset from kaggle https://www.kaggle.com/datasets/haseebindata/restaurant-orders Link to the dataset is in my github repository https://github.com/mehreengillani/Assignment1.git It is a small dataset with 500 entries and 8 variables.
#In next few code blocks we will check our data types, unique values for categorical variables
In this code block I am importing tidyverse library, reading csv file and showing data head
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.5
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ ggplot2 3.5.2 ✔ tibble 3.3.0
## ✔ lubridate 1.9.4 ✔ tidyr 1.3.1
## ✔ purrr 1.1.0
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
data<-read.csv('restaurant_orders.csv')
head(data)
## Order.ID Customer.Name Food.Item Category Quantity Price Payment.Method
## 1 2268 Mary Vega DDS Pasta Main 5 16.52 Cash
## 2 3082 Brandon Myers Brownie Dessert 4 17.27 Debit Card
## 3 3160 Margaret Wells Pasta Main 1 3.37 Credit Card
## 4 1272 Michael Matthews Pasta Main 5 2.20 Online Payment
## 5 9447 Connor Williams Soup Starter 1 12.23 Cash
## 6 1587 Matthew Miles Brownie Dessert 5 7.39 Credit Card
## Order.Time
## 1 2025-02-02 14:28:41
## 2 2025-06-08 10:57:47
## 3 2025-03-04 07:41:41
## 4 2025-05-15 12:43:45
## 5 2025-03-15 14:25:56
## 6 2025-04-12 05:49:18
#lets check this data dimension
dim(data)
## [1] 500 8
#lets view this dataset
view(data)
#Lets display the summary of dataset
summary(data)
## Order.ID Customer.Name Food.Item Category
## Min. :1055 Length:500 Length:500 Length:500
## 1st Qu.:3342 Class :character Class :character Class :character
## Median :5762 Mode :character Mode :character Mode :character
## Mean :5683
## 3rd Qu.:7945
## Max. :9997
## Quantity Price Payment.Method Order.Time
## Min. :1.00 Min. : 2.06 Length:500 Length:500
## 1st Qu.:2.00 1st Qu.: 7.28 Class :character Class :character
## Median :3.00 Median :13.30 Mode :character Mode :character
## Mean :3.03 Mean :13.20
## 3rd Qu.:4.00 3rd Qu.:19.08
## Max. :5.00 Max. :24.99
#Lets display how many unique food items are in this dataset
unique(data$Food.Item)
## [1] "Pasta" "Brownie" "Soup" "Cake" "Burger" "Ice Cream"
## [7] "Fries" "Pizza" "Salad"
table(data$Food.Item)
##
## Brownie Burger Cake Fries Ice Cream Pasta Pizza Salad
## 63 51 53 60 52 48 68 55
## Soup
## 50
#lets disploay barchart for food item
barplot(sort(table(data$Food.Item)), las=2, main = "Food Items order frequency", ylab = "Frequency")
#Lets display how many unique categories are in this dataset
unique(data$Category)
## [1] "Main" "Dessert" "Starter"
table(data$Category)
##
## Dessert Main Starter
## 168 167 165
#lets disploay barchart for order category data
barplot(sort(table(data$Category)), main = "Category order frequency", ylab = "Frequency")
#Lets display how many unique Payment.Methods are in this dataset also how many values for each category
unique(data$Payment.Method)
## [1] "Cash" "Debit Card" "Credit Card" "Online Payment"
sort(table(data$Payment.Method))
##
## Debit Card Online Payment Credit Card Cash
## 119 121 128 132
#lets see the summary of price variable
summary(data$Price)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 2.06 7.28 13.30 13.20 19.08 24.99
#lets display the boxplot of price variable
boxplot(data$Price)
#drop N/A, lets create clean dataframe and drop N/A from data
library(tidyr)
#there is no null values in our dataset
clean <- drop_na(data)
#Lets see mean value for order quantity
mean(clean$Quantity)
## [1] 3.03
#lets create another column/variable to show whether an order is large or small depending upon the quantity of order. If it is above mean value its large otherwise its small
clean$order.size <- as.factor(ifelse(clean$Quantity>mean(clean$Quantity), 'large','small'))
#lets drop Customer.Name column from data
library(dplyr)
clean <- clean %>% select(-Customer.Name)
head(clean)
## Order.ID Food.Item Category Quantity Price Payment.Method Order.Time
## 1 2268 Pasta Main 5 16.52 Cash 2025-02-02 14:28:41
## 2 3082 Brownie Dessert 4 17.27 Debit Card 2025-06-08 10:57:47
## 3 3160 Pasta Main 1 3.37 Credit Card 2025-03-04 07:41:41
## 4 1272 Pasta Main 5 2.20 Online Payment 2025-05-15 12:43:45
## 5 9447 Soup Starter 1 12.23 Cash 2025-03-15 14:25:56
## 6 1587 Brownie Dessert 5 7.39 Credit Card 2025-04-12 05:49:18
## order.size
## 1 large
## 2 large
## 3 small
## 4 large
## 5 small
## 6 large
#conclusion This is a small dataset about restaurant orders. Food Item, order category, order Quantity and payment method are categorical features. In future, I will perform linear regression model to predict order price.