Introduction

This is an R Markdown document assignment 1 by Mehreen Ali Gillani. I have downloaded dataset from kaggle https://www.kaggle.com/datasets/haseebindata/restaurant-orders Link to the dataset is in my github repository https://github.com/mehreengillani/Assignment1.git It is a small dataset with 500 entries and 8 variables.

#In next few code blocks we will check our data types, unique values for categorical variables

In this code block I am importing tidyverse library, reading csv file and showing data head

library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ ggplot2   3.5.2     ✔ tibble    3.3.0
## ✔ lubridate 1.9.4     ✔ tidyr     1.3.1
## ✔ purrr     1.1.0     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
data<-read.csv('restaurant_orders.csv')
head(data)
##   Order.ID    Customer.Name Food.Item Category Quantity Price Payment.Method
## 1     2268    Mary Vega DDS     Pasta     Main        5 16.52           Cash
## 2     3082    Brandon Myers   Brownie  Dessert        4 17.27     Debit Card
## 3     3160   Margaret Wells     Pasta     Main        1  3.37    Credit Card
## 4     1272 Michael Matthews     Pasta     Main        5  2.20 Online Payment
## 5     9447  Connor Williams      Soup  Starter        1 12.23           Cash
## 6     1587    Matthew Miles   Brownie  Dessert        5  7.39    Credit Card
##            Order.Time
## 1 2025-02-02 14:28:41
## 2 2025-06-08 10:57:47
## 3 2025-03-04 07:41:41
## 4 2025-05-15 12:43:45
## 5 2025-03-15 14:25:56
## 6 2025-04-12 05:49:18

#lets check this data dimension

dim(data)
## [1] 500   8

#lets view this dataset

view(data)

#Lets display the summary of dataset

summary(data)
##     Order.ID    Customer.Name       Food.Item           Category        
##  Min.   :1055   Length:500         Length:500         Length:500        
##  1st Qu.:3342   Class :character   Class :character   Class :character  
##  Median :5762   Mode  :character   Mode  :character   Mode  :character  
##  Mean   :5683                                                           
##  3rd Qu.:7945                                                           
##  Max.   :9997                                                           
##     Quantity        Price       Payment.Method      Order.Time       
##  Min.   :1.00   Min.   : 2.06   Length:500         Length:500        
##  1st Qu.:2.00   1st Qu.: 7.28   Class :character   Class :character  
##  Median :3.00   Median :13.30   Mode  :character   Mode  :character  
##  Mean   :3.03   Mean   :13.20                                        
##  3rd Qu.:4.00   3rd Qu.:19.08                                        
##  Max.   :5.00   Max.   :24.99

#Lets display how many unique food items are in this dataset

unique(data$Food.Item)
## [1] "Pasta"     "Brownie"   "Soup"      "Cake"      "Burger"    "Ice Cream"
## [7] "Fries"     "Pizza"     "Salad"
table(data$Food.Item)
## 
##   Brownie    Burger      Cake     Fries Ice Cream     Pasta     Pizza     Salad 
##        63        51        53        60        52        48        68        55 
##      Soup 
##        50

#lets disploay barchart for food item

barplot(sort(table(data$Food.Item)), las=2, main = "Food Items order frequency", ylab = "Frequency")

#Lets display how many unique categories are in this dataset

unique(data$Category)
## [1] "Main"    "Dessert" "Starter"
table(data$Category)
## 
## Dessert    Main Starter 
##     168     167     165

#lets disploay barchart for order category data

barplot(sort(table(data$Category)), main = "Category order frequency", ylab = "Frequency")

#Lets display how many unique Payment.Methods are in this dataset also how many values for each category

unique(data$Payment.Method)
## [1] "Cash"           "Debit Card"     "Credit Card"    "Online Payment"
sort(table(data$Payment.Method))
## 
##     Debit Card Online Payment    Credit Card           Cash 
##            119            121            128            132

#lets see the summary of price variable

summary(data$Price)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    2.06    7.28   13.30   13.20   19.08   24.99

#lets display the boxplot of price variable

boxplot(data$Price)

#drop N/A, lets create clean dataframe and drop N/A from data

library(tidyr)

#there is no null values in our dataset
clean <- drop_na(data)

#Lets see mean value for order quantity

mean(clean$Quantity)
## [1] 3.03

#lets create another column/variable to show whether an order is large or small depending upon the quantity of order. If it is above mean value its large otherwise its small

clean$order.size <- as.factor(ifelse(clean$Quantity>mean(clean$Quantity), 'large','small'))

#lets drop Customer.Name column from data

library(dplyr)
clean <- clean %>% select(-Customer.Name) 
head(clean)
##   Order.ID Food.Item Category Quantity Price Payment.Method          Order.Time
## 1     2268     Pasta     Main        5 16.52           Cash 2025-02-02 14:28:41
## 2     3082   Brownie  Dessert        4 17.27     Debit Card 2025-06-08 10:57:47
## 3     3160     Pasta     Main        1  3.37    Credit Card 2025-03-04 07:41:41
## 4     1272     Pasta     Main        5  2.20 Online Payment 2025-05-15 12:43:45
## 5     9447      Soup  Starter        1 12.23           Cash 2025-03-15 14:25:56
## 6     1587   Brownie  Dessert        5  7.39    Credit Card 2025-04-12 05:49:18
##   order.size
## 1      large
## 2      large
## 3      small
## 4      large
## 5      small
## 6      large

#conclusion This is a small dataset about restaurant orders. Food Item, order category, order Quantity and payment method are categorical features. In future, I will perform linear regression model to predict order price.