# Load necessary libraries
library(ggplot2)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(GGally) # For pair plot
## Warning: package 'GGally' was built under R version 4.4.2
## Registered S3 method overwritten by 'GGally':
## method from
## +.gg ggplot2
# Load the dataset
dataset <- read.csv("ECommerce_Customer_Reviews.csv")
head(dataset) # View the first few rows
## Customer_ID Age Gender Order_Amount Order_Rating Delivery_Speed Review_Length
## 1 CUST0001 56 Female 280 1 Average 105
## 2 CUST0002 46 Female 239 3 Average 48
## 3 CUST0003 32 Male 274 4 Slow 88
## 4 CUST0004 60 Male 434 1 Slow 66
## 5 CUST0005 25 Male 426 1 Slow 113
## 6 CUST0006 38 Male 332 2 Fast 163
## Is_First_Purchase Category City
## 1 No Fashion Austin
## 2 No Books New York
## 3 Yes Fashion San Francisco
## 4 Yes Fashion San Francisco
## 5 Yes Books Austin
## 6 No Books Seattle
#Step 2: Perform Data Visualization
#a. Single Variable Visualizations
#bar plot
ggplot(dataset, aes(x = Category)) +
geom_bar(fill = "skyblue") +
ggtitle("Bar Plot of Categories")

#pie chart
first_purchase_count <- dataset %>%
group_by(Is_First_Purchase) %>%
summarise(count = n())
ggplot(first_purchase_count, aes(x = "", y = count, fill = Is_First_Purchase)) +
geom_bar(stat = "identity", width = 1) +
coord_polar("y", start = 0) +
ggtitle("Pie Chart of First Purchase Status")

#Box plot
ggplot(dataset, aes(y = Order_Amount)) +
geom_boxplot(fill = "orange") +
ggtitle("Box Plot of Order Amount")

#Density Plot
ggplot(dataset, aes(x = Age)) +
geom_density(fill = "purple", alpha = 0.5) +
ggtitle("Density Plot of Age")

#Histogram
ggplot(dataset, aes(x = Review_Length)) +
geom_histogram(binwidth = 10, fill = "green", color = "black") +
ggtitle("Histogram of Review Length")

#b. Two-Variable Visualizations
#Bar Plot: Count of Category by Is_First_Purchase.
ggplot(dataset, aes(x = Category, fill = Is_First_Purchase)) +
geom_bar(position = "dodge") +
ggtitle("Bar Plot of Categories by First Purchase Status")

#Scatter Plot: Relationship between Order_Amount and Review_Length.
ggplot(dataset, aes(x = Order_Amount, y = Review_Length, color = Gender)) +
geom_point() +
ggtitle("Scatter Plot of Order Amount vs Review Length")

#Violin Plot: Order_Amount by Delivery_Speed.
ggplot(dataset, aes(x = Delivery_Speed, y = Order_Amount, fill = Delivery_Speed)) +
geom_violin() +
ggtitle("Violin Plot of Order Amount by Delivery Speed")

#Box Plot: Order_Rating by Delivery_Speed.
ggplot(dataset, aes(x = Delivery_Speed, y = Order_Rating, fill = Delivery_Speed)) +
geom_boxplot() +
ggtitle("Box Plot of Ratings by Delivery Speed")

#c. Multivariable Visualizations
#Multivariable Bar Plot: Average Order_Amount by Category and Gender.
avg_order <- dataset %>%
group_by(Category, Gender) %>%
summarise(avg_order_amount = mean(Order_Amount))
## `summarise()` has grouped output by 'Category'. You can override using the
## `.groups` argument.
ggplot(avg_order, aes(x = Category, y = avg_order_amount, fill = Gender)) +
geom_bar(stat = "identity", position = "dodge") +
ggtitle("Bar Plot of Average Order Amount by Category and Gender")

#Pair Plot: Relationship among Age, Order_Amount, and Review_Length.
ggpairs(dataset[, c("Age", "Order_Amount", "Review_Length")],
title = "Pair Plot of Numeric Variables")
