# Load necessary libraries
library(ggplot2)
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(GGally)  # For pair plot
## Warning: package 'GGally' was built under R version 4.4.2
## Registered S3 method overwritten by 'GGally':
##   method from   
##   +.gg   ggplot2
# Load the dataset
dataset <- read.csv("ECommerce_Customer_Reviews.csv")
head(dataset)  # View the first few rows
##   Customer_ID Age Gender Order_Amount Order_Rating Delivery_Speed Review_Length
## 1    CUST0001  56 Female          280            1        Average           105
## 2    CUST0002  46 Female          239            3        Average            48
## 3    CUST0003  32   Male          274            4           Slow            88
## 4    CUST0004  60   Male          434            1           Slow            66
## 5    CUST0005  25   Male          426            1           Slow           113
## 6    CUST0006  38   Male          332            2           Fast           163
##   Is_First_Purchase Category          City
## 1                No  Fashion        Austin
## 2                No    Books      New York
## 3               Yes  Fashion San Francisco
## 4               Yes  Fashion San Francisco
## 5               Yes    Books        Austin
## 6                No    Books       Seattle
#Step 2: Perform Data Visualization
#a. Single Variable Visualizations

#bar plot
ggplot(dataset, aes(x = Category)) +
  geom_bar(fill = "skyblue") +
  ggtitle("Bar Plot of Categories")

#pie chart
first_purchase_count <- dataset %>%
  group_by(Is_First_Purchase) %>%
  summarise(count = n())

ggplot(first_purchase_count, aes(x = "", y = count, fill = Is_First_Purchase)) +
  geom_bar(stat = "identity", width = 1) +
  coord_polar("y", start = 0) +
  ggtitle("Pie Chart of First Purchase Status")

#Box plot 
ggplot(dataset, aes(y = Order_Amount)) +
  geom_boxplot(fill = "orange") +
  ggtitle("Box Plot of Order Amount")

#Density Plot
ggplot(dataset, aes(x = Age)) +
  geom_density(fill = "purple", alpha = 0.5) +
  ggtitle("Density Plot of Age")

#Histogram
ggplot(dataset, aes(x = Review_Length)) +
  geom_histogram(binwidth = 10, fill = "green", color = "black") +
  ggtitle("Histogram of Review Length")

#b. Two-Variable Visualizations
#Bar Plot: Count of Category by Is_First_Purchase.
ggplot(dataset, aes(x = Category, fill = Is_First_Purchase)) +
  geom_bar(position = "dodge") +
  ggtitle("Bar Plot of Categories by First Purchase Status")

#Scatter Plot: Relationship between Order_Amount and Review_Length.
ggplot(dataset, aes(x = Order_Amount, y = Review_Length, color = Gender)) +
  geom_point() +
  ggtitle("Scatter Plot of Order Amount vs Review Length")

#Violin Plot: Order_Amount by Delivery_Speed.
ggplot(dataset, aes(x = Delivery_Speed, y = Order_Amount, fill = Delivery_Speed)) +
  geom_violin() +
  ggtitle("Violin Plot of Order Amount by Delivery Speed")

#Box Plot: Order_Rating by Delivery_Speed.
ggplot(dataset, aes(x = Delivery_Speed, y = Order_Rating, fill = Delivery_Speed)) +
  geom_boxplot() +
  ggtitle("Box Plot of Ratings by Delivery Speed")

#c. Multivariable Visualizations

#Multivariable Bar Plot: Average Order_Amount by Category and Gender.
avg_order <- dataset %>%
  group_by(Category, Gender) %>%
  summarise(avg_order_amount = mean(Order_Amount))
## `summarise()` has grouped output by 'Category'. You can override using the
## `.groups` argument.
ggplot(avg_order, aes(x = Category, y = avg_order_amount, fill = Gender)) +
  geom_bar(stat = "identity", position = "dodge") +
  ggtitle("Bar Plot of Average Order Amount by Category and Gender")

#Pair Plot: Relationship among Age, Order_Amount, and Review_Length.
ggpairs(dataset[, c("Age", "Order_Amount", "Review_Length")], 
        title = "Pair Plot of Numeric Variables")