customersatmall <- read.csv('C:/Users/fatim/OneDrive/Desktop/Mall_Customers.csv')
summary(customersatmall)
##    CustomerID        Genre                Age        Annual.Income..k..
##  Min.   :  1.00   Length:200         Min.   :18.00   Min.   : 15.00    
##  1st Qu.: 50.75   Class :character   1st Qu.:28.75   1st Qu.: 41.50    
##  Median :100.50   Mode  :character   Median :36.00   Median : 61.50    
##  Mean   :100.50                      Mean   :38.85   Mean   : 60.56    
##  3rd Qu.:150.25                      3rd Qu.:49.00   3rd Qu.: 78.00    
##  Max.   :200.00                      Max.   :70.00   Max.   :137.00    
##  Spending.Score..1.100.
##  Min.   : 1.00         
##  1st Qu.:34.75         
##  Median :50.00         
##  Mean   :50.20         
##  3rd Qu.:73.00         
##  Max.   :99.00
colnames (customersatmall)[2] <- ('Gender')
colnames (customersatmall)[4] <- ('AnnualIncome')
colnames (customersatmall)[5] <- ('Spending Score')
summary(customersatmall)
##    CustomerID        Gender               Age         AnnualIncome   
##  Min.   :  1.00   Length:200         Min.   :18.00   Min.   : 15.00  
##  1st Qu.: 50.75   Class :character   1st Qu.:28.75   1st Qu.: 41.50  
##  Median :100.50   Mode  :character   Median :36.00   Median : 61.50  
##  Mean   :100.50                      Mean   :38.85   Mean   : 60.56  
##  3rd Qu.:150.25                      3rd Qu.:49.00   3rd Qu.: 78.00  
##  Max.   :200.00                      Max.   :70.00   Max.   :137.00  
##  Spending Score 
##  Min.   : 1.00  
##  1st Qu.:34.75  
##  Median :50.00  
##  Mean   :50.20  
##  3rd Qu.:73.00  
##  Max.   :99.00
hist(customersatmall$Age,
     col = 'red',
     main = 'Age of Customers',
     xlab = 'Age')

hist(customersatmall$AnnualIncome,
     col = 'green',
     main = 'Annual Income of Customers',
     xlab = 'Annual Income')

hist(customersatmall$`Spending Score`,
     col = 'blue',
     main = 'Spending Score of Customers',
     xlab = 'Spending Score')

library(ggplot2)

#Visualization using the ggplot - Demographic Segmentation (Gender) - Annual Income and Age

ggplot(customersatmall) +
  geom_point(aes(x = Age, y = AnnualIncome, col = Gender)) 

#Visualization using the ggplot - Demographic Segmentation (Gender) - Spending Score and Age

ggplot(customersatmall) +
  geom_point(aes(x = Age, y = `Spending Score` , col = Gender))

#Visualization using the ggplot - Demographic Segmentation (Gender) - Annual Income and Spending Score

ggplot(customersatmall) +
  geom_point(aes(x = AnnualIncome, y = `Spending Score`, col = Gender))

K-means analysis

set.seed(123)
customersegments <- kmeans(customersatmall[,3:4], 3)
segmentation <- customersegments$cluster

#Visualization using the ggplot - Demographic & Behavioral Segmentation

ggplot(customersatmall[,3:5])  +
  geom_point(aes(x = AnnualIncome, y = `Spending Score`, col = as.factor(segmentation))) +
  scale_color_discrete(name="Customer Segments")