customersatmall <- read.csv('C:/Users/fatim/OneDrive/Desktop/Mall_Customers.csv')
summary(customersatmall)
## CustomerID Genre Age Annual.Income..k..
## Min. : 1.00 Length:200 Min. :18.00 Min. : 15.00
## 1st Qu.: 50.75 Class :character 1st Qu.:28.75 1st Qu.: 41.50
## Median :100.50 Mode :character Median :36.00 Median : 61.50
## Mean :100.50 Mean :38.85 Mean : 60.56
## 3rd Qu.:150.25 3rd Qu.:49.00 3rd Qu.: 78.00
## Max. :200.00 Max. :70.00 Max. :137.00
## Spending.Score..1.100.
## Min. : 1.00
## 1st Qu.:34.75
## Median :50.00
## Mean :50.20
## 3rd Qu.:73.00
## Max. :99.00
colnames (customersatmall)[2] <- ('Gender')
colnames (customersatmall)[4] <- ('AnnualIncome')
colnames (customersatmall)[5] <- ('Spending Score')
summary(customersatmall)
## CustomerID Gender Age AnnualIncome
## Min. : 1.00 Length:200 Min. :18.00 Min. : 15.00
## 1st Qu.: 50.75 Class :character 1st Qu.:28.75 1st Qu.: 41.50
## Median :100.50 Mode :character Median :36.00 Median : 61.50
## Mean :100.50 Mean :38.85 Mean : 60.56
## 3rd Qu.:150.25 3rd Qu.:49.00 3rd Qu.: 78.00
## Max. :200.00 Max. :70.00 Max. :137.00
## Spending Score
## Min. : 1.00
## 1st Qu.:34.75
## Median :50.00
## Mean :50.20
## 3rd Qu.:73.00
## Max. :99.00
hist(customersatmall$Age,
col = 'red',
main = 'Age of Customers',
xlab = 'Age')
hist(customersatmall$AnnualIncome,
col = 'green',
main = 'Annual Income of Customers',
xlab = 'Annual Income')
hist(customersatmall$`Spending Score`,
col = 'blue',
main = 'Spending Score of Customers',
xlab = 'Spending Score')
library(ggplot2)
#Visualization using the ggplot - Demographic Segmentation (Gender) - Annual Income and Age
ggplot(customersatmall) +
geom_point(aes(x = Age, y = AnnualIncome, col = Gender))
#Visualization using the ggplot - Demographic Segmentation (Gender) - Spending Score and Age
ggplot(customersatmall) +
geom_point(aes(x = Age, y = `Spending Score` , col = Gender))
#Visualization using the ggplot - Demographic Segmentation (Gender) - Annual Income and Spending Score
ggplot(customersatmall) +
geom_point(aes(x = AnnualIncome, y = `Spending Score`, col = Gender))
K-means analysis
set.seed(123)
customersegments <- kmeans(customersatmall[,3:4], 3)
segmentation <- customersegments$cluster
#Visualization using the ggplot - Demographic & Behavioral Segmentation
ggplot(customersatmall[,3:5]) +
geom_point(aes(x = AnnualIncome, y = `Spending Score`, col = as.factor(segmentation))) +
scale_color_discrete(name="Customer Segments")