Myra Hallman Mini Project 4

library(dplyr)
soap.df <- read.csv("BathSoap.csv")
  1. Pre-process the data to make it ready for analysis.
vars1.df <- select(soap.df, SEX, AGE, No.of.Brands, Avg.Price, Pur.Vol.No.Promo)
vars1.norm.df <- mutate_all(vars1.df, scale)
vars2.df <- select(soap.df, Pur.Vol.Price.Cat.1,Pur.Vol.Price.Cat.2, 
                   Pur.Vol.Price.Cat.3, Pur.Vol.Price.Cat.4 )
vars2.norm.df <- mutate_all(vars2.df, scale)
vars3.df <- select(soap.df, Brand.Runs, Avg.Price, Pur.Vol.No.Promo, 
                   Pur.Vol.Price.Cat.1, Pur.Vol.Price.Cat.2, 
                   Pur.Vol.Price.Cat.3, Pur.Vol.Price.Cat.4 )
vars3.norm.df <- mutate_all(vars3.df, scale)
  1. Use k-means clustering to identify clusters of households based on: • The variables that describe purchase behavior (including brand loyalty)
set.seed(2)
kmeans1.result <- kmeans(vars1.norm.df, centers = 4)
kmeans1.result$tot.withinss
[1] 1681.148
kmeans1.result$size
[1]  69 162 194 175
soap.df$segment <- kmeans1.result$cluster

• The variables that describe the basis for purchase

set.seed(2)
kmeans2.result <- kmeans(vars2.norm.df, centers = 4)
kmeans2.result$tot.withinss
[1] 517.6087
kmeans2.result$size
[1]  79 130  54 337
soap.df$segment <- kmeans2.result$cluster

• The variables that describe both purchase behavior and basis of purchase In each case, choose the number of segments (k). You may combine the existing variables to create alternative measures of loyalty and include them in the analysis.

Note 1: How should k be chosen? Think about how the clusters would be used. It is likely that the marketing efforts would support two to five different promotional approaches.

Note 2: How should the percentages of total purchases comprised by various brands be treated? Isn’t a customer who buys all brand A just as loyal as a customer who buys all brand B? What will be the effect on any distance measure of using the brand share variables as is? Consider using a single derived variable.

set.seed(2)
kmeans3.result <- kmeans(vars3.norm.df, centers = 4)
kmeans3.result$tot.withinss
[1] 1874.06
kmeans3.result$size
[1]  76 136  54 334
soap.df$segment <- kmeans3.result$cluster
  1. Select what you think is the best segmentation.

• The variables that describe the basis for purchase - Percent of volume purchased under the price category, Age, Sex, Household size.

  1. Create customer persona for each of the segments: Comment on the characteristics (demographic, brand loyalty, and basis for purchase) of these clusters. (These personas would be used to guide the development of advertising and promotional campaigns.)
segment.profiles <- summarize_at(
  group_by(soap.df, segment),
  vars(AGE, SEX, HS, Pur.Vol.Price.Cat.1,Pur.Vol.Price.Cat.2, 
       Pur.Vol.Price.Cat.3, Pur.Vol.Price.Cat.4), 
  mean
)
head(segment.profiles)
LS0tDQp0aXRsZTogIk1pbmkgUHJvamVjdCA0Ig0Kb3V0cHV0OiBodG1sX25vdGVib29rDQotLS0NCk15cmEgSGFsbG1hbg0KTWluaSBQcm9qZWN0IDQNCg0KYGBge3J9DQpsaWJyYXJ5KGRwbHlyKQ0KDQpzb2FwLmRmIDwtIHJlYWQuY3N2KCJCYXRoU29hcC5jc3YiKQ0KYGBgDQoNCg0KDQoxLiBQcmUtcHJvY2VzcyB0aGUgZGF0YSB0byBtYWtlIGl0IHJlYWR5IGZvciBhbmFseXNpcy4NCg0KYGBge3J9DQp2YXJzMS5kZiA8LSBzZWxlY3Qoc29hcC5kZiwgU0VYLCBBR0UsIE5vLm9mLkJyYW5kcywgQXZnLlByaWNlLCBQdXIuVm9sLk5vLlByb21vKQ0KdmFyczEubm9ybS5kZiA8LSBtdXRhdGVfYWxsKHZhcnMxLmRmLCBzY2FsZSkNCg0KdmFyczIuZGYgPC0gc2VsZWN0KHNvYXAuZGYsIFB1ci5Wb2wuUHJpY2UuQ2F0LjEsUHVyLlZvbC5QcmljZS5DYXQuMiwgDQogICAgICAgICAgICAgICAgICAgUHVyLlZvbC5QcmljZS5DYXQuMywgUHVyLlZvbC5QcmljZS5DYXQuNCApDQp2YXJzMi5ub3JtLmRmIDwtIG11dGF0ZV9hbGwodmFyczIuZGYsIHNjYWxlKQ0KDQp2YXJzMy5kZiA8LSBzZWxlY3Qoc29hcC5kZiwgQnJhbmQuUnVucywgQXZnLlByaWNlLCBQdXIuVm9sLk5vLlByb21vLCANCiAgICAgICAgICAgICAgICAgICBQdXIuVm9sLlByaWNlLkNhdC4xLCBQdXIuVm9sLlByaWNlLkNhdC4yLCANCiAgICAgICAgICAgICAgICAgICBQdXIuVm9sLlByaWNlLkNhdC4zLCBQdXIuVm9sLlByaWNlLkNhdC40ICkNCnZhcnMzLm5vcm0uZGYgPC0gbXV0YXRlX2FsbCh2YXJzMy5kZiwgc2NhbGUpDQpgYGANCg0KDQoyLiBVc2Ugay1tZWFucyBjbHVzdGVyaW5nIHRvIGlkZW50aWZ5IGNsdXN0ZXJzIG9mIGhvdXNlaG9sZHMgYmFzZWQgb246DQrigKIJVGhlIHZhcmlhYmxlcyB0aGF0IGRlc2NyaWJlIHB1cmNoYXNlIGJlaGF2aW9yIChpbmNsdWRpbmcgYnJhbmQgbG95YWx0eSkNCmBgYHtyfQ0Kc2V0LnNlZWQoMikNCmttZWFuczEucmVzdWx0IDwtIGttZWFucyh2YXJzMS5ub3JtLmRmLCBjZW50ZXJzID0gNCkNCmttZWFuczEucmVzdWx0JHRvdC53aXRoaW5zcw0Ka21lYW5zMS5yZXN1bHQkc2l6ZQ0Kc29hcC5kZiRzZWdtZW50IDwtIGttZWFuczEucmVzdWx0JGNsdXN0ZXINCg0KDQpgYGANCg0K4oCiCVRoZSB2YXJpYWJsZXMgdGhhdCBkZXNjcmliZSB0aGUgYmFzaXMgZm9yIHB1cmNoYXNlDQpgYGB7cn0NCnNldC5zZWVkKDIpDQprbWVhbnMyLnJlc3VsdCA8LSBrbWVhbnModmFyczIubm9ybS5kZiwgY2VudGVycyA9IDQpDQprbWVhbnMyLnJlc3VsdCR0b3Qud2l0aGluc3MNCmttZWFuczIucmVzdWx0JHNpemUNCnNvYXAuZGYkc2VnbWVudCA8LSBrbWVhbnMyLnJlc3VsdCRjbHVzdGVyDQpgYGANCg0K4oCiCVRoZSB2YXJpYWJsZXMgdGhhdCBkZXNjcmliZSBib3RoIHB1cmNoYXNlIGJlaGF2aW9yIGFuZCBiYXNpcyBvZiBwdXJjaGFzZQ0KSW4gZWFjaCBjYXNlLCBjaG9vc2UgdGhlIG51bWJlciBvZiBzZWdtZW50cyAoaykuIFlvdSBtYXkgY29tYmluZSB0aGUgZXhpc3RpbmcgdmFyaWFibGVzIHRvIGNyZWF0ZSBhbHRlcm5hdGl2ZSBtZWFzdXJlcyBvZiBsb3lhbHR5IGFuZCBpbmNsdWRlIHRoZW0gaW4gdGhlIGFuYWx5c2lzLg0KDQpOb3RlIDE6IEhvdyBzaG91bGQgayBiZSBjaG9zZW4/IFRoaW5rIGFib3V0IGhvdyB0aGUgY2x1c3RlcnMgd291bGQgYmUgdXNlZC4gSXQgaXMgbGlrZWx5IHRoYXQgdGhlIG1hcmtldGluZyBlZmZvcnRzIHdvdWxkIHN1cHBvcnQgdHdvIHRvIGZpdmUgZGlmZmVyZW50IHByb21vdGlvbmFsIGFwcHJvYWNoZXMuIA0KDQpOb3RlIDI6IEhvdyBzaG91bGQgdGhlIHBlcmNlbnRhZ2VzIG9mIHRvdGFsIHB1cmNoYXNlcyBjb21wcmlzZWQgYnkgdmFyaW91cyBicmFuZHMgYmUgdHJlYXRlZD8gSXNu4oCZdCBhIGN1c3RvbWVyIHdobyBidXlzIGFsbCBicmFuZCBBIGp1c3QgYXMgbG95YWwgYXMgYSBjdXN0b21lciB3aG8gYnV5cyBhbGwgYnJhbmQgQj8gV2hhdCB3aWxsIGJlIHRoZSBlZmZlY3Qgb24gYW55IGRpc3RhbmNlIG1lYXN1cmUgb2YgdXNpbmcgdGhlIGJyYW5kIHNoYXJlIHZhcmlhYmxlcyBhcyBpcz8gQ29uc2lkZXIgdXNpbmcgYSBzaW5nbGUgZGVyaXZlZCB2YXJpYWJsZS4NCg0KYGBge3J9DQpzZXQuc2VlZCgyKQ0Ka21lYW5zMy5yZXN1bHQgPC0ga21lYW5zKHZhcnMzLm5vcm0uZGYsIGNlbnRlcnMgPSA0KQ0Ka21lYW5zMy5yZXN1bHQkdG90LndpdGhpbnNzDQprbWVhbnMzLnJlc3VsdCRzaXplDQpzb2FwLmRmJHNlZ21lbnQgPC0ga21lYW5zMy5yZXN1bHQkY2x1c3Rlcg0KDQpgYGANCg0KDQozLiBTZWxlY3Qgd2hhdCB5b3UgdGhpbmsgaXMgdGhlIGJlc3Qgc2VnbWVudGF0aW9uLg0KDQrigKIJVGhlIHZhcmlhYmxlcyB0aGF0IGRlc2NyaWJlIHRoZSBiYXNpcyBmb3IgcHVyY2hhc2UgLSANCiAgUGVyY2VudCBvZiB2b2x1bWUgcHVyY2hhc2VkIHVuZGVyIHRoZSBwcmljZSBjYXRlZ29yeSwgQWdlLCBTZXgsIEhvdXNlaG9sZCBzaXplLg0KICANCjQuIENyZWF0ZSBjdXN0b21lciBwZXJzb25hIGZvciBlYWNoIG9mIHRoZSBzZWdtZW50czogQ29tbWVudCBvbiB0aGUgY2hhcmFjdGVyaXN0aWNzIA0KKGRlbW9ncmFwaGljLCBicmFuZCBsb3lhbHR5LCBhbmQgYmFzaXMgZm9yIHB1cmNoYXNlKSBvZiB0aGVzZSBjbHVzdGVycy4gKFRoZXNlIHBlcnNvbmFzIA0Kd291bGQgYmUgdXNlZCB0byBndWlkZSB0aGUgZGV2ZWxvcG1lbnQgb2YgYWR2ZXJ0aXNpbmcgYW5kIHByb21vdGlvbmFsIGNhbXBhaWducy4pDQogIA0KDQpgYGB7cn0NCnNlZ21lbnQucHJvZmlsZXMgPC0gc3VtbWFyaXplX2F0KA0KICBncm91cF9ieShzb2FwLmRmLCBzZWdtZW50KSwNCiAgdmFycyhBR0UsIFNFWCwgSFMsIFB1ci5Wb2wuUHJpY2UuQ2F0LjEsUHVyLlZvbC5QcmljZS5DYXQuMiwgDQogICAgICAgUHVyLlZvbC5QcmljZS5DYXQuMywgUHVyLlZvbC5QcmljZS5DYXQuNCksIA0KICBtZWFuDQopDQoNCmhlYWQoc2VnbWVudC5wcm9maWxlcykNCmBgYA0KDQo=