library(reshape)
library(ggplot2)
library(plyr)
library(grid)
library(gridExtra)
library(readxl)
coffee <- read_excel("D:/PG Business Analytics/SMDM/coffee.xlsx")
View(coffee)

Which brands of coffee are more popular? Given a brand, are all variants equally preferred?

count_brand<-count(coffee$brand)
names(count_brand)<- c("Brand","Count")
ggplot(count_brand,aes(Brand,Count))+geom_bar(stat = "Identity")

What is the price of different coffee brands?

cost<-coffee[,c("brand","price_per_packet")]
cost<-count(cost, c("brand", "price_per_packet"))
cost<-cast(cost,brand ~ price_per_packet)
cost

How frequently does a household buy coffee? How many packets of coffee are bought at a time?

summary(coffee$days_between_purchase)
   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
   1.00    6.00    9.00   15.14   17.00  741.00 
qplot(coffee$days_between_purchase,data=coffee)

ggplot(coffee,aes(brand,coffee$days_between_purchase))+geom_boxplot()

quantile(coffee$days_between_purchase)
  0%  25%  50%  75% 100% 
   1    6    9   17  741 
q1<-6
q3<-17
iqr<-q3-q1
upper<-q3+(iqr*1.5)
upper
[1] 33.5
coffee_new<-subset(coffee,days_between_purchase<upper)
summary(coffee_new$days_between_purchase)
   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
   1.00    6.00    8.00   10.79   14.00   33.00 
ggplot(coffee_new,aes(brand,days_between_purchase))+geom_boxplot()

qplot(coffee_new$days_between_purchase,bins=30)

On an average most households buy coffee in 9-12 days

count(coffee$no_of_packet)$freq/sum(count(coffee$no_of_packet)$freq)
[1] 0.80178034 0.14726001 0.05095964

80% bought one packet at a time, 15% bought two packets while 5% bought three or more packets

What factors have an impact on household’s coffee purchase?

brand<-(count(coffee_new$brand))
edu<-(count(coffee_new$education))
age<-(count(coffee_new$age))
sec<-(count(coffee_new$sec))
income<-(count(coffee_new$income))
price_cons<-(count(coffee_new$price_conscious))
par(mfrow=c(3,2))
barplot(brand$freq,names.arg=brand$x,main="BRAND")
barplot(edu$freq,names.arg=edu$x,main="Education")
barplot(age$freq,names.arg=age$x,main="Age")
barplot(sec$freq,names.arg=sec$x,main="Social Economic Status")
barplot(income$freq,names.arg=income$x,main="Income")
barplot(price_cons$freq,names.arg=price_cons$x,main="Price Conscious")

Hence the factors that impact Coffee sales are Brand, Age, Socio Economic Status and Price Conscious where as Education and Income do not have much impact on Coffee sales

What are the factors that have an impact on a household’s coffee purchase pattern? Does brand preference depend on household size? Does purchase depend on a person’s income or education level?

house<-cast(count(coffee[,c("brand","household_sz")], c("brand", "household_sz"))
     ,household_sz~ brand)
Using freq as value column.  Use the value argument to cast to override this choice
barplot((as.matrix(house)),beside = TRUE,legend.text = TRUE,col = c("red","blue","green","black","yellow"))

NA

Brand preference does not depend on house hold size

ggplot(edu, aes(x=x,y = freq)) +  geom_bar(stat = "identity")

The purchase pattern does not depend on educaction

ggplot(income, aes(x=x,y = freq)) +  geom_bar(stat = "identity")

The purchase patter does not depend on income

Are there any variables/columns you think will be eventually irrelevant for the study? Why? idno which is for each purshase has no releveant information.

Can you possibly identify some outliers? Days_between_Purchase has outliers which could depend on other factors like family vacation, illness and loss of job.

For better analysis do you think a few brand variants needs to be clubbed together?¶ Yes, all sub brand must be clubbed together

brands_club<-coffee_new
brands_club$brand[brands_club$brand=="Jacobs Krönung" | brands_club$brand=="Jacobs other"] <- "Jacobs"
brands_club$brand[brands_club$brand=="Aldi"| brands_club$brand=="Aldi other"] <- "Aldi"
brands_club$brand[brands_club$brand=="Eduscho Gala" | brands_club$brand=="Eduscho other"] <-"Eduscho"
brands_club$brand[brands_club$brand=="Tchibo Feine Milde" | brands_club$brand=="Tchibo other"]<-"Tchibo"
ggplot(count(brands_club$brand),aes(x,freq))+geom_bar(stat = "Identity")

As we see that Andere does not have a huge difference in sales when brands are clubbed together

What is the probability that a randomly chosen household belongs to the top two social levels?

cbind(count(coffee$sec),(count(coffee$sec))/sum(count(coffee$sec)$freq))
㤼㸱/㤼㸲 not meaningful for factors

Probability of top 2 income house holds = 0.5521

What is the probability that households belonging to the top two social levels buy more expensive coffee?

cast(count(coffee[,c("sec","price_per_packet")], c("sec", "price_per_packet"))
     ,sec~ price_per_packet)
Using freq as value column.  Use the value argument to cast to override this choice

Probability of higher income households purchasing expensive coffee = 0.5537

LS0tDQp0aXRsZTogIlNNRE0iDQpvdXRwdXQ6IGh0bWxfbm90ZWJvb2sNCi0tLQ0KDQoNCmBgYHtyfQ0KbGlicmFyeShyZXNoYXBlKQ0KbGlicmFyeShnZ3Bsb3QyKQ0KbGlicmFyeShwbHlyKQ0KbGlicmFyeShncmlkKQ0KbGlicmFyeShncmlkRXh0cmEpDQpgYGANCg0KDQoNCmBgYHtyfQ0KbGlicmFyeShyZWFkeGwpDQpjb2ZmZWUgPC0gcmVhZF9leGNlbCgiRDovUEcgQnVzaW5lc3MgQW5hbHl0aWNzL1NNRE0vY29mZmVlLnhsc3giKQ0KVmlldyhjb2ZmZWUpDQoNCmBgYA0KDQpXaGljaCBicmFuZHMgb2YgY29mZmVlIGFyZSBtb3JlIHBvcHVsYXI/IEdpdmVuIGEgYnJhbmQsIGFyZSBhbGwgdmFyaWFudHMgZXF1YWxseSBwcmVmZXJyZWQ/DQoNCmBgYHtyfQ0KDQpjb3VudF9icmFuZDwtY291bnQoY29mZmVlJGJyYW5kKQ0KbmFtZXMoY291bnRfYnJhbmQpPC0gYygiQnJhbmQiLCJDb3VudCIpDQpnZ3Bsb3QoY291bnRfYnJhbmQsYWVzKEJyYW5kLENvdW50KSkrZ2VvbV9iYXIoc3RhdCA9ICJJZGVudGl0eSIpDQoNCmBgYA0KDQoNCldoYXQgaXMgdGhlIHByaWNlIG9mIGRpZmZlcmVudCBjb2ZmZWUgYnJhbmRzPw0KDQoNCmBgYHtyfQ0KY29zdDwtY29mZmVlWyxjKCJicmFuZCIsInByaWNlX3Blcl9wYWNrZXQiKV0NCmNvc3Q8LWNvdW50KGNvc3QsIGMoImJyYW5kIiwgInByaWNlX3Blcl9wYWNrZXQiKSkNCmNvc3Q8LWNhc3QoY29zdCxicmFuZCB+IHByaWNlX3Blcl9wYWNrZXQpDQpgYGANCg0KYGBge3J9DQpjb3N0DQoNCmBgYA0KDQpIb3cgZnJlcXVlbnRseSBkb2VzIGEgaG91c2Vob2xkIGJ1eSBjb2ZmZWU/IEhvdyBtYW55IHBhY2tldHMgb2YgY29mZmVlIGFyZSBib3VnaHQgYXQgYSB0aW1lPw0KDQpgYGB7cn0NCnN1bW1hcnkoY29mZmVlJGRheXNfYmV0d2Vlbl9wdXJjaGFzZSkNCmBgYA0KDQpgYGB7cn0NCnFwbG90KGNvZmZlZSRkYXlzX2JldHdlZW5fcHVyY2hhc2UsZGF0YT1jb2ZmZWUpDQpgYGANCg0KYGBge3J9DQpnZ3Bsb3QoY29mZmVlLGFlcyhicmFuZCxjb2ZmZWUkZGF5c19iZXR3ZWVuX3B1cmNoYXNlKSkrZ2VvbV9ib3hwbG90KCkNCmBgYA0KDQpgYGB7cn0NCnF1YW50aWxlKGNvZmZlZSRkYXlzX2JldHdlZW5fcHVyY2hhc2UpDQpgYGANCg0KYGBge3J9DQpxMTwtNg0KcTM8LTE3DQppcXI8LXEzLXExDQp1cHBlcjwtcTMrKGlxcioxLjUpDQp1cHBlcg0KYGBgDQoNCg0KYGBge3J9DQpjb2ZmZWVfbmV3PC1zdWJzZXQoY29mZmVlLGRheXNfYmV0d2Vlbl9wdXJjaGFzZTx1cHBlcikNCnN1bW1hcnkoY29mZmVlX25ldyRkYXlzX2JldHdlZW5fcHVyY2hhc2UpDQpgYGANCg0KYGBge3J9DQpnZ3Bsb3QoY29mZmVlX25ldyxhZXMoYnJhbmQsZGF5c19iZXR3ZWVuX3B1cmNoYXNlKSkrZ2VvbV9ib3hwbG90KCkNCmBgYA0KDQoNCmBgYHtyfQ0KcXBsb3QoY29mZmVlX25ldyRkYXlzX2JldHdlZW5fcHVyY2hhc2UsYmlucz0zMCkNCmBgYA0KT24gYW4gYXZlcmFnZSBtb3N0IGhvdXNlaG9sZHMgYnV5IGNvZmZlZSBpbiA5LTEyIGRheXMNCg0KDQpgYGB7cn0NCmNvdW50KGNvZmZlZSRub19vZl9wYWNrZXQpJGZyZXEvc3VtKGNvdW50KGNvZmZlZSRub19vZl9wYWNrZXQpJGZyZXEpDQpgYGANCg0KODAlIGJvdWdodCBvbmUgcGFja2V0IGF0IGEgdGltZSwgMTUlIGJvdWdodCB0d28gcGFja2V0cyB3aGlsZSA1JSBib3VnaHQgdGhyZWUgb3IgbW9yZSBwYWNrZXRzDQoNCg0KV2hhdCBmYWN0b3JzIGhhdmUgYW4gaW1wYWN0IG9uIGhvdXNlaG9sZCdzIGNvZmZlZSBwdXJjaGFzZT8NCmBgYHtyfQ0KYnJhbmQ8LShjb3VudChjb2ZmZWVfbmV3JGJyYW5kKSkNCmVkdTwtKGNvdW50KGNvZmZlZV9uZXckZWR1Y2F0aW9uKSkNCmFnZTwtKGNvdW50KGNvZmZlZV9uZXckYWdlKSkNCnNlYzwtKGNvdW50KGNvZmZlZV9uZXckc2VjKSkNCmluY29tZTwtKGNvdW50KGNvZmZlZV9uZXckaW5jb21lKSkNCnByaWNlX2NvbnM8LShjb3VudChjb2ZmZWVfbmV3JHByaWNlX2NvbnNjaW91cykpDQpgYGANCg0KYGBge3J9DQpwYXIobWZyb3c9YygzLDIpKQ0KDQpiYXJwbG90KGJyYW5kJGZyZXEsbmFtZXMuYXJnPWJyYW5kJHgsbWFpbj0iQlJBTkQiKQ0KYmFycGxvdChlZHUkZnJlcSxuYW1lcy5hcmc9ZWR1JHgsbWFpbj0iRWR1Y2F0aW9uIikNCmJhcnBsb3QoYWdlJGZyZXEsbmFtZXMuYXJnPWFnZSR4LG1haW49IkFnZSIpDQpiYXJwbG90KHNlYyRmcmVxLG5hbWVzLmFyZz1zZWMkeCxtYWluPSJTb2NpYWwgRWNvbm9taWMgU3RhdHVzIikNCmJhcnBsb3QoaW5jb21lJGZyZXEsbmFtZXMuYXJnPWluY29tZSR4LG1haW49IkluY29tZSIpDQpiYXJwbG90KHByaWNlX2NvbnMkZnJlcSxuYW1lcy5hcmc9cHJpY2VfY29ucyR4LG1haW49IlByaWNlIENvbnNjaW91cyIpDQpgYGANCg0KSGVuY2UgdGhlIGZhY3RvcnMgdGhhdCBpbXBhY3QgQ29mZmVlIHNhbGVzIGFyZSBCcmFuZCwgQWdlLCBTb2NpbyBFY29ub21pYyBTdGF0dXMgYW5kIFByaWNlIENvbnNjaW91cw0Kd2hlcmUgYXMgRWR1Y2F0aW9uIGFuZCBJbmNvbWUgZG8gbm90IGhhdmUgbXVjaCBpbXBhY3Qgb24gQ29mZmVlIHNhbGVzDQoNCg0KDQoNCldoYXQgYXJlIHRoZSBmYWN0b3JzIHRoYXQgaGF2ZSBhbiBpbXBhY3Qgb24gYSBob3VzZWhvbGTigJlzIGNvZmZlZSBwdXJjaGFzZSBwYXR0ZXJuPyBEb2VzIGJyYW5kIHByZWZlcmVuY2UgZGVwZW5kIG9uIGhvdXNlaG9sZCBzaXplPyBEb2VzIHB1cmNoYXNlIGRlcGVuZCBvbiBhIHBlcnNvbuKAmXMgaW5jb21lIG9yIGVkdWNhdGlvbiBsZXZlbD8NCg0KYGBge3J9DQpob3VzZTwtY2FzdChjb3VudChjb2ZmZWVbLGMoImJyYW5kIiwiaG91c2Vob2xkX3N6IildLCBjKCJicmFuZCIsICJob3VzZWhvbGRfc3oiKSkNCiAgICAgLGhvdXNlaG9sZF9zen4gYnJhbmQpDQoNCmJhcnBsb3QoKGFzLm1hdHJpeChob3VzZSkpLGJlc2lkZSA9IFRSVUUsbGVnZW5kLnRleHQgPSBUUlVFLGNvbCA9IGMoInJlZCIsImJsdWUiLCJncmVlbiIsImJsYWNrIiwieWVsbG93IikpDQogICAgICAgIA0KYGBgDQoNCg0KQnJhbmQgcHJlZmVyZW5jZSBkb2VzIG5vdCBkZXBlbmQgb24gaG91c2UgaG9sZCBzaXplDQoNCg0KYGBge3J9DQpnZ3Bsb3QoZWR1LCBhZXMoeD14LHkgPSBmcmVxKSkgKyAgZ2VvbV9iYXIoc3RhdCA9ICJpZGVudGl0eSIpDQpgYGANClRoZSBwdXJjaGFzZSBwYXR0ZXJuIGRvZXMgbm90IGRlcGVuZCBvbiBlZHVjYWN0aW9uDQoNCg0KDQpgYGB7cn0NCmdncGxvdChpbmNvbWUsIGFlcyh4PXgseSA9IGZyZXEpKSArICBnZW9tX2JhcihzdGF0ID0gImlkZW50aXR5IikNCmBgYA0KVGhlIHB1cmNoYXNlIHBhdHRlciBkb2VzIG5vdCBkZXBlbmQgb24gaW5jb21lDQoNCg0KDQpBcmUgdGhlcmUgYW55IHZhcmlhYmxlcy9jb2x1bW5zIHlvdSB0aGluayB3aWxsIGJlIGV2ZW50dWFsbHkgaXJyZWxldmFudCBmb3IgdGhlIHN0dWR5PyBXaHk/DQogIGlkbm8gd2hpY2ggaXMgZm9yIGVhY2ggcHVyc2hhc2UgaGFzIG5vIHJlbGV2ZWFudCBpbmZvcm1hdGlvbi4NCg0KQ2FuIHlvdSBwb3NzaWJseSBpZGVudGlmeSBzb21lIG91dGxpZXJzPw0KIERheXNfYmV0d2Vlbl9QdXJjaGFzZSBoYXMgb3V0bGllcnMgd2hpY2ggY291bGQgZGVwZW5kIG9uIG90aGVyIGZhY3RvcnMgbGlrZSBmYW1pbHkgdmFjYXRpb24sIGlsbG5lc3MgYW5kIGxvc3Mgb2Ygam9iLg0KDQoNCkZvciBiZXR0ZXIgYW5hbHlzaXMgZG8geW91IHRoaW5rIGEgZmV3IGJyYW5kIHZhcmlhbnRzIG5lZWRzIHRvIGJlIGNsdWJiZWQgdG9nZXRoZXI/wrYNCiBZZXMsIGFsbCBzdWIgYnJhbmQgbXVzdCBiZSBjbHViYmVkIHRvZ2V0aGVyDQpgYGB7cn0NCmJyYW5kc19jbHViPC1jb2ZmZWVfbmV3DQpicmFuZHNfY2x1YiRicmFuZFticmFuZHNfY2x1YiRicmFuZD09IkphY29icyBLcsO2bnVuZyIgfCBicmFuZHNfY2x1YiRicmFuZD09IkphY29icyBvdGhlciJdIDwtICJKYWNvYnMiDQpicmFuZHNfY2x1YiRicmFuZFticmFuZHNfY2x1YiRicmFuZD09IkFsZGkifCBicmFuZHNfY2x1YiRicmFuZD09IkFsZGkgb3RoZXIiXSA8LSAiQWxkaSINCmJyYW5kc19jbHViJGJyYW5kW2JyYW5kc19jbHViJGJyYW5kPT0iRWR1c2NobyBHYWxhIiB8IGJyYW5kc19jbHViJGJyYW5kPT0iRWR1c2NobyBvdGhlciJdIDwtIkVkdXNjaG8iDQpicmFuZHNfY2x1YiRicmFuZFticmFuZHNfY2x1YiRicmFuZD09IlRjaGlibyBGZWluZSBNaWxkZSIgfCBicmFuZHNfY2x1YiRicmFuZD09IlRjaGlibyBvdGhlciJdPC0iVGNoaWJvIg0KDQoNCmdncGxvdChjb3VudChicmFuZHNfY2x1YiRicmFuZCksYWVzKHgsZnJlcSkpK2dlb21fYmFyKHN0YXQgPSAiSWRlbnRpdHkiKQ0KYGBgDQoNCkFzIHdlIHNlZSB0aGF0IEFuZGVyZSBkb2VzIG5vdCBoYXZlIGEgaHVnZSBkaWZmZXJlbmNlIGluIHNhbGVzIHdoZW4gYnJhbmRzIGFyZSBjbHViYmVkIHRvZ2V0aGVyDQoNCg0KV2hhdCBpcyB0aGUgcHJvYmFiaWxpdHkgdGhhdCBhIHJhbmRvbWx5IGNob3NlbiBob3VzZWhvbGQgYmVsb25ncyB0byB0aGUgdG9wIHR3byBzb2NpYWwgbGV2ZWxzPw0KYGBge3J9DQpjYmluZChjb3VudChjb2ZmZWUkc2VjKSwoY291bnQoY29mZmVlJHNlYykpL3N1bShjb3VudChjb2ZmZWUkc2VjKSRmcmVxKSkNCmBgYA0KDQpQcm9iYWJpbGl0eSBvZiB0b3AgMiBpbmNvbWUgaG91c2UgaG9sZHMgPSAwLjU1MjENCg0KDQpXaGF0IGlzIHRoZSBwcm9iYWJpbGl0eSB0aGF0IGhvdXNlaG9sZHMgYmVsb25naW5nIHRvIHRoZSB0b3AgdHdvIHNvY2lhbCBsZXZlbHMgYnV5IG1vcmUgZXhwZW5zaXZlIGNvZmZlZT8NCmBgYHtyfQ0KY2FzdChjb3VudChjb2ZmZWVbLGMoInNlYyIsInByaWNlX3Blcl9wYWNrZXQiKV0sIGMoInNlYyIsICJwcmljZV9wZXJfcGFja2V0IikpDQogICAgICxzZWN+IHByaWNlX3Blcl9wYWNrZXQpDQpgYGANCg0KUHJvYmFiaWxpdHkgb2YgaGlnaGVyIGluY29tZSBob3VzZWhvbGRzIHB1cmNoYXNpbmcgZXhwZW5zaXZlIGNvZmZlZSA9IDAuNTUzNw0KDQoNCg0K