library(reshape)
library(ggplot2)
library(plyr)
library(grid)
library(gridExtra)
library(readxl)
coffee <- read_excel("D:/PG Business Analytics/SMDM/coffee.xlsx")
View(coffee)
Which brands of coffee are more popular? Given a brand, are all variants equally preferred?
count_brand<-count(coffee$brand)
names(count_brand)<- c("Brand","Count")
ggplot(count_brand,aes(Brand,Count))+geom_bar(stat = "Identity")

What is the price of different coffee brands?
cost<-coffee[,c("brand","price_per_packet")]
cost<-count(cost, c("brand", "price_per_packet"))
cost<-cast(cost,brand ~ price_per_packet)
cost
How frequently does a household buy coffee? How many packets of coffee are bought at a time?
summary(coffee$days_between_purchase)
Min. 1st Qu. Median Mean 3rd Qu. Max.
1.00 6.00 9.00 15.14 17.00 741.00
qplot(coffee$days_between_purchase,data=coffee)

ggplot(coffee,aes(brand,coffee$days_between_purchase))+geom_boxplot()

quantile(coffee$days_between_purchase)
0% 25% 50% 75% 100%
1 6 9 17 741
q1<-6
q3<-17
iqr<-q3-q1
upper<-q3+(iqr*1.5)
upper
[1] 33.5
coffee_new<-subset(coffee,days_between_purchase<upper)
summary(coffee_new$days_between_purchase)
Min. 1st Qu. Median Mean 3rd Qu. Max.
1.00 6.00 8.00 10.79 14.00 33.00
ggplot(coffee_new,aes(brand,days_between_purchase))+geom_boxplot()

qplot(coffee_new$days_between_purchase,bins=30)

On an average most households buy coffee in 9-12 days
count(coffee$no_of_packet)$freq/sum(count(coffee$no_of_packet)$freq)
[1] 0.80178034 0.14726001 0.05095964
80% bought one packet at a time, 15% bought two packets while 5% bought three or more packets
What factors have an impact on household’s coffee purchase?
brand<-(count(coffee_new$brand))
edu<-(count(coffee_new$education))
age<-(count(coffee_new$age))
sec<-(count(coffee_new$sec))
income<-(count(coffee_new$income))
price_cons<-(count(coffee_new$price_conscious))
par(mfrow=c(3,2))
barplot(brand$freq,names.arg=brand$x,main="BRAND")
barplot(edu$freq,names.arg=edu$x,main="Education")
barplot(age$freq,names.arg=age$x,main="Age")
barplot(sec$freq,names.arg=sec$x,main="Social Economic Status")
barplot(income$freq,names.arg=income$x,main="Income")
barplot(price_cons$freq,names.arg=price_cons$x,main="Price Conscious")

Hence the factors that impact Coffee sales are Brand, Age, Socio Economic Status and Price Conscious where as Education and Income do not have much impact on Coffee sales
What are the factors that have an impact on a household’s coffee purchase pattern? Does brand preference depend on household size? Does purchase depend on a person’s income or education level?
house<-cast(count(coffee[,c("brand","household_sz")], c("brand", "household_sz"))
,household_sz~ brand)
Using freq as value column. Use the value argument to cast to override this choice
barplot((as.matrix(house)),beside = TRUE,legend.text = TRUE,col = c("red","blue","green","black","yellow"))

NA
Brand preference does not depend on house hold size
ggplot(edu, aes(x=x,y = freq)) + geom_bar(stat = "identity")

The purchase pattern does not depend on educaction
ggplot(income, aes(x=x,y = freq)) + geom_bar(stat = "identity")

The purchase patter does not depend on income
Are there any variables/columns you think will be eventually irrelevant for the study? Why? idno which is for each purshase has no releveant information.
Can you possibly identify some outliers? Days_between_Purchase has outliers which could depend on other factors like family vacation, illness and loss of job.
For better analysis do you think a few brand variants needs to be clubbed together?¶ Yes, all sub brand must be clubbed together
brands_club<-coffee_new
brands_club$brand[brands_club$brand=="Jacobs Krönung" | brands_club$brand=="Jacobs other"] <- "Jacobs"
brands_club$brand[brands_club$brand=="Aldi"| brands_club$brand=="Aldi other"] <- "Aldi"
brands_club$brand[brands_club$brand=="Eduscho Gala" | brands_club$brand=="Eduscho other"] <-"Eduscho"
brands_club$brand[brands_club$brand=="Tchibo Feine Milde" | brands_club$brand=="Tchibo other"]<-"Tchibo"
ggplot(count(brands_club$brand),aes(x,freq))+geom_bar(stat = "Identity")

As we see that Andere does not have a huge difference in sales when brands are clubbed together
What is the probability that a randomly chosen household belongs to the top two social levels?
cbind(count(coffee$sec),(count(coffee$sec))/sum(count(coffee$sec)$freq))
㤼㸱/㤼㸲 not meaningful for factors
Probability of top 2 income house holds = 0.5521
What is the probability that households belonging to the top two social levels buy more expensive coffee?
cast(count(coffee[,c("sec","price_per_packet")], c("sec", "price_per_packet"))
,sec~ price_per_packet)
Using freq as value column. Use the value argument to cast to override this choice
Probability of higher income households purchasing expensive coffee = 0.5537
LS0tDQp0aXRsZTogIlNNRE0iDQpvdXRwdXQ6IGh0bWxfbm90ZWJvb2sNCi0tLQ0KDQoNCmBgYHtyfQ0KbGlicmFyeShyZXNoYXBlKQ0KbGlicmFyeShnZ3Bsb3QyKQ0KbGlicmFyeShwbHlyKQ0KbGlicmFyeShncmlkKQ0KbGlicmFyeShncmlkRXh0cmEpDQpgYGANCg0KDQoNCmBgYHtyfQ0KbGlicmFyeShyZWFkeGwpDQpjb2ZmZWUgPC0gcmVhZF9leGNlbCgiRDovUEcgQnVzaW5lc3MgQW5hbHl0aWNzL1NNRE0vY29mZmVlLnhsc3giKQ0KVmlldyhjb2ZmZWUpDQoNCmBgYA0KDQpXaGljaCBicmFuZHMgb2YgY29mZmVlIGFyZSBtb3JlIHBvcHVsYXI/IEdpdmVuIGEgYnJhbmQsIGFyZSBhbGwgdmFyaWFudHMgZXF1YWxseSBwcmVmZXJyZWQ/DQoNCmBgYHtyfQ0KDQpjb3VudF9icmFuZDwtY291bnQoY29mZmVlJGJyYW5kKQ0KbmFtZXMoY291bnRfYnJhbmQpPC0gYygiQnJhbmQiLCJDb3VudCIpDQpnZ3Bsb3QoY291bnRfYnJhbmQsYWVzKEJyYW5kLENvdW50KSkrZ2VvbV9iYXIoc3RhdCA9ICJJZGVudGl0eSIpDQoNCmBgYA0KDQoNCldoYXQgaXMgdGhlIHByaWNlIG9mIGRpZmZlcmVudCBjb2ZmZWUgYnJhbmRzPw0KDQoNCmBgYHtyfQ0KY29zdDwtY29mZmVlWyxjKCJicmFuZCIsInByaWNlX3Blcl9wYWNrZXQiKV0NCmNvc3Q8LWNvdW50KGNvc3QsIGMoImJyYW5kIiwgInByaWNlX3Blcl9wYWNrZXQiKSkNCmNvc3Q8LWNhc3QoY29zdCxicmFuZCB+IHByaWNlX3Blcl9wYWNrZXQpDQpgYGANCg0KYGBge3J9DQpjb3N0DQoNCmBgYA0KDQpIb3cgZnJlcXVlbnRseSBkb2VzIGEgaG91c2Vob2xkIGJ1eSBjb2ZmZWU/IEhvdyBtYW55IHBhY2tldHMgb2YgY29mZmVlIGFyZSBib3VnaHQgYXQgYSB0aW1lPw0KDQpgYGB7cn0NCnN1bW1hcnkoY29mZmVlJGRheXNfYmV0d2Vlbl9wdXJjaGFzZSkNCmBgYA0KDQpgYGB7cn0NCnFwbG90KGNvZmZlZSRkYXlzX2JldHdlZW5fcHVyY2hhc2UsZGF0YT1jb2ZmZWUpDQpgYGANCg0KYGBge3J9DQpnZ3Bsb3QoY29mZmVlLGFlcyhicmFuZCxjb2ZmZWUkZGF5c19iZXR3ZWVuX3B1cmNoYXNlKSkrZ2VvbV9ib3hwbG90KCkNCmBgYA0KDQpgYGB7cn0NCnF1YW50aWxlKGNvZmZlZSRkYXlzX2JldHdlZW5fcHVyY2hhc2UpDQpgYGANCg0KYGBge3J9DQpxMTwtNg0KcTM8LTE3DQppcXI8LXEzLXExDQp1cHBlcjwtcTMrKGlxcioxLjUpDQp1cHBlcg0KYGBgDQoNCg0KYGBge3J9DQpjb2ZmZWVfbmV3PC1zdWJzZXQoY29mZmVlLGRheXNfYmV0d2Vlbl9wdXJjaGFzZTx1cHBlcikNCnN1bW1hcnkoY29mZmVlX25ldyRkYXlzX2JldHdlZW5fcHVyY2hhc2UpDQpgYGANCg0KYGBge3J9DQpnZ3Bsb3QoY29mZmVlX25ldyxhZXMoYnJhbmQsZGF5c19iZXR3ZWVuX3B1cmNoYXNlKSkrZ2VvbV9ib3hwbG90KCkNCmBgYA0KDQoNCmBgYHtyfQ0KcXBsb3QoY29mZmVlX25ldyRkYXlzX2JldHdlZW5fcHVyY2hhc2UsYmlucz0zMCkNCmBgYA0KT24gYW4gYXZlcmFnZSBtb3N0IGhvdXNlaG9sZHMgYnV5IGNvZmZlZSBpbiA5LTEyIGRheXMNCg0KDQpgYGB7cn0NCmNvdW50KGNvZmZlZSRub19vZl9wYWNrZXQpJGZyZXEvc3VtKGNvdW50KGNvZmZlZSRub19vZl9wYWNrZXQpJGZyZXEpDQpgYGANCg0KODAlIGJvdWdodCBvbmUgcGFja2V0IGF0IGEgdGltZSwgMTUlIGJvdWdodCB0d28gcGFja2V0cyB3aGlsZSA1JSBib3VnaHQgdGhyZWUgb3IgbW9yZSBwYWNrZXRzDQoNCg0KV2hhdCBmYWN0b3JzIGhhdmUgYW4gaW1wYWN0IG9uIGhvdXNlaG9sZCdzIGNvZmZlZSBwdXJjaGFzZT8NCmBgYHtyfQ0KYnJhbmQ8LShjb3VudChjb2ZmZWVfbmV3JGJyYW5kKSkNCmVkdTwtKGNvdW50KGNvZmZlZV9uZXckZWR1Y2F0aW9uKSkNCmFnZTwtKGNvdW50KGNvZmZlZV9uZXckYWdlKSkNCnNlYzwtKGNvdW50KGNvZmZlZV9uZXckc2VjKSkNCmluY29tZTwtKGNvdW50KGNvZmZlZV9uZXckaW5jb21lKSkNCnByaWNlX2NvbnM8LShjb3VudChjb2ZmZWVfbmV3JHByaWNlX2NvbnNjaW91cykpDQpgYGANCg0KYGBge3J9DQpwYXIobWZyb3c9YygzLDIpKQ0KDQpiYXJwbG90KGJyYW5kJGZyZXEsbmFtZXMuYXJnPWJyYW5kJHgsbWFpbj0iQlJBTkQiKQ0KYmFycGxvdChlZHUkZnJlcSxuYW1lcy5hcmc9ZWR1JHgsbWFpbj0iRWR1Y2F0aW9uIikNCmJhcnBsb3QoYWdlJGZyZXEsbmFtZXMuYXJnPWFnZSR4LG1haW49IkFnZSIpDQpiYXJwbG90KHNlYyRmcmVxLG5hbWVzLmFyZz1zZWMkeCxtYWluPSJTb2NpYWwgRWNvbm9taWMgU3RhdHVzIikNCmJhcnBsb3QoaW5jb21lJGZyZXEsbmFtZXMuYXJnPWluY29tZSR4LG1haW49IkluY29tZSIpDQpiYXJwbG90KHByaWNlX2NvbnMkZnJlcSxuYW1lcy5hcmc9cHJpY2VfY29ucyR4LG1haW49IlByaWNlIENvbnNjaW91cyIpDQpgYGANCg0KSGVuY2UgdGhlIGZhY3RvcnMgdGhhdCBpbXBhY3QgQ29mZmVlIHNhbGVzIGFyZSBCcmFuZCwgQWdlLCBTb2NpbyBFY29ub21pYyBTdGF0dXMgYW5kIFByaWNlIENvbnNjaW91cw0Kd2hlcmUgYXMgRWR1Y2F0aW9uIGFuZCBJbmNvbWUgZG8gbm90IGhhdmUgbXVjaCBpbXBhY3Qgb24gQ29mZmVlIHNhbGVzDQoNCg0KDQoNCldoYXQgYXJlIHRoZSBmYWN0b3JzIHRoYXQgaGF2ZSBhbiBpbXBhY3Qgb24gYSBob3VzZWhvbGTigJlzIGNvZmZlZSBwdXJjaGFzZSBwYXR0ZXJuPyBEb2VzIGJyYW5kIHByZWZlcmVuY2UgZGVwZW5kIG9uIGhvdXNlaG9sZCBzaXplPyBEb2VzIHB1cmNoYXNlIGRlcGVuZCBvbiBhIHBlcnNvbuKAmXMgaW5jb21lIG9yIGVkdWNhdGlvbiBsZXZlbD8NCg0KYGBge3J9DQpob3VzZTwtY2FzdChjb3VudChjb2ZmZWVbLGMoImJyYW5kIiwiaG91c2Vob2xkX3N6IildLCBjKCJicmFuZCIsICJob3VzZWhvbGRfc3oiKSkNCiAgICAgLGhvdXNlaG9sZF9zen4gYnJhbmQpDQoNCmJhcnBsb3QoKGFzLm1hdHJpeChob3VzZSkpLGJlc2lkZSA9IFRSVUUsbGVnZW5kLnRleHQgPSBUUlVFLGNvbCA9IGMoInJlZCIsImJsdWUiLCJncmVlbiIsImJsYWNrIiwieWVsbG93IikpDQogICAgICAgIA0KYGBgDQoNCg0KQnJhbmQgcHJlZmVyZW5jZSBkb2VzIG5vdCBkZXBlbmQgb24gaG91c2UgaG9sZCBzaXplDQoNCg0KYGBge3J9DQpnZ3Bsb3QoZWR1LCBhZXMoeD14LHkgPSBmcmVxKSkgKyAgZ2VvbV9iYXIoc3RhdCA9ICJpZGVudGl0eSIpDQpgYGANClRoZSBwdXJjaGFzZSBwYXR0ZXJuIGRvZXMgbm90IGRlcGVuZCBvbiBlZHVjYWN0aW9uDQoNCg0KDQpgYGB7cn0NCmdncGxvdChpbmNvbWUsIGFlcyh4PXgseSA9IGZyZXEpKSArICBnZW9tX2JhcihzdGF0ID0gImlkZW50aXR5IikNCmBgYA0KVGhlIHB1cmNoYXNlIHBhdHRlciBkb2VzIG5vdCBkZXBlbmQgb24gaW5jb21lDQoNCg0KDQpBcmUgdGhlcmUgYW55IHZhcmlhYmxlcy9jb2x1bW5zIHlvdSB0aGluayB3aWxsIGJlIGV2ZW50dWFsbHkgaXJyZWxldmFudCBmb3IgdGhlIHN0dWR5PyBXaHk/DQogIGlkbm8gd2hpY2ggaXMgZm9yIGVhY2ggcHVyc2hhc2UgaGFzIG5vIHJlbGV2ZWFudCBpbmZvcm1hdGlvbi4NCg0KQ2FuIHlvdSBwb3NzaWJseSBpZGVudGlmeSBzb21lIG91dGxpZXJzPw0KIERheXNfYmV0d2Vlbl9QdXJjaGFzZSBoYXMgb3V0bGllcnMgd2hpY2ggY291bGQgZGVwZW5kIG9uIG90aGVyIGZhY3RvcnMgbGlrZSBmYW1pbHkgdmFjYXRpb24sIGlsbG5lc3MgYW5kIGxvc3Mgb2Ygam9iLg0KDQoNCkZvciBiZXR0ZXIgYW5hbHlzaXMgZG8geW91IHRoaW5rIGEgZmV3IGJyYW5kIHZhcmlhbnRzIG5lZWRzIHRvIGJlIGNsdWJiZWQgdG9nZXRoZXI/wrYNCiBZZXMsIGFsbCBzdWIgYnJhbmQgbXVzdCBiZSBjbHViYmVkIHRvZ2V0aGVyDQpgYGB7cn0NCmJyYW5kc19jbHViPC1jb2ZmZWVfbmV3DQpicmFuZHNfY2x1YiRicmFuZFticmFuZHNfY2x1YiRicmFuZD09IkphY29icyBLcsO2bnVuZyIgfCBicmFuZHNfY2x1YiRicmFuZD09IkphY29icyBvdGhlciJdIDwtICJKYWNvYnMiDQpicmFuZHNfY2x1YiRicmFuZFticmFuZHNfY2x1YiRicmFuZD09IkFsZGkifCBicmFuZHNfY2x1YiRicmFuZD09IkFsZGkgb3RoZXIiXSA8LSAiQWxkaSINCmJyYW5kc19jbHViJGJyYW5kW2JyYW5kc19jbHViJGJyYW5kPT0iRWR1c2NobyBHYWxhIiB8IGJyYW5kc19jbHViJGJyYW5kPT0iRWR1c2NobyBvdGhlciJdIDwtIkVkdXNjaG8iDQpicmFuZHNfY2x1YiRicmFuZFticmFuZHNfY2x1YiRicmFuZD09IlRjaGlibyBGZWluZSBNaWxkZSIgfCBicmFuZHNfY2x1YiRicmFuZD09IlRjaGlibyBvdGhlciJdPC0iVGNoaWJvIg0KDQoNCmdncGxvdChjb3VudChicmFuZHNfY2x1YiRicmFuZCksYWVzKHgsZnJlcSkpK2dlb21fYmFyKHN0YXQgPSAiSWRlbnRpdHkiKQ0KYGBgDQoNCkFzIHdlIHNlZSB0aGF0IEFuZGVyZSBkb2VzIG5vdCBoYXZlIGEgaHVnZSBkaWZmZXJlbmNlIGluIHNhbGVzIHdoZW4gYnJhbmRzIGFyZSBjbHViYmVkIHRvZ2V0aGVyDQoNCg0KV2hhdCBpcyB0aGUgcHJvYmFiaWxpdHkgdGhhdCBhIHJhbmRvbWx5IGNob3NlbiBob3VzZWhvbGQgYmVsb25ncyB0byB0aGUgdG9wIHR3byBzb2NpYWwgbGV2ZWxzPw0KYGBge3J9DQpjYmluZChjb3VudChjb2ZmZWUkc2VjKSwoY291bnQoY29mZmVlJHNlYykpL3N1bShjb3VudChjb2ZmZWUkc2VjKSRmcmVxKSkNCmBgYA0KDQpQcm9iYWJpbGl0eSBvZiB0b3AgMiBpbmNvbWUgaG91c2UgaG9sZHMgPSAwLjU1MjENCg0KDQpXaGF0IGlzIHRoZSBwcm9iYWJpbGl0eSB0aGF0IGhvdXNlaG9sZHMgYmVsb25naW5nIHRvIHRoZSB0b3AgdHdvIHNvY2lhbCBsZXZlbHMgYnV5IG1vcmUgZXhwZW5zaXZlIGNvZmZlZT8NCmBgYHtyfQ0KY2FzdChjb3VudChjb2ZmZWVbLGMoInNlYyIsInByaWNlX3Blcl9wYWNrZXQiKV0sIGMoInNlYyIsICJwcmljZV9wZXJfcGFja2V0IikpDQogICAgICxzZWN+IHByaWNlX3Blcl9wYWNrZXQpDQpgYGANCg0KUHJvYmFiaWxpdHkgb2YgaGlnaGVyIGluY29tZSBob3VzZWhvbGRzIHB1cmNoYXNpbmcgZXhwZW5zaXZlIGNvZmZlZSA9IDAuNTUzNw0KDQoNCg0K