data = read.table(file='Dominicks_scanner_data-orange_juice.txt', header=TRUE, sep='\t')
summary(data)
## week sales1 sales2 sales3
## Min. : 40.00 Min. : 4608 Min. : 6048 Min. : 704
## 1st Qu.: 73.75 1st Qu.: 7600 1st Qu.: 8736 1st Qu.: 2096
## Median :102.50 Median :10880 Median :10704 Median : 3648
## Mean :101.84 Mean :18879 Mean :11763 Mean : 14898
## 3rd Qu.:131.25 3rd Qu.:17040 3rd Qu.:12984 3rd Qu.: 13088
## Max. :160.00 Max. :98624 Max. :29952 Max. :171264
## sales4 sales5 price1 price2
## Min. : 3072 Min. : 896 Min. :0.02600 Min. :0.03700
## 1st Qu.: 5184 1st Qu.: 3504 1st Qu.:0.03900 1st Qu.:0.04800
## Median : 7872 Median : 7264 Median :0.04700 Median :0.04900
## Mean : 18481 Mean :15359 Mean :0.04534 Mean :0.04984
## 3rd Qu.: 19088 3rd Qu.:18208 3rd Qu.:0.05300 3rd Qu.:0.05300
## Max. :192128 Max. :83008 Max. :0.05700 Max. :0.06000
## price3 price4 price5 disp1
## Min. :0.02300 Min. :0.02000 Min. :0.01500 Min. :0.0000
## 1st Qu.:0.03100 1st Qu.:0.03100 1st Qu.:0.02300 1st Qu.:0.0000
## Median :0.03700 Median :0.03500 Median :0.02600 Median :0.3291
## Mean :0.03588 Mean :0.03541 Mean :0.02681 Mean :0.4527
## 3rd Qu.:0.04000 3rd Qu.:0.03925 3rd Qu.:0.03100 3rd Qu.:1.0000
## Max. :0.04500 Max. :0.04700 Max. :0.03900 Max. :1.0000
## disp2 disp3 disp4 disp5
## Min. :0.0000 Min. :0.0000 Min. :0.0000 Min. :0.0000
## 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.0000
## Median :0.0000 Median :0.0000 Median :0.4322 Median :0.0000
## Mean :0.2481 Mean :0.4356 Mean :0.4828 Mean :0.4914
## 3rd Qu.:0.4552 3rd Qu.:1.0000 3rd Qu.:1.0000 3rd Qu.:1.0000
## Max. :1.0000 Max. :1.0000 Max. :1.0000 Max. :1.0000
## feat1 feat2 feat3 feat4
## Min. :0.0000 Min. :0.0000 Min. :0.0000 Min. :0.000
## 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.000
## Median :0.0000 Median :0.0000 Median :0.0000 Median :0.000
## Mean :0.1651 Mean :0.1291 Mean :0.3323 Mean :0.273
## 3rd Qu.:0.0000 3rd Qu.:0.0000 3rd Qu.:1.0000 3rd Qu.:1.000
## Max. :1.0000 Max. :1.0000 Max. :1.0000 Max. :1.000
## feat5 grmar1 grmar2 grmar3
## Min. :0.00 Min. :0.01566 Min. :0.04803 Min. :0.00079
## 1st Qu.:0.00 1st Qu.:0.22155 1st Qu.:0.27481 1st Qu.:0.22135
## Median :0.00 Median :0.29041 Median :0.29620 Median :0.33048
## Mean :0.25 Mean :0.27933 Mean :0.28558 Mean :0.29456
## 3rd Qu.:0.25 3rd Qu.:0.36721 3rd Qu.:0.31542 3rd Qu.:0.37774
## Max. :1.00 Max. :0.48616 Max. :0.41816 Max. :0.49137
## grmar4 grmar5
## Min. :0.005774 Min. :0.0077
## 1st Qu.:0.173191 1st Qu.:0.2069
## Median :0.261976 Median :0.2922
## Mean :0.253375 Mean :0.2929
## 3rd Qu.:0.334891 3rd Qu.:0.4088
## Max. :0.449935 Max. :0.5365
head(data)
## week sales1 sales2 sales3 sales4 sales5 price1 price2 price3 price4
## 1 40 6528 8448 47040 4224 2432 0.057 0.06 0.030 0.047
## 2 43 6016 8352 3008 5696 2112 0.057 0.06 0.044 0.047
## 3 44 6272 7776 2816 36160 896 0.057 0.06 0.044 0.040
## 4 45 6848 7968 28352 3200 1152 0.057 0.06 0.037 0.047
## 5 46 7424 7296 3712 11904 3072 0.057 0.06 0.044 0.047
## 6 47 6848 7200 11136 21376 4672 0.057 0.06 0.044 0.037
## price5 disp1 disp2 disp3 disp4 disp5 feat1 feat2 feat3 feat4
## 1 0.025 0.2843137 0 0.4965986 0.0000000 0 0 0 1 0
## 2 0.039 0.0000000 0 0.0000000 0.0000000 0 0 0 1 0
## 3 0.039 0.0000000 0 0.0000000 0.0000000 0 0 0 0 1
## 4 0.039 0.0000000 0 0.4492099 0.0000000 0 0 0 0 0
## 5 0.039 0.0000000 0 0.0000000 0.0000000 0 0 0 0 0
## 6 0.033 0.0000000 0 0.0000000 0.4580838 0 0 0 0 1
## feat5 grmar1 grmar2 grmar3 grmar4 grmar5
## 1 1 0.3443569 0.2957 0.1079354 0.3077000 0.0101
## 2 0 0.3421011 0.2957 0.4168021 0.3659202 0.4281
## 3 0 0.3199276 0.2957 0.3725909 0.3348179 0.4281
## 4 0 0.2646710 0.2957 0.2576124 0.4044160 0.3687
## 5 0 0.2609828 0.2957 0.3637621 0.4125806 0.3747
## 6 1 0.2598000 0.2957 0.3653954 0.2076904 0.2550
attach(data)
# market shares by units
sku.sum.total = sum(sum(data$sales1), sum(sales2), sum(sales3), sum(sales4), sum(sales5))
sku.units.sums = c(sum(sales1), sum(sales2), sum(sales3), sum(sales4), sum(sales5))
sku.units.shares = sku.units.sums / sku.sum.total
sku.units.shares
## [1] 0.2378334 0.1481880 0.1876798 0.2328153 0.1934834
# market shares
sales.matrix = matrix(unlist(data[2:6]), ncol = 5)
prices.matrix = matrix(unlist(data[7:11]), ncol = 5)
sku.dollar.total = t(sales.matrix) %*% prices.matrix
sku.dollar.total = sku.dollar.total[,1] # get the first column because only that is price(i) * sales(i)
dim(sales.matrix)
## [1] 116 5
dim(prices.matrix)
## [1] 116 5
head(sku.dollar.total)
## [1] 84863.87 62059.01 79901.89 101024.64 81977.92
head(data)
## week sales1 sales2 sales3 sales4 sales5 price1 price2 price3 price4
## 1 40 6528 8448 47040 4224 2432 0.057 0.06 0.030 0.047
## 2 43 6016 8352 3008 5696 2112 0.057 0.06 0.044 0.047
## 3 44 6272 7776 2816 36160 896 0.057 0.06 0.044 0.040
## 4 45 6848 7968 28352 3200 1152 0.057 0.06 0.037 0.047
## 5 46 7424 7296 3712 11904 3072 0.057 0.06 0.044 0.047
## 6 47 6848 7200 11136 21376 4672 0.057 0.06 0.044 0.037
## price5 disp1 disp2 disp3 disp4 disp5 feat1 feat2 feat3 feat4
## 1 0.025 0.2843137 0 0.4965986 0.0000000 0 0 0 1 0
## 2 0.039 0.0000000 0 0.0000000 0.0000000 0 0 0 1 0
## 3 0.039 0.0000000 0 0.0000000 0.0000000 0 0 0 0 1
## 4 0.039 0.0000000 0 0.4492099 0.0000000 0 0 0 0 0
## 5 0.039 0.0000000 0 0.0000000 0.0000000 0 0 0 0 0
## 6 0.033 0.0000000 0 0.0000000 0.4580838 0 0 0 0 1
## feat5 grmar1 grmar2 grmar3 grmar4 grmar5
## 1 1 0.3443569 0.2957 0.1079354 0.3077000 0.0101
## 2 0 0.3421011 0.2957 0.4168021 0.3659202 0.4281
## 3 0 0.3199276 0.2957 0.3725909 0.3348179 0.4281
## 4 0 0.2646710 0.2957 0.2576124 0.4044160 0.3687
## 5 0 0.2609828 0.2957 0.3637621 0.4125806 0.3747
## 6 1 0.2598000 0.2957 0.3653954 0.2076904 0.2550
sku.dollar.sum = sum(sku.dollar.total)
sku.dollar.share = sku.dollar.total / sku.dollar.sum
sku.dollar.share
## [1] 0.2070723 0.1514272 0.1949648 0.2465054 0.2000304
The ranking of unit market share is : product 1, product 4, prod 5, prod 3, prod 2.
The ranking of dollar market share is : product 4, product 1, prod 5, prod 3, prod 2.
Combining the results, we can say the leading products are product 1 and product 4.
plot(price1, type="l", col="red")
lines(price2, col="blue")
lines(price3, col="green")
lines(price4, col="yellow")
lines(price5, col="purple")
plot(disp1, type='l', col='red')
lines(disp2, col="blue")
lines(disp3, col="green")
lines(disp4, col="yellow")
lines(disp5, col="purple")
???? Brands are not competing in an identical way.
logdata = data
logdata[2:11] = log(data[2:11])
fit.1 = lm(sales1 ~ price1 + price2 + price3 + price4 + price5 + disp1 + feat1, data = logdata)
summary(fit.1)
##
## Call:
## lm(formula = sales1 ~ price1 + price2 + price3 + price4 + price5 +
## disp1 + feat1, data = logdata)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.1627 -0.1754 -0.0023 0.1424 0.9813
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 3.41606 1.09405 3.122 0.0023 **
## price1 -2.52921 0.23126 -10.936 < 2e-16 ***
## price2 0.02260 0.32100 0.070 0.9440
## price3 0.54954 0.18162 3.026 0.0031 **
## price4 -0.03993 0.19586 -0.204 0.8388
## price5 0.03894 0.15221 0.256 0.7986
## disp1 0.01326 0.09514 0.139 0.8894
## feat1 0.68187 0.09952 6.852 4.68e-10 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.3487 on 108 degrees of freedom
## Multiple R-squared: 0.7987, Adjusted R-squared: 0.7857
## F-statistic: 61.23 on 7 and 108 DF, p-value: < 2.2e-16
fit.2 = lm(sales2 ~ price1 + price2 + price3 + price4 + price5 + disp2 + feat2, data = logdata)
summary(fit.2)
##
## Call:
## lm(formula = sales2 ~ price1 + price2 + price3 + price4 + price5 +
## disp2 + feat2, data = logdata)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.60053 -0.15746 -0.00021 0.14193 0.62937
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 5.33173 0.74079 7.197 8.56e-11 ***
## price1 0.33922 0.11321 2.996 0.00339 **
## price2 -1.52700 0.25404 -6.011 2.54e-08 ***
## price3 0.04343 0.11943 0.364 0.71685
## price4 -0.03999 0.13252 -0.302 0.76343
## price5 -0.11299 0.09994 -1.131 0.26075
## disp2 0.03794 0.06734 0.563 0.57437
## feat2 0.30790 0.07317 4.208 5.34e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.229 on 108 degrees of freedom
## Multiple R-squared: 0.5522, Adjusted R-squared: 0.5232
## F-statistic: 19.03 on 7 and 108 DF, p-value: 2.362e-16
fit.3 = lm(sales3 ~ price1 + price2 + price3 + price4 + price5 + disp3 + feat3, data = logdata)
summary(fit.3)
##
## Call:
## lm(formula = sales3 ~ price1 + price2 + price3 + price4 + price5 +
## disp3 + feat3, data = logdata)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.47226 -0.33518 -0.04125 0.31514 2.27551
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 5.24350 2.41412 2.172 0.03204 *
## price1 1.13033 0.35925 3.146 0.00214 **
## price2 0.96317 0.66058 1.458 0.14773
## price3 -4.24014 0.47208 -8.982 9.56e-15 ***
## price4 1.40581 0.43974 3.197 0.00182 **
## price5 0.03398 0.32332 0.105 0.91649
## disp3 0.31524 0.18307 1.722 0.08794 .
## feat3 0.98729 0.17141 5.760 8.03e-08 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.7383 on 108 degrees of freedom
## Multiple R-squared: 0.7203, Adjusted R-squared: 0.7022
## F-statistic: 39.74 on 7 and 108 DF, p-value: < 2.2e-16
fit.4 = lm(sales4 ~ price1 + price2 + price3 + price4 + price5 + disp4 + feat4, data = logdata)
summary(fit.4)
##
## Call:
## lm(formula = sales4 ~ price1 + price2 + price3 + price4 + price5 +
## disp4 + feat4, data = logdata)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.28313 -0.28904 -0.03792 0.16057 1.61841
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 6.5763 1.5828 4.155 6.54e-05 ***
## price1 0.5185 0.2405 2.156 0.0333 *
## price2 0.2733 0.4264 0.641 0.5229
## price3 0.5810 0.2571 2.260 0.0258 *
## price4 -2.5892 0.3684 -7.028 1.97e-10 ***
## price5 0.5362 0.2064 2.598 0.0107 *
## disp4 0.2208 0.1343 1.644 0.1032
## feat4 0.8953 0.1294 6.921 3.33e-10 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.4731 on 108 degrees of freedom
## Multiple R-squared: 0.756, Adjusted R-squared: 0.7402
## F-statistic: 47.8 on 7 and 108 DF, p-value: < 2.2e-16
fit.5 = lm(sales5 ~ price1 + price2 + price3 + price4 + price5 + disp5 + feat5, data = logdata)
summary(fit.5)
##
## Call:
## lm(formula = sales5 ~ price1 + price2 + price3 + price4 + price5 +
## disp5 + feat5, data = logdata)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.18031 -0.34812 -0.02469 0.37808 1.81364
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.5227 2.1649 -0.241 0.80967
## price1 0.1219 0.3346 0.364 0.71643
## price2 -1.6634 0.6199 -2.683 0.00843 **
## price3 1.0077 0.3630 2.776 0.00649 **
## price4 1.0951 0.3892 2.813 0.00583 **
## price5 -3.1956 0.3453 -9.256 2.29e-15 ***
## disp5 0.2833 0.1436 1.973 0.05110 .
## feat5 0.6513 0.1550 4.203 5.44e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.6943 on 108 degrees of freedom
## Multiple R-squared: 0.6674, Adjusted R-squared: 0.6458
## F-statistic: 30.96 on 7 and 108 DF, p-value: < 2.2e-16
The cross elasticity between product 1 and product 3 is positive, and also significant. So for each 1% increase in the price of product 3, there is a .54% increase in product 1.
Cross elasticity between product 1 and product 2, product 3 and product 4, and product 5 vs product 2, 3, 4, are significant.
# prices below mean are seen as 'price cuts', above mean are normal prices. So separate data into high group and low group, and run an anova test on those two to see if their means are significantly different.
logdata$high1 = 0
logdata[logdata$sales1 > mean(logdata$sales1), "high1"] = 1
logdata$high2 = 0
logdata[logdata$sales2 > mean(logdata$sales2), "high2"] = 1
logdata$high3 = 0
logdata[logdata$sales3 > mean(logdata$sales3), "high3"] = 1
logdata$high4 = 0
logdata[logdata$sales4 > mean(logdata$sales4), "high4"] = 1
logdata$high5 = 0
logdata[logdata$sales5 > mean(logdata$sales5), "high5"] = 1
head(logdata)
## week sales1 sales2 sales3 sales4 sales5 price1 price2
## 1 40 8.783856 9.041685 10.758754 8.348538 7.796469 -2.864704 -2.813411
## 2 43 8.702178 9.030256 8.009031 8.647519 7.655391 -2.864704 -2.813411
## 3 44 8.743851 8.958797 7.943073 10.495709 6.797940 -2.864704 -2.813411
## 4 45 8.831712 8.983189 10.252453 8.070906 7.049255 -2.864704 -2.813411
## 5 46 8.912473 8.895082 8.219326 9.384630 8.030084 -2.864704 -2.813411
## 6 47 8.831712 8.881836 9.317938 9.970024 8.449343 -2.864704 -2.813411
## price3 price4 price5 disp1 disp2 disp3 disp4 disp5
## 1 -3.506558 -3.057608 -3.688879 0.2843137 0 0.4965986 0.0000000 0
## 2 -3.123566 -3.057608 -3.244194 0.0000000 0 0.0000000 0.0000000 0
## 3 -3.123566 -3.218876 -3.244194 0.0000000 0 0.0000000 0.0000000 0
## 4 -3.296837 -3.057608 -3.244194 0.0000000 0 0.4492099 0.0000000 0
## 5 -3.123566 -3.057608 -3.244194 0.0000000 0 0.0000000 0.0000000 0
## 6 -3.123566 -3.296837 -3.411248 0.0000000 0 0.0000000 0.4580838 0
## feat1 feat2 feat3 feat4 feat5 grmar1 grmar2 grmar3 grmar4
## 1 0 0 1 0 1 0.3443569 0.2957 0.1079354 0.3077000
## 2 0 0 1 0 0 0.3421011 0.2957 0.4168021 0.3659202
## 3 0 0 0 1 0 0.3199276 0.2957 0.3725909 0.3348179
## 4 0 0 0 0 0 0.2646710 0.2957 0.2576124 0.4044160
## 5 0 0 0 0 0 0.2609828 0.2957 0.3637621 0.4125806
## 6 0 0 0 1 1 0.2598000 0.2957 0.3653954 0.2076904
## grmar5 high1 high2 high3 high4 high5
## 1 0.0101 0 0 1 0 0
## 2 0.4281 0 0 0 0 0
## 3 0.4281 0 0 0 1 0
## 4 0.3687 0 0 1 0 0
## 5 0.3747 0 0 0 1 0
## 6 0.2550 0 0 1 1 0
pre.cut.1 = lm(logdata[logdata$high1 == 1,"sales1"] ~ logdata[logdata$high1 == 1, "price1"], data=logdata)
summary(pre.cut.1)
##
## Call:
## lm(formula = logdata[logdata$high1 == 1, "sales1"] ~ logdata[logdata$high1 ==
## 1, "price1"], data = logdata)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.12573 -0.29773 0.08732 0.34727 1.30366
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2.9297 1.3595 2.155 0.0376
## logdata[logdata$high1 == 1, "price1"] -2.2396 0.4096 -5.467 3.06e-06
##
## (Intercept) *
## logdata[logdata$high1 == 1, "price1"] ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.4793 on 38 degrees of freedom
## Multiple R-squared: 0.4403, Adjusted R-squared: 0.4256
## F-statistic: 29.89 on 1 and 38 DF, p-value: 3.059e-06
post.cut.1 = lm(logdata[logdata$high1 == 0,"sales1"] ~ logdata[logdata$high1 == 0,"price1"],data=logdata)
anova(pre.cut.1, post.cut.1)
## Warning in anova.lmlist(object, ...): models with response
## '"logdata[logdata$high1 == 0, \"sales1\"]"' removed because response
## differs from model 1
## Analysis of Variance Table
##
## Response: logdata[logdata$high1 == 1, "sales1"]
## Df Sum Sq Mean Sq F value Pr(>F)
## logdata[logdata$high1 == 1, "price1"] 1 6.8681 6.8681 29.893 3.059e-06
## Residuals 38 8.7306 0.2298
##
## logdata[logdata$high1 == 1, "price1"] ***
## Residuals
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
plot(logdata$price1, type="l", col="red")
lines(logdata$sales1)
summary.logdata = summary(logdata)
quan.1 = quantile(logdata$sales1)