juice

data = read.table(file='Dominicks_scanner_data-orange_juice.txt', header=TRUE, sep='\t')
summary(data)

##       week            sales1          sales2          sales3      
##  Min.   : 40.00   Min.   : 4608   Min.   : 6048   Min.   :   704  
##  1st Qu.: 73.75   1st Qu.: 7600   1st Qu.: 8736   1st Qu.:  2096  
##  Median :102.50   Median :10880   Median :10704   Median :  3648  
##  Mean   :101.84   Mean   :18879   Mean   :11763   Mean   : 14898  
##  3rd Qu.:131.25   3rd Qu.:17040   3rd Qu.:12984   3rd Qu.: 13088  
##  Max.   :160.00   Max.   :98624   Max.   :29952   Max.   :171264  
##      sales4           sales5          price1            price2       
##  Min.   :  3072   Min.   :  896   Min.   :0.02600   Min.   :0.03700  
##  1st Qu.:  5184   1st Qu.: 3504   1st Qu.:0.03900   1st Qu.:0.04800  
##  Median :  7872   Median : 7264   Median :0.04700   Median :0.04900  
##  Mean   : 18481   Mean   :15359   Mean   :0.04534   Mean   :0.04984  
##  3rd Qu.: 19088   3rd Qu.:18208   3rd Qu.:0.05300   3rd Qu.:0.05300  
##  Max.   :192128   Max.   :83008   Max.   :0.05700   Max.   :0.06000  
##      price3            price4            price5            disp1       
##  Min.   :0.02300   Min.   :0.02000   Min.   :0.01500   Min.   :0.0000  
##  1st Qu.:0.03100   1st Qu.:0.03100   1st Qu.:0.02300   1st Qu.:0.0000  
##  Median :0.03700   Median :0.03500   Median :0.02600   Median :0.3291  
##  Mean   :0.03588   Mean   :0.03541   Mean   :0.02681   Mean   :0.4527  
##  3rd Qu.:0.04000   3rd Qu.:0.03925   3rd Qu.:0.03100   3rd Qu.:1.0000  
##  Max.   :0.04500   Max.   :0.04700   Max.   :0.03900   Max.   :1.0000  
##      disp2            disp3            disp4            disp5       
##  Min.   :0.0000   Min.   :0.0000   Min.   :0.0000   Min.   :0.0000  
##  1st Qu.:0.0000   1st Qu.:0.0000   1st Qu.:0.0000   1st Qu.:0.0000  
##  Median :0.0000   Median :0.0000   Median :0.4322   Median :0.0000  
##  Mean   :0.2481   Mean   :0.4356   Mean   :0.4828   Mean   :0.4914  
##  3rd Qu.:0.4552   3rd Qu.:1.0000   3rd Qu.:1.0000   3rd Qu.:1.0000  
##  Max.   :1.0000   Max.   :1.0000   Max.   :1.0000   Max.   :1.0000  
##      feat1            feat2            feat3            feat4      
##  Min.   :0.0000   Min.   :0.0000   Min.   :0.0000   Min.   :0.000  
##  1st Qu.:0.0000   1st Qu.:0.0000   1st Qu.:0.0000   1st Qu.:0.000  
##  Median :0.0000   Median :0.0000   Median :0.0000   Median :0.000  
##  Mean   :0.1651   Mean   :0.1291   Mean   :0.3323   Mean   :0.273  
##  3rd Qu.:0.0000   3rd Qu.:0.0000   3rd Qu.:1.0000   3rd Qu.:1.000  
##  Max.   :1.0000   Max.   :1.0000   Max.   :1.0000   Max.   :1.000  
##      feat5          grmar1            grmar2            grmar3       
##  Min.   :0.00   Min.   :0.01566   Min.   :0.04803   Min.   :0.00079  
##  1st Qu.:0.00   1st Qu.:0.22155   1st Qu.:0.27481   1st Qu.:0.22135  
##  Median :0.00   Median :0.29041   Median :0.29620   Median :0.33048  
##  Mean   :0.25   Mean   :0.27933   Mean   :0.28558   Mean   :0.29456  
##  3rd Qu.:0.25   3rd Qu.:0.36721   3rd Qu.:0.31542   3rd Qu.:0.37774  
##  Max.   :1.00   Max.   :0.48616   Max.   :0.41816   Max.   :0.49137  
##      grmar4             grmar5      
##  Min.   :0.005774   Min.   :0.0077  
##  1st Qu.:0.173191   1st Qu.:0.2069  
##  Median :0.261976   Median :0.2922  
##  Mean   :0.253375   Mean   :0.2929  
##  3rd Qu.:0.334891   3rd Qu.:0.4088  
##  Max.   :0.449935   Max.   :0.5365

head(data)

##   week sales1 sales2 sales3 sales4 sales5 price1 price2 price3 price4
## 1   40   6528   8448  47040   4224   2432  0.057   0.06  0.030  0.047
## 2   43   6016   8352   3008   5696   2112  0.057   0.06  0.044  0.047
## 3   44   6272   7776   2816  36160    896  0.057   0.06  0.044  0.040
## 4   45   6848   7968  28352   3200   1152  0.057   0.06  0.037  0.047
## 5   46   7424   7296   3712  11904   3072  0.057   0.06  0.044  0.047
## 6   47   6848   7200  11136  21376   4672  0.057   0.06  0.044  0.037
##   price5     disp1 disp2     disp3     disp4 disp5 feat1 feat2 feat3 feat4
## 1  0.025 0.2843137     0 0.4965986 0.0000000     0     0     0     1     0
## 2  0.039 0.0000000     0 0.0000000 0.0000000     0     0     0     1     0
## 3  0.039 0.0000000     0 0.0000000 0.0000000     0     0     0     0     1
## 4  0.039 0.0000000     0 0.4492099 0.0000000     0     0     0     0     0
## 5  0.039 0.0000000     0 0.0000000 0.0000000     0     0     0     0     0
## 6  0.033 0.0000000     0 0.0000000 0.4580838     0     0     0     0     1
##   feat5    grmar1 grmar2    grmar3    grmar4 grmar5
## 1     1 0.3443569 0.2957 0.1079354 0.3077000 0.0101
## 2     0 0.3421011 0.2957 0.4168021 0.3659202 0.4281
## 3     0 0.3199276 0.2957 0.3725909 0.3348179 0.4281
## 4     0 0.2646710 0.2957 0.2576124 0.4044160 0.3687
## 5     0 0.2609828 0.2957 0.3637621 0.4125806 0.3747
## 6     1 0.2598000 0.2957 0.3653954 0.2076904 0.2550

attach(data)

# market shares by units
sku.sum.total = sum(sum(data$sales1), sum(sales2), sum(sales3), sum(sales4),  sum(sales5))
sku.units.sums = c(sum(sales1), sum(sales2), sum(sales3), sum(sales4), sum(sales5))
sku.units.shares = sku.units.sums / sku.sum.total
sku.units.shares

## [1] 0.2378334 0.1481880 0.1876798 0.2328153 0.1934834

# market shares 
sales.matrix = matrix(unlist(data[2:6]), ncol = 5)
prices.matrix = matrix(unlist(data[7:11]), ncol = 5)
sku.dollar.total = t(sales.matrix) %*% prices.matrix
sku.dollar.total = sku.dollar.total[,1] # get the first column because only that is price(i) * sales(i)
dim(sales.matrix)

## [1] 116   5

dim(prices.matrix)

## [1] 116   5

head(sku.dollar.total)

## [1]  84863.87  62059.01  79901.89 101024.64  81977.92

head(data)

##   week sales1 sales2 sales3 sales4 sales5 price1 price2 price3 price4
## 1   40   6528   8448  47040   4224   2432  0.057   0.06  0.030  0.047
## 2   43   6016   8352   3008   5696   2112  0.057   0.06  0.044  0.047
## 3   44   6272   7776   2816  36160    896  0.057   0.06  0.044  0.040
## 4   45   6848   7968  28352   3200   1152  0.057   0.06  0.037  0.047
## 5   46   7424   7296   3712  11904   3072  0.057   0.06  0.044  0.047
## 6   47   6848   7200  11136  21376   4672  0.057   0.06  0.044  0.037
##   price5     disp1 disp2     disp3     disp4 disp5 feat1 feat2 feat3 feat4
## 1  0.025 0.2843137     0 0.4965986 0.0000000     0     0     0     1     0
## 2  0.039 0.0000000     0 0.0000000 0.0000000     0     0     0     1     0
## 3  0.039 0.0000000     0 0.0000000 0.0000000     0     0     0     0     1
## 4  0.039 0.0000000     0 0.4492099 0.0000000     0     0     0     0     0
## 5  0.039 0.0000000     0 0.0000000 0.0000000     0     0     0     0     0
## 6  0.033 0.0000000     0 0.0000000 0.4580838     0     0     0     0     1
##   feat5    grmar1 grmar2    grmar3    grmar4 grmar5
## 1     1 0.3443569 0.2957 0.1079354 0.3077000 0.0101
## 2     0 0.3421011 0.2957 0.4168021 0.3659202 0.4281
## 3     0 0.3199276 0.2957 0.3725909 0.3348179 0.4281
## 4     0 0.2646710 0.2957 0.2576124 0.4044160 0.3687
## 5     0 0.2609828 0.2957 0.3637621 0.4125806 0.3747
## 6     1 0.2598000 0.2957 0.3653954 0.2076904 0.2550

sku.dollar.sum = sum(sku.dollar.total)
sku.dollar.share = sku.dollar.total / sku.dollar.sum
sku.dollar.share

## [1] 0.2070723 0.1514272 0.1949648 0.2465054 0.2000304

The ranking of unit market share is : product 1, product 4, prod 5, prod 3, prod 2.
The ranking of dollar market share is : product 4, product 1, prod 5, prod 3, prod 2.
Combining the results, we can say the leading products are product 1 and product 4.

plot(price1, type="l", col="red")
lines(price2, col="blue")
lines(price3, col="green")
lines(price4, col="yellow")
lines(price5, col="purple")

plot(disp1, type='l', col='red')
lines(disp2, col="blue")
lines(disp3, col="green")
lines(disp4, col="yellow")
lines(disp5, col="purple")

???? Brands are not competing in an identical way.

logdata = data
logdata[2:11] = log(data[2:11])
fit.1 = lm(sales1 ~ price1 + price2 + price3 + price4 + price5 + disp1 + feat1, data = logdata)
summary(fit.1)

## 
## Call:
## lm(formula = sales1 ~ price1 + price2 + price3 + price4 + price5 + 
##     disp1 + feat1, data = logdata)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -1.1627 -0.1754 -0.0023  0.1424  0.9813 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  3.41606    1.09405   3.122   0.0023 ** 
## price1      -2.52921    0.23126 -10.936  < 2e-16 ***
## price2       0.02260    0.32100   0.070   0.9440    
## price3       0.54954    0.18162   3.026   0.0031 ** 
## price4      -0.03993    0.19586  -0.204   0.8388    
## price5       0.03894    0.15221   0.256   0.7986    
## disp1        0.01326    0.09514   0.139   0.8894    
## feat1        0.68187    0.09952   6.852 4.68e-10 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.3487 on 108 degrees of freedom
## Multiple R-squared:  0.7987, Adjusted R-squared:  0.7857 
## F-statistic: 61.23 on 7 and 108 DF,  p-value: < 2.2e-16

fit.2 = lm(sales2 ~ price1 + price2 + price3 + price4 + price5 + disp2 + feat2, data = logdata)
summary(fit.2)

## 
## Call:
## lm(formula = sales2 ~ price1 + price2 + price3 + price4 + price5 + 
##     disp2 + feat2, data = logdata)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.60053 -0.15746 -0.00021  0.14193  0.62937 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  5.33173    0.74079   7.197 8.56e-11 ***
## price1       0.33922    0.11321   2.996  0.00339 ** 
## price2      -1.52700    0.25404  -6.011 2.54e-08 ***
## price3       0.04343    0.11943   0.364  0.71685    
## price4      -0.03999    0.13252  -0.302  0.76343    
## price5      -0.11299    0.09994  -1.131  0.26075    
## disp2        0.03794    0.06734   0.563  0.57437    
## feat2        0.30790    0.07317   4.208 5.34e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.229 on 108 degrees of freedom
## Multiple R-squared:  0.5522, Adjusted R-squared:  0.5232 
## F-statistic: 19.03 on 7 and 108 DF,  p-value: 2.362e-16

fit.3 = lm(sales3 ~ price1 + price2 + price3 + price4 + price5 + disp3 + feat3, data = logdata)
summary(fit.3)

## 
## Call:
## lm(formula = sales3 ~ price1 + price2 + price3 + price4 + price5 + 
##     disp3 + feat3, data = logdata)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.47226 -0.33518 -0.04125  0.31514  2.27551 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  5.24350    2.41412   2.172  0.03204 *  
## price1       1.13033    0.35925   3.146  0.00214 ** 
## price2       0.96317    0.66058   1.458  0.14773    
## price3      -4.24014    0.47208  -8.982 9.56e-15 ***
## price4       1.40581    0.43974   3.197  0.00182 ** 
## price5       0.03398    0.32332   0.105  0.91649    
## disp3        0.31524    0.18307   1.722  0.08794 .  
## feat3        0.98729    0.17141   5.760 8.03e-08 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.7383 on 108 degrees of freedom
## Multiple R-squared:  0.7203, Adjusted R-squared:  0.7022 
## F-statistic: 39.74 on 7 and 108 DF,  p-value: < 2.2e-16

fit.4 = lm(sales4 ~ price1 + price2 + price3 + price4 + price5 + disp4 + feat4, data = logdata)
summary(fit.4)

## 
## Call:
## lm(formula = sales4 ~ price1 + price2 + price3 + price4 + price5 + 
##     disp4 + feat4, data = logdata)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.28313 -0.28904 -0.03792  0.16057  1.61841 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   6.5763     1.5828   4.155 6.54e-05 ***
## price1        0.5185     0.2405   2.156   0.0333 *  
## price2        0.2733     0.4264   0.641   0.5229    
## price3        0.5810     0.2571   2.260   0.0258 *  
## price4       -2.5892     0.3684  -7.028 1.97e-10 ***
## price5        0.5362     0.2064   2.598   0.0107 *  
## disp4         0.2208     0.1343   1.644   0.1032    
## feat4         0.8953     0.1294   6.921 3.33e-10 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.4731 on 108 degrees of freedom
## Multiple R-squared:  0.756,  Adjusted R-squared:  0.7402 
## F-statistic:  47.8 on 7 and 108 DF,  p-value: < 2.2e-16

fit.5 = lm(sales5 ~ price1 + price2 + price3 + price4 + price5 + disp5 + feat5, data = logdata)
summary(fit.5)

## 
## Call:
## lm(formula = sales5 ~ price1 + price2 + price3 + price4 + price5 + 
##     disp5 + feat5, data = logdata)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -2.18031 -0.34812 -0.02469  0.37808  1.81364 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  -0.5227     2.1649  -0.241  0.80967    
## price1        0.1219     0.3346   0.364  0.71643    
## price2       -1.6634     0.6199  -2.683  0.00843 ** 
## price3        1.0077     0.3630   2.776  0.00649 ** 
## price4        1.0951     0.3892   2.813  0.00583 ** 
## price5       -3.1956     0.3453  -9.256 2.29e-15 ***
## disp5         0.2833     0.1436   1.973  0.05110 .  
## feat5         0.6513     0.1550   4.203 5.44e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.6943 on 108 degrees of freedom
## Multiple R-squared:  0.6674, Adjusted R-squared:  0.6458 
## F-statistic: 30.96 on 7 and 108 DF,  p-value: < 2.2e-16

The cross elasticity between product 1 and product 3 is positive, and also significant. So for each 1% increase in the price of product 3, there is a .54% increase in product 1.
Cross elasticity between product 1 and product 2, product 3 and product 4, and product 5 vs product 2, 3, 4, are significant.

# prices below mean are seen as 'price cuts', above mean are normal prices. So separate data into high group and low group, and run an anova test on those two to see if their means are significantly different.
logdata$high1 = 0
logdata[logdata$sales1 > mean(logdata$sales1), "high1"] = 1
logdata$high2 = 0
logdata[logdata$sales2 > mean(logdata$sales2), "high2"] = 1
logdata$high3 = 0
logdata[logdata$sales3 > mean(logdata$sales3), "high3"] = 1
logdata$high4 = 0
logdata[logdata$sales4 > mean(logdata$sales4), "high4"] = 1
logdata$high5 = 0
logdata[logdata$sales5 > mean(logdata$sales5), "high5"] = 1
head(logdata)

##   week   sales1   sales2    sales3    sales4   sales5    price1    price2
## 1   40 8.783856 9.041685 10.758754  8.348538 7.796469 -2.864704 -2.813411
## 2   43 8.702178 9.030256  8.009031  8.647519 7.655391 -2.864704 -2.813411
## 3   44 8.743851 8.958797  7.943073 10.495709 6.797940 -2.864704 -2.813411
## 4   45 8.831712 8.983189 10.252453  8.070906 7.049255 -2.864704 -2.813411
## 5   46 8.912473 8.895082  8.219326  9.384630 8.030084 -2.864704 -2.813411
## 6   47 8.831712 8.881836  9.317938  9.970024 8.449343 -2.864704 -2.813411
##      price3    price4    price5     disp1 disp2     disp3     disp4 disp5
## 1 -3.506558 -3.057608 -3.688879 0.2843137     0 0.4965986 0.0000000     0
## 2 -3.123566 -3.057608 -3.244194 0.0000000     0 0.0000000 0.0000000     0
## 3 -3.123566 -3.218876 -3.244194 0.0000000     0 0.0000000 0.0000000     0
## 4 -3.296837 -3.057608 -3.244194 0.0000000     0 0.4492099 0.0000000     0
## 5 -3.123566 -3.057608 -3.244194 0.0000000     0 0.0000000 0.0000000     0
## 6 -3.123566 -3.296837 -3.411248 0.0000000     0 0.0000000 0.4580838     0
##   feat1 feat2 feat3 feat4 feat5    grmar1 grmar2    grmar3    grmar4
## 1     0     0     1     0     1 0.3443569 0.2957 0.1079354 0.3077000
## 2     0     0     1     0     0 0.3421011 0.2957 0.4168021 0.3659202
## 3     0     0     0     1     0 0.3199276 0.2957 0.3725909 0.3348179
## 4     0     0     0     0     0 0.2646710 0.2957 0.2576124 0.4044160
## 5     0     0     0     0     0 0.2609828 0.2957 0.3637621 0.4125806
## 6     0     0     0     1     1 0.2598000 0.2957 0.3653954 0.2076904
##   grmar5 high1 high2 high3 high4 high5
## 1 0.0101     0     0     1     0     0
## 2 0.4281     0     0     0     0     0
## 3 0.4281     0     0     0     1     0
## 4 0.3687     0     0     1     0     0
## 5 0.3747     0     0     0     1     0
## 6 0.2550     0     0     1     1     0

pre.cut.1 = lm(logdata[logdata$high1 == 1,"sales1"] ~ logdata[logdata$high1 == 1, "price1"], data=logdata)
summary(pre.cut.1)

## 
## Call:
## lm(formula = logdata[logdata$high1 == 1, "sales1"] ~ logdata[logdata$high1 == 
##     1, "price1"], data = logdata)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.12573 -0.29773  0.08732  0.34727  1.30366 
## 
## Coefficients:
##                                       Estimate Std. Error t value Pr(>|t|)
## (Intercept)                             2.9297     1.3595   2.155   0.0376
## logdata[logdata$high1 == 1, "price1"]  -2.2396     0.4096  -5.467 3.06e-06
##                                          
## (Intercept)                           *  
## logdata[logdata$high1 == 1, "price1"] ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.4793 on 38 degrees of freedom
## Multiple R-squared:  0.4403, Adjusted R-squared:  0.4256 
## F-statistic: 29.89 on 1 and 38 DF,  p-value: 3.059e-06

post.cut.1 = lm(logdata[logdata$high1 == 0,"sales1"] ~ logdata[logdata$high1 == 0,"price1"],data=logdata)
anova(pre.cut.1, post.cut.1)

## Warning in anova.lmlist(object, ...): models with response
## '"logdata[logdata$high1 == 0, \"sales1\"]"' removed because response
## differs from model 1

## Analysis of Variance Table
## 
## Response: logdata[logdata$high1 == 1, "sales1"]
##                                       Df Sum Sq Mean Sq F value    Pr(>F)
## logdata[logdata$high1 == 1, "price1"]  1 6.8681  6.8681  29.893 3.059e-06
## Residuals                             38 8.7306  0.2298                  
##                                          
## logdata[logdata$high1 == 1, "price1"] ***
## Residuals                                
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

plot(logdata$price1, type="l", col="red")
lines(logdata$sales1)

summary.logdata = summary(logdata)
quan.1 = quantile(logdata$sales1)

juice

Vicky

February 18, 2016