Discussion 12

Data Building

It’s a sales data of dummy customers, objective is to find the best way to predict the Order Qty in by the Customers inormation. We have Following Cusomer info:
+ Cust: Customer Number + Brand : Category or Brand of the Product + Qty : Quanitiy Sold in the give Quarter + Promo : Promotion Applied to the Order + State : State from USA + Qtr : quarterly Data tag with Yeat + Qtrr : quarterly Data tag + Year : Year

dataMkt <- read_excel(path=paste0("mkt_Data.xlsx")) %>% .[,c(33,9,12,15,20,22,35,34)] 
names(dataMkt) <- c("Cust","Brand","Qty","Promo","State","Qtr","Year","Qtrr")

dataMkt$Year <- as.character(dataMkt$Year)
dataMkt$Cust <- as.character(dataMkt$Cust)
head(dataMkt)

Groupping the SALES DATA by each Quarter Customer.

lm_cust <- dataMkt %>% group_by(Cust,Qtr,Brand,Promo,State,Year,Qtrr) %>% summarise(Qty = sum(Qty))


describe(lm_cust)

## Warning in describe(lm_cust): NAs introduced by coercion

## Warning in describe(lm_cust): NAs introduced by coercion

## Warning in describe(lm_cust): NAs introduced by coercion

## Warning in describe(lm_cust): NAs introduced by coercion

## Warning in describe(lm_cust): NAs introduced by coercion

## Warning in FUN(newX[, i], ...): no non-missing arguments to min; returning
## Inf

## Warning in FUN(newX[, i], ...): no non-missing arguments to min; returning
## Inf

## Warning in FUN(newX[, i], ...): no non-missing arguments to min; returning
## Inf

## Warning in FUN(newX[, i], ...): no non-missing arguments to min; returning
## Inf

## Warning in FUN(newX[, i], ...): no non-missing arguments to min; returning
## Inf

## Warning in FUN(newX[, i], ...): no non-missing arguments to max; returning
## -Inf

## Warning in FUN(newX[, i], ...): no non-missing arguments to max; returning
## -Inf

## Warning in FUN(newX[, i], ...): no non-missing arguments to max; returning
## -Inf

## Warning in FUN(newX[, i], ...): no non-missing arguments to max; returning
## -Inf

## Warning in FUN(newX[, i], ...): no non-missing arguments to max; returning
## -Inf

Model Creation

Try Predicting Quantity based on Known informaiton . Identifying Best model to predict Quantity .

Data

lm_cust  %>% 
   ggplot(mapping = aes(x=Brand, y = Qty,fill = Qtr)) + 
   geom_col()+facet_grid(Year~ .)+
   theme(axis.text.x = element_text(angle = 70, hjust = 1)) + 
   scale_y_continuous( labels = scales::number)+
   ggtitle("Brand by Year ") +ylab("Ordered Unit")

lm_cust  %>% 
   ggplot(mapping = aes(x=Cust, y = jitter(Qty))) + 
   geom_point()+
   theme(axis.text.x = element_text(angle = 70, hjust = 1)) + 
   scale_y_continuous( labels = scales::number)+
   ggtitle("Brand by Year ") +ylab("Ordered Unit")

lm_cust  %>% 
   ggplot(mapping = aes(x=Cust, y = Qty,fill = Qtr)) + 
   geom_col()+
   theme(axis.text.x = element_text(angle = 70, hjust = 1)) + 
   scale_y_continuous( labels = scales::number)+
   ggtitle("Brand by Year ") +ylab("Ordered Unit")

LM Model 1

Creating a sample of 200 records and then We will see if we can Build a model to predict Qty by other known factors. We will follow backword appraoch.

# Result Table 
result <- data.frame(var = 1:10,pval = 1:10, comment=1:10)

result$var <- "A"
result$pval <- "A"
result$comment <- "A" 

## Ready for Model
set.seed(42672)
 lm_cust_s1 <- lm_cust[sample(nrow(lm_cust),200),]
 

lm1 <- lm(Qty ~ Qtrr + Brand + Promo + State,lm_cust_s1)
summary(lm1)

## 
## Call:
## lm(formula = Qty ~ Qtrr + Brand + Promo + State, data = lm_cust_s1)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -34.819  -7.064  -1.331   4.621 109.723 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)   
## (Intercept)   12.1903    25.5678   0.477  0.63428   
## QtrrQ2        -9.6299     5.5472  -1.736  0.08481 . 
## QtrrQ3       -12.5274     8.1055  -1.546  0.12452   
## QtrrQ4       -14.7938     7.9852  -1.853  0.06608 . 
## BrandAX        0.8196    23.9381   0.034  0.97274   
## BrandBB       -9.1260    27.4966  -0.332  0.74047   
## BrandBE        2.6453    22.7250   0.116  0.90750   
## BrandBV      -28.5377    31.3228  -0.911  0.36385   
## BrandCH        7.5324    21.2010   0.355  0.72292   
## BrandDG        4.5916    21.5843   0.213  0.83185   
## BrandDY        9.2029    27.6848   0.332  0.74008   
## BrandEA       -2.8370    22.7429  -0.125  0.90091   
## BrandHC        2.4028    20.0190   0.120  0.90464   
## BrandMK        7.6045    19.8830   0.382  0.70271   
## BrandOJ      -29.6001    23.4077  -1.265  0.20818   
## BrandOO        1.9130    20.0811   0.095  0.92425   
## BrandOX       12.1262    20.5952   0.589  0.55697   
## BrandOY        2.7836    20.7103   0.134  0.89328   
## BrandPH       -6.3724    20.8978  -0.305  0.76088   
## BrandPP       -6.4604    24.0029  -0.269  0.78822   
## BrandPR        4.9872    20.6925   0.241  0.80991   
## BrandPS      -30.4702    28.0328  -1.087  0.27897   
## BrandRA      -11.3147    22.7603  -0.497  0.61990   
## BrandRB       14.1610    19.7887   0.716  0.47545   
## BrandRJ      -18.5538    29.6137  -0.627  0.53201   
## BrandRX       11.5364    19.6492   0.587  0.55809   
## BrandRY       -8.4432    20.3798  -0.414  0.67931   
## BrandTF       10.6164    22.0164   0.482  0.63043   
## BrandTY       15.7957    22.0772   0.715  0.47553   
## BrandVE        2.0631    21.2448   0.097  0.92278   
## BrandVO       -1.5657    22.0143  -0.071  0.94340   
## PromoH15       7.8808    11.0910   0.711  0.47856   
## PromoHC18     47.4700    21.8053   2.177  0.03119 * 
## PromoLUX12     3.3146    11.8530   0.280  0.78018   
## PromoLUX20    11.8567    11.4721   1.034  0.30318   
## PromoLUX24    13.4856    23.0946   0.584  0.56023   
## PromoLUX40    22.3993    11.3079   1.981  0.04961 * 
## PromoNASC75   18.8557    16.6408   1.133  0.25915   
## PromoOOX40    24.8739    18.4787   1.346  0.18050   
## PromoOther    12.5330    11.3643   1.103  0.27203   
## PromoOY12      8.0700    18.2033   0.443  0.65823   
## PromoPB1       1.8279    21.6845   0.084  0.93294   
## PromoPB10      0.3381    20.3775   0.017  0.98679   
## PromoPB2      54.0845    18.2090   2.970  0.00352 **
## PromoPB6      -6.7787    17.1785  -0.395  0.69375   
## PromoR-PB10  -12.7955    21.1503  -0.605  0.54619   
## PromoR-PB15  -11.3565    15.3445  -0.740  0.46051   
## PromoRB15     12.2947    15.4961   0.793  0.42891   
## PromoRBRX30    7.5800    21.2966   0.356  0.72245   
## PromoSY1      17.6224    13.8395   1.273  0.20505   
## PromoSY100    22.6608    12.1525   1.865  0.06436 . 
## PromoSY200    27.6843    17.7794   1.557  0.12175   
## PromoUP1      13.7577    13.5856   1.013  0.31300   
## PromoUP3       6.4307    12.6063   0.510  0.61079   
## PromoVPFP200  11.3610    18.2192   0.624  0.53394   
## PromoWC1      31.3733    13.9749   2.245  0.02637 * 
## StateAL       -4.9313    14.0264  -0.352  0.72570   
## StateAZ       -8.3419    15.4916  -0.538  0.59112   
## StateCA       -7.5266    14.2044  -0.530  0.59705   
## StateFL       -7.2924    14.4786  -0.504  0.61530   
## StateIN       -7.0913    15.4281  -0.460  0.64651   
## StateMS       -1.3027    17.5012  -0.074  0.94077   
## StateNY      -10.0934    14.8678  -0.679  0.49836   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 18.25 on 137 degrees of freedom
## Multiple R-squared:  0.3341, Adjusted R-squared:  0.03278 
## F-statistic: 1.109 on 62 and 137 DF,  p-value: 0.3064

anova(lm1)

result$var[1] <- "lm(formula = Order_Qty ~ Qtrr + Brand + Promo + State, 
    data = lm_cust_s1)"
result$pval[1] <-  "Multiple R-squared:  0.3341,    Adjusted R-squared:  0.03278 
F-statistic: 1.109 on 62 and 137 DF,  p-value: 0.3064"

result$comment[1] <- "Qtr and some Promo codes are significant,state is not "

As we can see that we with this model moslty all of the Coefficients are irrelevent excpet Quater and Few Promotions. Adjusted R Square is not great at all with only 5% of data explain.

we need to think how we can improve this.

LM Model 2

#Dropping State

lm2 <- lm(Qty ~  Qtrr + Brand + Promo  ,lm_cust_s1)
summary(lm2)

## 
## Call:
## lm(formula = Qty ~ Qtrr + Brand + Promo, data = lm_cust_s1)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -33.691  -6.599  -1.339   4.526 111.590 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)   
## (Intercept)    3.7758    20.7932   0.182  0.85616   
## QtrrQ2       -10.5933     5.2931  -2.001  0.04723 * 
## QtrrQ3       -14.4997     7.6925  -1.885  0.06146 . 
## QtrrQ4       -16.2344     7.6284  -2.128  0.03503 * 
## BrandAX        4.2542    22.8316   0.186  0.85245   
## BrandBB       -3.1638    26.1208  -0.121  0.90376   
## BrandBE        5.4900    21.4896   0.255  0.79872   
## BrandBV      -24.8512    29.6184  -0.839  0.40283   
## BrandCH        9.0667    20.6654   0.439  0.66151   
## BrandDG        7.2500    20.5860   0.352  0.72522   
## BrandDY       13.1015    26.1218   0.502  0.61675   
## BrandEA        0.2452    21.5875   0.011  0.99095   
## BrandHC        6.4658    19.1070   0.338  0.73556   
## BrandMK       11.0429    18.9608   0.582  0.56120   
## BrandOJ      -25.4426    22.5096  -1.130  0.26023   
## BrandOO        6.6622    18.8854   0.353  0.72478   
## BrandOX       16.3335    19.4047   0.842  0.40134   
## BrandOY        7.9792    19.4580   0.410  0.68236   
## BrandPH       -1.6450    19.6655  -0.084  0.93345   
## BrandPP       -2.5919    22.8117  -0.114  0.90970   
## BrandPR        8.1317    19.5860   0.415  0.67863   
## BrandPS      -27.9053    27.1089  -1.029  0.30503   
## BrandRA       -6.6589    21.4086  -0.311  0.75622   
## BrandRB       18.2302    18.6905   0.975  0.33101   
## BrandRJ      -11.8320    28.0388  -0.422  0.67366   
## BrandRX       16.0544    18.5665   0.865  0.38864   
## BrandRY       -4.5238    19.3259  -0.234  0.81526   
## BrandTF       15.1949    20.7155   0.734  0.46444   
## BrandTY       22.0103    20.5828   1.069  0.28670   
## BrandVE        5.1585    20.3770   0.253  0.80051   
## BrandVO        2.6885    20.6919   0.130  0.89680   
## PromoH15       5.2247    10.2017   0.512  0.60933   
## PromoHC18     43.6590    20.7815   2.101  0.03740 * 
## PromoLUX12     1.2117    11.3585   0.107  0.91519   
## PromoLUX20     9.4038    10.9043   0.862  0.38990   
## PromoLUX24    10.0293    22.3261   0.449  0.65395   
## PromoLUX40    20.6853    10.9123   1.896  0.06002 . 
## PromoNASC75   18.9071    16.2702   1.162  0.24713   
## PromoOOX40    22.0754    17.6624   1.250  0.21338   
## PromoOther    11.8175    10.8371   1.090  0.27733   
## PromoOY12      5.5416    17.3815   0.319  0.75032   
## PromoPB1       0.5620    21.0517   0.027  0.97874   
## PromoPB10     -0.2416    19.9734  -0.012  0.99037   
## PromoPB2      53.1144    17.6877   3.003  0.00315 **
## PromoPB6      -7.7736    16.6010  -0.468  0.64030   
## PromoR-PB10  -13.8302    20.6794  -0.669  0.50470   
## PromoR-PB15  -13.6727    14.8166  -0.923  0.35766   
## PromoRB15      9.0562    14.6341   0.619  0.53700   
## PromoRBRX30    6.9940    20.8116   0.336  0.73731   
## PromoSY1      17.9077    13.4496   1.331  0.18514   
## PromoSY100    21.6224    11.6372   1.858  0.06520 . 
## PromoSY200    22.6752    16.3075   1.390  0.16653   
## PromoUP1      11.0581    12.6546   0.874  0.38366   
## PromoUP3       5.7187    11.4280   0.500  0.61755   
## PromoVPFP200   7.0523    17.0788   0.413  0.68028   
## PromoWC1      27.1295    12.4482   2.179  0.03093 * 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 17.9 on 144 degrees of freedom
## Multiple R-squared:  0.327,  Adjusted R-squared:  0.06998 
## F-statistic: 1.272 on 55 and 144 DF,  p-value: 0.1307

anova(lm2)

result$var[2] <- "lm(formula =Qtrr + Brand + Promo, data = lm_cust_s1)"
result$pval[2] <- "Multiple R-squared:  0.327,  Adjusted R-squared:  0.06998 
F-statistic: 1.272 on 55 and 144 DF,  p-value: 0.1307"
result$comment[2] <- "(Month)Brand is not significant" 

#Promo promoVPFP200 turns out to more significant here.

LM Model 3

Now Grouping the data form DIfferent Brand together to see if that has any pattern in it.

#lm_cust_s12<- mutate(lm_cust_s1,Place = if_else(str_detect(Brand,"/*O|/*R[A-Z]") , 1 , 0))

lm_cust_s12<- mutate(lm_cust_s1,Place = case_when(str_detect(Brand,"/*O[A-Z]") ~ 3,
                                                  str_detect(Brand,"/*R[A-Z]") ~ 1,
                                                  TRUE ~ 2))


head(lm_cust_s12)

#3  Dropping brand info 
lm3 <- lm(Qty ~ Qtrr + Brand + Promo + Place,lm_cust_s12)
summary(lm3)

## 
## Call:
## lm(formula = Qty ~ Qtrr + Brand + Promo + Place, data = lm_cust_s12)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -33.691  -6.599  -1.339   4.526 111.590 
## 
## Coefficients: (1 not defined because of singularities)
##              Estimate Std. Error t value Pr(>|t|)   
## (Intercept)    3.7758    20.7932   0.182  0.85616   
## QtrrQ2       -10.5933     5.2931  -2.001  0.04723 * 
## QtrrQ3       -14.4997     7.6925  -1.885  0.06146 . 
## QtrrQ4       -16.2344     7.6284  -2.128  0.03503 * 
## BrandAX        4.2542    22.8316   0.186  0.85245   
## BrandBB       -3.1638    26.1208  -0.121  0.90376   
## BrandBE        5.4900    21.4896   0.255  0.79872   
## BrandBV      -24.8512    29.6184  -0.839  0.40283   
## BrandCH        9.0667    20.6654   0.439  0.66151   
## BrandDG        7.2500    20.5860   0.352  0.72522   
## BrandDY       13.1015    26.1218   0.502  0.61675   
## BrandEA        0.2452    21.5875   0.011  0.99095   
## BrandHC        6.4658    19.1070   0.338  0.73556   
## BrandMK       11.0429    18.9608   0.582  0.56120   
## BrandOJ      -25.4426    22.5096  -1.130  0.26023   
## BrandOO        6.6622    18.8854   0.353  0.72478   
## BrandOX       16.3335    19.4047   0.842  0.40134   
## BrandOY        7.9792    19.4580   0.410  0.68236   
## BrandPH       -1.6450    19.6655  -0.084  0.93345   
## BrandPP       -2.5919    22.8117  -0.114  0.90970   
## BrandPR        8.1317    19.5860   0.415  0.67863   
## BrandPS      -27.9053    27.1089  -1.029  0.30503   
## BrandRA       -6.6589    21.4086  -0.311  0.75622   
## BrandRB       18.2302    18.6905   0.975  0.33101   
## BrandRJ      -11.8320    28.0388  -0.422  0.67366   
## BrandRX       16.0544    18.5665   0.865  0.38864   
## BrandRY       -4.5238    19.3259  -0.234  0.81526   
## BrandTF       15.1949    20.7155   0.734  0.46444   
## BrandTY       22.0103    20.5828   1.069  0.28670   
## BrandVE        5.1585    20.3770   0.253  0.80051   
## BrandVO        2.6885    20.6919   0.130  0.89680   
## PromoH15       5.2247    10.2017   0.512  0.60933   
## PromoHC18     43.6590    20.7815   2.101  0.03740 * 
## PromoLUX12     1.2117    11.3585   0.107  0.91519   
## PromoLUX20     9.4038    10.9043   0.862  0.38990   
## PromoLUX24    10.0293    22.3261   0.449  0.65395   
## PromoLUX40    20.6853    10.9123   1.896  0.06002 . 
## PromoNASC75   18.9071    16.2702   1.162  0.24713   
## PromoOOX40    22.0754    17.6624   1.250  0.21338   
## PromoOther    11.8175    10.8371   1.090  0.27733   
## PromoOY12      5.5416    17.3815   0.319  0.75032   
## PromoPB1       0.5620    21.0517   0.027  0.97874   
## PromoPB10     -0.2416    19.9734  -0.012  0.99037   
## PromoPB2      53.1144    17.6877   3.003  0.00315 **
## PromoPB6      -7.7736    16.6010  -0.468  0.64030   
## PromoR-PB10  -13.8302    20.6794  -0.669  0.50470   
## PromoR-PB15  -13.6727    14.8166  -0.923  0.35766   
## PromoRB15      9.0562    14.6341   0.619  0.53700   
## PromoRBRX30    6.9940    20.8116   0.336  0.73731   
## PromoSY1      17.9077    13.4496   1.331  0.18514   
## PromoSY100    21.6224    11.6372   1.858  0.06520 . 
## PromoSY200    22.6752    16.3075   1.390  0.16653   
## PromoUP1      11.0581    12.6546   0.874  0.38366   
## PromoUP3       5.7187    11.4280   0.500  0.61755   
## PromoVPFP200   7.0523    17.0788   0.413  0.68028   
## PromoWC1      27.1295    12.4482   2.179  0.03093 * 
## Place              NA         NA      NA       NA   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 17.9 on 144 degrees of freedom
## Multiple R-squared:  0.327,  Adjusted R-squared:  0.06998 
## F-statistic: 1.272 on 55 and 144 DF,  p-value: 0.1307

anova(lm3)

result$var[3] <- "lm(formula = Order_Qty ~ Qtrr + Brand + Promo + Place, data = lm_cust_s12)"
result$pval[3] <- "Multiple R-squared:  0.327,  Adjusted R-squared:  0.06998 
F-statistic: 1.272 on 55 and 144 DF,  p-value: 0.1307"
result$comment[3] <- " "