setwd("/Users/admin/Downloads")
unilever=read.csv('unilever.csv', header=T)
unilever.new=split(unilever, unilever$Brand)
class(unilever.new)
## [1] "list"
ben.jerry=unilever.new[[5]]
popsicle=unilever.new[[1]]
klondike=unilever.new[[2]]
breyers=unilever.new[[4]]
good.humor=unilever.new[[3]]
fruttare=unilever.new[[12]]
unilever.new=rbind(ben.jerry, popsicle, klondike, breyers, good.humor, fruttare)
names(unilever.new)
## [1] "Roi.Id"
## [2] "Year"
## [3] "Sector"
## [4] "Division"
## [5] "Planned.Account.Name"
## [6] "MLT"
## [7] "Category"
## [8] "Brand"
## [9] "Product"
## [10] "Ev.."
## [11] "Event.Start.Date"
## [12] "Event.Weeks"
## [13] "Event.Code"
## [14] "Feature.Price"
## [15] "Deal.Structure"
## [16] "Units.per.Case"
## [17] "List.Price.per.Unit"
## [18] "NSV.per.unit"
## [19] "COGS.per.Unit"
## [20] "COGS.Rate.."
## [21] "Everyday.Unit.Cost"
## [22] "Everyday.Price"
## [23] "EDLP.Rate"
## [24] "Ship.Promo.UC"
## [25] "Plan.Scan.Unit.Rate"
## [26] "Plan.Scan.Trade.OI.Unit.Rate"
## [27] "IncSpendexcScanUnitRate"
## [28] "Total.Deal.Per.Unit"
## [29] "Tot.Var.TS.per.Unit"
## [30] "Menu.Fixed.Cost"
## [31] "Other.Fixed.Cost"
## [32] "TTL.Spend.per.Unit"
## [33] "Plan.Scan.Units"
## [34] "Plan.Shipped.Units"
## [35] "Nielsen.Customer.Name"
## [36] "Nielson.Product.Name"
## [37] "Nielsen.Category"
## [38] "Mid.Week.Event"
## [39] "Week.Validation"
## [40] "Nielsen.Event.Week"
## [41] "Nielsen.Feature.Price"
## [42] "Nielsen.Non.Promoted.Unit.Price"
## [43] "Nielsen.AVG.Price"
## [44] "Price.Bands"
## [45] "Base.Unit...Volume"
## [46] "Incr..Unit.Volume"
## [47] "Event.Unit.Volume"
## [48] "Promoted.Units"
## [49] "X..Volume"
## [50] "Eq.Volume"
## [51] "Non.Promoted.Units"
## [52] "Baseline.Any.Promo.U.Vol"
## [53] "Incr..Any.Promo.U.Vol"
## [54] "Baseline.Non.Promo.U.Vol"
## [55] "Incr..Non.Promo.U.Vol"
## [56] "X..ACV.TPR"
## [57] "X..ACV.Ftr"
## [58] "X..ACV..Dsply"
## [59] "X.ACV.Feat...Disp"
## [60] "X.ACV.Any.Promo"
## [61] "Unit.Share.of.Category"
## [62] "X..Share.of.Category"
## [63] "TQMP"
## [64] "Base.COGS"
## [65] "Incremental.COGS"
## [66] "Total.Event.COGS"
## [67] "Base...NSV"
## [68] "Incremental...NSV"
## [69] "Event...NSV"
## [70] "Base.Sales.Contribution"
## [71] "Incremental.Sales.Contribution"
## [72] "Total.Sales.Contribution"
## [73] "Base.Event.Cost"
## [74] "Incremental.Event.Cost"
## [75] "Total.Event.Cost"
## [76] "Var.TS"
## [77] "Tot.TS"
## [78] "Inc.TCI"
## [79] "Total.TCI"
## [80] "Incr.NIP.."
## [81] "Event...Lift"
## [82] "Base.Subsidy"
## [83] "Everyday.Retail.Margin"
## [84] "Event.Retailer.margin"
## [85] "Promo.Efficiency"
## [86] "Incr.Event...ROI"
## [87] "Total.Event...ROI"
## [88] "Total.TCI.ROI"
## [89] "Incr.TCI.ROI"
## [90] "Coverage"
## [91] "Event.Notes"
## [92] "Event.Description"
## [93] "Channel"
## [94] "Nielsen_price_diff"
## [95] "Std_price_diff"
## [96] "ln_incr_unit_volume"
## [97] "ln_incr_unit_cost"
## [98] "ln_event_pct_lift"
## [99] "ln_incr_NIP"
## [100] "ln_total_TCI"
## [101] "ln_inc_TCI"
## [102] "ln_Tot_TS"
## [103] "ln_var_TS"
## [104] "Pred.Formula.Incr.Event...ROI_PAName_Prod_EventCode"
## [105] "ln_feature_price"
## [106] "ln_list_price_per_unit"
## [107] "ln_everyday_unit_cost"
## [108] "ln_everyday_price"
## [109] "ln_EDLP_rate"
## [110] "Predicted._PA_PR_Promo_Incr.Event...ROI"
## [111] "Predicted.Incr.Event...ROI_BJs_only"
## [112] "ln_planned_shipp_units"
## [113] "ln_incspendexunitrate"
## [114] "ln_total_deal_unit"
## [115] "otherfixedcostadd1"
## [116] "ln_ttlspendunit"
## [117] "Predicted_4_var_model_Incr.Event...ROI"
## [118] "Predicted_3_var_model_Incr.Event...ROI"
## [119] "ln_base_unit_volume"
## [120] "ln_event_unit_volume"
## [121] "ln_ACV_any_promo"
## [122] "ln_total_event_cost"
## [123] "ln_baseline_any_promo_U_volume"
## [124] "ln_TQLP"
## [125] "ln_Eq_volume"
## [126] "ln_baseline_non_promo_U_volume"
## [127] "ln_._volume"
## [128] "Predicted.Incr.Event...ROI_non_logged"
## [129] "Predicted.Incr.Event...ROI_logged"
## [130] "Predicted.Incr.Event...ROI_logged_7813"
## [131] "Predicted.Incr.Event...ROI_TTl_spend_level"
## [132] "Predicted.Incr.Event...ROI_ln_TTl_spend_unit"
## [133] "Residual.Incr.Event...ROI_levels"
## [134] "Residual.Incr.Event...ROI_ln"
unilever.new=unilever.new[,-c(95, 128:132, 117,118,111,110,104,35,36,37,40,41,42,43,133,134)]
unilever.new=unilever.new[,-87]
names(unilever.new)
## [1] "Roi.Id" "Year"
## [3] "Sector" "Division"
## [5] "Planned.Account.Name" "MLT"
## [7] "Category" "Brand"
## [9] "Product" "Ev.."
## [11] "Event.Start.Date" "Event.Weeks"
## [13] "Event.Code" "Feature.Price"
## [15] "Deal.Structure" "Units.per.Case"
## [17] "List.Price.per.Unit" "NSV.per.unit"
## [19] "COGS.per.Unit" "COGS.Rate.."
## [21] "Everyday.Unit.Cost" "Everyday.Price"
## [23] "EDLP.Rate" "Ship.Promo.UC"
## [25] "Plan.Scan.Unit.Rate" "Plan.Scan.Trade.OI.Unit.Rate"
## [27] "IncSpendexcScanUnitRate" "Total.Deal.Per.Unit"
## [29] "Tot.Var.TS.per.Unit" "Menu.Fixed.Cost"
## [31] "Other.Fixed.Cost" "TTL.Spend.per.Unit"
## [33] "Plan.Scan.Units" "Plan.Shipped.Units"
## [35] "Mid.Week.Event" "Week.Validation"
## [37] "Price.Bands" "Base.Unit...Volume"
## [39] "Incr..Unit.Volume" "Event.Unit.Volume"
## [41] "Promoted.Units" "X..Volume"
## [43] "Eq.Volume" "Non.Promoted.Units"
## [45] "Baseline.Any.Promo.U.Vol" "Incr..Any.Promo.U.Vol"
## [47] "Baseline.Non.Promo.U.Vol" "Incr..Non.Promo.U.Vol"
## [49] "X..ACV.TPR" "X..ACV.Ftr"
## [51] "X..ACV..Dsply" "X.ACV.Feat...Disp"
## [53] "X.ACV.Any.Promo" "Unit.Share.of.Category"
## [55] "X..Share.of.Category" "TQMP"
## [57] "Base.COGS" "Incremental.COGS"
## [59] "Total.Event.COGS" "Base...NSV"
## [61] "Incremental...NSV" "Event...NSV"
## [63] "Base.Sales.Contribution" "Incremental.Sales.Contribution"
## [65] "Total.Sales.Contribution" "Base.Event.Cost"
## [67] "Incremental.Event.Cost" "Total.Event.Cost"
## [69] "Var.TS" "Tot.TS"
## [71] "Inc.TCI" "Total.TCI"
## [73] "Incr.NIP.." "Event...Lift"
## [75] "Base.Subsidy" "Everyday.Retail.Margin"
## [77] "Event.Retailer.margin" "Promo.Efficiency"
## [79] "Incr.Event...ROI" "Total.Event...ROI"
## [81] "Total.TCI.ROI" "Incr.TCI.ROI"
## [83] "Coverage" "Event.Notes"
## [85] "Event.Description" "Channel"
## [87] "ln_incr_unit_volume" "ln_incr_unit_cost"
## [89] "ln_event_pct_lift" "ln_incr_NIP"
## [91] "ln_total_TCI" "ln_inc_TCI"
## [93] "ln_Tot_TS" "ln_var_TS"
## [95] "ln_feature_price" "ln_list_price_per_unit"
## [97] "ln_everyday_unit_cost" "ln_everyday_price"
## [99] "ln_EDLP_rate" "ln_planned_shipp_units"
## [101] "ln_incspendexunitrate" "ln_total_deal_unit"
## [103] "otherfixedcostadd1" "ln_ttlspendunit"
## [105] "ln_base_unit_volume" "ln_event_unit_volume"
## [107] "ln_ACV_any_promo" "ln_total_event_cost"
## [109] "ln_baseline_any_promo_U_volume" "ln_TQLP"
## [111] "ln_Eq_volume" "ln_baseline_non_promo_U_volume"
## [113] "ln_._volume"
missing.var=unilever.new[,c(89,88,90,27,49,101,87)]
#MISSING VALUE IMPUTATION
install.packages('mice', repos='http://cran.us.r-project.org'); require(mice)
##
## The downloaded binary packages are in
## /var/folders/Vm/VmfZyeujEVu8VuKusEeMw++++TI/-Tmp-//Rtmp0xVnNB/downloaded_packages
## Loading required package: mice
## Warning: package 'mice' was built under R version 3.2.3
## Loading required package: Rcpp
## Warning: package 'Rcpp' was built under R version 3.2.2
## mice 2.25 2015-11-09
imp=mice(cbind(missing.var,unilever.new[,64]), seed=34, m=5)
##
## iter imp variable
## 1 1 ln_event_pct_lift ln_incr_NIP IncSpendexcScanUnitRate X..ACV.TPR ln_incspendexunitrate ln_incr_unit_volume
## 1 2 ln_event_pct_lift ln_incr_NIP IncSpendexcScanUnitRate X..ACV.TPR ln_incspendexunitrate ln_incr_unit_volume
## 1 3 ln_event_pct_lift ln_incr_NIP IncSpendexcScanUnitRate X..ACV.TPR ln_incspendexunitrate ln_incr_unit_volume
## 1 4 ln_event_pct_lift ln_incr_NIP IncSpendexcScanUnitRate X..ACV.TPR ln_incspendexunitrate ln_incr_unit_volume
## 1 5 ln_event_pct_lift ln_incr_NIP IncSpendexcScanUnitRate X..ACV.TPR ln_incspendexunitrate ln_incr_unit_volume
## 2 1 ln_event_pct_lift ln_incr_NIP IncSpendexcScanUnitRate X..ACV.TPR ln_incspendexunitrate ln_incr_unit_volume
## 2 2 ln_event_pct_lift ln_incr_NIP IncSpendexcScanUnitRate X..ACV.TPR ln_incspendexunitrate ln_incr_unit_volume
## 2 3 ln_event_pct_lift ln_incr_NIP IncSpendexcScanUnitRate X..ACV.TPR ln_incspendexunitrate ln_incr_unit_volume
## 2 4 ln_event_pct_lift ln_incr_NIP IncSpendexcScanUnitRate X..ACV.TPR ln_incspendexunitrate ln_incr_unit_volume
## 2 5 ln_event_pct_lift ln_incr_NIP IncSpendexcScanUnitRate X..ACV.TPR ln_incspendexunitrate ln_incr_unit_volume
## 3 1 ln_event_pct_lift ln_incr_NIP IncSpendexcScanUnitRate X..ACV.TPR ln_incspendexunitrate ln_incr_unit_volume
## 3 2 ln_event_pct_lift ln_incr_NIP IncSpendexcScanUnitRate X..ACV.TPR ln_incspendexunitrate ln_incr_unit_volume
## 3 3 ln_event_pct_lift ln_incr_NIP IncSpendexcScanUnitRate X..ACV.TPR ln_incspendexunitrate ln_incr_unit_volume
## 3 4 ln_event_pct_lift ln_incr_NIP IncSpendexcScanUnitRate X..ACV.TPR ln_incspendexunitrate ln_incr_unit_volume
## 3 5 ln_event_pct_lift ln_incr_NIP IncSpendexcScanUnitRate X..ACV.TPR ln_incspendexunitrate ln_incr_unit_volume
## 4 1 ln_event_pct_lift ln_incr_NIP IncSpendexcScanUnitRate X..ACV.TPR ln_incspendexunitrate ln_incr_unit_volume
## 4 2 ln_event_pct_lift ln_incr_NIP IncSpendexcScanUnitRate X..ACV.TPR ln_incspendexunitrate ln_incr_unit_volume
## 4 3 ln_event_pct_lift ln_incr_NIP IncSpendexcScanUnitRate X..ACV.TPR ln_incspendexunitrate ln_incr_unit_volume
## 4 4 ln_event_pct_lift ln_incr_NIP IncSpendexcScanUnitRate X..ACV.TPR ln_incspendexunitrate ln_incr_unit_volume
## 4 5 ln_event_pct_lift ln_incr_NIP IncSpendexcScanUnitRate X..ACV.TPR ln_incspendexunitrate ln_incr_unit_volume
## 5 1 ln_event_pct_lift ln_incr_NIP IncSpendexcScanUnitRate X..ACV.TPR ln_incspendexunitrate ln_incr_unit_volume
## 5 2 ln_event_pct_lift ln_incr_NIP IncSpendexcScanUnitRate X..ACV.TPR ln_incspendexunitrate ln_incr_unit_volume
## 5 3 ln_event_pct_lift ln_incr_NIP IncSpendexcScanUnitRate X..ACV.TPR ln_incspendexunitrate ln_incr_unit_volume
## 5 4 ln_event_pct_lift ln_incr_NIP IncSpendexcScanUnitRate X..ACV.TPR ln_incspendexunitrate ln_incr_unit_volume
## 5 5 ln_event_pct_lift ln_incr_NIP IncSpendexcScanUnitRate X..ACV.TPR ln_incspendexunitrate ln_incr_unit_volume
names(complete(imp))
## [1] "ln_event_pct_lift" "ln_incr_unit_cost"
## [3] "ln_incr_NIP" "IncSpendexcScanUnitRate"
## [5] "X..ACV.TPR" "ln_incspendexunitrate"
## [7] "ln_incr_unit_volume" "unilever.new[, 64]"
#CONCATENATE IMPUTED DATA W/ ORIGINAL DATA
unilever.new=unilever.new[,-c(89,88,90,27,49,101,87)]
unilever.new=cbind(complete(imp), unilever.new)
unilever.new=unilever.new[,-8]
unilever.new=unilever.new[,-23]
unilever.new=unilever.new[,-c(87,86)]
unilever.new=unilever.new[,-86]
unilever.new=unilever.new[,-c(41,42)]
unilever.new=unilever.new[,-83]
unilever.new=unilever.new[,-which(names(unilever.new)=='Event.Description')]
unilever.new=unilever.new[,-which(names(unilever.new)=='Total.Sales.Contribution')]
unilever.new=unilever.new[,-10]
#RENAMING THE LEVELS OF THE PLANNED.ACCOUNT.NAME
levels(unilever.new$Planned.Account.Name)[levels(unilever.new$Planned.Account.Name)=="A&P/SUPER FRESH-NY- IC" ]<-'Super.F'
levels(unilever.new$Planned.Account.Name)[levels(unilever.new$Planned.Account.Name)=="AMERICAN SALES COMPANY INC"]<-'ASC.INC'
levels(unilever.new$Planned.Account.Name)[levels(unilever.new$Planned.Account.Name)=='BASHAS INC']<-'B.INC'
levels(unilever.new$Planned.Account.Name)[levels(unilever.new$Planned.Account.Name)== "BI-LO (ALL)"]<-'BI-LO'
levels(unilever.new$Planned.Account.Name)[levels(unilever.new$Planned.Account.Name)== "BJ's" ]<-'BJ'
levels(unilever.new$Planned.Account.Name)[levels(unilever.new$Planned.Account.Name)=="DOLLAR GENERAL"]<-'DG'
levels(unilever.new$Planned.Account.Name)[levels(unilever.new$Planned.Account.Name)=="FAMILY DOLLAR"]<-'FD'
levels(unilever.new$Planned.Account.Name)[levels(unilever.new$Planned.Account.Name)=="FOOD LION INC"]<-'FL'
levels(unilever.new$Planned.Account.Name)[10]<-'GE'
levels(unilever.new$Planned.Account.Name)[11]<-'GC'
levels(unilever.new$Planned.Account.Name)[12]<-'TEA.CO'
levels(unilever.new$Planned.Account.Name)[13]<-'HT.INC'
levels(unilever.new$Planned.Account.Name)[14]<-'HY-VEE.INC'
levels(unilever.new$Planned.Account.Name)[15]<-'KM'
levels(unilever.new$Planned.Account.Name)[16]<-'KROGER'
levels(unilever.new$Planned.Account.Name)[17]<-'MEIJER'
levels(unilever.new$Planned.Account.Name)[18]<-'PUBLIX'
levels(unilever.new$Planned.Account.Name)[19]<-'R.INC'
levels(unilever.new$Planned.Account.Name)[20]<-'RAC'
levels(unilever.new$Planned.Account.Name)[21]<-'ROUND'
levels(unilever.new$Planned.Account.Name)[22]<-'SAFE.BELL'
levels(unilever.new$Planned.Account.Name)[23]<-'SM.INC'
levels(unilever.new$Planned.Account.Name)[24]<-'SB.INC'
levels(unilever.new$Planned.Account.Name)[25]<-'S&S'
levels(unilever.new$Planned.Account.Name)[27]<-'TPC'
levels(unilever.new$Planned.Account.Name)[28]<-'TTD'
levels(unilever.new$Planned.Account.Name)[29]<-'WFC'
levels(unilever.new$Planned.Account.Name)[31]<-'WM.INC'
levels(unilever.new$Planned.Account.Name)[32]<-'WD.Jack'
names(unilever.new)[names(unilever.new)=='Planned.Account.Name']<-'P.A.N.'
#REGRESSION TREE
install.packages('MASS', repos='http://cran.us.r-project.org'); require(MASS)
##
## The downloaded binary packages are in
## /var/folders/Vm/VmfZyeujEVu8VuKusEeMw++++TI/-Tmp-//Rtmp0xVnNB/downloaded_packages
## Loading required package: MASS
## Warning: package 'MASS' was built under R version 3.2.3
install.packages('randomForest', repos='http://cran.us.r-project.org'); require(randomForest)
##
## The downloaded binary packages are in
## /var/folders/Vm/VmfZyeujEVu8VuKusEeMw++++TI/-Tmp-//Rtmp0xVnNB/downloaded_packages
## Loading required package: randomForest
## Warning: package 'randomForest' was built under R version 3.2.3
## randomForest 4.6-12
## Type rfNews() to see new features/changes/bug fixes.
install.packages('tree', repos='http://cran.us.r-project.org'); require(tree)
##
## The downloaded binary packages are in
## /var/folders/Vm/VmfZyeujEVu8VuKusEeMw++++TI/-Tmp-//Rtmp0xVnNB/downloaded_packages
## Loading required package: tree
attach(unilever.new)
set.seed(1)
train=sample(1:nrow(unilever.new), nrow(unilever.new)/2)
unilever.test=unilever.new[-train, 'Incremental.Sales.Contribution']
library(gbm)
## Loading required package: survival
## Loading required package: lattice
## Loading required package: splines
## Loading required package: parallel
## Loaded gbm 2.1.1
set.seed(2)
boost.unilev=gbm(Incremental.Sales.Contribution~., data=unilever.new[train,-which(names(unilever.new)=='Incr.Event...ROI')], distribution='gaussian', n.trees=5000, interaction.depth=4)
summary(boost.unilev)

## var rel.inf
## Event.Start.Date Event.Start.Date 6.007773e+01
## ln_incr_NIP ln_incr_NIP 7.763196e+00
## Base.Event.Cost Base.Event.Cost 5.273924e+00
## Incr.NIP.. Incr.NIP.. 3.751888e+00
## X..Volume X..Volume 3.457312e+00
## Promoted.Units Promoted.Units 2.298867e+00
## Baseline.Any.Promo.U.Vol Baseline.Any.Promo.U.Vol 1.773779e+00
## ln_incr_unit_cost ln_incr_unit_cost 1.561460e+00
## Base.COGS Base.COGS 1.254671e+00
## P.A.N. P.A.N. 1.254139e+00
## Total.Event...ROI Total.Event...ROI 1.251325e+00
## Base.Sales.Contribution Base.Sales.Contribution 1.071434e+00
## TTL.Spend.per.Unit TTL.Spend.per.Unit 1.055010e+00
## Total.Event.Cost Total.Event.Cost 9.569655e-01
## Plan.Scan.Units Plan.Scan.Units 9.027236e-01
## Plan.Shipped.Units Plan.Shipped.Units 7.938950e-01
## Eq.Volume Eq.Volume 5.970907e-01
## X..ACV..Dsply X..ACV..Dsply 4.843398e-01
## Event.Unit.Volume Event.Unit.Volume 4.483635e-01
## Base...NSV Base...NSV 4.190188e-01
## Total.TCI Total.TCI 4.185016e-01
## Base.Unit...Volume Base.Unit...Volume 4.172499e-01
## Total.Event.COGS Total.Event.COGS 3.047761e-01
## Incremental...NSV Incremental...NSV 2.859316e-01
## Deal.Structure Deal.Structure 2.320999e-01
## Incremental.COGS Incremental.COGS 1.789037e-01
## Menu.Fixed.Cost Menu.Fixed.Cost 1.714848e-01
## Event...NSV Event...NSV 1.567543e-01
## ln_incr_unit_volume ln_incr_unit_volume 1.518760e-01
## X.ACV.Feat...Disp X.ACV.Feat...Disp 1.315852e-01
## Baseline.Non.Promo.U.Vol Baseline.Non.Promo.U.Vol 1.052966e-01
## Plan.Scan.Unit.Rate Plan.Scan.Unit.Rate 1.030788e-01
## Ship.Promo.UC Ship.Promo.UC 9.335171e-02
## Var.TS Var.TS 9.152281e-02
## TQMP TQMP 8.521819e-02
## Total.Deal.Per.Unit Total.Deal.Per.Unit 7.866510e-02
## Incr..Any.Promo.U.Vol Incr..Any.Promo.U.Vol 7.525934e-02
## Non.Promoted.Units Non.Promoted.Units 7.175388e-02
## Incr..Non.Promo.U.Vol Incr..Non.Promo.U.Vol 5.237129e-02
## Division Division 4.378566e-02
## Event.Retailer.margin Event.Retailer.margin 4.290784e-02
## Everyday.Unit.Cost Everyday.Unit.Cost 3.765792e-02
## ln_incspendexunitrate ln_incspendexunitrate 3.344987e-02
## Other.Fixed.Cost Other.Fixed.Cost 2.632556e-02
## Inc.TCI Inc.TCI 2.584378e-02
## ln_event_pct_lift ln_event_pct_lift 2.397804e-02
## Channel Channel 2.014501e-02
## Event.Weeks Event.Weeks 1.623824e-02
## IncSpendexcScanUnitRate IncSpendexcScanUnitRate 1.499409e-02
## X..Share.of.Category X..Share.of.Category 1.410072e-02
## Everyday.Retail.Margin Everyday.Retail.Margin 1.121840e-02
## Brand Brand 7.378438e-03
## EDLP.Rate EDLP.Rate 5.745309e-03
## List.Price.per.Unit List.Price.per.Unit 5.652499e-03
## Everyday.Price Everyday.Price 3.859512e-03
## Event.Code Event.Code 3.731649e-03
## Ev.. Ev.. 3.106387e-03
## X..ACV.Ftr X..ACV.Ftr 2.849465e-03
## Feature.Price Feature.Price 2.211836e-03
## Unit.Share.of.Category Unit.Share.of.Category 7.972471e-04
## COGS.Rate.. COGS.Rate.. 6.200115e-04
## X..ACV.TPR X..ACV.TPR 5.863107e-04
## Roi.Id Roi.Id 0.000000e+00
## Year Year 0.000000e+00
## MLT MLT 0.000000e+00
## Category Category 0.000000e+00
## Product Product 0.000000e+00
## NSV.per.unit NSV.per.unit 0.000000e+00
## COGS.per.Unit COGS.per.Unit 0.000000e+00
## Plan.Scan.Trade.OI.Unit.Rate Plan.Scan.Trade.OI.Unit.Rate 0.000000e+00
## Tot.Var.TS.per.Unit Tot.Var.TS.per.Unit 0.000000e+00
## Mid.Week.Event Mid.Week.Event 0.000000e+00
## Incr..Unit.Volume Incr..Unit.Volume 0.000000e+00
## X.ACV.Any.Promo X.ACV.Any.Promo 0.000000e+00
## Incremental.Event.Cost Incremental.Event.Cost 0.000000e+00
## Tot.TS Tot.TS 0.000000e+00
## Event...Lift Event...Lift 0.000000e+00
## Base.Subsidy Base.Subsidy 0.000000e+00
## Promo.Efficiency Promo.Efficiency 0.000000e+00
## ln_total_TCI ln_total_TCI 0.000000e+00
## ln_inc_TCI ln_inc_TCI 0.000000e+00
## ln_Tot_TS ln_Tot_TS 0.000000e+00
## ln_var_TS ln_var_TS 0.000000e+00
## ln_feature_price ln_feature_price 0.000000e+00
## ln_list_price_per_unit ln_list_price_per_unit 0.000000e+00
## ln_everyday_unit_cost ln_everyday_unit_cost 0.000000e+00
## ln_everyday_price ln_everyday_price 0.000000e+00
## ln_EDLP_rate ln_EDLP_rate 0.000000e+00
## ln_planned_shipp_units ln_planned_shipp_units 0.000000e+00
## ln_total_deal_unit ln_total_deal_unit 0.000000e+00
## otherfixedcostadd1 otherfixedcostadd1 0.000000e+00
## ln_ttlspendunit ln_ttlspendunit 0.000000e+00
## ln_base_unit_volume ln_base_unit_volume 0.000000e+00
## ln_event_unit_volume ln_event_unit_volume 0.000000e+00
## ln_ACV_any_promo ln_ACV_any_promo 0.000000e+00
## ln_total_event_cost ln_total_event_cost 0.000000e+00
## ln_baseline_any_promo_U_volume ln_baseline_any_promo_U_volume 0.000000e+00
## ln_TQLP ln_TQLP 0.000000e+00
## ln_Eq_volume ln_Eq_volume 0.000000e+00
## ln_baseline_non_promo_U_volume ln_baseline_non_promo_U_volume 0.000000e+00
## ln_._volume ln_._volume 0.000000e+00
yhat.boost=predict(boost.unilev, newdata=unilever.new[-train,-which(names(unilever.new)=='Incr.Event...ROI')], n.trees=5000)
mean((yhat.boost-unilever.test)^2)
## [1] 11657295050
#CLASSIFICATION TREE
install.packages('rpart', repos='http://cran.us.r-project.org'); require(rpart)
##
## The downloaded binary packages are in
## /var/folders/Vm/VmfZyeujEVu8VuKusEeMw++++TI/-Tmp-//Rtmp0xVnNB/downloaded_packages
## Loading required package: rpart
data(wine, package='rattle')
install.packages('rattle', repos='http://cran.us.r-project.org'); require(rattle)
##
## The downloaded binary packages are in
## /var/folders/Vm/VmfZyeujEVu8VuKusEeMw++++TI/-Tmp-//Rtmp0xVnNB/downloaded_packages
## Loading required package: rattle
## Warning: package 'rattle' was built under R version 3.2.3
## Warning: Failed to load RGtk2 dynamic library, attempting to install it.
## Please install GTK+ from http://r.research.att.com/libs/GTK_2.24.17-X11.pkg
## If the package still does not load, please ensure that GTK+ is installed and that it is on your PATH environment variable
## IN ANY CASE, RESTART R BEFORE TRYING TO LOAD THE PACKAGE AGAIN
## Rattle: A free graphical interface for data mining with R.
## Version 4.0.5 Copyright (c) 2006-2015 Togaware Pty Ltd.
## Type 'rattle()' to shake, rattle, and roll your data.
require(rpart.plot)
## Loading required package: rpart.plot
ISC=Incremental.Sales.Contribution
summary(ISC<=0)
## Mode FALSE TRUE NA's
## logical 1134 475 0
ISC.dich=ifelse(ISC<=0, 'Neg', 'Pos')
ISC.dich=as.factor(ISC.dich)
tree.uni=rpart(ISC.dich~., data=unilever.new[,-c(15,16,17,18,65,66,79,80,71,72)], method="class")
summary(tree.uni)
## Call:
## rpart(formula = ISC.dich ~ ., data = unilever.new[, -c(15, 16,
## 17, 18, 65, 66, 79, 80, 71, 72)], method = "class")
## n= 1609
##
## CP nsplit rel error xerror xstd
## 1 0.34736842 0 1.0000000 1.0000000 0.03851959
## 2 0.10947368 1 0.6526316 0.6694737 0.03362831
## 3 0.07578947 2 0.5431579 0.5894737 0.03201621
## 4 0.05263158 3 0.4673684 0.5326316 0.03074103
## 5 0.02105263 4 0.4147368 0.4652632 0.02906827
## 6 0.01684211 5 0.3936842 0.4568421 0.02884549
## 7 0.01473684 6 0.3768421 0.4547368 0.02878928
## 8 0.01263158 12 0.2715789 0.4231579 0.02792079
## 9 0.01000000 15 0.2336842 0.4063158 0.02743713
##
## Variable importance
## Event.Retailer.margin Incr.NIP.. COGS.Rate..
## 14 8 7
## Ship.Promo.UC Brand ln_ttlspendunit
## 6 6 6
## TTL.Spend.per.Unit P.A.N. COGS.per.Unit
## 6 4 4
## ln_incr_NIP Incr..Unit.Volume ln_event_pct_lift
## 4 3 2
## Deal.Structure Total.Deal.Per.Unit Incremental.COGS
## 2 2 2
## ln_ACV_any_promo X.ACV.Any.Promo List.Price.per.Unit
## 2 2 2
## ln_list_price_per_unit NSV.per.unit Incremental...NSV
## 2 2 1
## ln_TQLP TQMP Incr..Any.Promo.U.Vol
## 1 1 1
## ln_incr_unit_volume Everyday.Unit.Cost ln_everyday_unit_cost
## 1 1 1
## Category Division Base.Subsidy
## 1 1 1
## Event...Lift Promo.Efficiency ln_total_deal_unit
## 1 1 1
## Tot.Var.TS.per.Unit
## 1
##
## Node number 1: 1609 observations, complexity param=0.3473684
## predicted class=Pos expected loss=0.2952144 P(node) =1
## class counts: 475 1134
## probabilities: 0.295 0.705
## left son=2 (225 obs) right son=3 (1384 obs)
## Primary splits:
## Event.Retailer.margin < 0.407141 to the right, improve=170.84110, (0 missing)
## Incr.NIP.. < 411.3877 to the left, improve=146.77590, (0 missing)
## ln_incr_NIP < 9.971741 to the left, improve=116.22820, (0 missing)
## Ship.Promo.UC < 1.863961 to the left, improve=111.46720, (0 missing)
## P.A.N. splits as R-LRL-LL--R-RL-LRLL-LRRRRL-L--RR, improve= 92.16861, (0 missing)
## Surrogate splits:
## Ship.Promo.UC < 1.397492 to the left, agree=0.896, adj=0.258, (0 split)
## TTL.Spend.per.Unit < 2.000782 to the right, agree=0.893, adj=0.236, (0 split)
## ln_ttlspendunit < 0.6940378 to the right, agree=0.893, adj=0.236, (0 split)
## Incr.NIP.. < -2629.299 to the left, agree=0.877, adj=0.120, (0 split)
## Total.Deal.Per.Unit < 2.04 to the right, agree=0.874, adj=0.102, (0 split)
##
## Node number 2: 225 observations, complexity param=0.01263158
## predicted class=Neg expected loss=0.1333333 P(node) =0.1398384
## class counts: 195 30
## probabilities: 0.867 0.133
## left son=4 (189 obs) right son=5 (36 obs)
## Primary splits:
## TTL.Spend.per.Unit < 0.9814375 to the right, improve=11.523810, (0 missing)
## ln_ttlspendunit < -0.01774234 to the right, improve=11.523810, (0 missing)
## Ship.Promo.UC < 2.14989 to the left, improve= 6.975845, (0 missing)
## P.A.N. splits as R-LL---L--R-LL-LLLR-RRRLRL-L--RL, improve= 5.542510, (0 missing)
## Event.Retailer.margin < 0.4684733 to the right, improve= 5.142857, (0 missing)
## Surrogate splits:
## ln_ttlspendunit < -0.01774234 to the right, agree=1.000, adj=1.000, (0 split)
## Ship.Promo.UC < 2.111831 to the left, agree=0.880, adj=0.250, (0 split)
## Deal.Structure splits as L-R--L-RR-LLLL-LL, agree=0.876, adj=0.222, (0 split)
## Everyday.Retail.Margin < 0.3394822 to the left, agree=0.871, adj=0.194, (0 split)
## ln_incr_unit_cost < 6.97658 to the right, agree=0.862, adj=0.139, (0 split)
##
## Node number 3: 1384 observations, complexity param=0.1094737
## predicted class=Pos expected loss=0.2023121 P(node) =0.8601616
## class counts: 280 1104
## probabilities: 0.202 0.798
## left son=6 (182 obs) right son=7 (1202 obs)
## Primary splits:
## COGS.Rate.. < 0.5068609 to the right, improve=81.34176, (0 missing)
## Incr.NIP.. < 411.264 to the left, improve=78.43316, (0 missing)
## Brand splits as LRRRR------R------------------, improve=74.31874, (0 missing)
## ln_incr_NIP < 8.633838 to the left, improve=63.11573, (0 missing)
## X.ACV.Any.Promo < 0.5376 to the left, improve=58.57972, (0 missing)
## Surrogate splits:
## Brand splits as LRRRR------R------------------, agree=0.993, adj=0.951, (0 split)
## COGS.per.Unit < 1.643001 to the right, agree=0.934, adj=0.495, (0 split)
## X.ACV.Any.Promo < 0.4174 to the left, agree=0.905, adj=0.280, (0 split)
## ln_ACV_any_promo < -0.8713367 to the left, agree=0.905, adj=0.280, (0 split)
## Event.Retailer.margin < -0.2262355 to the left, agree=0.882, adj=0.099, (0 split)
##
## Node number 4: 189 observations
## predicted class=Neg expected loss=0.06349206 P(node) =0.1174643
## class counts: 177 12
## probabilities: 0.937 0.063
##
## Node number 5: 36 observations, complexity param=0.01263158
## predicted class=Neg expected loss=0.5 P(node) =0.02237415
## class counts: 18 18
## probabilities: 0.500 0.500
## left son=10 (12 obs) right son=11 (24 obs)
## Primary splits:
## Event...Lift < 0.5529234 to the left, improve=9.000000, (0 missing)
## Base.Subsidy < 0.6439603 to the right, improve=9.000000, (0 missing)
## Promo.Efficiency < 0.3560397 to the left, improve=9.000000, (0 missing)
## X.ACV.Any.Promo < 0.685 to the left, improve=8.415584, (0 missing)
## ln_ACV_any_promo < -0.380102 to the left, improve=8.415584, (0 missing)
## Surrogate splits:
## Base.Subsidy < 0.6439603 to the right, agree=1.000, adj=1.000, (0 split)
## Promo.Efficiency < 0.3560397 to the left, agree=1.000, adj=1.000, (0 split)
## ln_event_pct_lift < -0.5908115 to the left, agree=0.972, adj=0.917, (0 split)
## Incr.NIP.. < 132.8794 to the left, agree=0.917, adj=0.750, (0 split)
## COGS.per.Unit < 1.298433 to the right, agree=0.806, adj=0.417, (0 split)
##
## Node number 6: 182 observations, complexity param=0.05263158
## predicted class=Neg expected loss=0.3571429 P(node) =0.1131137
## class counts: 117 65
## probabilities: 0.643 0.357
## left son=12 (107 obs) right son=13 (75 obs)
## Primary splits:
## P.A.N. splits as R-LLL-L---L-LL-LLLL-LRRRRL-L--LL, improve=24.44370, (0 missing)
## ln_incr_NIP < 11.11982 to the left, improve=20.06759, (0 missing)
## Incr.NIP.. < 67498.22 to the left, improve=20.06759, (0 missing)
## TQMP < 941.5 to the left, improve=18.46218, (0 missing)
## ln_TQLP < 6.847472 to the left, improve=18.46218, (0 missing)
## Surrogate splits:
## TQMP < 941.5 to the left, agree=0.868, adj=0.680, (0 split)
## ln_TQLP < 6.847472 to the left, agree=0.868, adj=0.680, (0 split)
## Deal.Structure splits as LLLL--LLL-RRLL---, agree=0.863, adj=0.667, (0 split)
## Everyday.Unit.Cost < 2.83 to the right, agree=0.808, adj=0.533, (0 split)
## ln_everyday_unit_cost < 1.040006 to the right, agree=0.808, adj=0.533, (0 split)
##
## Node number 7: 1202 observations, complexity param=0.07578947
## predicted class=Pos expected loss=0.1356073 P(node) =0.7470479
## class counts: 163 1039
## probabilities: 0.136 0.864
## left son=14 (42 obs) right son=15 (1160 obs)
## Primary splits:
## Incr.NIP.. < 407.3582 to the left, improve=54.73093, (0 missing)
## P.A.N. splits as R-LRL-L---R-RL-LRLL-LRRRRL-R--RR, improve=35.16570, (0 missing)
## ln_incr_NIP < 8.574341 to the left, improve=29.52496, (0 missing)
## Division splits as RRL-LRRRL, improve=21.17088, (0 missing)
## Deal.Structure splits as RRRLR--RR-RRRLRRL, improve=19.60157, (0 missing)
## Surrogate splits:
## ln_incr_NIP < 6.064283 to the left, agree=0.983, adj=0.524, (0 split)
## Incr..Unit.Volume < 256 to the left, agree=0.977, adj=0.333, (0 split)
## Incremental.COGS < 264.1492 to the left, agree=0.977, adj=0.333, (0 split)
## Incremental...NSV < 719.0346 to the left, agree=0.977, adj=0.333, (0 split)
## ln_event_pct_lift < -1.37129 to the left, agree=0.974, adj=0.262, (0 split)
##
## Node number 10: 12 observations
## predicted class=Neg expected loss=0 P(node) =0.007458048
## class counts: 12 0
## probabilities: 1.000 0.000
##
## Node number 11: 24 observations
## predicted class=Pos expected loss=0.25 P(node) =0.0149161
## class counts: 6 18
## probabilities: 0.250 0.750
##
## Node number 12: 107 observations
## predicted class=Neg expected loss=0.1401869 P(node) =0.06650093
## class counts: 92 15
## probabilities: 0.860 0.140
##
## Node number 13: 75 observations, complexity param=0.02105263
## predicted class=Pos expected loss=0.3333333 P(node) =0.0466128
## class counts: 25 50
## probabilities: 0.333 0.667
## left son=26 (14 obs) right son=27 (61 obs)
## Primary splits:
## Ship.Promo.UC < 2.068938 to the left, improve=9.445746, (0 missing)
## TTL.Spend.per.Unit < 0.8545179 to the right, improve=9.445746, (0 missing)
## ln_ttlspendunit < -0.1560796 to the right, improve=9.445746, (0 missing)
## Event.Retailer.margin < 0.2699142 to the right, improve=8.653846, (0 missing)
## Total.Deal.Per.Unit < 0.825 to the right, improve=8.546402, (0 missing)
## Surrogate splits:
## TTL.Spend.per.Unit < 0.8545179 to the right, agree=1.00, adj=1.000, (0 split)
## ln_ttlspendunit < -0.1560796 to the right, agree=1.00, adj=1.000, (0 split)
## Total.Deal.Per.Unit < 0.825 to the right, agree=0.96, adj=0.786, (0 split)
## Tot.Var.TS.per.Unit < 0.825 to the right, agree=0.96, adj=0.786, (0 split)
## ln_total_deal_unit < -0.1926467 to the right, agree=0.96, adj=0.786, (0 split)
##
## Node number 14: 42 observations
## predicted class=Neg expected loss=0.07142857 P(node) =0.02610317
## class counts: 39 3
## probabilities: 0.929 0.071
##
## Node number 15: 1160 observations, complexity param=0.01473684
## predicted class=Pos expected loss=0.1068966 P(node) =0.7209447
## class counts: 124 1036
## probabilities: 0.107 0.893
## left son=30 (280 obs) right son=31 (880 obs)
## Primary splits:
## P.A.N. splits as R-LRL-----R-RL-LRLR-LRRRRL-R--RR, improve=28.55362, (0 missing)
## TTL.Spend.per.Unit < 1.869223 to the right, improve=21.11706, (0 missing)
## ln_ttlspendunit < 0.6260574 to the right, improve=21.11706, (0 missing)
## Ship.Promo.UC < 1.881292 to the left, improve=14.89483, (0 missing)
## Tot.Var.TS.per.Unit < 1.965 to the right, improve=14.46967, (0 missing)
## Surrogate splits:
## Eq.Volume < 218613 to the right, agree=0.795, adj=0.150, (0 split)
## ln_Eq_volume < 12.29505 to the right, agree=0.795, adj=0.150, (0 split)
## Division splits as RR--LRRRL, agree=0.793, adj=0.143, (0 split)
## X..Volume < 1366853 to the right, agree=0.793, adj=0.143, (0 split)
## ln_._volume < 14.12802 to the right, agree=0.793, adj=0.143, (0 split)
##
## Node number 26: 14 observations
## predicted class=Neg expected loss=0.1428571 P(node) =0.008701057
## class counts: 12 2
## probabilities: 0.857 0.143
##
## Node number 27: 61 observations, complexity param=0.01684211
## predicted class=Pos expected loss=0.2131148 P(node) =0.03791175
## class counts: 13 48
## probabilities: 0.213 0.787
## left son=54 (12 obs) right son=55 (49 obs)
## Primary splits:
## ln_incr_NIP < 9.425281 to the left, improve=11.493030, (0 missing)
## Incr.NIP.. < 12430.3 to the left, improve=11.493030, (0 missing)
## ln_event_pct_lift < -1.005 to the left, improve= 9.826289, (0 missing)
## Event...Lift < 0.3650475 to the left, improve= 9.826289, (0 missing)
## Base.Subsidy < 0.7325761 to the right, improve= 9.826289, (0 missing)
## Surrogate splits:
## Incr.NIP.. < 12430.3 to the left, agree=1.000, adj=1.000, (0 split)
## Incr..Any.Promo.U.Vol < 7994.5 to the left, agree=0.967, adj=0.833, (0 split)
## ln_event_pct_lift < -1.186195 to the left, agree=0.951, adj=0.750, (0 split)
## ln_incr_unit_volume < 8.9459 to the left, agree=0.951, adj=0.750, (0 split)
## Incr..Unit.Volume < 7686 to the left, agree=0.951, adj=0.750, (0 split)
##
## Node number 30: 280 observations, complexity param=0.01473684
## predicted class=Pos expected loss=0.3035714 P(node) =0.1740211
## class counts: 85 195
## probabilities: 0.304 0.696
## left son=60 (48 obs) right son=61 (232 obs)
## Primary splits:
## Ship.Promo.UC < 1.905941 to the left, improve=13.57245, (0 missing)
## Event.Retailer.margin < 0.2883454 to the right, improve=13.57202, (0 missing)
## Base.Unit...Volume < 2993.333 to the right, improve=12.57165, (0 missing)
## ln_base_unit_volume < 8.004121 to the right, improve=12.57165, (0 missing)
## TTL.Spend.per.Unit < 1.842381 to the right, improve=11.51316, (0 missing)
## Surrogate splits:
## Division splits as -L--RRRRR, agree=0.889, adj=0.354, (0 split)
## P.A.N. splits as --R-R--------R-R-L--R----R------, agree=0.889, adj=0.354, (0 split)
## Plan.Scan.Unit.Rate < 1.285 to the right, agree=0.886, adj=0.333, (0 split)
## TTL.Spend.per.Unit < 1.842381 to the right, agree=0.871, adj=0.250, (0 split)
## ln_ttlspendunit < 0.6114874 to the right, agree=0.871, adj=0.250, (0 split)
##
## Node number 31: 880 observations, complexity param=0.01263158
## predicted class=Pos expected loss=0.04431818 P(node) =0.5469236
## class counts: 39 841
## probabilities: 0.044 0.956
## left son=62 (8 obs) right son=63 (872 obs)
## Primary splits:
## Ship.Promo.UC < 1.542484 to the left, improve=11.141810, (0 missing)
## Other.Fixed.Cost < 125957.2 to the right, improve= 5.444558, (0 missing)
## otherfixedcostadd1 < 125958.2 to the right, improve= 5.444558, (0 missing)
## Event.Retailer.margin < 0.3686428 to the right, improve= 4.776597, (0 missing)
## TTL.Spend.per.Unit < 1.844557 to the right, improve= 4.200653, (0 missing)
##
## Node number 54: 12 observations
## predicted class=Neg expected loss=0.1666667 P(node) =0.007458048
## class counts: 10 2
## probabilities: 0.833 0.167
##
## Node number 55: 49 observations
## predicted class=Pos expected loss=0.06122449 P(node) =0.0304537
## class counts: 3 46
## probabilities: 0.061 0.939
##
## Node number 60: 48 observations, complexity param=0.01473684
## predicted class=Neg expected loss=0.3541667 P(node) =0.02983219
## class counts: 31 17
## probabilities: 0.646 0.354
## left son=120 (34 obs) right son=121 (14 obs)
## Primary splits:
## COGS.per.Unit < 1.260007 to the right, improve=10.000350, (0 missing)
## Event.Retailer.margin < 0.3027855 to the right, improve=10.000350, (0 missing)
## Deal.Structure splits as -------RR-LLL--L-, improve= 9.683201, (0 missing)
## Feature.Price < 2.4 to the right, improve= 7.843551, (0 missing)
## ln_feature_price < 0.8750172 to the right, improve= 7.843551, (0 missing)
## Surrogate splits:
## Roi.Id < 93357 to the right, agree=0.854, adj=0.500, (0 split)
## COGS.Rate.. < 0.4060833 to the right, agree=0.854, adj=0.500, (0 split)
## Event.Retailer.margin < 0.2459036 to the right, agree=0.854, adj=0.500, (0 split)
## Year < 2013.5 to the right, agree=0.833, adj=0.429, (0 split)
## Everyday.Price < 2.901523 to the right, agree=0.833, adj=0.429, (0 split)
##
## Node number 61: 232 observations, complexity param=0.01473684
## predicted class=Pos expected loss=0.2327586 P(node) =0.1441889
## class counts: 54 178
## probabilities: 0.233 0.767
## left son=122 (19 obs) right son=123 (213 obs)
## Primary splits:
## Deal.Structure splits as RRRL---RR-RRRLRRL, improve=8.435580, (0 missing)
## Base.Unit...Volume < 2888.333 to the right, improve=7.262069, (0 missing)
## ln_base_unit_volume < 7.968435 to the right, improve=7.262069, (0 missing)
## ln_event_pct_lift < 1.002115 to the left, improve=6.854347, (0 missing)
## Event...Lift < 2.723043 to the left, improve=6.854347, (0 missing)
## Surrogate splits:
## IncSpendexcScanUnitRate < 0.05 to the right, agree=0.948, adj=0.368, (0 split)
## ln_incspendexunitrate < -4.600195 to the right, agree=0.948, adj=0.368, (0 split)
## Base.Sales.Contribution < 540466.8 to the right, agree=0.940, adj=0.263, (0 split)
## Incr..Non.Promo.U.Vol < -8925 to the left, agree=0.935, adj=0.211, (0 split)
## Division splits as -R--RRRRL, agree=0.922, adj=0.053, (0 split)
##
## Node number 62: 8 observations
## predicted class=Neg expected loss=0.125 P(node) =0.004972032
## class counts: 7 1
## probabilities: 0.875 0.125
##
## Node number 63: 872 observations
## predicted class=Pos expected loss=0.03669725 P(node) =0.5419515
## class counts: 32 840
## probabilities: 0.037 0.963
##
## Node number 120: 34 observations
## predicted class=Neg expected loss=0.1470588 P(node) =0.02113114
## class counts: 29 5
## probabilities: 0.853 0.147
##
## Node number 121: 14 observations
## predicted class=Pos expected loss=0.1428571 P(node) =0.008701057
## class counts: 2 12
## probabilities: 0.143 0.857
##
## Node number 122: 19 observations
## predicted class=Neg expected loss=0.3157895 P(node) =0.01180858
## class counts: 13 6
## probabilities: 0.684 0.316
##
## Node number 123: 213 observations, complexity param=0.01473684
## predicted class=Pos expected loss=0.1924883 P(node) =0.1323804
## class counts: 41 172
## probabilities: 0.192 0.808
## left son=246 (110 obs) right son=247 (103 obs)
## Primary splits:
## ln_incr_NIP < 10.10188 to the left, improve=7.187719, (0 missing)
## X..Share.of.Category < 7.955981 to the left, improve=7.187719, (0 missing)
## Incr.NIP.. < 24390.67 to the left, improve=7.187719, (0 missing)
## Unit.Share.of.Category < 9.315141 to the left, improve=7.076254, (0 missing)
## X.ACV.Feat...Disp < 0.105 to the left, improve=6.011315, (0 missing)
## Surrogate splits:
## Incr.NIP.. < 24390.67 to the left, agree=1.000, adj=1.000, (0 split)
## ln_incr_unit_volume < 9.551299 to the left, agree=0.962, adj=0.922, (0 split)
## Incr..Unit.Volume < 14063 to the left, agree=0.962, adj=0.922, (0 split)
## Incr..Any.Promo.U.Vol < 13965.5 to the left, agree=0.962, adj=0.922, (0 split)
## Incremental.COGS < 22690.43 to the left, agree=0.958, adj=0.913, (0 split)
##
## Node number 246: 110 observations, complexity param=0.01473684
## predicted class=Pos expected loss=0.3181818 P(node) =0.06836544
## class counts: 35 75
## probabilities: 0.318 0.682
## left son=492 (35 obs) right son=493 (75 obs)
## Primary splits:
## List.Price.per.Unit < 3.134654 to the right, improve=21.09108, (0 missing)
## NSV.per.unit < 3.07319 to the right, improve=21.09108, (0 missing)
## ln_list_price_per_unit < 1.142838 to the right, improve=21.09108, (0 missing)
## TTL.Spend.per.Unit < 0.942383 to the right, improve=17.95185, (0 missing)
## ln_ttlspendunit < -0.05829768 to the right, improve=17.95185, (0 missing)
## Surrogate splits:
## NSV.per.unit < 3.07319 to the right, agree=1.000, adj=1.000, (0 split)
## ln_list_price_per_unit < 1.142838 to the right, agree=1.000, adj=1.000, (0 split)
## TTL.Spend.per.Unit < 0.942383 to the right, agree=0.855, adj=0.543, (0 split)
## ln_ttlspendunit < -0.05829768 to the right, agree=0.855, adj=0.543, (0 split)
## Category splits as RLL----------, agree=0.845, adj=0.514, (0 split)
##
## Node number 247: 103 observations
## predicted class=Pos expected loss=0.05825243 P(node) =0.06401492
## class counts: 6 97
## probabilities: 0.058 0.942
##
## Node number 492: 35 observations
## predicted class=Neg expected loss=0.2285714 P(node) =0.02175264
## class counts: 27 8
## probabilities: 0.771 0.229
##
## Node number 493: 75 observations
## predicted class=Pos expected loss=0.1066667 P(node) =0.0466128
## class counts: 8 67
## probabilities: 0.107 0.893
fancyRpartPlot(tree.uni)
