library(ggplot2)
library(readxl)
library(randomForest)
library(caret)
library(lubridate)
library(ModelMetrics)
sapply(ret, function(y) sum(length(which(is.na(y)))))
WEEK_END_DATE STORE_NUM UPC UNITS VISITS HHS SPEND
0 0 0 0 0 0 0
PRICE BASE_PRICE FEATURE DISPLAY TPR_ONLY
23 185 0 0 0
ret[is.na(ret$PRICE),8]<-0
ret[is.na(ret$BASE_PRICE),9]<-0
sapply(ret, function(y) sum(length(which(is.na(y)))))
WEEK_END_DATE STORE_NUM UPC UNITS VISITS HHS SPEND
0 0 0 0 0 0 0
PRICE BASE_PRICE FEATURE DISPLAY TPR_ONLY
0 0 0 0 0
ret$week<-week(as.Date(ret$WEEK_END_DATE))
ret$month<-month(as.Date(ret$WEEK_END_DATE))
ret$year<-year(as.Date(ret$WEEK_END_DATE))
t<- createDataPartition(y = ret$SPEND,p = .7,list = FALSE)
summary(lm(SPEND~.,data=ret[,]))
Call:
lm(formula = SPEND ~ ., data = ret[, ])
Residuals:
Min 1Q Median 3Q Max
-868.03 -10.17 0.37 7.08 604.54
Coefficients: (1 not defined because of singularities)
Estimate Std. Error t value Pr(>|t|)
(Intercept) -5.736e+01 1.636e+00 -35.071 < 2e-16 ***
WEEK_END_DATE 2.776e-08 1.284e-09 21.610 < 2e-16 ***
STORE_NUM -2.551e-05 3.708e-06 -6.880 5.98e-12 ***
UPC 7.433e-11 1.616e-12 45.993 < 2e-16 ***
UNITS 9.070e-01 6.879e-03 131.857 < 2e-16 ***
VISITS 3.931e+00 2.795e-02 140.617 < 2e-16 ***
HHS -2.476e+00 2.676e-02 -92.556 < 2e-16 ***
PRICE -3.967e+00 1.058e-01 -37.480 < 2e-16 ***
BASE_PRICE 1.259e+01 1.002e-01 125.635 < 2e-16 ***
FEATURE 7.808e+00 1.619e-01 48.237 < 2e-16 ***
DISPLAY -5.065e+00 1.247e-01 -40.611 < 2e-16 ***
TPR_ONLY -8.617e+00 1.253e-01 -68.757 < 2e-16 ***
week -1.958e-01 2.636e-02 -7.428 1.10e-13 ***
month 8.018e-01 1.148e-01 6.984 2.88e-12 ***
year NA NA NA NA
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 23.87 on 524936 degrees of freedom
Multiple R-squared: 0.8774, Adjusted R-squared: 0.8774
F-statistic: 2.891e+05 on 13 and 524936 DF, p-value: < 2.2e-16
fit<-(lm(SPEND~.,data=ret[,]))
predl<-predict(fit,ret)
prediction from a rank-deficient fit may be misleading
mse(ret$SPEND,predl)
[1] 569.7657
rf<-randomForest(SPEND~.,data=ret[t,],ntree=101,mtry=3,importance=TRUE,nodesize=524)
rf
Call:
randomForest(formula = SPEND ~ ., data = ret[t, ], ntree = 101, mtry = 3, importance = TRUE, nodesize = 524)
Type of random forest: regression
Number of trees: 101
No. of variables tried at each split: 3
Mean of squared residuals: 138.8977
% Var explained: 97.02
pred<-predict(rf,ret[t,])
head(cbind(ret[t,7],pred))
mse(ret$SPEND[t],pred)
[1] 128.6694
pred<-predict(rf,ret[-t,])
head(cbind(ret[-t,7],pred))
mse(ret$SPEND[-t],pred)
[1] 147.2189
LS0tDQp0aXRsZTogIkFuYWx5emluZyBFLUNvbW1lcmNlIHNhbGVzIg0Kb3V0cHV0OiBodG1sX25vdGVib29rDQotLS0NCg0KDQpgYGB7cn0NCmxpYnJhcnkoZ2dwbG90MikNCmxpYnJhcnkocmVhZHhsKQ0KbGlicmFyeShyYW5kb21Gb3Jlc3QpDQpsaWJyYXJ5KGNhcmV0KQ0KbGlicmFyeShsdWJyaWRhdGUpDQpsaWJyYXJ5KE1vZGVsTWV0cmljcykNCmBgYA0KDQpgYGB7cn0NCnJldCA8LSByZWFkX2V4Y2VsKCJDOi9NeSBXb3JrL0RhdGEgQW5hbHl0aWNzL0RhdGEgc2V0cy9kdW5uaHVtYnkgLSBSZXRhaWxUcmFuc2FjdGlvbkRhdGEueGxzeCIsIA0KICAgICAgICAgICAgICAgICAgc2hlZXQgPSAiZGggVHJhbnNhY3Rpb24gRGF0YSIpDQpoZWFkKHJldCkNCmBgYA0KDQoNCmBgYHtyfQ0Kc2FwcGx5KHJldCwgZnVuY3Rpb24oeSkgc3VtKGxlbmd0aCh3aGljaChpcy5uYSh5KSkpKSkNCmBgYA0KDQoNCmBgYHtyfQ0KcmV0W2lzLm5hKHJldCRQUklDRSksOF08LTANCnJldFtpcy5uYShyZXQkQkFTRV9QUklDRSksOV08LTANCnNhcHBseShyZXQsIGZ1bmN0aW9uKHkpIHN1bShsZW5ndGgod2hpY2goaXMubmEoeSkpKSkpDQpgYGANCg0KDQpgYGB7cn0NCnJldCR3ZWVrPC13ZWVrKGFzLkRhdGUocmV0JFdFRUtfRU5EX0RBVEUpKQ0KcmV0JG1vbnRoPC1tb250aChhcy5EYXRlKHJldCRXRUVLX0VORF9EQVRFKSkNCnJldCR5ZWFyPC15ZWFyKGFzLkRhdGUocmV0JFdFRUtfRU5EX0RBVEUpKQ0KYGBgDQoNCmBgYHtyfQ0KdDwtIGNyZWF0ZURhdGFQYXJ0aXRpb24oeSA9IHJldCRTUEVORCxwID0gLjcsbGlzdCA9IEZBTFNFKQ0KYGBgDQoNCg0KDQpgYGB7cn0NCnN1bW1hcnkobG0oU1BFTkR+LixkYXRhPXJldFssXSkpDQpgYGANCg0KDQoNCmBgYHtyfQ0KZml0PC0obG0oU1BFTkR+LixkYXRhPXJldFssXSkpDQpwcmVkbDwtcHJlZGljdChmaXQscmV0KQ0KbXNlKHJldCRTUEVORCxwcmVkbCkNCmBgYA0KDQoNCg0KYGBge3J9DQpyZjwtcmFuZG9tRm9yZXN0KFNQRU5Efi4sZGF0YT1yZXRbdCxdLG50cmVlPTEwMSxtdHJ5PTMsaW1wb3J0YW5jZT1UUlVFLG5vZGVzaXplPTUyNCkNCnJmDQpgYGANCg0KDQoNCmBgYHtyfQ0KcHJlZDwtcHJlZGljdChyZixyZXRbdCxdKQ0KaGVhZChjYmluZChyZXRbdCw3XSxwcmVkKSkNCmBgYA0KDQpgYGB7cn0NCm1zZShyZXQkU1BFTkRbdF0scHJlZCkNCmBgYA0KDQoNCmBgYHtyfQ0KcHJlZDwtcHJlZGljdChyZixyZXRbLXQsXSkNCmhlYWQoY2JpbmQocmV0Wy10LDddLHByZWQpKQ0KYGBgDQoNCmBgYHtyfQ0KbXNlKHJldCRTUEVORFstdF0scHJlZCkNCmBgYA0K