library(ggplot2)
library(readxl)
library(randomForest)
library(caret)
library(lubridate)
library(ModelMetrics)
sapply(ret, function(y) sum(length(which(is.na(y)))))
WEEK_END_DATE     STORE_NUM           UPC         UNITS        VISITS           HHS         SPEND 
            0             0             0             0             0             0             0 
        PRICE    BASE_PRICE       FEATURE       DISPLAY      TPR_ONLY 
           23           185             0             0             0 
ret[is.na(ret$PRICE),8]<-0
ret[is.na(ret$BASE_PRICE),9]<-0
sapply(ret, function(y) sum(length(which(is.na(y)))))
WEEK_END_DATE     STORE_NUM           UPC         UNITS        VISITS           HHS         SPEND 
            0             0             0             0             0             0             0 
        PRICE    BASE_PRICE       FEATURE       DISPLAY      TPR_ONLY 
            0             0             0             0             0 
ret$week<-week(as.Date(ret$WEEK_END_DATE))
ret$month<-month(as.Date(ret$WEEK_END_DATE))
ret$year<-year(as.Date(ret$WEEK_END_DATE))
t<- createDataPartition(y = ret$SPEND,p = .7,list = FALSE)
summary(lm(SPEND~.,data=ret[,]))

Call:
lm(formula = SPEND ~ ., data = ret[, ])

Residuals:
    Min      1Q  Median      3Q     Max 
-868.03  -10.17    0.37    7.08  604.54 

Coefficients: (1 not defined because of singularities)
                Estimate Std. Error t value Pr(>|t|)    
(Intercept)   -5.736e+01  1.636e+00 -35.071  < 2e-16 ***
WEEK_END_DATE  2.776e-08  1.284e-09  21.610  < 2e-16 ***
STORE_NUM     -2.551e-05  3.708e-06  -6.880 5.98e-12 ***
UPC            7.433e-11  1.616e-12  45.993  < 2e-16 ***
UNITS          9.070e-01  6.879e-03 131.857  < 2e-16 ***
VISITS         3.931e+00  2.795e-02 140.617  < 2e-16 ***
HHS           -2.476e+00  2.676e-02 -92.556  < 2e-16 ***
PRICE         -3.967e+00  1.058e-01 -37.480  < 2e-16 ***
BASE_PRICE     1.259e+01  1.002e-01 125.635  < 2e-16 ***
FEATURE        7.808e+00  1.619e-01  48.237  < 2e-16 ***
DISPLAY       -5.065e+00  1.247e-01 -40.611  < 2e-16 ***
TPR_ONLY      -8.617e+00  1.253e-01 -68.757  < 2e-16 ***
week          -1.958e-01  2.636e-02  -7.428 1.10e-13 ***
month          8.018e-01  1.148e-01   6.984 2.88e-12 ***
year                  NA         NA      NA       NA    
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 23.87 on 524936 degrees of freedom
Multiple R-squared:  0.8774,    Adjusted R-squared:  0.8774 
F-statistic: 2.891e+05 on 13 and 524936 DF,  p-value: < 2.2e-16
fit<-(lm(SPEND~.,data=ret[,]))
predl<-predict(fit,ret)
prediction from a rank-deficient fit may be misleading
mse(ret$SPEND,predl)
[1] 569.7657
rf<-randomForest(SPEND~.,data=ret[t,],ntree=101,mtry=3,importance=TRUE,nodesize=524)
rf

Call:
 randomForest(formula = SPEND ~ ., data = ret[t, ], ntree = 101,      mtry = 3, importance = TRUE, nodesize = 524) 
               Type of random forest: regression
                     Number of trees: 101
No. of variables tried at each split: 3

          Mean of squared residuals: 138.8977
                    % Var explained: 97.02
pred<-predict(rf,ret[t,])
head(cbind(ret[t,7],pred))
mse(ret$SPEND[t],pred)
[1] 128.6694
pred<-predict(rf,ret[-t,])
head(cbind(ret[-t,7],pred))
mse(ret$SPEND[-t],pred)
[1] 147.2189
LS0tDQp0aXRsZTogIkFuYWx5emluZyBFLUNvbW1lcmNlIHNhbGVzIg0Kb3V0cHV0OiBodG1sX25vdGVib29rDQotLS0NCg0KDQpgYGB7cn0NCmxpYnJhcnkoZ2dwbG90MikNCmxpYnJhcnkocmVhZHhsKQ0KbGlicmFyeShyYW5kb21Gb3Jlc3QpDQpsaWJyYXJ5KGNhcmV0KQ0KbGlicmFyeShsdWJyaWRhdGUpDQpsaWJyYXJ5KE1vZGVsTWV0cmljcykNCmBgYA0KDQpgYGB7cn0NCnJldCA8LSByZWFkX2V4Y2VsKCJDOi9NeSBXb3JrL0RhdGEgQW5hbHl0aWNzL0RhdGEgc2V0cy9kdW5uaHVtYnkgLSBSZXRhaWxUcmFuc2FjdGlvbkRhdGEueGxzeCIsIA0KICAgICAgICAgICAgICAgICAgc2hlZXQgPSAiZGggVHJhbnNhY3Rpb24gRGF0YSIpDQpoZWFkKHJldCkNCmBgYA0KDQoNCmBgYHtyfQ0Kc2FwcGx5KHJldCwgZnVuY3Rpb24oeSkgc3VtKGxlbmd0aCh3aGljaChpcy5uYSh5KSkpKSkNCmBgYA0KDQoNCmBgYHtyfQ0KcmV0W2lzLm5hKHJldCRQUklDRSksOF08LTANCnJldFtpcy5uYShyZXQkQkFTRV9QUklDRSksOV08LTANCnNhcHBseShyZXQsIGZ1bmN0aW9uKHkpIHN1bShsZW5ndGgod2hpY2goaXMubmEoeSkpKSkpDQpgYGANCg0KDQpgYGB7cn0NCnJldCR3ZWVrPC13ZWVrKGFzLkRhdGUocmV0JFdFRUtfRU5EX0RBVEUpKQ0KcmV0JG1vbnRoPC1tb250aChhcy5EYXRlKHJldCRXRUVLX0VORF9EQVRFKSkNCnJldCR5ZWFyPC15ZWFyKGFzLkRhdGUocmV0JFdFRUtfRU5EX0RBVEUpKQ0KYGBgDQoNCmBgYHtyfQ0KdDwtIGNyZWF0ZURhdGFQYXJ0aXRpb24oeSA9IHJldCRTUEVORCxwID0gLjcsbGlzdCA9IEZBTFNFKQ0KYGBgDQoNCg0KDQpgYGB7cn0NCnN1bW1hcnkobG0oU1BFTkR+LixkYXRhPXJldFssXSkpDQpgYGANCg0KDQoNCmBgYHtyfQ0KZml0PC0obG0oU1BFTkR+LixkYXRhPXJldFssXSkpDQpwcmVkbDwtcHJlZGljdChmaXQscmV0KQ0KbXNlKHJldCRTUEVORCxwcmVkbCkNCmBgYA0KDQoNCg0KYGBge3J9DQpyZjwtcmFuZG9tRm9yZXN0KFNQRU5Efi4sZGF0YT1yZXRbdCxdLG50cmVlPTEwMSxtdHJ5PTMsaW1wb3J0YW5jZT1UUlVFLG5vZGVzaXplPTUyNCkNCnJmDQpgYGANCg0KDQoNCmBgYHtyfQ0KcHJlZDwtcHJlZGljdChyZixyZXRbdCxdKQ0KaGVhZChjYmluZChyZXRbdCw3XSxwcmVkKSkNCmBgYA0KDQpgYGB7cn0NCm1zZShyZXQkU1BFTkRbdF0scHJlZCkNCmBgYA0KDQoNCmBgYHtyfQ0KcHJlZDwtcHJlZGljdChyZixyZXRbLXQsXSkNCmhlYWQoY2JpbmQocmV0Wy10LDddLHByZWQpKQ0KYGBgDQoNCmBgYHtyfQ0KbXNlKHJldCRTUEVORFstdF0scHJlZCkNCmBgYA0K