# knitr::opts_chunk$set(echo = TRUE)
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(ggplot2)
library(tidyr)
library(mlbench)
library(tidymodels)
## ── Attaching packages ────────────────────────────────────── tidymodels 1.2.0 ──
## ✔ broom        1.0.6      ✔ rsample      1.2.1 
## ✔ dials        1.2.1      ✔ tibble       3.2.1 
## ✔ infer        1.0.7      ✔ tune         1.2.1 
## ✔ modeldata    1.4.0      ✔ workflows    1.1.4 
## ✔ parsnip      1.2.1      ✔ workflowsets 1.1.0 
## ✔ purrr        1.0.2      ✔ yardstick    1.3.1 
## ✔ recipes      1.0.10
## ── Conflicts ───────────────────────────────────────── tidymodels_conflicts() ──
## ✖ purrr::discard() masks scales::discard()
## ✖ dplyr::filter()  masks stats::filter()
## ✖ dplyr::lag()     masks stats::lag()
## ✖ recipes::step()  masks stats::step()
## • Dig deeper into tidy modeling with R at https://www.tmwr.org
library(DALEX)
## Welcome to DALEX (version: 2.4.3).
## Find examples and detailed introduction at: http://ema.drwhy.ai/
## 
## Attaching package: 'DALEX'
## The following object is masked from 'package:dplyr':
## 
##     explain
library(ranger)
library(Rcpp)
## 
## Attaching package: 'Rcpp'
## The following object is masked from 'package:rsample':
## 
##     populate
library(corrplot)
## corrplot 0.92 loaded
library(ggplot2)
library(gridExtra)
## 
## Attaching package: 'gridExtra'
## The following object is masked from 'package:dplyr':
## 
##     combine
library(SHAPforxgboost)
library(xgboost)
## 
## Attaching package: 'xgboost'
## The following object is masked from 'package:dplyr':
## 
##     slice
library(lubridate)
## 
## Attaching package: 'lubridate'
## The following objects are masked from 'package:base':
## 
##     date, intersect, setdiff, union
lst_csv = list.files(pattern = '.csv')

# Read all csv files in the directory and save as df with the same name of the file
for (i in lst_csv){
  temp_df = read.csv(i)  
    assign(gsub('.csv','',i),temp_df)}

train_df = train %>% 
  mutate(date = as.Date(date)) %>% 
  mutate(weekdays = wday(date)) %>% 
  mutate(date = as.numeric(factor(date))) %>% 
  select(c(colnames(test),orders,-id, -holiday_name)) %>% 
  mutate(warehouse = as.numeric(factor(warehouse))) 
  

test_df = test %>% 
  mutate(weekdays = wday(date)) %>% 
  mutate(date = as.numeric(factor(date))) %>% 
  select(-holiday_name) %>% 
  mutate(weekdays = wday(date)) %>% 
  mutate(warehouse = as.numeric(factor(warehouse)))
n = floor(nrow(train_df)*0.8)
train_df <- train_df[order(train_df$date), ]

train = train_df[c(1:n),] 

test = train_df[c((n+1):nrow(train_df)),] 

dtrain <- xgb.DMatrix(data = as.matrix(train[, -7]), label = train[, 7])
dtest <- xgb.DMatrix(data = as.matrix(test[, -7]), label = test[, 7])

# Custom MAPE eval function
mape_eval <- function(preds, dtrain) {
  labels <- getinfo(dtrain, "label")
  err <- mean(abs((labels - preds)/pmax(1, abs(labels))))
  return(list(metric = "mape", value = err))
}

params <- list(
  booster = "gbtree",
  eta = 0.5,
  max_depth = 10,
  subsample = 0.7,
  colsample_bytree = 0.7,
  objective = "reg:squarederror"
)

num_round <- 100
watchlist <- list(train = dtrain)
model <- xgb.train(
  params = params, 
  data = dtrain, 
  nrounds = num_round, 
  watchlist = watchlist,  
  eval_metric = mape_eval,
  maximize = FALSE,
  early_stopping_rounds = 50
)
## [1]  train-mape:0.495361 
## Will train until train_mape hasn't improved in 50 rounds.
## 
## [2]  train-mape:0.267383 
## [3]  train-mape:0.140297 
## [4]  train-mape:0.082433 
## [5]  train-mape:0.062820 
## [6]  train-mape:0.059089 
## [7]  train-mape:0.054445 
## [8]  train-mape:0.055547 
## [9]  train-mape:0.054940 
## [10] train-mape:0.052501 
## [11] train-mape:0.051276 
## [12] train-mape:0.049128 
## [13] train-mape:0.045392 
## [14] train-mape:0.045255 
## [15] train-mape:0.044535 
## [16] train-mape:0.043740 
## [17] train-mape:0.043592 
## [18] train-mape:0.043499 
## [19] train-mape:0.042452 
## [20] train-mape:0.041294 
## [21] train-mape:0.040657 
## [22] train-mape:0.040778 
## [23] train-mape:0.040671 
## [24] train-mape:0.039878 
## [25] train-mape:0.039961 
## [26] train-mape:0.039687 
## [27] train-mape:0.039632 
## [28] train-mape:0.039209 
## [29] train-mape:0.038819 
## [30] train-mape:0.038874 
## [31] train-mape:0.038663 
## [32] train-mape:0.038696 
## [33] train-mape:0.038012 
## [34] train-mape:0.037129 
## [35] train-mape:0.036589 
## [36] train-mape:0.035790 
## [37] train-mape:0.035764 
## [38] train-mape:0.035021 
## [39] train-mape:0.034860 
## [40] train-mape:0.034735 
## [41] train-mape:0.034370 
## [42] train-mape:0.033570 
## [43] train-mape:0.033557 
## [44] train-mape:0.033234 
## [45] train-mape:0.033373 
## [46] train-mape:0.033362 
## [47] train-mape:0.032567 
## [48] train-mape:0.032420 
## [49] train-mape:0.032493 
## [50] train-mape:0.032514 
## [51] train-mape:0.032531 
## [52] train-mape:0.032436 
## [53] train-mape:0.032072 
## [54] train-mape:0.032088 
## [55] train-mape:0.032097 
## [56] train-mape:0.031280 
## [57] train-mape:0.031304 
## [58] train-mape:0.031302 
## [59] train-mape:0.031330 
## [60] train-mape:0.031363 
## [61] train-mape:0.031366 
## [62] train-mape:0.030567 
## [63] train-mape:0.030498 
## [64] train-mape:0.030050 
## [65] train-mape:0.030095 
## [66] train-mape:0.029600 
## [67] train-mape:0.028712 
## [68] train-mape:0.028711 
## [69] train-mape:0.027937 
## [70] train-mape:0.027920 
## [71] train-mape:0.027888 
## [72] train-mape:0.027926 
## [73] train-mape:0.027899 
## [74] train-mape:0.027859 
## [75] train-mape:0.027854 
## [76] train-mape:0.027939 
## [77] train-mape:0.027958 
## [78] train-mape:0.027328 
## [79] train-mape:0.027317 
## [80] train-mape:0.027261 
## [81] train-mape:0.027290 
## [82] train-mape:0.027278 
## [83] train-mape:0.026377 
## [84] train-mape:0.026380 
## [85] train-mape:0.025927 
## [86] train-mape:0.025650 
## [87] train-mape:0.025027 
## [88] train-mape:0.025043 
## [89] train-mape:0.024665 
## [90] train-mape:0.024239 
## [91] train-mape:0.024240 
## [92] train-mape:0.024301 
## [93] train-mape:0.024357 
## [94] train-mape:0.023583 
## [95] train-mape:0.023630 
## [96] train-mape:0.023359 
## [97] train-mape:0.022833 
## [98] train-mape:0.022824 
## [99] train-mape:0.022809 
## [100]    train-mape:0.022879
prediction = as.data.frame(predict(model, dtest))
prediction = prediction %>% 
  mutate( id = test$id) %>% 
  mutate( warehouse = test$warehouse) %>% 
  mutate( date = test$date) %>% 
  rename( orders = `predict(model, dtest)`)
ggplot(prediction, aes(y = orders, x = date, color = as.factor(warehouse)))  +
  geom_line() +
  labs(title="Prediction",
       x="Date",
       y="Value") +
  theme_minimal()

train = train_df
test = test_df %>% 
  select(-id)

dtrain <- xgb.DMatrix(data = as.matrix(train[, -7]), label = train[, 7])
dtest <- xgb.DMatrix(data = as.matrix(test[, -7]), label = test[, 7])

prediction = as.data.frame(predict(model, dtest))
prediction = prediction %>% 
  mutate( id = test_df$id) %>% 
  mutate( warehouse = test$warehouse) %>% 
  mutate( date = test$date) %>% 
  rename( orders = `predict(model, dtest)`)
ggplot(prediction, aes(y = orders, x = date, color = as.factor(warehouse)))  +
  geom_line() +
  labs(title="Prediction",
       x="Date",
       y="Value") +
  theme_minimal()

prediction = merge(solution_example, prediction, by = "id") %>% 
  select(-c(orders.x, date, warehouse))
write.csv(prediction, 'prediction_12_xgboost_MAPE.csv',row.names = FALSE)