setwd('C:/Users/DellPC/Desktop/Corner/Py_source_code/Project/Store Item Demand Forecasting')

R packages

suppressMessages(library(tidyverse))
suppressMessages(library(zoo))
# Set up for python environment in Rstudio


library(reticulate)

EDA + Prophet + MLP Neural Network Forecasting


Execitove Summary:

This tutorial consists of comprehensive Exploratory Data Analysis with Prophet and MLP Neural Network Forecast Modeling for Store Item Demand Forecasting Challenge competition. The objective of the problem is to forecast 3 months of sales for 50 different items at 10 different stores using the 5 years hisotry of sales.

The data contains the following files:

 1. train.csv
 2. test.csv
 3. Sample submission.csv

Data Fields

date - Date of the sale data. There are no holiday effects or store closures. store - Store ID item - Item ID sales - Number of items sold at a particular store on a particular date.

Introduction:

What is time series?

The difference between a time series and a normal series - Time series is dependent on the time

Frequency

Say, you have electricity consumption at Bangalore at hourly level. The cycle could be a day, a week or even annual. I will cover what frequency would be for all different type of time series.

Before, we proceed I will reiterate this.

Frequency: is the number of observations per cycle.

Load libraries


import numpy as np 
import pandas as pd

from datetime import datetime

import plotly.express as px
import plotly.graph_objects as go
import plotly.offline as pyo
from plotly.colors import n_colors
from plotly.subplots import make_subplots

from sklearn.preprocessing import MinMaxScaler
from fbprophet import Prophet

Load Datasets


train = pd.read_csv('train.csv')

test = pd.read_csv('test.csv')

Structure & Summary of the Data


print(f'The train data set has  {train.shape[0]} rows and {train.shape[1]} columns')
## The train data set has  913000 rows and 4 columns
train.dtypes
## date     object
## store     int64
## item      int64
## sales     int64
## dtype: object
print(train.describe())
##                store           item          sales
## count  913000.000000  913000.000000  913000.000000
## mean        5.500000      25.500000      52.250287
## std         2.872283      14.430878      28.801144
## min         1.000000       1.000000       0.000000
## 25%         3.000000      13.000000      30.000000
## 50%         5.500000      25.500000      47.000000
## 75%         8.000000      38.000000      70.000000
## max        10.000000      50.000000     231.000000
print('The test data set has {test.shape[0]} rows and {test.shape[1]} columns')
## The test data set has {test.shape[0]} rows and {test.shape[1]} columns
test.dtypes
## id        int64
## date     object
## store     int64
## item      int64
## dtype: object
print(test.describe())
##                  id         store         item
## count  45000.000000  45000.000000  45000.00000
## mean   22499.500000      5.500000     25.50000
## std    12990.525394      2.872313     14.43103
## min        0.000000      1.000000      1.00000
## 25%    11249.750000      3.000000     13.00000
## 50%    22499.500000      5.500000     25.50000
## 75%    33749.250000      8.000000     38.00000
## max    44999.000000     10.000000     50.00000
print('The summary of train sales is:')
## The summary of train sales is:
print(train.sales.describe)
## <bound method NDFrame.describe of 0         13
## 1         11
## 2         14
## 3         13
## 4         10
##           ..
## 912995    63
## 912996    59
## 912997    74
## 912998    62
## 912999    82
## Name: sales, Length: 913000, dtype: int64>

# Nhap 

train.dtypes
## date     object
## store     int64
## item      int64
## sales     int64
## dtype: object
dates = []

for date in train.date.values.tolist():
     d = datetime.strptime(date, '%Y-%m-%d')
     dates.append(d)

Extraction of Year and Month of Year:



train['date'] = pd.to_datetime(train['date'], format = '%Y-%m-%d')
df = py$train

yearmonth = df %>% transmute(date = as.factor(as.yearmon(as.Date(as.character(date)))))

train['year']  = train['date'].dt.year 
train['month']  = train['date'].dt.month 
train['yearmonth'] = r.yearmonth

train.head(5)
##         date  store  item  sales  year  month yearmonth
## 0 2013-01-01      1     1     13  2013      1  Jan 2013
## 1 2013-01-02      1     1     11  2013      1  Jan 2013
## 2 2013-01-03      1     1     14  2013      1  Jan 2013
## 3 2013-01-04      1     1     13  2013      1  Jan 2013
## 4 2013-01-05      1     1     10  2013      1  Jan 2013

Histogram of Sale Price


fig = px.colors.qualitative.swatches()

pyo.plot(fig, filename='discrete_color_plotly.html', auto_open=False)
## 'discrete_color_plotly.html'
print(px.colors.qualitative.Pastel)
## ['rgb(102, 197, 204)', 'rgb(246, 207, 113)', 'rgb(248, 156, 116)', 'rgb(220, 176, 242)', 'rgb(135, 197, 95)', 'rgb(158, 185, 243)', 'rgb(254, 136, 177)', 'rgb(201, 219, 116)', 'rgb(139, 224, 164)', 'rgb(180, 151, 231)', 'rgb(179, 179, 179)']
htmltools::includeHTML('discrete_color_plotly.html')

fig = go.Figure()

fig.add_trace(go.Histogram(x= train['sales'], marker_color = 'rgb(139, 224, 164)'))
## Figure({
##     'data': [{'marker': {'color': 'rgb(139, 224, 164)'},
##               'type': 'histogram',
##               'x': array([13, 11, 14, ..., 74, 62, 82], dtype=int64)}],
##     'layout': {'template': '...'}
## })
fig.update_layout(title = 'Histogram of Sale Price', xaxis_title = 'Sales Price')
## Figure({
##     'data': [{'marker': {'color': 'rgb(139, 224, 164)'},
##               'type': 'histogram',
##               'x': array([13, 11, 14, ..., 74, 62, 82], dtype=int64)}],
##     'layout': {'template': '...',
##                'title': {'text': 'Histogram of Sale Price'},
##                'xaxis': {'title': {'text': 'Sales Price'}}}
## })
pyo.plot(fig, filename ='histogram_sales.html', auto_open = False)
## 'histogram_sales.html'
htmltools::includeHTML('histogram_sales.html')

Therefore our Sales Price follows Positively Skewed Distribution.

Growth by date


MSP = train[['date', 'sales']].groupby('date', as_index = False).sum()
MSP = py$MSP 

MSP$rate = c(0, 100*diff(MSP$sales)/MSP[-nrow(MSP),]$sales)

rate = MSP$rate



MSP['rate'] = r.rate



fig = make_subplots(rows=2, cols =1,
                     subplot_titles = ('The Growth of Sale Prices by date', 'Change rate of Sale Price'))


fig.add_trace(
go.Scatter(x=MSP['date'], y=MSP['sales'], mode='markers', 
                                 marker =dict(
                                 color = '#DA16FF',
                                 size = 8)), row=1, col=1
                                                                )
                                                                
                                                                
## Figure({
##     'data': [{'marker': {'color': '#DA16FF', 'size': 8},
##               'mode': 'markers',
##               'type': 'scatter',
##               'x': array([datetime.datetime(2013, 1, 1, 0, 0),
##                           datetime.datetime(2013, 1, 2, 0, 0),
##                           datetime.datetime(2013, 1, 3, 0, 0), ...,
##                           datetime.datetime(2017, 12, 29, 0, 0),
##                           datetime.datetime(2017, 12, 30, 0, 0),
##                           datetime.datetime(2017, 12, 31, 0, 0)], dtype=object),
##               'xaxis': 'x',
##               'y': array([13696, 13678, 14488, ..., 23535, 24988, 26420], dtype=int64),
##               'yaxis': 'y'}],
##     'layout': {'annotations': [{'font': {'size': 16},
##                                 'showarrow': False,
##                                 'text': 'The Growth of Sale Prices by date',
##                                 'x': 0.5,
##                                 'xanchor': 'center',
##                                 'xref': 'paper',
##                                 'y': 1.0,
##                                 'yanchor': 'bottom',
##                                 'yref': 'paper'},
##                                {'font': {'size': 16},
##                                 'showarrow': False,
##                                 'text': 'Change rate of Sale Price',
##                                 'x': 0.5,
##                                 'xanchor': 'center',
##                                 'xref': 'paper',
##                                 'y': 0.375,
##                                 'yanchor': 'bottom',
##                                 'yref': 'paper'}],
##                'template': '...',
##                'xaxis': {'anchor': 'y', 'domain': [0.0, 1.0]},
##                'xaxis2': {'anchor': 'y2', 'domain': [0.0, 1.0]},
##                'yaxis': {'anchor': 'x', 'domain': [0.625, 1.0]},
##                'yaxis2': {'anchor': 'x2', 'domain': [0.0, 0.375]}}
## })
fig.add_trace(go.Scatter(x=MSP['date'], y=MSP['rate'], mode='lines', marker_color ='grey'), row = 2, col = 1)
## Figure({
##     'data': [{'marker': {'color': '#DA16FF', 'size': 8},
##               'mode': 'markers',
##               'type': 'scatter',
##               'x': array([datetime.datetime(2013, 1, 1, 0, 0),
##                           datetime.datetime(2013, 1, 2, 0, 0),
##                           datetime.datetime(2013, 1, 3, 0, 0), ...,
##                           datetime.datetime(2017, 12, 29, 0, 0),
##                           datetime.datetime(2017, 12, 30, 0, 0),
##                           datetime.datetime(2017, 12, 31, 0, 0)], dtype=object),
##               'xaxis': 'x',
##               'y': array([13696, 13678, 14488, ..., 23535, 24988, 26420], dtype=int64),
##               'yaxis': 'y'},
##              {'marker': {'color': 'grey'},
##               'mode': 'lines',
##               'type': 'scatter',
##               'x': array([datetime.datetime(2013, 1, 1, 0, 0),
##                           datetime.datetime(2013, 1, 2, 0, 0),
##                           datetime.datetime(2013, 1, 3, 0, 0), ...,
##                           datetime.datetime(2017, 12, 29, 0, 0),
##                           datetime.datetime(2017, 12, 30, 0, 0),
##                           datetime.datetime(2017, 12, 31, 0, 0)], dtype=object),
##               'xaxis': 'x2',
##               'y': array([ 0.        , -0.13142523,  5.92191841, ...,  7.53941056,  6.17378373,
##                            5.73075076]),
##               'yaxis': 'y2'}],
##     'layout': {'annotations': [{'font': {'size': 16},
##                                 'showarrow': False,
##                                 'text': 'The Growth of Sale Prices by date',
##                                 'x': 0.5,
##                                 'xanchor': 'center',
##                                 'xref': 'paper',
##                                 'y': 1.0,
##                                 'yanchor': 'bottom',
##                                 'yref': 'paper'},
##                                {'font': {'size': 16},
##                                 'showarrow': False,
##                                 'text': 'Change rate of Sale Price',
##                                 'x': 0.5,
##                                 'xanchor': 'center',
##                                 'xref': 'paper',
##                                 'y': 0.375,
##                                 'yanchor': 'bottom',
##                                 'yref': 'paper'}],
##                'template': '...',
##                'xaxis': {'anchor': 'y', 'domain': [0.0, 1.0]},
##                'xaxis2': {'anchor': 'y2', 'domain': [0.0, 1.0]},
##                'yaxis': {'anchor': 'x', 'domain': [0.625, 1.0]},
##                'yaxis2': {'anchor': 'x2', 'domain': [0.0, 0.375]}}
## })
fig.update_layout(height=800, width=1000)
## Figure({
##     'data': [{'marker': {'color': '#DA16FF', 'size': 8},
##               'mode': 'markers',
##               'type': 'scatter',
##               'x': array([datetime.datetime(2013, 1, 1, 0, 0),
##                           datetime.datetime(2013, 1, 2, 0, 0),
##                           datetime.datetime(2013, 1, 3, 0, 0), ...,
##                           datetime.datetime(2017, 12, 29, 0, 0),
##                           datetime.datetime(2017, 12, 30, 0, 0),
##                           datetime.datetime(2017, 12, 31, 0, 0)], dtype=object),
##               'xaxis': 'x',
##               'y': array([13696, 13678, 14488, ..., 23535, 24988, 26420], dtype=int64),
##               'yaxis': 'y'},
##              {'marker': {'color': 'grey'},
##               'mode': 'lines',
##               'type': 'scatter',
##               'x': array([datetime.datetime(2013, 1, 1, 0, 0),
##                           datetime.datetime(2013, 1, 2, 0, 0),
##                           datetime.datetime(2013, 1, 3, 0, 0), ...,
##                           datetime.datetime(2017, 12, 29, 0, 0),
##                           datetime.datetime(2017, 12, 30, 0, 0),
##                           datetime.datetime(2017, 12, 31, 0, 0)], dtype=object),
##               'xaxis': 'x2',
##               'y': array([ 0.        , -0.13142523,  5.92191841, ...,  7.53941056,  6.17378373,
##                            5.73075076]),
##               'yaxis': 'y2'}],
##     'layout': {'annotations': [{'font': {'size': 16},
##                                 'showarrow': False,
##                                 'text': 'The Growth of Sale Prices by date',
##                                 'x': 0.5,
##                                 'xanchor': 'center',
##                                 'xref': 'paper',
##                                 'y': 1.0,
##                                 'yanchor': 'bottom',
##                                 'yref': 'paper'},
##                                {'font': {'size': 16},
##                                 'showarrow': False,
##                                 'text': 'Change rate of Sale Price',
##                                 'x': 0.5,
##                                 'xanchor': 'center',
##                                 'xref': 'paper',
##                                 'y': 0.375,
##                                 'yanchor': 'bottom',
##                                 'yref': 'paper'}],
##                'height': 800,
##                'template': '...',
##                'width': 1000,
##                'xaxis': {'anchor': 'y', 'domain': [0.0, 1.0]},
##                'xaxis2': {'anchor': 'y2', 'domain': [0.0, 1.0]},
##                'yaxis': {'anchor': 'x', 'domain': [0.625, 1.0]},
##                'yaxis2': {'anchor': 'x2', 'domain': [0.0, 0.375]}}
## })
pyo.plot(fig, filename='growth_sale.html', auto_open=False)
## 'growth_sale.html'
htmltools::includeHTML('growth_sale.html')
  1. The Growth of the Sales Price are Multiplicative with increasing TREND and SEASONALITY

  2. The Change in Rate of Sales Price is looking constant by Date. But the Growth Rate TRUE at Yearly/ Monthly Level. Let’s check it out

Growth by Month of Different Year


MSP = train[['month', 'sales']].groupby('month', as_index = False).sum()
MSP = py$MSP 

MSP$rate = c(0, 100*diff(MSP$sales)/MSP[-nrow(MSP),]$sales)

rate = MSP$rate



MSP['rate'] = r.rate



fig = make_subplots(rows=2, cols =1,
                     subplot_titles = ('The Growth of Sale Prices by date', 'Change rate of Sale Price'))


fig.add_trace(
go.Scatter(x=MSP['month'], y=MSP['sales'], mode='markers', 
                                 marker =dict(
                                 color = '#DA16FF',
                                 size = 8)), row=1, col=1
                                                                )
                                                                
                                                                
## Figure({
##     'data': [{'marker': {'color': '#DA16FF', 'size': 8},
##               'mode': 'markers',
##               'type': 'scatter',
##               'x': array([ 1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12], dtype=int64),
##               'xaxis': 'x',
##               'y': array([2753149, 2776177, 3666182, 4136467, 4582437, 4726911, 5192393, 4580655,
##                           4130457, 3967520, 4141356, 3050808], dtype=int64),
##               'yaxis': 'y'}],
##     'layout': {'annotations': [{'font': {'size': 16},
##                                 'showarrow': False,
##                                 'text': 'The Growth of Sale Prices by date',
##                                 'x': 0.5,
##                                 'xanchor': 'center',
##                                 'xref': 'paper',
##                                 'y': 1.0,
##                                 'yanchor': 'bottom',
##                                 'yref': 'paper'},
##                                {'font': {'size': 16},
##                                 'showarrow': False,
##                                 'text': 'Change rate of Sale Price',
##                                 'x': 0.5,
##                                 'xanchor': 'center',
##                                 'xref': 'paper',
##                                 'y': 0.375,
##                                 'yanchor': 'bottom',
##                                 'yref': 'paper'}],
##                'template': '...',
##                'xaxis': {'anchor': 'y', 'domain': [0.0, 1.0]},
##                'xaxis2': {'anchor': 'y2', 'domain': [0.0, 1.0]},
##                'yaxis': {'anchor': 'x', 'domain': [0.625, 1.0]},
##                'yaxis2': {'anchor': 'x2', 'domain': [0.0, 0.375]}}
## })
fig.add_trace(go.Scatter(x=MSP['month'], y=MSP['rate'], mode='lines', marker_color ='grey'), row = 2, col = 1)
## Figure({
##     'data': [{'marker': {'color': '#DA16FF', 'size': 8},
##               'mode': 'markers',
##               'type': 'scatter',
##               'x': array([ 1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12], dtype=int64),
##               'xaxis': 'x',
##               'y': array([2753149, 2776177, 3666182, 4136467, 4582437, 4726911, 5192393, 4580655,
##                           4130457, 3967520, 4141356, 3050808], dtype=int64),
##               'yaxis': 'y'},
##              {'marker': {'color': 'grey'},
##               'mode': 'lines',
##               'type': 'scatter',
##               'x': array([ 1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12], dtype=int64),
##               'xaxis': 'x2',
##               'y': array([  0.        ,   0.83642404,  32.05865476,  12.82765013,  10.78142289,
##                             3.15277657,   9.84748814, -11.78142718,  -9.82824509,  -3.94476931,
##                             4.3814776 , -26.33311408]),
##               'yaxis': 'y2'}],
##     'layout': {'annotations': [{'font': {'size': 16},
##                                 'showarrow': False,
##                                 'text': 'The Growth of Sale Prices by date',
##                                 'x': 0.5,
##                                 'xanchor': 'center',
##                                 'xref': 'paper',
##                                 'y': 1.0,
##                                 'yanchor': 'bottom',
##                                 'yref': 'paper'},
##                                {'font': {'size': 16},
##                                 'showarrow': False,
##                                 'text': 'Change rate of Sale Price',
##                                 'x': 0.5,
##                                 'xanchor': 'center',
##                                 'xref': 'paper',
##                                 'y': 0.375,
##                                 'yanchor': 'bottom',
##                                 'yref': 'paper'}],
##                'template': '...',
##                'xaxis': {'anchor': 'y', 'domain': [0.0, 1.0]},
##                'xaxis2': {'anchor': 'y2', 'domain': [0.0, 1.0]},
##                'yaxis': {'anchor': 'x', 'domain': [0.625, 1.0]},
##                'yaxis2': {'anchor': 'x2', 'domain': [0.0, 0.375]}}
## })
fig.update_layout(height=800, width=1000)
## Figure({
##     'data': [{'marker': {'color': '#DA16FF', 'size': 8},
##               'mode': 'markers',
##               'type': 'scatter',
##               'x': array([ 1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12], dtype=int64),
##               'xaxis': 'x',
##               'y': array([2753149, 2776177, 3666182, 4136467, 4582437, 4726911, 5192393, 4580655,
##                           4130457, 3967520, 4141356, 3050808], dtype=int64),
##               'yaxis': 'y'},
##              {'marker': {'color': 'grey'},
##               'mode': 'lines',
##               'type': 'scatter',
##               'x': array([ 1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12], dtype=int64),
##               'xaxis': 'x2',
##               'y': array([  0.        ,   0.83642404,  32.05865476,  12.82765013,  10.78142289,
##                             3.15277657,   9.84748814, -11.78142718,  -9.82824509,  -3.94476931,
##                             4.3814776 , -26.33311408]),
##               'yaxis': 'y2'}],
##     'layout': {'annotations': [{'font': {'size': 16},
##                                 'showarrow': False,
##                                 'text': 'The Growth of Sale Prices by date',
##                                 'x': 0.5,
##                                 'xanchor': 'center',
##                                 'xref': 'paper',
##                                 'y': 1.0,
##                                 'yanchor': 'bottom',
##                                 'yref': 'paper'},
##                                {'font': {'size': 16},
##                                 'showarrow': False,
##                                 'text': 'Change rate of Sale Price',
##                                 'x': 0.5,
##                                 'xanchor': 'center',
##                                 'xref': 'paper',
##                                 'y': 0.375,
##                                 'yanchor': 'bottom',
##                                 'yref': 'paper'}],
##                'height': 800,
##                'template': '...',
##                'width': 1000,
##                'xaxis': {'anchor': 'y', 'domain': [0.0, 1.0]},
##                'xaxis2': {'anchor': 'y2', 'domain': [0.0, 1.0]},
##                'yaxis': {'anchor': 'x', 'domain': [0.625, 1.0]},
##                'yaxis2': {'anchor': 'x2', 'domain': [0.0, 0.375]}}
## })
pyo.plot(fig, filename='growth_sale_month.html', auto_open=False)
## 'growth_sale_month.html'
htmltools::includeHTML('growth_sale_month.html')