install.packages("plyr")
## Installing package into '/cloud/lib/x86_64-pc-linux-gnu-library/4.4'
## (as 'lib' is unspecified)
install.packages("dtplyr")
## Installing package into '/cloud/lib/x86_64-pc-linux-gnu-library/4.4'
## (as 'lib' is unspecified)
install.packages("tidyr")
## Installing package into '/cloud/lib/x86_64-pc-linux-gnu-library/4.4'
## (as 'lib' is unspecified)
install.packages("lsr")
## Installing package into '/cloud/lib/x86_64-pc-linux-gnu-library/4.4'
## (as 'lib' is unspecified)
install.packages("ggplot2")
## Installing package into '/cloud/lib/x86_64-pc-linux-gnu-library/4.4'
## (as 'lib' is unspecified)
install.packages("stats")
## Installing package into '/cloud/lib/x86_64-pc-linux-gnu-library/4.4'
## (as 'lib' is unspecified)
## Warning: package 'stats' is a base package, and should not be updated
install.packages("tidyverse")
## Installing package into '/cloud/lib/x86_64-pc-linux-gnu-library/4.4'
## (as 'lib' is unspecified)
install.packages("forecast")
## Installing package into '/cloud/lib/x86_64-pc-linux-gnu-library/4.4'
## (as 'lib' is unspecified)
install.packages("lubridate")
## Installing package into '/cloud/lib/x86_64-pc-linux-gnu-library/4.4'
## (as 'lib' is unspecified)
library(plyr)
library(dtplyr)
library(tidyr)
library(lsr)
library(ggplot2)
library(stats)
#Load packages
library(tidyverse) # for general data wrangling and plotting
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ lubridate 1.9.4     ✔ tibble    3.3.0
## ✔ purrr     1.0.4     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::arrange()   masks plyr::arrange()
## ✖ purrr::compact()   masks plyr::compact()
## ✖ dplyr::count()     masks plyr::count()
## ✖ dplyr::desc()      masks plyr::desc()
## ✖ dplyr::failwith()  masks plyr::failwith()
## ✖ dplyr::filter()    masks stats::filter()
## ✖ dplyr::id()        masks plyr::id()
## ✖ dplyr::lag()       masks stats::lag()
## ✖ dplyr::mutate()    masks plyr::mutate()
## ✖ dplyr::rename()    masks plyr::rename()
## ✖ dplyr::summarise() masks plyr::summarise()
## ✖ dplyr::summarize() masks plyr::summarize()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(forecast)
## Registered S3 method overwritten by 'quantmod':
##   method            from
##   as.zoo.data.frame zoo
library(lubridate) # for working with dates

R Markdown

This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.

When you click the Knit button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this:

df <- read_csv("retail_store_inventory.csv")
## Rows: 73100 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr  (6): Store ID, Product ID, Category, Region, Weather Condition, Seasona...
## dbl  (8): Inventory Level, Units Sold, Units Ordered, Demand Forecast, Price...
## date (1): Date
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
print(df)
## # A tibble: 73,100 × 15
##    Date       `Store ID` `Product ID` Category    Region `Inventory Level`
##    <date>     <chr>      <chr>        <chr>       <chr>              <dbl>
##  1 2022-01-01 S001       P0001        Groceries   North                231
##  2 2022-01-01 S001       P0002        Toys        South                204
##  3 2022-01-01 S001       P0003        Toys        West                 102
##  4 2022-01-01 S001       P0004        Toys        North                469
##  5 2022-01-01 S001       P0005        Electronics East                 166
##  6 2022-01-01 S001       P0006        Groceries   South                138
##  7 2022-01-01 S001       P0007        Furniture   East                 359
##  8 2022-01-01 S001       P0008        Clothing    North                380
##  9 2022-01-01 S001       P0009        Electronics West                 183
## 10 2022-01-01 S001       P0010        Toys        South                108
## # ℹ 73,090 more rows
## # ℹ 9 more variables: `Units Sold` <dbl>, `Units Ordered` <dbl>,
## #   `Demand Forecast` <dbl>, Price <dbl>, Discount <dbl>,
## #   `Weather Condition` <chr>, `Holiday/Promotion` <dbl>,
## #   `Competitor Pricing` <dbl>, Seasonality <chr>
##       Date              Store ID          Product ID          Category        
##  Min.   :2022-01-01   Length:73100       Length:73100       Length:73100      
##  1st Qu.:2022-07-02   Class :character   Class :character   Class :character  
##  Median :2023-01-01   Mode  :character   Mode  :character   Mode  :character  
##  Mean   :2023-01-01                                                           
##  3rd Qu.:2023-07-03                                                           
##  Max.   :2024-01-01                                                           
##     Region          Inventory Level   Units Sold    Units Ordered
##  Length:73100       Min.   : 50.0   Min.   :  0.0   Min.   : 20  
##  Class :character   1st Qu.:162.0   1st Qu.: 49.0   1st Qu.: 65  
##  Mode  :character   Median :273.0   Median :107.0   Median :110  
##                     Mean   :274.5   Mean   :136.5   Mean   :110  
##                     3rd Qu.:387.0   3rd Qu.:203.0   3rd Qu.:155  
##                     Max.   :500.0   Max.   :499.0   Max.   :200  
##  Demand Forecast      Price           Discount     Weather Condition 
##  Min.   : -9.99   Min.   : 10.00   Min.   : 0.00   Length:73100      
##  1st Qu.: 53.67   1st Qu.: 32.65   1st Qu.: 5.00   Class :character  
##  Median :113.02   Median : 55.05   Median :10.00   Mode  :character  
##  Mean   :141.49   Mean   : 55.14   Mean   :10.01                     
##  3rd Qu.:208.05   3rd Qu.: 77.86   3rd Qu.:15.00                     
##  Max.   :518.55   Max.   :100.00   Max.   :20.00                     
##  Holiday/Promotion Competitor Pricing Seasonality       
##  Min.   :0.0000    Min.   :  5.03     Length:73100      
##  1st Qu.:0.0000    1st Qu.: 32.68     Class :character  
##  Median :0.0000    Median : 55.01     Mode  :character  
##  Mean   :0.4973    Mean   : 55.15                       
##  3rd Qu.:1.0000    3rd Qu.: 77.82                       
##  Max.   :1.0000    Max.   :104.94
## spc_tbl_ [73,100 × 15] (S3: spec_tbl_df/tbl_df/tbl/data.frame)
##  $ Date              : Date[1:73100], format: "2022-01-01" "2022-01-01" ...
##  $ Store ID          : chr [1:73100] "S001" "S001" "S001" "S001" ...
##  $ Product ID        : chr [1:73100] "P0001" "P0002" "P0003" "P0004" ...
##  $ Category          : chr [1:73100] "Groceries" "Toys" "Toys" "Toys" ...
##  $ Region            : chr [1:73100] "North" "South" "West" "North" ...
##  $ Inventory Level   : num [1:73100] 231 204 102 469 166 138 359 380 183 108 ...
##  $ Units Sold        : num [1:73100] 127 150 65 61 14 128 97 312 175 28 ...
##  $ Units Ordered     : num [1:73100] 55 66 51 164 135 102 167 54 135 196 ...
##  $ Demand Forecast   : num [1:73100] 135.47 144.04 74.02 62.18 9.26 ...
##  $ Price             : num [1:73100] 33.5 63 28 32.7 73.6 ...
##  $ Discount          : num [1:73100] 20 20 10 10 0 10 10 5 10 0 ...
##  $ Weather Condition : chr [1:73100] "Rainy" "Sunny" "Sunny" "Cloudy" ...
##  $ Holiday/Promotion : num [1:73100] 0 0 1 1 0 1 1 0 0 1 ...
##  $ Competitor Pricing: num [1:73100] 29.7 66.2 31.3 34.7 69 ...
##  $ Seasonality       : chr [1:73100] "Autumn" "Autumn" "Summer" "Autumn" ...
##  - attr(*, "spec")=
##   .. cols(
##   ..   Date = col_date(format = ""),
##   ..   `Store ID` = col_character(),
##   ..   `Product ID` = col_character(),
##   ..   Category = col_character(),
##   ..   Region = col_character(),
##   ..   `Inventory Level` = col_double(),
##   ..   `Units Sold` = col_double(),
##   ..   `Units Ordered` = col_double(),
##   ..   `Demand Forecast` = col_double(),
##   ..   Price = col_double(),
##   ..   Discount = col_double(),
##   ..   `Weather Condition` = col_character(),
##   ..   `Holiday/Promotion` = col_double(),
##   ..   `Competitor Pricing` = col_double(),
##   ..   Seasonality = col_character()
##   .. )
##  - attr(*, "problems")=<externalptr>
##  [1] "Date"               "Store ID"           "Product ID"        
##  [4] "Category"           "Region"             "Inventory Level"   
##  [7] "Units Sold"         "Units Ordered"      "Demand Forecast"   
## [10] "Price"              "Discount"           "Weather Condition" 
## [13] "Holiday/Promotion"  "Competitor Pricing" "Seasonality"
## # A tibble: 73,100 × 15
##    date       store_id product_id category    region inventory_level units_sold
##    <date>     <chr>    <chr>      <chr>       <chr>            <dbl>      <dbl>
##  1 2022-01-01 S001     P0001      Groceries   North              231        127
##  2 2022-01-01 S001     P0002      Toys        South              204        150
##  3 2022-01-01 S001     P0003      Toys        West               102         65
##  4 2022-01-01 S001     P0004      Toys        North              469         61
##  5 2022-01-01 S001     P0005      Electronics East               166         14
##  6 2022-01-01 S001     P0006      Groceries   South              138        128
##  7 2022-01-01 S001     P0007      Furniture   East               359         97
##  8 2022-01-01 S001     P0008      Clothing    North              380        312
##  9 2022-01-01 S001     P0009      Electronics West               183        175
## 10 2022-01-01 S001     P0010      Toys        South              108         28
## # ℹ 73,090 more rows
## # ℹ 8 more variables: units_ordered <dbl>, demand_forecast <dbl>, price <dbl>,
## #   discount <dbl>, weather_condition <chr>, holiday_promotion <dbl>,
## #   competitor_pricing <dbl>, seasonality <chr>
new_df <-  filter(df, category == "Toys" & product_id == "P0002") #%>% select(Date, Demand.Forecast)
print(new_df)
## # A tibble: 728 × 15
##    date       store_id product_id category region inventory_level units_sold
##    <date>     <chr>    <chr>      <chr>    <chr>            <dbl>      <dbl>
##  1 2022-01-01 S001     P0002      Toys     South              204        150
##  2 2022-01-05 S001     P0002      Toys     West               270        220
##  3 2022-01-05 S005     P0002      Toys     West                98         59
##  4 2022-01-06 S002     P0002      Toys     North              117         23
##  5 2022-01-06 S004     P0002      Toys     East               162        105
##  6 2022-01-07 S004     P0002      Toys     North               61         46
##  7 2022-01-08 S002     P0002      Toys     North              384        134
##  8 2022-01-08 S004     P0002      Toys     South               74         25
##  9 2022-01-09 S002     P0002      Toys     North               93         29
## 10 2022-01-09 S004     P0002      Toys     North              211         74
## # ℹ 718 more rows
## # ℹ 8 more variables: units_ordered <dbl>, demand_forecast <dbl>, price <dbl>,
## #   discount <dbl>, weather_condition <chr>, holiday_promotion <dbl>,
## #   competitor_pricing <dbl>, seasonality <chr>
new_df$month <- format(as.Date(new_df$date, format = "%Y%d%m"), "%m")
new_df$year <- format(as.Date(new_df$date, format = "%Y%d%m"), "%Y")
print(new_df)
## # A tibble: 728 × 17
##    date       store_id product_id category region inventory_level units_sold
##    <date>     <chr>    <chr>      <chr>    <chr>            <dbl>      <dbl>
##  1 2022-01-01 S001     P0002      Toys     South              204        150
##  2 2022-01-05 S001     P0002      Toys     West               270        220
##  3 2022-01-05 S005     P0002      Toys     West                98         59
##  4 2022-01-06 S002     P0002      Toys     North              117         23
##  5 2022-01-06 S004     P0002      Toys     East               162        105
##  6 2022-01-07 S004     P0002      Toys     North               61         46
##  7 2022-01-08 S002     P0002      Toys     North              384        134
##  8 2022-01-08 S004     P0002      Toys     South               74         25
##  9 2022-01-09 S002     P0002      Toys     North               93         29
## 10 2022-01-09 S004     P0002      Toys     North              211         74
## # ℹ 718 more rows
## # ℹ 10 more variables: units_ordered <dbl>, demand_forecast <dbl>, price <dbl>,
## #   discount <dbl>, weather_condition <chr>, holiday_promotion <dbl>,
## #   competitor_pricing <dbl>, seasonality <chr>, month <chr>, year <chr>
new_df$month_year <- format(as.Date(new_df$date, format = "%Y%d%m"), "%m%Y")
print(new_df)
## # A tibble: 728 × 18
##    date       store_id product_id category region inventory_level units_sold
##    <date>     <chr>    <chr>      <chr>    <chr>            <dbl>      <dbl>
##  1 2022-01-01 S001     P0002      Toys     South              204        150
##  2 2022-01-05 S001     P0002      Toys     West               270        220
##  3 2022-01-05 S005     P0002      Toys     West                98         59
##  4 2022-01-06 S002     P0002      Toys     North              117         23
##  5 2022-01-06 S004     P0002      Toys     East               162        105
##  6 2022-01-07 S004     P0002      Toys     North               61         46
##  7 2022-01-08 S002     P0002      Toys     North              384        134
##  8 2022-01-08 S004     P0002      Toys     South               74         25
##  9 2022-01-09 S002     P0002      Toys     North               93         29
## 10 2022-01-09 S004     P0002      Toys     North              211         74
## # ℹ 718 more rows
## # ℹ 11 more variables: units_ordered <dbl>, demand_forecast <dbl>, price <dbl>,
## #   discount <dbl>, weather_condition <chr>, holiday_promotion <dbl>,
## #   competitor_pricing <dbl>, seasonality <chr>, month <chr>, year <chr>,
## #   month_year <chr>
ts_prep <- 
  aggregate(new_df$demand_forecast, by = list(month= format(new_df$date, "%Y-%m")), FUN = mean) #rename column "x" with values Demand.Forecast
print(ts_prep)
##      month         x
## 1  2022-01 136.72432
## 2  2022-02 128.20333
## 3  2022-03 164.15606
## 4  2022-04 125.45154
## 5  2022-05 121.45081
## 6  2022-06 148.20259
## 7  2022-07 124.45972
## 8  2022-08 147.13080
## 9  2022-09 137.35143
## 10 2022-10 149.70600
## 11 2022-11 148.97438
## 12 2022-12 134.08300
## 13 2023-01 155.78457
## 14 2023-02 169.35143
## 15 2023-03 106.94886
## 16 2023-04 158.58120
## 17 2023-05 166.66160
## 18 2023-06 102.85650
## 19 2023-07 147.86719
## 20 2023-08 119.61794
## 21 2023-09 109.77531
## 22 2023-10  97.87069
## 23 2023-11 119.38152
## 24 2023-12 165.95040
colnames(ts_prep)[2] <- "demand_forecast"
#print(ts_prep)
time_series <- ts(ts_prep$demand_forecast, 
                  start = c(2022, 1), 
                  end = c(2023, 12), 
                  frequency = 12 )
print(time_series)
##            Jan       Feb       Mar       Apr       May       Jun       Jul
## 2022 136.72432 128.20333 164.15606 125.45154 121.45081 148.20259 124.45972
## 2023 155.78457 169.35143 106.94886 158.58120 166.66160 102.85650 147.86719
##            Aug       Sep       Oct       Nov       Dec
## 2022 147.13080 137.35143 149.70600 148.97438 134.08300
## 2023 119.61794 109.77531  97.87069 119.38152 165.95040
plot(time_series)

acf(time_series)

arima <- select(ts_prep, demand_forecast)
fit <- auto.arima(arima)
forecast_values <- forecast(fit, 1)
print(forecast_values)
##    Point Forecast    Lo 80    Hi 80    Lo 95    Hi 95
## 25       136.9392 109.5001 164.3783 94.97474 178.9037
plot(forecast_values, main = "Graph with forecast", col.main = "darkgreen")