install.packages("plyr")
## Installing package into '/cloud/lib/x86_64-pc-linux-gnu-library/4.4'
## (as 'lib' is unspecified)
install.packages("dtplyr")
## Installing package into '/cloud/lib/x86_64-pc-linux-gnu-library/4.4'
## (as 'lib' is unspecified)
install.packages("tidyr")
## Installing package into '/cloud/lib/x86_64-pc-linux-gnu-library/4.4'
## (as 'lib' is unspecified)
install.packages("lsr")
## Installing package into '/cloud/lib/x86_64-pc-linux-gnu-library/4.4'
## (as 'lib' is unspecified)
install.packages("ggplot2")
## Installing package into '/cloud/lib/x86_64-pc-linux-gnu-library/4.4'
## (as 'lib' is unspecified)
install.packages("stats")
## Installing package into '/cloud/lib/x86_64-pc-linux-gnu-library/4.4'
## (as 'lib' is unspecified)
## Warning: package 'stats' is a base package, and should not be updated
install.packages("tidyverse")
## Installing package into '/cloud/lib/x86_64-pc-linux-gnu-library/4.4'
## (as 'lib' is unspecified)
install.packages("forecast")
## Installing package into '/cloud/lib/x86_64-pc-linux-gnu-library/4.4'
## (as 'lib' is unspecified)
install.packages("lubridate")
## Installing package into '/cloud/lib/x86_64-pc-linux-gnu-library/4.4'
## (as 'lib' is unspecified)
library(plyr)
library(dtplyr)
library(tidyr)
library(lsr)
library(ggplot2)
library(stats)
#Load packages
library(tidyverse) # for general data wrangling and plotting
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.5
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ lubridate 1.9.4 ✔ tibble 3.3.0
## ✔ purrr 1.0.4
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::arrange() masks plyr::arrange()
## ✖ purrr::compact() masks plyr::compact()
## ✖ dplyr::count() masks plyr::count()
## ✖ dplyr::desc() masks plyr::desc()
## ✖ dplyr::failwith() masks plyr::failwith()
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::id() masks plyr::id()
## ✖ dplyr::lag() masks stats::lag()
## ✖ dplyr::mutate() masks plyr::mutate()
## ✖ dplyr::rename() masks plyr::rename()
## ✖ dplyr::summarise() masks plyr::summarise()
## ✖ dplyr::summarize() masks plyr::summarize()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(forecast)
## Registered S3 method overwritten by 'quantmod':
## method from
## as.zoo.data.frame zoo
library(lubridate) # for working with dates
R Markdown
This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.
When you click the Knit button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this:
df <- read_csv("retail_store_inventory.csv")
## Rows: 73100 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (6): Store ID, Product ID, Category, Region, Weather Condition, Seasona...
## dbl (8): Inventory Level, Units Sold, Units Ordered, Demand Forecast, Price...
## date (1): Date
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
print(df)
## # A tibble: 73,100 × 15
## Date `Store ID` `Product ID` Category Region `Inventory Level`
## <date> <chr> <chr> <chr> <chr> <dbl>
## 1 2022-01-01 S001 P0001 Groceries North 231
## 2 2022-01-01 S001 P0002 Toys South 204
## 3 2022-01-01 S001 P0003 Toys West 102
## 4 2022-01-01 S001 P0004 Toys North 469
## 5 2022-01-01 S001 P0005 Electronics East 166
## 6 2022-01-01 S001 P0006 Groceries South 138
## 7 2022-01-01 S001 P0007 Furniture East 359
## 8 2022-01-01 S001 P0008 Clothing North 380
## 9 2022-01-01 S001 P0009 Electronics West 183
## 10 2022-01-01 S001 P0010 Toys South 108
## # ℹ 73,090 more rows
## # ℹ 9 more variables: `Units Sold` <dbl>, `Units Ordered` <dbl>,
## # `Demand Forecast` <dbl>, Price <dbl>, Discount <dbl>,
## # `Weather Condition` <chr>, `Holiday/Promotion` <dbl>,
## # `Competitor Pricing` <dbl>, Seasonality <chr>
## Date Store ID Product ID Category
## Min. :2022-01-01 Length:73100 Length:73100 Length:73100
## 1st Qu.:2022-07-02 Class :character Class :character Class :character
## Median :2023-01-01 Mode :character Mode :character Mode :character
## Mean :2023-01-01
## 3rd Qu.:2023-07-03
## Max. :2024-01-01
## Region Inventory Level Units Sold Units Ordered
## Length:73100 Min. : 50.0 Min. : 0.0 Min. : 20
## Class :character 1st Qu.:162.0 1st Qu.: 49.0 1st Qu.: 65
## Mode :character Median :273.0 Median :107.0 Median :110
## Mean :274.5 Mean :136.5 Mean :110
## 3rd Qu.:387.0 3rd Qu.:203.0 3rd Qu.:155
## Max. :500.0 Max. :499.0 Max. :200
## Demand Forecast Price Discount Weather Condition
## Min. : -9.99 Min. : 10.00 Min. : 0.00 Length:73100
## 1st Qu.: 53.67 1st Qu.: 32.65 1st Qu.: 5.00 Class :character
## Median :113.02 Median : 55.05 Median :10.00 Mode :character
## Mean :141.49 Mean : 55.14 Mean :10.01
## 3rd Qu.:208.05 3rd Qu.: 77.86 3rd Qu.:15.00
## Max. :518.55 Max. :100.00 Max. :20.00
## Holiday/Promotion Competitor Pricing Seasonality
## Min. :0.0000 Min. : 5.03 Length:73100
## 1st Qu.:0.0000 1st Qu.: 32.68 Class :character
## Median :0.0000 Median : 55.01 Mode :character
## Mean :0.4973 Mean : 55.15
## 3rd Qu.:1.0000 3rd Qu.: 77.82
## Max. :1.0000 Max. :104.94
## spc_tbl_ [73,100 × 15] (S3: spec_tbl_df/tbl_df/tbl/data.frame)
## $ Date : Date[1:73100], format: "2022-01-01" "2022-01-01" ...
## $ Store ID : chr [1:73100] "S001" "S001" "S001" "S001" ...
## $ Product ID : chr [1:73100] "P0001" "P0002" "P0003" "P0004" ...
## $ Category : chr [1:73100] "Groceries" "Toys" "Toys" "Toys" ...
## $ Region : chr [1:73100] "North" "South" "West" "North" ...
## $ Inventory Level : num [1:73100] 231 204 102 469 166 138 359 380 183 108 ...
## $ Units Sold : num [1:73100] 127 150 65 61 14 128 97 312 175 28 ...
## $ Units Ordered : num [1:73100] 55 66 51 164 135 102 167 54 135 196 ...
## $ Demand Forecast : num [1:73100] 135.47 144.04 74.02 62.18 9.26 ...
## $ Price : num [1:73100] 33.5 63 28 32.7 73.6 ...
## $ Discount : num [1:73100] 20 20 10 10 0 10 10 5 10 0 ...
## $ Weather Condition : chr [1:73100] "Rainy" "Sunny" "Sunny" "Cloudy" ...
## $ Holiday/Promotion : num [1:73100] 0 0 1 1 0 1 1 0 0 1 ...
## $ Competitor Pricing: num [1:73100] 29.7 66.2 31.3 34.7 69 ...
## $ Seasonality : chr [1:73100] "Autumn" "Autumn" "Summer" "Autumn" ...
## - attr(*, "spec")=
## .. cols(
## .. Date = col_date(format = ""),
## .. `Store ID` = col_character(),
## .. `Product ID` = col_character(),
## .. Category = col_character(),
## .. Region = col_character(),
## .. `Inventory Level` = col_double(),
## .. `Units Sold` = col_double(),
## .. `Units Ordered` = col_double(),
## .. `Demand Forecast` = col_double(),
## .. Price = col_double(),
## .. Discount = col_double(),
## .. `Weather Condition` = col_character(),
## .. `Holiday/Promotion` = col_double(),
## .. `Competitor Pricing` = col_double(),
## .. Seasonality = col_character()
## .. )
## - attr(*, "problems")=<externalptr>
## [1] "Date" "Store ID" "Product ID"
## [4] "Category" "Region" "Inventory Level"
## [7] "Units Sold" "Units Ordered" "Demand Forecast"
## [10] "Price" "Discount" "Weather Condition"
## [13] "Holiday/Promotion" "Competitor Pricing" "Seasonality"
## # A tibble: 73,100 × 15
## date store_id product_id category region inventory_level units_sold
## <date> <chr> <chr> <chr> <chr> <dbl> <dbl>
## 1 2022-01-01 S001 P0001 Groceries North 231 127
## 2 2022-01-01 S001 P0002 Toys South 204 150
## 3 2022-01-01 S001 P0003 Toys West 102 65
## 4 2022-01-01 S001 P0004 Toys North 469 61
## 5 2022-01-01 S001 P0005 Electronics East 166 14
## 6 2022-01-01 S001 P0006 Groceries South 138 128
## 7 2022-01-01 S001 P0007 Furniture East 359 97
## 8 2022-01-01 S001 P0008 Clothing North 380 312
## 9 2022-01-01 S001 P0009 Electronics West 183 175
## 10 2022-01-01 S001 P0010 Toys South 108 28
## # ℹ 73,090 more rows
## # ℹ 8 more variables: units_ordered <dbl>, demand_forecast <dbl>, price <dbl>,
## # discount <dbl>, weather_condition <chr>, holiday_promotion <dbl>,
## # competitor_pricing <dbl>, seasonality <chr>
new_df <- filter(df, category == "Toys" & product_id == "P0002") #%>% select(Date, Demand.Forecast)
print(new_df)
## # A tibble: 728 × 15
## date store_id product_id category region inventory_level units_sold
## <date> <chr> <chr> <chr> <chr> <dbl> <dbl>
## 1 2022-01-01 S001 P0002 Toys South 204 150
## 2 2022-01-05 S001 P0002 Toys West 270 220
## 3 2022-01-05 S005 P0002 Toys West 98 59
## 4 2022-01-06 S002 P0002 Toys North 117 23
## 5 2022-01-06 S004 P0002 Toys East 162 105
## 6 2022-01-07 S004 P0002 Toys North 61 46
## 7 2022-01-08 S002 P0002 Toys North 384 134
## 8 2022-01-08 S004 P0002 Toys South 74 25
## 9 2022-01-09 S002 P0002 Toys North 93 29
## 10 2022-01-09 S004 P0002 Toys North 211 74
## # ℹ 718 more rows
## # ℹ 8 more variables: units_ordered <dbl>, demand_forecast <dbl>, price <dbl>,
## # discount <dbl>, weather_condition <chr>, holiday_promotion <dbl>,
## # competitor_pricing <dbl>, seasonality <chr>
new_df$month <- format(as.Date(new_df$date, format = "%Y%d%m"), "%m")
new_df$year <- format(as.Date(new_df$date, format = "%Y%d%m"), "%Y")
print(new_df)
## # A tibble: 728 × 17
## date store_id product_id category region inventory_level units_sold
## <date> <chr> <chr> <chr> <chr> <dbl> <dbl>
## 1 2022-01-01 S001 P0002 Toys South 204 150
## 2 2022-01-05 S001 P0002 Toys West 270 220
## 3 2022-01-05 S005 P0002 Toys West 98 59
## 4 2022-01-06 S002 P0002 Toys North 117 23
## 5 2022-01-06 S004 P0002 Toys East 162 105
## 6 2022-01-07 S004 P0002 Toys North 61 46
## 7 2022-01-08 S002 P0002 Toys North 384 134
## 8 2022-01-08 S004 P0002 Toys South 74 25
## 9 2022-01-09 S002 P0002 Toys North 93 29
## 10 2022-01-09 S004 P0002 Toys North 211 74
## # ℹ 718 more rows
## # ℹ 10 more variables: units_ordered <dbl>, demand_forecast <dbl>, price <dbl>,
## # discount <dbl>, weather_condition <chr>, holiday_promotion <dbl>,
## # competitor_pricing <dbl>, seasonality <chr>, month <chr>, year <chr>
new_df$month_year <- format(as.Date(new_df$date, format = "%Y%d%m"), "%m%Y")
print(new_df)
## # A tibble: 728 × 18
## date store_id product_id category region inventory_level units_sold
## <date> <chr> <chr> <chr> <chr> <dbl> <dbl>
## 1 2022-01-01 S001 P0002 Toys South 204 150
## 2 2022-01-05 S001 P0002 Toys West 270 220
## 3 2022-01-05 S005 P0002 Toys West 98 59
## 4 2022-01-06 S002 P0002 Toys North 117 23
## 5 2022-01-06 S004 P0002 Toys East 162 105
## 6 2022-01-07 S004 P0002 Toys North 61 46
## 7 2022-01-08 S002 P0002 Toys North 384 134
## 8 2022-01-08 S004 P0002 Toys South 74 25
## 9 2022-01-09 S002 P0002 Toys North 93 29
## 10 2022-01-09 S004 P0002 Toys North 211 74
## # ℹ 718 more rows
## # ℹ 11 more variables: units_ordered <dbl>, demand_forecast <dbl>, price <dbl>,
## # discount <dbl>, weather_condition <chr>, holiday_promotion <dbl>,
## # competitor_pricing <dbl>, seasonality <chr>, month <chr>, year <chr>,
## # month_year <chr>
ts_prep <-
aggregate(new_df$demand_forecast, by = list(month= format(new_df$date, "%Y-%m")), FUN = mean) #rename column "x" with values Demand.Forecast
print(ts_prep)
## month x
## 1 2022-01 136.72432
## 2 2022-02 128.20333
## 3 2022-03 164.15606
## 4 2022-04 125.45154
## 5 2022-05 121.45081
## 6 2022-06 148.20259
## 7 2022-07 124.45972
## 8 2022-08 147.13080
## 9 2022-09 137.35143
## 10 2022-10 149.70600
## 11 2022-11 148.97438
## 12 2022-12 134.08300
## 13 2023-01 155.78457
## 14 2023-02 169.35143
## 15 2023-03 106.94886
## 16 2023-04 158.58120
## 17 2023-05 166.66160
## 18 2023-06 102.85650
## 19 2023-07 147.86719
## 20 2023-08 119.61794
## 21 2023-09 109.77531
## 22 2023-10 97.87069
## 23 2023-11 119.38152
## 24 2023-12 165.95040
colnames(ts_prep)[2] <- "demand_forecast"
#print(ts_prep)
time_series <- ts(ts_prep$demand_forecast,
start = c(2022, 1),
end = c(2023, 12),
frequency = 12 )
print(time_series)
## Jan Feb Mar Apr May Jun Jul
## 2022 136.72432 128.20333 164.15606 125.45154 121.45081 148.20259 124.45972
## 2023 155.78457 169.35143 106.94886 158.58120 166.66160 102.85650 147.86719
## Aug Sep Oct Nov Dec
## 2022 147.13080 137.35143 149.70600 148.97438 134.08300
## 2023 119.61794 109.77531 97.87069 119.38152 165.95040
plot(time_series)
acf(time_series)
arima <- select(ts_prep, demand_forecast)
fit <- auto.arima(arima)
forecast_values <- forecast(fit, 1)
print(forecast_values)
## Point Forecast Lo 80 Hi 80 Lo 95 Hi 95
## 25 136.9392 109.5001 164.3783 94.97474 178.9037
plot(forecast_values, main = "Graph with forecast", col.main = "darkgreen")