UPC Summary of Distinct Monthly UPCs and Prices for each store: Milk

UPC Summary of Distinct Monthly UPCs and Prices for each store: Infant Formula

master_infant_std_UPC <- master_infant_std %>%
  distinct(UPC, .keep_all = TRUE)

DT::datatable(master_infant_std_UPC, options = list(pageLength = 15, autoWidth = TRUE))

UPC Summary of Distinct Monthly UPCs and Prices for each store: Diapers

master_diapers_std_UPC <- master_diapers_std %>%
  distinct(UPC, .keep_all = TRUE)

DT::datatable(master_diapers_std_UPC, options = list(pageLength = 15, autoWidth = TRUE))

UPC Summary of Distinct Monthly UPCs and Prices for each store: Bread

master_bread_std_UPC <- master_bread_std %>%
  distinct(UPC, .keep_all = TRUE)

DT::datatable(master_bread_std_UPC, options = list(pageLength = 15, autoWidth = TRUE))

UPC Summary of Distinct Monthly UPCs and Prices for each store: Rice

master_rice_std_UPC <- master_rice_std %>%
  distinct(UPC, .keep_all = TRUE)

DT::datatable(master_rice_std_UPC, options = list(pageLength = 15, autoWidth = TRUE))

UPC Summary of Distinct Monthly UPCs and Prices for each store: Sugar

master_sugar_std_UPC <- master_sugar_std %>%
  distinct(UPC, .keep_all = TRUE)

DT::datatable(master_sugar_std_UPC, options = list(pageLength = 15, autoWidth = TRUE))

UPC Summary of Distinct Monthly UPCs and Prices for each store: Flour

master_flour_std_UPC <- master_flour_std %>%
  distinct(UPC, .keep_all = TRUE)

DT::datatable(master_flour_std_UPC, options = list(pageLength = 15, autoWidth = TRUE))

Outliers-Milk

##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
## 0.03281 0.06062 0.08031 0.08996 0.13542 0.18288

##    pull_date      UPC HOME_STORE_NAME      city   zip
## 1 2023-08-15 20511508         Walmart   Kasilof 99615
## 2 2023-08-15 20511508         Walmart Ketchikan 99901
##                                           SKU_DESCRIPTION  Size price
## 1 Darigold 2% Reduced Fat Milk, Half Gallon Jug, 64 fl oz 64 fl   2.1
## 2 Darigold 2% Reduced Fat Milk, Half Gallon Jug, 64 fl oz 64 fl   2.1
##   internal_prod_code month store_region extracted_text Size_in_Oz Price_per_Oz
## 1                  1   Aug Southcentral           <NA>         64    0.0328125
## 2                  1   Aug    Southeast           <NA>         64    0.0328125

## [1] 179

Outliers - Infant Formula

##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##  0.5300  0.8146  2.1363  1.8764  2.4589  4.8752

##    pull_date         UPC HOME_STORE_NAME       city   zip
## 1 2023-02-15 70074569741             ACC Metlakatla 99926
##                       SKU_DESCRIPTION  Size price internal_prod_code month
## 1 Similac Formula Advance Concentrate 13 Oz  6.89                  2   Feb
##   store_region extracted_text Size_in_Oz Price_per_Oz
## 1    Southeast           <NA>         13         0.53

## [1] 226

Outliers-Diapers

IQR_value <- IQR(master_diapers_std$Price_per_diaper)
Q1 <- quantile(master_diapers_std$Price_per_diaper, 0.25)
Q3 <- quantile(master_diapers_std$Price_per_diaper, 0.75)

lower_bound <- Q1 - 1.5 * IQR_value
upper_bound <- Q3 + 1.5 * IQR_value

diapers_outliers <- master_diapers_std %>% 
  filter(Price_per_diaper < lower_bound | Price_per_diaper > upper_bound)

DT::datatable(diapers_outliers, options = list(pageLength = 15, autoWidth = TRUE))
summary(master_diapers_std$Price_per_diaper)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##  0.1513  0.3550  0.4366  0.4134  0.4471  0.7403
boxplot(master_diapers_std$Price_per_diaper, horizontal = TRUE, main = "Boxplot of Diaper Price_per_diapers")

master_diapers_std %>% filter(Price_per_diaper == min(Price_per_diaper))
##     pull_date       UPC HOME_STORE_NAME      city   zip
## 1  2023-11-15 960555681           Carrs Anchorage 99507
## 2  2023-11-15 960555681           Carrs Fairbanks 99703
## 3  2023-11-15 960555681           Carrs    Juneau 99801
## 4  2023-11-15 960555681           Carrs     Kenai 99611
## 5  2023-11-15 960555681           Carrs Ketchikan 99901
## 6  2023-11-15 960555681           Carrs   Wasilla 99654
## 7  2023-12-15 960555681           Carrs Anchorage 99507
## 8  2023-12-15 960555681           Carrs Fairbanks 99703
## 9  2023-12-15 960555681           Carrs     Kenai 99611
## 10 2023-12-15 960555681           Carrs   Wasilla 99654
##                                          SKU_DESCRIPTION           Size price
## 1  Huggies Snug and Dry Size 1 Baby Diapers - 38 Count 1 1 Baby Diapers  5.75
## 2  Huggies Snug and Dry Size 1 Baby Diapers - 38 Count 1 1 Baby Diapers  5.75
## 3  Huggies Snug and Dry Size 1 Baby Diapers - 38 Count 1 1 Baby Diapers  5.75
## 4  Huggies Snug and Dry Size 1 Baby Diapers - 38 Count 1 1 Baby Diapers  5.75
## 5  Huggies Snug and Dry Size 1 Baby Diapers - 38 Count 1 1 Baby Diapers  5.75
## 6  Huggies Snug and Dry Size 1 Baby Diapers - 38 Count 1 1 Baby Diapers  5.75
## 7  Huggies Snug and Dry Size 1 Baby Diapers - 38 Count 1 1 Baby Diapers  5.75
## 8  Huggies Snug and Dry Size 1 Baby Diapers - 38 Count 1 1 Baby Diapers  5.75
## 9  Huggies Snug and Dry Size 1 Baby Diapers - 38 Count 1 1 Baby Diapers  5.75
## 10 Huggies Snug and Dry Size 1 Baby Diapers - 38 Count 1 1 Baby Diapers  5.75
##    internal_prod_code month store_region extracted_text Count Price_per_diaper
## 1                   3   Nov Southcentral           <NA>    38        0.1513158
## 2                   3   Nov     Interior           <NA>    38        0.1513158
## 3                   3   Nov    Southeast           <NA>    38        0.1513158
## 4                   3   Nov Southcentral           <NA>    38        0.1513158
## 5                   3   Nov    Southeast           <NA>    38        0.1513158
## 6                   3   Nov Southcentral           <NA>    38        0.1513158
## 7                   3   Dec Southcentral           <NA>    38        0.1513158
## 8                   3   Dec     Interior           <NA>    38        0.1513158
## 9                   3   Dec Southcentral           <NA>    38        0.1513158
## 10                  3   Dec Southcentral           <NA>    38        0.1513158
hist(master_diapers_std$Price_per_diaper, main="Histogram of Diaper Price_per_diapers", xlab="Price_per_diaper")

sum(master_diapers_std$Price_per_diaper < Q1)
## [1] 138
ggplot(master_diapers_std, aes(x = Price_per_diaper)) +
  geom_density(fill = "blue", alpha = 0.5) +
  geom_vline(aes(xintercept = mean(Price_per_diaper, na.rm = TRUE)), color = "red", linetype = "dashed") +
  labs(title = "Density Plot of Diaper Price_per_diapers", x = "Price_per_diaper", y = "Density")

Outliers-Bread

IQR_value <- IQR(master_bread_std$Price_per_Oz)
Q1 <- quantile(master_bread_std$Price_per_Oz, 0.25)
Q3 <- quantile(master_bread_std$Price_per_Oz, 0.75)

lower_bound <- Q1 - 1.5 * IQR_value
upper_bound <- Q3 + 1.5 * IQR_value

bread_outliers <- master_bread_std %>% 
  filter(Price_per_Oz < lower_bound | Price_per_Oz > upper_bound)

DT::datatable(bread_outliers, options = list(pageLength = 15, autoWidth = TRUE))
summary(master_bread_std$Price_per_Oz)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
## 0.07955 0.22227 0.30375 0.29084 0.36208 0.48708
boxplot(master_bread_std$Price_per_Oz, horizontal = TRUE, main = "Boxplot of Bread Price_per_Ozs")

master_bread_std %>% filter(Price_per_Oz == min(Price_per_Oz))
##    pull_date       UPC HOME_STORE_NAME      city   zip
## 1 2023-08-15 960018678           Carrs Anchorage 99507
## 2 2023-08-15 960018678           Carrs Fairbanks 99703
## 3 2023-08-15 960018678           Carrs    Juneau 99801
## 4 2023-08-15 960018678           Carrs     Kenai 99611
## 5 2023-08-15 960018678           Carrs Ketchikan 99901
## 6 2023-08-15 960018678           Carrs   Wasilla 99654
##                         SKU_DESCRIPTION  Size price internal_prod_code month
## 1 Oven Joy Bread Enriched Wheat - 22 Oz 22 Oz  1.75                  4   Aug
## 2 Oven Joy Bread Enriched Wheat - 22 Oz 22 Oz  1.75                  4   Aug
## 3 Oven Joy Bread Enriched Wheat - 22 Oz 22 Oz  1.75                  4   Aug
## 4 Oven Joy Bread Enriched Wheat - 22 Oz 22 Oz  1.75                  4   Aug
## 5 Oven Joy Bread Enriched Wheat - 22 Oz 22 Oz  1.75                  4   Aug
## 6 Oven Joy Bread Enriched Wheat - 22 Oz 22 Oz  1.75                  4   Aug
##   store_region extracted_text Size_in_Oz Price_per_Oz
## 1 Southcentral           <NA>         22   0.07954545
## 2     Interior           <NA>         22   0.07954545
## 3    Southeast           <NA>         22   0.07954545
## 4 Southcentral           <NA>         22   0.07954545
## 5    Southeast           <NA>         22   0.07954545
## 6 Southcentral           <NA>         22   0.07954545
hist(master_bread_std$Price_per_Oz, main="Histogram of Bread Price_per_Ozs", xlab="Price_per_Oz")

sum(master_bread_std$Price_per_Oz < Q1)
## [1] 357
ggplot(master_bread_std, aes(x = Price_per_Oz)) +
  geom_density(fill = "blue", alpha = 0.5) +
  geom_vline(aes(xintercept = mean(Price_per_Oz, na.rm = TRUE)), color = "red", linetype = "dashed") +
  labs(title = "Density Plot of Bread Price_per_Ozs", x = "Price_per_Oz", y = "Density")

Outliers-Rice

IQR_value <- IQR(master_rice_std$Price_per_lb)
Q1 <- quantile(master_rice_std$Price_per_lb, 0.25)
Q3 <- quantile(master_rice_std$Price_per_lb, 0.75)

lower_bound <- Q1 - 1.5 * IQR_value
upper_bound <- Q3 + 1.5 * IQR_value

rice_outliers <- master_rice_std %>% 
  filter(Price_per_lb < lower_bound | Price_per_lb > upper_bound)

DT::datatable(rice_outliers, options = list(pageLength = 15, autoWidth = TRUE))
summary(master_rice_std$Price_per_lb)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   0.558   1.498   2.438   2.444   3.638   7.274
boxplot(master_rice_std$Price_per_lb, horizontal = TRUE, main = "Boxplot of Rice Prices")

master_rice_std %>% filter(Price_per_lb == min(Price_per_lb))
##    pull_date       UPC HOME_STORE_NAME      city   zip
## 1 2023-11-15 126150030           Carrs Fairbanks 99703
##                                    SKU_DESCRIPTION Size price
## 1 Signature SELECT Rice Enriched Long Grain - 5 Lb 5 Lb  2.79
##   internal_prod_code month store_region extracted_text Size_in_lbs Price_per_lb
## 1                  5   Nov     Interior           <NA>           5        0.558
hist(master_rice_std$Price_per_lb, main="Histogram of Rice Prices", xlab="Price_per_lb")

sum(master_rice_std$Price_per_lb < Q1)
## [1] 342
ggplot(master_rice_std, aes(x = Price_per_lb)) +
  geom_density(fill = "blue", alpha = 0.5) +
  geom_vline(aes(xintercept = mean(Price_per_lb, na.rm = TRUE)), color = "red", linetype = "dashed") +
  labs(title = "Density Plot of Rice Prices", x = "Price_per_lb", y = "Density")

Outliers-Sugar

master_sugar_std$Price_per_lb_per_lb
## NULL
IQR_value <- IQR(master_sugar_std$Price_per_lb)
Q1 <- quantile(master_sugar_std$Price_per_lb, 0.25)
Q3 <- quantile(master_sugar_std$Price_per_lb, 0.75)

lower_bound <- Q1 - 1.5 * IQR_value
upper_bound <- Q3 + 1.5 * IQR_value

sugar_outliers <- master_sugar_std %>% 
  filter(Price_per_lb < lower_bound | Price_per_lb > upper_bound)

DT::datatable(sugar_outliers, options = list(pageLength = 15, autoWidth = TRUE))
summary(master_sugar_std$Price_per_lb)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   0.832   0.979   1.249   1.302   1.349   2.329
boxplot(master_sugar_std$Price_per_lb, horizontal = TRUE, main = "Boxplot of Sugar Price_per_lbs")

master_sugar_std %>% filter(Price_per_lb == min(Price_per_lb))
##     pull_date      UPC HOME_STORE_NAME      city   zip
## 1  2023-03-09 10314931         Walmart Anchorage 99504
## 2  2023-03-09 10314931         Walmart     Kenai 99611
## 3  2023-03-09 10314931         Walmart   Kasilof 99615
## 4  2023-03-09 10314931         Walmart   Wasilla 99654
## 5  2023-03-09 10314931         Walmart Fairbanks 99701
## 6  2023-03-09 10314931         Walmart Ketchikan 99901
## 7  2023-04-17 10314931         Walmart Anchorage 99504
## 8  2023-04-17 10314931         Walmart     Kenai 99611
## 9  2023-04-17 10314931         Walmart   Kasilof 99615
## 10 2023-04-17 10314931         Walmart   Wasilla 99654
## 11 2023-04-17 10314931         Walmart Fairbanks 99701
## 12 2023-04-17 10314931         Walmart Ketchikan 99901
## 13 2023-05-16 10314931         Walmart Anchorage 99504
## 14 2023-05-16 10314931         Walmart     Kenai 99611
## 15 2023-05-16 10314931         Walmart   Kasilof 99615
## 16 2023-05-16 10314931         Walmart   Wasilla 99654
## 17 2023-05-16 10314931         Walmart Fairbanks 99701
## 18 2023-05-16 10314931         Walmart Ketchikan 99901
## 19 2023-06-15 10314931         Walmart Fairbanks 99701
##                         SKU_DESCRIPTION   Size price internal_prod_code month
## 1  Great Value Granulated Sugar, 10 lbs 10 lbs  8.32                  6   Mar
## 2  Great Value Granulated Sugar, 10 lbs 10 lbs  8.32                  6   Mar
## 3  Great Value Granulated Sugar, 10 lbs 10 lbs  8.32                  6   Mar
## 4  Great Value Granulated Sugar, 10 lbs 10 lbs  8.32                  6   Mar
## 5  Great Value Granulated Sugar, 10 lbs 10 lbs  8.32                  6   Mar
## 6  Great Value Granulated Sugar, 10 lbs 10 lbs  8.32                  6   Mar
## 7  Great Value Granulated Sugar, 10 lbs 10 lbs  8.32                  6   Apr
## 8  Great Value Granulated Sugar, 10 lbs 10 lbs  8.32                  6   Apr
## 9  Great Value Granulated Sugar, 10 lbs 10 lbs  8.32                  6   Apr
## 10 Great Value Granulated Sugar, 10 lbs 10 lbs  8.32                  6   Apr
## 11 Great Value Granulated Sugar, 10 lbs 10 lbs  8.32                  6   Apr
## 12 Great Value Granulated Sugar, 10 lbs 10 lbs  8.32                  6   Apr
## 13 Great Value Granulated Sugar, 10 lbs 10 lbs  8.32                  6   May
## 14 Great Value Granulated Sugar, 10 lbs 10 lbs  8.32                  6   May
## 15 Great Value Granulated Sugar, 10 lbs 10 lbs  8.32                  6   May
## 16 Great Value Granulated Sugar, 10 lbs 10 lbs  8.32                  6   May
## 17 Great Value Granulated Sugar, 10 lbs 10 lbs  8.32                  6   May
## 18 Great Value Granulated Sugar, 10 lbs 10 lbs  8.32                  6   May
## 19 Great Value Granulated Sugar, 10 lbs 10 lbs  8.32                  6   Jun
##    store_region extracted_text Size_in_lbs Price_per_lb
## 1  Southcentral           <NA>          10        0.832
## 2  Southcentral           <NA>          10        0.832
## 3  Southcentral           <NA>          10        0.832
## 4  Southcentral           <NA>          10        0.832
## 5      Interior           <NA>          10        0.832
## 6     Southeast           <NA>          10        0.832
## 7  Southcentral           <NA>          10        0.832
## 8  Southcentral           <NA>          10        0.832
## 9  Southcentral           <NA>          10        0.832
## 10 Southcentral           <NA>          10        0.832
## 11     Interior           <NA>          10        0.832
## 12    Southeast           <NA>          10        0.832
## 13 Southcentral           <NA>          10        0.832
## 14 Southcentral           <NA>          10        0.832
## 15 Southcentral           <NA>          10        0.832
## 16 Southcentral           <NA>          10        0.832
## 17     Interior           <NA>          10        0.832
## 18    Southeast           <NA>          10        0.832
## 19     Interior           <NA>          10        0.832
hist(master_sugar_std$Price_per_lb, main="Histogram of Sugar Price_per_lbs", xlab="Price_per_lb")

sum(master_sugar_std$Price_per_lb < Q1)
## [1] 65
ggplot(master_sugar_std, aes(x = Price_per_lb)) +
  geom_density(fill = "blue", alpha = 0.5) +
  geom_vline(aes(xintercept = mean(Price_per_lb, na.rm = TRUE)), color = "red", linetype = "dashed") +
  labs(title = "Density Plot of Sugar Price_per_lbs", x = "Price_per_lb", y = "Density")

Outliers-Flour

##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   0.643   1.229   2.149   1.903   2.399   3.299

##    pull_date      UPC HOME_STORE_NAME      city   zip
## 1 2023-03-09 10311380         Walmart Anchorage 99504
## 2 2023-03-09 10311380         Walmart   Wasilla 99654
## 3 2023-03-09 10311308         Walmart Ketchikan 99901
## 4 2023-04-17 10311380         Walmart Anchorage 99504
## 5 2023-04-17 10311380         Walmart   Wasilla 99654
## 6 2023-06-15 10311380         Walmart Anchorage 99504
## 7 2023-06-15 10311380         Walmart   Wasilla 99654
## 8 2023-06-15 10311308         Walmart Ketchikan 99901
##                                      SKU_DESCRIPTION      Size price
## 1 Gold Medal Unbleached All Purpose Flour, 10 pounds 10 pounds  6.43
## 2 Gold Medal Unbleached All Purpose Flour, 10 pounds 10 pounds  6.43
## 3               Gold Medal All Purpose Flour, 10 lb.     10 lb  6.43
## 4 Gold Medal Unbleached All Purpose Flour, 10 pounds 10 pounds  6.43
## 5 Gold Medal Unbleached All Purpose Flour, 10 pounds 10 pounds  6.43
## 6 Gold Medal Unbleached All Purpose Flour, 10 pounds 10 pounds  6.43
## 7 Gold Medal Unbleached All Purpose Flour, 10 pounds 10 pounds  6.43
## 8               Gold Medal All Purpose Flour, 10 lb.     10 lb  6.43
##   internal_prod_code month store_region extracted_text Size_in_lbs Price_per_lb
## 1                  7   Mar Southcentral           <NA>          10        0.643
## 2                  7   Mar Southcentral           <NA>          10        0.643
## 3                  7   Mar    Southeast           <NA>          10        0.643
## 4                  7   Apr Southcentral           <NA>          10        0.643
## 5                  7   Apr Southcentral           <NA>          10        0.643
## 6                  7   Jun Southcentral           <NA>          10        0.643
## 7                  7   Jun Southcentral           <NA>          10        0.643
## 8                  7   Jun    Southeast           <NA>          10        0.643

## [1] 226