






UPC Summary of Distinct Monthly UPCs and Prices for each store:
Milk
UPC Summary of Distinct Monthly UPCs and Prices for each store:
Diapers
master_diapers_std_UPC <- master_diapers_std %>%
distinct(UPC, .keep_all = TRUE)
DT::datatable(master_diapers_std_UPC, options = list(pageLength = 15, autoWidth = TRUE))
UPC Summary of Distinct Monthly UPCs and Prices for each store:
Bread
master_bread_std_UPC <- master_bread_std %>%
distinct(UPC, .keep_all = TRUE)
DT::datatable(master_bread_std_UPC, options = list(pageLength = 15, autoWidth = TRUE))
UPC Summary of Distinct Monthly UPCs and Prices for each store:
Rice
master_rice_std_UPC <- master_rice_std %>%
distinct(UPC, .keep_all = TRUE)
DT::datatable(master_rice_std_UPC, options = list(pageLength = 15, autoWidth = TRUE))
UPC Summary of Distinct Monthly UPCs and Prices for each store:
Sugar
master_sugar_std_UPC <- master_sugar_std %>%
distinct(UPC, .keep_all = TRUE)
DT::datatable(master_sugar_std_UPC, options = list(pageLength = 15, autoWidth = TRUE))
UPC Summary of Distinct Monthly UPCs and Prices for each store:
Flour
master_flour_std_UPC <- master_flour_std %>%
distinct(UPC, .keep_all = TRUE)
DT::datatable(master_flour_std_UPC, options = list(pageLength = 15, autoWidth = TRUE))
Outliers-Milk
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.03281 0.06062 0.08031 0.08996 0.13542 0.18288

## pull_date UPC HOME_STORE_NAME city zip
## 1 2023-08-15 20511508 Walmart Kasilof 99615
## 2 2023-08-15 20511508 Walmart Ketchikan 99901
## SKU_DESCRIPTION Size price
## 1 Darigold 2% Reduced Fat Milk, Half Gallon Jug, 64 fl oz 64 fl 2.1
## 2 Darigold 2% Reduced Fat Milk, Half Gallon Jug, 64 fl oz 64 fl 2.1
## internal_prod_code month store_region extracted_text Size_in_Oz Price_per_Oz
## 1 1 Aug Southcentral <NA> 64 0.0328125
## 2 1 Aug Southeast <NA> 64 0.0328125

## [1] 179

Outliers-Diapers
IQR_value <- IQR(master_diapers_std$Price_per_diaper)
Q1 <- quantile(master_diapers_std$Price_per_diaper, 0.25)
Q3 <- quantile(master_diapers_std$Price_per_diaper, 0.75)
lower_bound <- Q1 - 1.5 * IQR_value
upper_bound <- Q3 + 1.5 * IQR_value
diapers_outliers <- master_diapers_std %>%
filter(Price_per_diaper < lower_bound | Price_per_diaper > upper_bound)
DT::datatable(diapers_outliers, options = list(pageLength = 15, autoWidth = TRUE))
summary(master_diapers_std$Price_per_diaper)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.1513 0.3550 0.4366 0.4134 0.4471 0.7403
boxplot(master_diapers_std$Price_per_diaper, horizontal = TRUE, main = "Boxplot of Diaper Price_per_diapers")

master_diapers_std %>% filter(Price_per_diaper == min(Price_per_diaper))
## pull_date UPC HOME_STORE_NAME city zip
## 1 2023-11-15 960555681 Carrs Anchorage 99507
## 2 2023-11-15 960555681 Carrs Fairbanks 99703
## 3 2023-11-15 960555681 Carrs Juneau 99801
## 4 2023-11-15 960555681 Carrs Kenai 99611
## 5 2023-11-15 960555681 Carrs Ketchikan 99901
## 6 2023-11-15 960555681 Carrs Wasilla 99654
## 7 2023-12-15 960555681 Carrs Anchorage 99507
## 8 2023-12-15 960555681 Carrs Fairbanks 99703
## 9 2023-12-15 960555681 Carrs Kenai 99611
## 10 2023-12-15 960555681 Carrs Wasilla 99654
## SKU_DESCRIPTION Size price
## 1 Huggies Snug and Dry Size 1 Baby Diapers - 38 Count 1 1 Baby Diapers 5.75
## 2 Huggies Snug and Dry Size 1 Baby Diapers - 38 Count 1 1 Baby Diapers 5.75
## 3 Huggies Snug and Dry Size 1 Baby Diapers - 38 Count 1 1 Baby Diapers 5.75
## 4 Huggies Snug and Dry Size 1 Baby Diapers - 38 Count 1 1 Baby Diapers 5.75
## 5 Huggies Snug and Dry Size 1 Baby Diapers - 38 Count 1 1 Baby Diapers 5.75
## 6 Huggies Snug and Dry Size 1 Baby Diapers - 38 Count 1 1 Baby Diapers 5.75
## 7 Huggies Snug and Dry Size 1 Baby Diapers - 38 Count 1 1 Baby Diapers 5.75
## 8 Huggies Snug and Dry Size 1 Baby Diapers - 38 Count 1 1 Baby Diapers 5.75
## 9 Huggies Snug and Dry Size 1 Baby Diapers - 38 Count 1 1 Baby Diapers 5.75
## 10 Huggies Snug and Dry Size 1 Baby Diapers - 38 Count 1 1 Baby Diapers 5.75
## internal_prod_code month store_region extracted_text Count Price_per_diaper
## 1 3 Nov Southcentral <NA> 38 0.1513158
## 2 3 Nov Interior <NA> 38 0.1513158
## 3 3 Nov Southeast <NA> 38 0.1513158
## 4 3 Nov Southcentral <NA> 38 0.1513158
## 5 3 Nov Southeast <NA> 38 0.1513158
## 6 3 Nov Southcentral <NA> 38 0.1513158
## 7 3 Dec Southcentral <NA> 38 0.1513158
## 8 3 Dec Interior <NA> 38 0.1513158
## 9 3 Dec Southcentral <NA> 38 0.1513158
## 10 3 Dec Southcentral <NA> 38 0.1513158
hist(master_diapers_std$Price_per_diaper, main="Histogram of Diaper Price_per_diapers", xlab="Price_per_diaper")

sum(master_diapers_std$Price_per_diaper < Q1)
## [1] 138
ggplot(master_diapers_std, aes(x = Price_per_diaper)) +
geom_density(fill = "blue", alpha = 0.5) +
geom_vline(aes(xintercept = mean(Price_per_diaper, na.rm = TRUE)), color = "red", linetype = "dashed") +
labs(title = "Density Plot of Diaper Price_per_diapers", x = "Price_per_diaper", y = "Density")

Outliers-Bread
IQR_value <- IQR(master_bread_std$Price_per_Oz)
Q1 <- quantile(master_bread_std$Price_per_Oz, 0.25)
Q3 <- quantile(master_bread_std$Price_per_Oz, 0.75)
lower_bound <- Q1 - 1.5 * IQR_value
upper_bound <- Q3 + 1.5 * IQR_value
bread_outliers <- master_bread_std %>%
filter(Price_per_Oz < lower_bound | Price_per_Oz > upper_bound)
DT::datatable(bread_outliers, options = list(pageLength = 15, autoWidth = TRUE))
summary(master_bread_std$Price_per_Oz)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.07955 0.22227 0.30375 0.29084 0.36208 0.48708
boxplot(master_bread_std$Price_per_Oz, horizontal = TRUE, main = "Boxplot of Bread Price_per_Ozs")

master_bread_std %>% filter(Price_per_Oz == min(Price_per_Oz))
## pull_date UPC HOME_STORE_NAME city zip
## 1 2023-08-15 960018678 Carrs Anchorage 99507
## 2 2023-08-15 960018678 Carrs Fairbanks 99703
## 3 2023-08-15 960018678 Carrs Juneau 99801
## 4 2023-08-15 960018678 Carrs Kenai 99611
## 5 2023-08-15 960018678 Carrs Ketchikan 99901
## 6 2023-08-15 960018678 Carrs Wasilla 99654
## SKU_DESCRIPTION Size price internal_prod_code month
## 1 Oven Joy Bread Enriched Wheat - 22 Oz 22 Oz 1.75 4 Aug
## 2 Oven Joy Bread Enriched Wheat - 22 Oz 22 Oz 1.75 4 Aug
## 3 Oven Joy Bread Enriched Wheat - 22 Oz 22 Oz 1.75 4 Aug
## 4 Oven Joy Bread Enriched Wheat - 22 Oz 22 Oz 1.75 4 Aug
## 5 Oven Joy Bread Enriched Wheat - 22 Oz 22 Oz 1.75 4 Aug
## 6 Oven Joy Bread Enriched Wheat - 22 Oz 22 Oz 1.75 4 Aug
## store_region extracted_text Size_in_Oz Price_per_Oz
## 1 Southcentral <NA> 22 0.07954545
## 2 Interior <NA> 22 0.07954545
## 3 Southeast <NA> 22 0.07954545
## 4 Southcentral <NA> 22 0.07954545
## 5 Southeast <NA> 22 0.07954545
## 6 Southcentral <NA> 22 0.07954545
hist(master_bread_std$Price_per_Oz, main="Histogram of Bread Price_per_Ozs", xlab="Price_per_Oz")

sum(master_bread_std$Price_per_Oz < Q1)
## [1] 357
ggplot(master_bread_std, aes(x = Price_per_Oz)) +
geom_density(fill = "blue", alpha = 0.5) +
geom_vline(aes(xintercept = mean(Price_per_Oz, na.rm = TRUE)), color = "red", linetype = "dashed") +
labs(title = "Density Plot of Bread Price_per_Ozs", x = "Price_per_Oz", y = "Density")

Outliers-Rice
IQR_value <- IQR(master_rice_std$Price_per_lb)
Q1 <- quantile(master_rice_std$Price_per_lb, 0.25)
Q3 <- quantile(master_rice_std$Price_per_lb, 0.75)
lower_bound <- Q1 - 1.5 * IQR_value
upper_bound <- Q3 + 1.5 * IQR_value
rice_outliers <- master_rice_std %>%
filter(Price_per_lb < lower_bound | Price_per_lb > upper_bound)
DT::datatable(rice_outliers, options = list(pageLength = 15, autoWidth = TRUE))
summary(master_rice_std$Price_per_lb)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.558 1.498 2.438 2.444 3.638 7.274
boxplot(master_rice_std$Price_per_lb, horizontal = TRUE, main = "Boxplot of Rice Prices")

master_rice_std %>% filter(Price_per_lb == min(Price_per_lb))
## pull_date UPC HOME_STORE_NAME city zip
## 1 2023-11-15 126150030 Carrs Fairbanks 99703
## SKU_DESCRIPTION Size price
## 1 Signature SELECT Rice Enriched Long Grain - 5 Lb 5 Lb 2.79
## internal_prod_code month store_region extracted_text Size_in_lbs Price_per_lb
## 1 5 Nov Interior <NA> 5 0.558
hist(master_rice_std$Price_per_lb, main="Histogram of Rice Prices", xlab="Price_per_lb")

sum(master_rice_std$Price_per_lb < Q1)
## [1] 342
ggplot(master_rice_std, aes(x = Price_per_lb)) +
geom_density(fill = "blue", alpha = 0.5) +
geom_vline(aes(xintercept = mean(Price_per_lb, na.rm = TRUE)), color = "red", linetype = "dashed") +
labs(title = "Density Plot of Rice Prices", x = "Price_per_lb", y = "Density")

Outliers-Sugar
master_sugar_std$Price_per_lb_per_lb
## NULL
IQR_value <- IQR(master_sugar_std$Price_per_lb)
Q1 <- quantile(master_sugar_std$Price_per_lb, 0.25)
Q3 <- quantile(master_sugar_std$Price_per_lb, 0.75)
lower_bound <- Q1 - 1.5 * IQR_value
upper_bound <- Q3 + 1.5 * IQR_value
sugar_outliers <- master_sugar_std %>%
filter(Price_per_lb < lower_bound | Price_per_lb > upper_bound)
DT::datatable(sugar_outliers, options = list(pageLength = 15, autoWidth = TRUE))
summary(master_sugar_std$Price_per_lb)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.832 0.979 1.249 1.302 1.349 2.329
boxplot(master_sugar_std$Price_per_lb, horizontal = TRUE, main = "Boxplot of Sugar Price_per_lbs")

master_sugar_std %>% filter(Price_per_lb == min(Price_per_lb))
## pull_date UPC HOME_STORE_NAME city zip
## 1 2023-03-09 10314931 Walmart Anchorage 99504
## 2 2023-03-09 10314931 Walmart Kenai 99611
## 3 2023-03-09 10314931 Walmart Kasilof 99615
## 4 2023-03-09 10314931 Walmart Wasilla 99654
## 5 2023-03-09 10314931 Walmart Fairbanks 99701
## 6 2023-03-09 10314931 Walmart Ketchikan 99901
## 7 2023-04-17 10314931 Walmart Anchorage 99504
## 8 2023-04-17 10314931 Walmart Kenai 99611
## 9 2023-04-17 10314931 Walmart Kasilof 99615
## 10 2023-04-17 10314931 Walmart Wasilla 99654
## 11 2023-04-17 10314931 Walmart Fairbanks 99701
## 12 2023-04-17 10314931 Walmart Ketchikan 99901
## 13 2023-05-16 10314931 Walmart Anchorage 99504
## 14 2023-05-16 10314931 Walmart Kenai 99611
## 15 2023-05-16 10314931 Walmart Kasilof 99615
## 16 2023-05-16 10314931 Walmart Wasilla 99654
## 17 2023-05-16 10314931 Walmart Fairbanks 99701
## 18 2023-05-16 10314931 Walmart Ketchikan 99901
## 19 2023-06-15 10314931 Walmart Fairbanks 99701
## SKU_DESCRIPTION Size price internal_prod_code month
## 1 Great Value Granulated Sugar, 10 lbs 10 lbs 8.32 6 Mar
## 2 Great Value Granulated Sugar, 10 lbs 10 lbs 8.32 6 Mar
## 3 Great Value Granulated Sugar, 10 lbs 10 lbs 8.32 6 Mar
## 4 Great Value Granulated Sugar, 10 lbs 10 lbs 8.32 6 Mar
## 5 Great Value Granulated Sugar, 10 lbs 10 lbs 8.32 6 Mar
## 6 Great Value Granulated Sugar, 10 lbs 10 lbs 8.32 6 Mar
## 7 Great Value Granulated Sugar, 10 lbs 10 lbs 8.32 6 Apr
## 8 Great Value Granulated Sugar, 10 lbs 10 lbs 8.32 6 Apr
## 9 Great Value Granulated Sugar, 10 lbs 10 lbs 8.32 6 Apr
## 10 Great Value Granulated Sugar, 10 lbs 10 lbs 8.32 6 Apr
## 11 Great Value Granulated Sugar, 10 lbs 10 lbs 8.32 6 Apr
## 12 Great Value Granulated Sugar, 10 lbs 10 lbs 8.32 6 Apr
## 13 Great Value Granulated Sugar, 10 lbs 10 lbs 8.32 6 May
## 14 Great Value Granulated Sugar, 10 lbs 10 lbs 8.32 6 May
## 15 Great Value Granulated Sugar, 10 lbs 10 lbs 8.32 6 May
## 16 Great Value Granulated Sugar, 10 lbs 10 lbs 8.32 6 May
## 17 Great Value Granulated Sugar, 10 lbs 10 lbs 8.32 6 May
## 18 Great Value Granulated Sugar, 10 lbs 10 lbs 8.32 6 May
## 19 Great Value Granulated Sugar, 10 lbs 10 lbs 8.32 6 Jun
## store_region extracted_text Size_in_lbs Price_per_lb
## 1 Southcentral <NA> 10 0.832
## 2 Southcentral <NA> 10 0.832
## 3 Southcentral <NA> 10 0.832
## 4 Southcentral <NA> 10 0.832
## 5 Interior <NA> 10 0.832
## 6 Southeast <NA> 10 0.832
## 7 Southcentral <NA> 10 0.832
## 8 Southcentral <NA> 10 0.832
## 9 Southcentral <NA> 10 0.832
## 10 Southcentral <NA> 10 0.832
## 11 Interior <NA> 10 0.832
## 12 Southeast <NA> 10 0.832
## 13 Southcentral <NA> 10 0.832
## 14 Southcentral <NA> 10 0.832
## 15 Southcentral <NA> 10 0.832
## 16 Southcentral <NA> 10 0.832
## 17 Interior <NA> 10 0.832
## 18 Southeast <NA> 10 0.832
## 19 Interior <NA> 10 0.832
hist(master_sugar_std$Price_per_lb, main="Histogram of Sugar Price_per_lbs", xlab="Price_per_lb")

sum(master_sugar_std$Price_per_lb < Q1)
## [1] 65
ggplot(master_sugar_std, aes(x = Price_per_lb)) +
geom_density(fill = "blue", alpha = 0.5) +
geom_vline(aes(xintercept = mean(Price_per_lb, na.rm = TRUE)), color = "red", linetype = "dashed") +
labs(title = "Density Plot of Sugar Price_per_lbs", x = "Price_per_lb", y = "Density")

Outliers-Flour
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.643 1.229 2.149 1.903 2.399 3.299

## pull_date UPC HOME_STORE_NAME city zip
## 1 2023-03-09 10311380 Walmart Anchorage 99504
## 2 2023-03-09 10311380 Walmart Wasilla 99654
## 3 2023-03-09 10311308 Walmart Ketchikan 99901
## 4 2023-04-17 10311380 Walmart Anchorage 99504
## 5 2023-04-17 10311380 Walmart Wasilla 99654
## 6 2023-06-15 10311380 Walmart Anchorage 99504
## 7 2023-06-15 10311380 Walmart Wasilla 99654
## 8 2023-06-15 10311308 Walmart Ketchikan 99901
## SKU_DESCRIPTION Size price
## 1 Gold Medal Unbleached All Purpose Flour, 10 pounds 10 pounds 6.43
## 2 Gold Medal Unbleached All Purpose Flour, 10 pounds 10 pounds 6.43
## 3 Gold Medal All Purpose Flour, 10 lb. 10 lb 6.43
## 4 Gold Medal Unbleached All Purpose Flour, 10 pounds 10 pounds 6.43
## 5 Gold Medal Unbleached All Purpose Flour, 10 pounds 10 pounds 6.43
## 6 Gold Medal Unbleached All Purpose Flour, 10 pounds 10 pounds 6.43
## 7 Gold Medal Unbleached All Purpose Flour, 10 pounds 10 pounds 6.43
## 8 Gold Medal All Purpose Flour, 10 lb. 10 lb 6.43
## internal_prod_code month store_region extracted_text Size_in_lbs Price_per_lb
## 1 7 Mar Southcentral <NA> 10 0.643
## 2 7 Mar Southcentral <NA> 10 0.643
## 3 7 Mar Southeast <NA> 10 0.643
## 4 7 Apr Southcentral <NA> 10 0.643
## 5 7 Apr Southcentral <NA> 10 0.643
## 6 7 Jun Southcentral <NA> 10 0.643
## 7 7 Jun Southcentral <NA> 10 0.643
## 8 7 Jun Southeast <NA> 10 0.643

## [1] 226
