library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.5
## ✔ forcats 1.0.0 ✔ stringr 1.5.2
## ✔ ggplot2 3.5.2 ✔ tibble 3.3.0
## ✔ lubridate 1.9.4 ✔ tidyr 1.3.1
## ✔ purrr 1.1.0
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(highcharter)
## Registered S3 method overwritten by 'quantmod':
## method from
## as.zoo.data.frame zoo
Datasets used are sourced from the U.S. Bureau of Labor Statistics website: https://www.bls.gov/data/
CPIForecast.csv US_Average_Prices_CPI.csv US_Average_Food_Prices.csv
For the purposes of the project, we will be focusing on the Food at Home category of CPI and food items/groupings.
US_Ave_Prices_df <- read.csv("US_Average_Prices_CPI.csv")
US_Ave_Prices_df |>
filter(year %in% c(2005, 2024)) |>
group_by(item_name) |>
summarise(CPI_Difference = round(((value[year == 2024] - value[year == 2005]) / value[year == 2005])*100)) |>
hchart("column", hcaes(x = item_name, y = CPI_Difference, group = item_name)) |>
hc_add_theme(hc_theme_ft()) |>
hc_title(text = "Increase in CPI for Urban US Consumers From the Years 2005 to 2024") |>
hc_xAxis(title = list(text = "")) |>
hc_yAxis(title = list(text = "Increase of Percentage CPI "),
labels = list(formatter = JS("function() { return (this.value).toFixed(0) + '%';}"))) |>
hc_tooltip(
headerFormat = "",
pointFormat = "<span style = 'text-transform:uppercase; color:{series.color}'><strong><b>{point.name}</b></strong></span><br>
<b>CPI: {point.y}</b>",
useHTML = TRUE
) |>
hc_plotOptions(
column = list(
dataLabels = list(enabled = TRUE, format = "{point.y}%", style = list(fontWeight = "bold")),
pointWidth = 50,
pointPadding = 0,
groupPadding = 0.5))
Introduction Graph that shows the difference/change increase of Average CPI for each of these items from the years 2005 to 2024.
This is using the US City Average of these values across the US.
So Food at Home items CPI ,on average, have increased by 62% according to the U.S. Bureau of Labor Statistics.
This represents an inflation of the average cost of items under each of these categories.
Meaning our spending power is decreasing and we have to spend more for the same “basket of goods”.
US_Ave_Prices_df |>
filter(item_name == "Food at home") |>
mutate(CPI = round(value)) |>
select(item_name, year, CPI) |>
hchart("line", hcaes(x = year, y = CPI)) |>
hc_title(text = "<span>📈 </span>Increase in Consumer Price Index of Food At Home", useHTML = TRUE) |>
hc_subtitle(text = "From the Years 2005 to 2024") |>
hc_xAxis(title = list(text = "")) |>
hc_yAxis(title = list(text = "Consumer Price Index")) |>
hc_tooltip(
pointFormat = "<b>CPI: {point.y}</b>",
useHTML = TRUE
) |>
hc_add_theme(hc_theme_smpl()) |>
hc_plotOptions(
line = list(
dataLabels = list(enabled = TRUE, format = '{point.y}'))
)
This is just a more detailed look ar the CPI increases per year from the yearly range of 2005-2024.
These are the actual values of CPI for Food at Home from the U.S. Bureau of Labor Statistics.
There has been a steady increase in its value except for a slight drop from 2015 to 2016. It began to drastically take off from 2021 to present. Maybe this can be attributed to Political/Government policy changes and comeing out of the Pandemic.
Dairy goods (i.e., milk, butter, cheese, eggs, yogurt) Fresh produce (i.e., fresh vegetables, fruit, juices) Snack foods (i.e., chips, cookies, crackers, dried fruit) Bread and pastries Frozen goods (i.e., frozen meals, appetizers, desserts) Breakfast goods (i.e., cereal, oatmeal, pancake mix) Canned goods (i.e., canned vegetables, canned fruit) Pasta and rice Condiments (i.e., hot sauce, peanut butter, oil) Butcher meats (i.e., beef, chicken, pork, ham, turkey) Deli meats and cheeses
eggs, milk, fresh vegetables, fruit, meats.
US_Ave_Food_Prices_df <- read.csv("US_Average_Food_Prices.csv")
str(US_Ave_Food_Prices_df)
## 'data.frame': 193079 obs. of 6 variables:
## $ series_id: chr "APU0000701111 " "APU0000701111 " "APU0000701111 " "APU0000701111 " ...
## $ year : int 1995 1995 1995 1995 1995 1995 1995 1995 1995 1995 ...
## $ period : chr "M01" "M02" "M03" "M04" ...
## $ value : chr "0.238" "0.242" "0.242" "0.236" ...
## $ item_name: chr "Flour, white, all purpose, per lb. (453.6 gm)" "Flour, white, all purpose, per lb. (453.6 gm)" "Flour, white, all purpose, per lb. (453.6 gm)" "Flour, white, all purpose, per lb. (453.6 gm)" ...
## $ area_name: chr "U.S. city average" "U.S. city average" "U.S. city average" "U.S. city average" ...
*This big ole thang calculates the Average price of Distinct Food Groups for the year 2022.
*Note: The Dataset has redundant entries, so the groups chosen are to filter those out.
US_2022_FG_Ave <- US_Ave_Food_Prices_df |>
filter(year == 2022 & area_name == "U.S. city average") |>
mutate(value = as.numeric(value)) |>
group_by(item_name) |>
summarise(item_mean_2022 = round(mean(value, na.rm = TRUE),2)) |>
mutate(
food_group = case_when(
item_name %in% c("Rice, white, long grain, uncooked, per lb. (453.6 gm)") ~ "Grains",
item_name %in% c("Milk, fresh, whole, fortified, per gal. (3.8 lit)","American processed cheese, per lb. (453.6 gm)","Cheddar cheese, natural, per lb. (453.6 gm)","Milk, fresh, low-fat, reduced fat, skim, per gal. (3.8 lit)","Yogurt, per 8 oz. (226.8 gm)","Butter, stick, per lb. (453.6 gm)") ~ "Dairy",
item_name %in% c("Bacon, sliced, per lb. (453.6 gm)","Chicken, fresh, whole, per lb. (453.6 gm)","Chicken legs, bone-in, per lb. (453.6 gm)","Eggs, grade A, large, per doz.","All uncooked ground beef, per lb. (453.6 gm)","Chicken breast, boneless, per lb. (453.6 gm)") ~ "Meats",
item_name %in% c("Cookies, chocolate chip, per lb. (453.6 gm)","Potato chips, per 16 oz.","") ~ "Snacks",
item_name %in% c("Bananas, per lb. (453.6 gm)","Lemons, per lb. (453.6 gm)","Oranges, Navel, per lb. (453.6 gm)","Strawberries, dry pint, per 12 oz. (340.2 gm)","Potatoes, white, per lb. (453.6 gm)","Tomatoes, field grown, per lb. (453.6 gm)","Lettuce, romaine, per lb. (453.6 gm)") ~ "Produce",
item_name %in% c("Bread, white, pan, per lb. (453.6 gm)", "Bread, whole wheat, pan, per lb. (453.6 gm)") ~ "Bread",
item_name %in% c("Coffee, 100%, ground roast, all sizes, per lb. (453.6 gm)") ~ "Coffee",
TRUE ~ "Other"
)
) |>
filter(!food_group %in% c("Other")) |>
group_by(food_group) |>
summarise(food_group_mean = round(mean(item_mean_2022, na.rm = TRUE),2))
library(viridis)
## Loading required package: viridisLite
US_2022_FG_Ave |>
hchart("bar", hcaes(x = food_group, y = food_group_mean, group = food_group)) |>
hc_add_theme(hc_theme_ffx()) |>
hc_colors(viridis(7)) |>
hc_title(text = "<span>🍅 </span> <b><strong>Popular Food Groups Average Prices in US for 2022</strong></b>", useHTML = TRUE) |>
hc_yAxis(title = list(text = "<b><strong>Average Price</strong></b>", useHTML = TRUE),
labels = list(format = "${value:.2f}")) |>
hc_xAxis(title = list(text = "")) |>
hc_plotOptions(
bar = list(
dataLabels = list(enabled = TRUE, format = '${point.y:.2f}'),
pointWidth = 25,
pointPadding = 0,
groupPadding = 0.5)
) |>
hc_legend(enabled = FALSE)
This is a graph that shows the averages prices of items belonging to these popular food groups in US Cities for the year 2022.
The items in this food group are averaged using their average prices at a standard amount of 1 lb or 1 gallon for liquids like milk.
Items in Groups:
Grains: Rice, white, long grain, uncooked, per lb. (453.6 gm)
Dairy: Milk, fresh, whole, fortified, per gal. (3.8 lit) , American processed cheese, per lb. (453.6 gm) , Cheddar cheese, natural, per lb. (453.6 gm) , Milk, fresh, low-fat, reduced fat, skim, per gal. (3.8 lit) , Yogurt, per 8 oz. (226.8 gm) , Butter, stick, per lb. (453.6 gm)
Meats: Bacon, sliced, per lb. (453.6 gm) , Chicken, fresh, whole, per lb. (453.6 gm), Chicken legs, bone-in, per lb. (453.6 gm), Eggs, grade A, large, per doz., All uncooked ground beef, per lb. (453.6 gm), Chicken breast, boneless, per lb. (453.6 gm)
Snacks: Cookies, chocolate chip, per lb. (453.6 gm), Potato chips, per 16 oz.
Produce: Bananas, per lb. (453.6 gm), Lemons, per lb. (453.6 gm), Oranges, Navel, per lb. (453.6 gm), Strawberries, dry pint, per 12 oz. (340.2 gm), Potatoes, white, per lb. (453.6 gm), Tomatoes, field grown, per lb. (453.6 gm), Lettuce, romaine, per lb. (453.6 gm)
Bread: Bread, white, pan, per lb. (453.6 gm), Bread, whole wheat, pan, per lb. (453.6 gm)
Coffee: Coffee, 100%, ground roast, all sizes, per lb. (453.6 gm)
US_2021_FG_Ave <- US_Ave_Food_Prices_df |>
filter(year == 2021 & area_name == "U.S. city average") |>
mutate(value = as.numeric(value)) |>
group_by(item_name) |>
summarise(item_mean_2021 = round(mean(value, na.rm = TRUE),2)) |>
mutate(
food_group = case_when(
item_name %in% c("Rice, white, long grain, uncooked, per lb. (453.6 gm)") ~ "Grains",
item_name %in% c("Milk, fresh, whole, fortified, per gal. (3.8 lit)","American processed cheese, per lb. (453.6 gm)","Cheddar cheese, natural, per lb. (453.6 gm)","Milk, fresh, low-fat, reduced fat, skim, per gal. (3.8 lit)","Yogurt, per 8 oz. (226.8 gm)","Butter, stick, per lb. (453.6 gm)") ~ "Dairy",
item_name %in% c("Bacon, sliced, per lb. (453.6 gm)","Chicken, fresh, whole, per lb. (453.6 gm)","Chicken legs, bone-in, per lb. (453.6 gm)","Eggs, grade A, large, per doz.","All uncooked ground beef, per lb. (453.6 gm)","Chicken breast, boneless, per lb. (453.6 gm)","") ~ "Meats",
item_name %in% c("Cookies, chocolate chip, per lb. (453.6 gm)","Potato chips, per 16 oz.","") ~ "Snacks",
item_name %in% c("Bananas, per lb. (453.6 gm)","Lemons, per lb. (453.6 gm)","Oranges, Navel, per lb. (453.6 gm)","Strawberries, dry pint, per 12 oz. (340.2 gm)","Potatoes, white, per lb. (453.6 gm)","Tomatoes, field grown, per lb. (453.6 gm)","Lettuce, romaine, per lb. (453.6 gm)") ~ "Produce",
item_name %in% c("Bread, white, pan, per lb. (453.6 gm)", "Bread, whole wheat, pan, per lb. (453.6 gm)") ~ "Bread",
item_name %in% c("Coffee, 100%, ground roast, all sizes, per lb. (453.6 gm)") ~ "Coffee",
TRUE ~ "Other"
)
) |>
filter(!food_group %in% c("Other")) |>
group_by(food_group) |>
summarise(food_group_mean = round(mean(item_mean_2021, na.rm = TRUE),2))
US_2021_FG_Ave
## # A tibble: 7 × 2
## food_group food_group_mean
## <chr> <dbl>
## 1 Bread 1.82
## 2 Coffee 4.7
## 3 Dairy 3.49
## 4 Grains 0.79
## 5 Meats 3.3
## 6 Produce 1.79
## 7 Snacks 4.52
US_2021_FG_Ave |>
hchart("bar", hcaes(x = food_group, y = food_group_mean, group = food_group)) |>
hc_add_theme(hc_theme_ffx()) |>
hc_colors(viridis(7)) |>
hc_title(text = "<span>🍅 </span> <b><strong>Popular Food Groups Average Prices in US for 2021</strong></b>", useHTML = TRUE) |>
hc_yAxis(title = list(text = "<b><strong>Average Price</strong></b>", useHTML = TRUE),
labels = list(format = "${value:.2f}")) |>
hc_xAxis(title = list(text = "")) |>
hc_plotOptions(
bar = list(
dataLabels = list(enabled = TRUE, format = '${point.y:.2f}'),
pointWidth = 25,
pointPadding = 0,
groupPadding = 0.5)
) |>
hc_legend(enabled = FALSE)
US_2021_FG_Ave_w_year <- US_2021_FG_Ave |>
mutate(Year = 2021)
US_2022_FG_Ave_w_year <- US_2022_FG_Ave |>
mutate(Year = 2022)
US_2021_2022_FG_Ave <- bind_rows(US_2021_FG_Ave_w_year, US_2022_FG_Ave_w_year)
US_2021_2022_FG_Ave |>
mutate(year_group = paste(Year, food_group, sep = " - ")) |>
hchart("bar", hcaes(x = food_group, y = food_group_mean, group = year_group)) |>
hc_add_theme(hc_theme_ffx()) |>
hc_colors(viridis(7)) |>
hc_title(text = "<span>🍅 </span> <b><strong>Popular Food Groups Average Prices in US for 2021-2022</strong></b>", useHTML = TRUE) |>
hc_yAxis(title = list(text = "<b><strong>Average Price</strong></b>", useHTML = TRUE),
labels = list(format = "${value:.2f}")) |>
hc_xAxis(title = list(text = "")) |>
hc_plotOptions(
bar = list(
dataLabels = list(enabled = TRUE, format = '${point.y:.2f}'),
pointWidth = 15,
pointPadding = 2,
groupPadding = 0.2)
) |>
hc_legend(enabled = FALSE)
This is a side by side comparison of the years 2021 - 2022
US_Ave_Food_Prices_df |>
filter((year >= 2005 & year <= 2022) & area_name == "U.S. city average") |>
mutate(value = as.numeric(value)) |>
group_by(year,item_name) |>
summarise(item_mean = round(mean(value, na.rm = TRUE),2)) |>
mutate(
food_group = case_when(
item_name %in% c("Rice, white, long grain, uncooked, per lb. (453.6 gm)") ~ "Grains",
item_name %in% c("Milk, fresh, whole, fortified, per gal. (3.8 lit)","American processed cheese, per lb. (453.6 gm)","Cheddar cheese, natural, per lb. (453.6 gm)","Milk, fresh, low-fat, reduced fat, skim, per gal. (3.8 lit)","Yogurt, per 8 oz. (226.8 gm)","Butter, stick, per lb. (453.6 gm)") ~ "Dairy",
item_name %in% c("Bacon, sliced, per lb. (453.6 gm)","Chicken, fresh, whole, per lb. (453.6 gm)","Chicken legs, bone-in, per lb. (453.6 gm)","Eggs, grade A, large, per doz.","All uncooked ground beef, per lb. (453.6 gm)","Chicken breast, boneless, per lb. (453.6 gm)","") ~ "Meats",
item_name %in% c("Cookies, chocolate chip, per lb. (453.6 gm)","Potato chips, per 16 oz.","") ~ "Snacks",
item_name %in% c("Bananas, per lb. (453.6 gm)","Lemons, per lb. (453.6 gm)","Oranges, Navel, per lb. (453.6 gm)","Strawberries, dry pint, per 12 oz. (340.2 gm)","Potatoes, white, per lb. (453.6 gm)","Tomatoes, field grown, per lb. (453.6 gm)","Lettuce, romaine, per lb. (453.6 gm)") ~ "Produce",
item_name %in% c("Bread, white, pan, per lb. (453.6 gm)", "Bread, whole wheat, pan, per lb. (453.6 gm)") ~ "Bread",
item_name %in% c("Coffee, 100%, ground roast, all sizes, per lb. (453.6 gm)") ~ "Coffee",
TRUE ~ "Other"
)
) |>
filter(!food_group %in% c("Other")) |>
group_by(year,food_group) |>
summarise(food_group_mean = round(mean(item_mean, na.rm = TRUE),2)) |>
hchart("line", hcaes(x = year, y = food_group_mean, group = food_group)) |>
hc_add_theme(hc_theme_ffx()) |>
hc_colors(viridis(7)) |>
hc_title(text = "<span>🍅 </span> <b><strong>Popular Food Groups Average Prices in US from 2005 to 2022</strong></b>", useHTML = TRUE) |>
hc_yAxis(title = list(text = "<b><strong>Average Price</strong></b>", useHTML = TRUE),
labels = list(format = "${value:.2f}")) |>
hc_xAxis(title = list(text = "<b><strong>Years</strong></b>", useHTML = TRUE)) |>
hc_legend(align = "center")
## Warning: There was 1 warning in `mutate()`.
## ℹ In argument: `value = as.numeric(value)`.
## Caused by warning:
## ! NAs introduced by coercion
## `summarise()` has grouped output by 'year'. You can override using the
## `.groups` argument.
## `summarise()` has grouped output by 'year'. You can override using the
## `.groups` argument.
This is just an overall line graph to represent the changes in prices as the years passed in the US for US City Average.
Again, using standard units like lbs and gallons for items included in the list.
US_2022_FG_Ave |>
hchart("pie", hcaes(name = food_group, y = food_group_mean)) |>
hc_add_theme(hc_theme_flatdark()) |>
hc_title(text = "<span>🍩 </span> <b><strong>2022 US Average Cost of Basket of Goods by Popular Food Group</strong></b>"
, useHTML = TRUE) |>
hc_plotOptions(
pie = list(
innerSize = '40%', # Creates donut hole
dataLabels = list(
enabled = TRUE,
format = "{point.name}<br>${point.y:.2f}"
)
)
)
This pie chart shows a breakdown of an average basket of food groups/categories for the US consumer based on US City average prices and the year 2022. This is basically showing the expected average cost per item belonging to any of these groups, given the standard weight of 1 lb and 1 gallon. So average cost of a gallon of milk would be about $4.00.
US_2022_Ave_Area <- US_Ave_Food_Prices_df |>
filter((year == 2022)) |>
mutate(value = as.numeric(value)) |>
group_by(year,item_name, area_name) |>
summarise(item_mean = round(mean(value, na.rm = TRUE),2)) |>
mutate(
food_group = case_when(
item_name %in% c("Rice, white, long grain, uncooked, per lb. (453.6 gm)") ~ "Grains",
item_name %in% c("Milk, fresh, whole, fortified, per gal. (3.8 lit)","American processed cheese, per lb. (453.6 gm)","Cheddar cheese, natural, per lb. (453.6 gm)","Milk, fresh, low-fat, reduced fat, skim, per gal. (3.8 lit)","Yogurt, per 8 oz. (226.8 gm)","Butter, stick, per lb. (453.6 gm)") ~ "Dairy",
item_name %in% c("Bacon, sliced, per lb. (453.6 gm)","Chicken, fresh, whole, per lb. (453.6 gm)","Chicken legs, bone-in, per lb. (453.6 gm)","Eggs, grade A, large, per doz.","All uncooked ground beef, per lb. (453.6 gm)","Chicken breast, boneless, per lb. (453.6 gm)","") ~ "Meats",
item_name %in% c("Cookies, chocolate chip, per lb. (453.6 gm)","Potato chips, per 16 oz.","") ~ "Snacks",
item_name %in% c("Bananas, per lb. (453.6 gm)","Lemons, per lb. (453.6 gm)","Oranges, Navel, per lb. (453.6 gm)","Strawberries, dry pint, per 12 oz. (340.2 gm)","Potatoes, white, per lb. (453.6 gm)","Tomatoes, field grown, per lb. (453.6 gm)","Lettuce, romaine, per lb. (453.6 gm)") ~ "Produce",
item_name %in% c("Bread, white, pan, per lb. (453.6 gm)", "Bread, whole wheat, pan, per lb. (453.6 gm)") ~ "Bread",
item_name %in% c("Coffee, 100%, ground roast, all sizes, per lb. (453.6 gm)") ~ "Coffee",
TRUE ~ "Other"
)
) |>
filter(!food_group %in% c("Other")) |>
group_by(year,food_group, area_name) |>
summarise(food_group_mean = round(mean(item_mean, na.rm = TRUE),2)) |>
group_by(year, area_name) |>
summarise(area_food_sum = sum(food_group_mean))
## `summarise()` has grouped output by 'year', 'item_name'. You can override using
## the `.groups` argument.
## `summarise()` has grouped output by 'year', 'food_group'. You can override
## using the `.groups` argument.
## `summarise()` has grouped output by 'year'. You can override using the
## `.groups` argument.
US_2022_Ave_Area |>
hchart("column", hcaes(x = area_name, y = area_food_sum, group = area_name)) |>
hc_add_theme(hc_theme_darkunica()) |>
hc_title(text = "2022 Average Cost of Basket of Goods Based on US Geography") |>
hc_xAxis(title = list(text = "")) |>
hc_yAxis(title = list(text = "<b><strong>Average Price</strong></b>", useHTML = TRUE),
labels = list(format = "${value:.2f}")) |>
hc_plotOptions(
column = list(
dataLabels = list(enabled = TRUE, format = '${point.y:.2f}'),
pointWidth = 50,
pointPadding = 0,
groupPadding = 0.5)
) |>
hc_legend(align = "center")
Comparing a basket of goods (like the pie chart before, which shows an average price of a basket of goods for the selected food groups, based on the US City Average), but comparing the regions of the US, instead of overall US city average.
US_Forcast_df <- read.csv("CPIForecast.csv")
US_Forcast_df |>
filter(Aggregate == "Food at home" & Attribute %in% c("Mid point of prediction interval 2025", "Mid point of prediction interval 2026") & Low.level == "" & !Mid.level == "") |>
select(Aggregate, Mid.level, Value, Attribute) |>
hchart("column", hcaes(x = Mid.level, y = Value, group = Attribute)) |>
hc_add_theme(hc_theme_google()) |>
hc_plotOptions(
column = list(color = "red",
negativeColor = "green",
dataLabels = list(enabled = TRUE,
format = "{point.y}%",
style = list(fontWeight = "bold")),
pointWidth = 25,
pointPadding = 0.5,
groupPadding = 0.1
)
) |>
hc_title(text = "<span>🌐 </span><b><strong>CPI Forecasted Percentage Changes From 2025 to 2026</strong></b>", useHTML = TRUE) |>
hc_legend(enabled = FALSE) |>
hc_xAxis(title = list(text = "<b><strong>Popular Food Groups</strong></b>", useHTML = TRUE)) |>
hc_yAxis(title = list(text = "<b><strong>Predicted CPI Percentage Change</strong></b>", useHTML = TRUE))
This graph shows the Mid point of prediction interval of food at home food groups for the years 2025 to 2026. These are the predicted increases in CPI in percentages shown side by side.
For example, Eggs were predicted to rise in CPI by around 24.8% (shown in red, because that signifies in inflation and less spending power for the consumer), but are predicted to drop by around 11.1% for the upcoming year of 2026 (shown in green because this is better for consumers in the US).