About the Data

THis is a datset of monthly values of each country’s food prices since 2017. I’m trying to see how food prices have varied in past years.

importing data + libraries

library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.2     ✔ readr     2.1.4
## ✔ forcats   1.0.0     ✔ stringr   1.5.0
## ✔ ggplot2   3.4.3     ✔ tibble    3.2.1
## ✔ lubridate 1.9.2     ✔ tidyr     1.3.0
## ✔ purrr     1.0.2     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(ggplot2)
library(ggthemes)
library(ggvis)
## 
## Attaching package: 'ggvis'
## 
## The following object is masked from 'package:ggplot2':
## 
##     resolution
library(reshape2)
## 
## Attaching package: 'reshape2'
## 
## The following object is masked from 'package:tidyr':
## 
##     smiths
library(knitr)
library(shiny)
library(reticulate)
library(formatR)
library(maps)
## 
## Attaching package: 'maps'
## 
## The following object is masked from 'package:purrr':
## 
##     map
library(mapdata)
library(dplyr)
food_ind <- read_csv("WLD_RTFP_country_2023-09-12.csv")
## Rows: 4773 Columns: 8
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr  (2): country, ISO3
## dbl  (5): Open, High, Low, Close, Inflation
## date (1): date
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
str(food_ind)
## spc_tbl_ [4,773 × 8] (S3: spec_tbl_df/tbl_df/tbl/data.frame)
##  $ Open     : num [1:4773] 0.53 0.53 0.53 0.53 0.56 0.59 0.59 0.6 0.6 0.63 ...
##  $ High     : num [1:4773] 0.54 0.54 0.54 0.55 0.57 0.6 0.6 0.6 0.62 0.64 ...
##  $ Low      : num [1:4773] 0.53 0.53 0.53 0.53 0.55 0.58 0.59 0.59 0.59 0.62 ...
##  $ Close    : num [1:4773] 0.53 0.53 0.53 0.55 0.57 0.59 0.59 0.59 0.62 0.63 ...
##  $ Inflation: num [1:4773] NA NA NA NA NA NA NA NA NA NA ...
##  $ country  : chr [1:4773] "Afghanistan" "Afghanistan" "Afghanistan" "Afghanistan" ...
##  $ ISO3     : chr [1:4773] "AFG" "AFG" "AFG" "AFG" ...
##  $ date     : Date[1:4773], format: "2007-01-01" "2007-02-01" ...
##  - attr(*, "spec")=
##   .. cols(
##   ..   Open = col_double(),
##   ..   High = col_double(),
##   ..   Low = col_double(),
##   ..   Close = col_double(),
##   ..   Inflation = col_double(),
##   ..   country = col_character(),
##   ..   ISO3 = col_character(),
##   ..   date = col_date(format = "")
##   .. )
##  - attr(*, "problems")=<externalptr>

Cleaning the dataset

food_ind_clean <- food_ind[!is.na(food_ind$Close), ]

All countries compared on one single graph and listing down the top 10 countries which saw the most change in past years

food_ind_clean$date <- as.Date(food_ind_clean$date)

# Calculate the change in 'Close' price for each country
food_ind_clean <- food_ind_clean %>%
  group_by(country) %>%
  mutate(change = Close - lag(Close))

# Create the scatter plot for 'Close' column with a line connecting points
ggplot(food_ind_clean, aes(x = date, y = Close, color = change, group = country)) +
  geom_point(aes(size = abs(change)), alpha = 0.5) +
  geom_line() +  # Add a line connecting points
  scale_color_gradient(low = "blue", high = "red") +
  scale_size_continuous(range = c(2, 6)) +
  labs(title = "Variation of Food Price Inflation Across Countries",
       y = "Close Price",
       x = "Date",
       color = "Change in Close Price") +
  theme_minimal() +
  theme(legend.position = "top") +
  guides(color = guide_colorbar(title = "Change in Price"))
## Warning: Removed 25 rows containing missing values (`geom_point()`).

country_change <- food_ind %>%
  group_by(country) %>%
  summarize(total_change = sum(Close - lag(Close, default = first(Close))))

# Select the top 10 countries with the highest total change
top_countries <- country_change %>%
  arrange(desc(total_change)) %>%
  head(10)

# Print the names of the top 10 countries
cat("Top 10 Countries with the Most Change in Close Price:\n")
## Top 10 Countries with the Most Change in Close Price:
cat(top_countries$country, sep = "\n")
## Sudan
## Lebanon
## Syrian Arab Republic
## South Sudan
## Haiti
## Yemen, Rep.
## Myanmar
## Nigeria
## Burundi
## Liberia

Generating heatmap of the world map using the geom_polygon to visualize the above top 10 countries.

doing this for past 5 years as per above graph they are the most releveant ones

# Filter the dataset to include data from 2017 to 2023
start_date <- as.Date("2017-01-01")
end_date <- as.Date("2023-12-31")

filtered_data <- food_ind_clean %>%
  filter(date >= start_date & date <= end_date)

# Calculate the change in 'Close' column for each country
filtered_data <- filtered_data %>%
  arrange(country, date) %>%
  group_by(country) %>%
  mutate(Change = Close - lag(Close, default = first(Close)))

# Downloading worldmap dataset 
world_map <- map_data("world")

# Merging dataset 
merged_data <- merge(world_map, filtered_data, by.x = "region", by.y = "country", all.x = TRUE)

# Creating the heatmap
ggplot(merged_data, aes(x = long, y = lat, group = group, fill = Change)) +
  geom_polygon() +
  scale_fill_gradient(low = "maroon", high = "yellow", name = "Change in Price") +
  coord_fixed(ratio = 1.25) +
  labs(title = "Change in  Price Across Countries (2017-2023)",
       fill = "Change in Close Price") +
  theme_void()

This is consistent with the top 10 countries listed as the heatmap is only highlighting those regions/countries