Introduction

For this dataset, we will be using the monthly food price estimate by product and market You can access the data here: https://datacatalog.worldbank.org/search/dataset/0065651/Philippines---Monthly-food-price-estimates-by-product-and-market

The intention is to understand how food prices increase over time by looking at trends and potentially the impact of politics

Loading the Data: Load the libraries, data and clean the columns

# load packages and the dataset
library(tidyverse)
library(ggplot2)
library(readr)
library(ggthemes)
data <- read_csv("PHL_RTFP_mkt_2007_2024-09-11.csv")
## Rows: 23217 Columns: 109
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr  (10): ISO3, country, adm1_name, adm2_name, mkt_name, geo_id, currency, ...
## dbl  (98): lat, lon, year, month, data_coverage, data_coverage_recent, index...
## date  (1): price_date
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
print(paste0("This data has ",ncol(data)," columns. These are the variables:"))
## [1] "This data has 109 columns. These are the variables:"
names(data)
##   [1] "ISO3"                         "country"                     
##   [3] "adm1_name"                    "adm2_name"                   
##   [5] "mkt_name"                     "lat"                         
##   [7] "lon"                          "geo_id"                      
##   [9] "price_date"                   "year"                        
##  [11] "month"                        "currency"                    
##  [13] "components"                   "start_dense_data"            
##  [15] "last_survey_point"            "data_coverage"               
##  [17] "data_coverage_recent"         "index_confidence_score"      
##  [19] "spatially_interpolated"       "beans"                       
##  [21] "cabbage"                      "carrots"                     
##  [23] "eggs"                         "garlic"                      
##  [25] "meat_beef_chops"              "meat_chicken_whole"          
##  [27] "meat_pork"                    "onions"                      
##  [29] "potatoes"                     "rice"                        
##  [31] "tomatoes"                     "o_beans"                     
##  [33] "h_beans"                      "l_beans"                     
##  [35] "c_beans"                      "inflation_beans"             
##  [37] "trust_beans"                  "o_cabbage"                   
##  [39] "h_cabbage"                    "l_cabbage"                   
##  [41] "c_cabbage"                    "inflation_cabbage"           
##  [43] "trust_cabbage"                "o_carrots"                   
##  [45] "h_carrots"                    "l_carrots"                   
##  [47] "c_carrots"                    "inflation_carrots"           
##  [49] "trust_carrots"                "o_eggs"                      
##  [51] "h_eggs"                       "l_eggs"                      
##  [53] "c_eggs"                       "inflation_eggs"              
##  [55] "trust_eggs"                   "o_garlic"                    
##  [57] "h_garlic"                     "l_garlic"                    
##  [59] "c_garlic"                     "inflation_garlic"            
##  [61] "trust_garlic"                 "o_meat_beef_chops"           
##  [63] "h_meat_beef_chops"            "l_meat_beef_chops"           
##  [65] "c_meat_beef_chops"            "inflation_meat_beef_chops"   
##  [67] "trust_meat_beef_chops"        "o_meat_chicken_whole"        
##  [69] "h_meat_chicken_whole"         "l_meat_chicken_whole"        
##  [71] "c_meat_chicken_whole"         "inflation_meat_chicken_whole"
##  [73] "trust_meat_chicken_whole"     "o_meat_pork"                 
##  [75] "h_meat_pork"                  "l_meat_pork"                 
##  [77] "c_meat_pork"                  "inflation_meat_pork"         
##  [79] "trust_meat_pork"              "o_onions"                    
##  [81] "h_onions"                     "l_onions"                    
##  [83] "c_onions"                     "inflation_onions"            
##  [85] "trust_onions"                 "o_potatoes"                  
##  [87] "h_potatoes"                   "l_potatoes"                  
##  [89] "c_potatoes"                   "inflation_potatoes"          
##  [91] "trust_potatoes"               "o_rice"                      
##  [93] "h_rice"                       "l_rice"                      
##  [95] "c_rice"                       "inflation_rice"              
##  [97] "trust_rice"                   "o_tomatoes"                  
##  [99] "h_tomatoes"                   "l_tomatoes"                  
## [101] "c_tomatoes"                   "inflation_tomatoes"          
## [103] "trust_tomatoes"               "o_food_price_index"          
## [105] "h_food_price_index"           "l_food_price_index"          
## [107] "c_food_price_index"           "inflation_food_price_index"  
## [109] "trust_food_price_index"

From here, we are able to determine that there are 109 columns. The only columns we need are columns 104 to 108 which indicate the opening, close, high and low foodprice index along with inflation, and columns 3 to 5 which show the location and market name, and column 9 to 11 which shows the price date

data <- data[,c(3:5,9:11,104:108)]
data[1:5,] # view first 5 rows
## # A tibble: 5 × 11
##   adm1_name         adm2_name mkt_name price_date  year month o_food_price_index
##   <chr>             <chr>     <chr>    <date>     <dbl> <dbl>              <dbl>
## 1 Cordillera Admin… Abra      Abra     2007-01-01  2007     1               0.71
## 2 Cordillera Admin… Abra      Abra     2007-02-01  2007     2               0.71
## 3 Cordillera Admin… Abra      Abra     2007-03-01  2007     3               0.69
## 4 Cordillera Admin… Abra      Abra     2007-04-01  2007     4               0.69
## 5 Cordillera Admin… Abra      Abra     2007-05-01  2007     5               0.7 
## # ℹ 4 more variables: h_food_price_index <dbl>, l_food_price_index <dbl>,
## #   c_food_price_index <dbl>, inflation_food_price_index <dbl>

Now that we have the data, we want to focus on one administrative area, which would be the National Capital Region. This is where the capital of the Philippines is, Metro Manila. We can do this by using dplyr. to subset it only to NCR. I will put it in the dataframe ncr

# start by converting to factors
data$adm1_name <- as.factor(data$adm1_name)
data$adm2_name <- as.factor(data$adm2_name)
data$mkt_name <- as.factor(data$mkt_name)

levels(data$adm1_name) # from here, we see that the fourth level is NCR
##  [1] "Autonomous region in Muslim Mindanao"
##  [2] "Cordillera Administrative region"    
##  [3] "Market Average"                      
##  [4] "National Capital region"             
##  [5] "Region I"                            
##  [6] "Region II"                           
##  [7] "Region III"                          
##  [8] "Region IV-A"                         
##  [9] "Region IV-B"                         
## [10] "Region IX"                           
## [11] "Region V"                            
## [12] "Region VI"                           
## [13] "Region VII"                          
## [14] "Region VIII"                         
## [15] "Region X"                            
## [16] "Region XI"                           
## [17] "Region XII"                          
## [18] "Region XIII"
ncr <- data %>% filter(adm1_name == levels(data$adm1_name)[4])
ncr[1:5,] # get the first 5 rows
## # A tibble: 5 × 11
##   adm1_name         adm2_name mkt_name price_date  year month o_food_price_index
##   <fct>             <fct>     <fct>    <date>     <dbl> <dbl>              <dbl>
## 1 National Capital… Metropol… Metro M… 2007-01-01  2007     1               0.8 
## 2 National Capital… Metropol… Metro M… 2007-02-01  2007     2               0.81
## 3 National Capital… Metropol… Metro M… 2007-03-01  2007     3               0.77
## 4 National Capital… Metropol… Metro M… 2007-04-01  2007     4               0.76
## 5 National Capital… Metropol… Metro M… 2007-05-01  2007     5               0.78
## # ℹ 4 more variables: h_food_price_index <dbl>, l_food_price_index <dbl>,
## #   c_food_price_index <dbl>, inflation_food_price_index <dbl>

Data Cleanup: We start by formatting the data mutating and getting the mean food price and omitting NA rows. Then doing multiple checks to ensure integrity of data. Involves using the count() function.

# we will start by adding a mean using the mutate function of dplyr
ncr <- ncr %>% mutate(mean_price_index = round(((o_food_price_index + h_food_price_index + l_food_price_index + c_food_price_index)/4),2)) # add a monthly mean to the price index

# using dplyr, we will also use the count function to count NA columns.
count(ncr,mpi_na=sum(is.na(mean_price_index)),ifpi_na=sum(is.na(inflation_food_price_index)))
## # A tibble: 1 × 3
##   mpi_na ifpi_na     n
##    <int>   <int> <int>
## 1      0      12   213

We can see here that under the two variales I will be exploring (Mean Price Index and Inflation Food Price Index), Inflation Food Price has NA values. We need to omit this and run another check.

ncr <- na.omit(ncr) # omit NA values

# double check for NA values
count(ncr,mpi_na=sum(is.na(mean_price_index)),ifpi_na=sum(is.na(inflation_food_price_index)))
## # A tibble: 1 × 3
##   mpi_na ifpi_na     n
##    <int>   <int> <int>
## 1      0       0   201

Given this, after the na.omit function, there are no more NA values but total rows is down to 201. We will analyze if 201 is logical:

print(paste0("There are ",nrow(ncr)," rows. Since every row on the data set represents one month, we will check the last row. The latest data is ",max(ncr$price_date)," and the minimum is ",min(ncr$price_date),". Given this, there are ", 2024-2008, " years or ", (12*16)," months and with the additional 9 months of the year given the latest data set, then we should expect ", (12*16)+9," rows. For final confirmation, we can also see ",length(unique(ncr$price_date))," unique values for price date"))
## [1] "There are 201 rows. Since every row on the data set represents one month, we will check the last row. The latest data is 2024-09-01 and the minimum is 2008-01-01. Given this, there are 16 years or 192 months and with the additional 9 months of the year given the latest data set, then we should expect 201 rows. For final confirmation, we can also see 201 unique values for price date"

Question 1: What is the yearly trend of mean price index and food price inflation

We will use Dplyr to analyze this. Involves using summarise and group_by function

year_view <- ncr %>% select(year, mean_price_index,inflation_food_price_index) %>% group_by(year) %>% summarise("Mean Food Price Index" = round(mean(mean_price_index),2),"Mean Inflation Food Price Index"  = round(mean(inflation_food_price_index),2)) %>% rename(Year=year)
year_view
## # A tibble: 17 × 3
##     Year `Mean Food Price Index` `Mean Inflation Food Price Index`
##    <dbl>                   <dbl>                             <dbl>
##  1  2008                    0.78                             -3.53
##  2  2009                    0.81                              4.59
##  3  2010                    0.81                             -0.29
##  4  2011                    0.87                              8.95
##  5  2012                    0.85                             -3.08
##  6  2013                    0.86                              1.33
##  7  2014                    0.94                             10.6 
##  8  2015                    0.92                             -2.8 
##  9  2016                    0.95                              3.44
## 10  2017                    1.03                              8.14
## 11  2018                    1.14                             10.7 
## 12  2019                    1.08                             -5.5 
## 13  2020                    1.24                             17.7 
## 14  2021                    1.39                             10.7 
## 15  2022                    1.44                              4.89
## 16  2023                    1.6                              10.5 
## 17  2024                    1.56                             -1.11

We see here that the mean food price and inflation, as expected has increased since 2008 but there are some years that it’s flat, some years where it declines the following year so it’s not just increasing year on year

Important to note as well that the range (max minus min) of the mean price index from 2008 to 2016 is .17 whereas from 2017 to 2024 it’s .57. Hypothesis include new president (Rodrigo Duterte) and Covid from 2020 onwards.

Mean inflation simply follows increase and decrease of mean food price.

Question 2: Which months tend to have the highest inflation

month_view <- ncr %>% select(month, mean_price_index,inflation_food_price_index) %>% group_by(month) %>% summarise("Mean Food Price Index" = round(mean(mean_price_index),2),"Mean Inflation Food Price Index"  = round(mean(inflation_food_price_index),2)) %>% rename(Month=month)
month_view$Month <- as.factor(month_view$Month)

# change factor levels
levels(month_view$Month) <- c("Jan","Feb","Mar","Apr","May","Jun","Jul","Aug","Sep","Oct","Nov","Dec")
month_view
## # A tibble: 12 × 3
##    Month `Mean Food Price Index` `Mean Inflation Food Price Index`
##    <fct>                   <dbl>                             <dbl>
##  1 Jan                      1.09                              4.55
##  2 Feb                      1.06                              4.52
##  3 Mar                      1.01                              4.41
##  4 Apr                      1.01                              4.25
##  5 May                      1.03                              4.12
##  6 Jun                      1.06                              4.15
##  7 Jul                      1.08                              4.49
##  8 Aug                      1.09                              4.55
##  9 Sep                      1.11                              4.48
## 10 Oct                      1.08                              5   
## 11 Nov                      1.1                               4.88
## 12 Dec                      1.1                               4.75

The mean food price tends to be lowest during the summer season (Mar to May) but start to increase around the time of the rainy season. Food tends to be highest during the end of the year when consumption is up.

Mean inflation is also highest in the last quarter of the year. Higher than any other quarter of the year.

Graphing the Data

We will now use Ggplot to visualize the mean food price over time and visualize the increasing and decreasing trends over the summar season but also to show the year on year experience

ncr %>% select(price_date, mean_price_index) %>% arrange(price_date) %>% ggplot(aes(x=price_date,y=mean_price_index)) + geom_point() + geom_smooth() + scale_x_date(date_breaks = "1 year", minor_breaks = "6 month", date_labels = "%y") + labs(title="Avg Food Price Index in the Philippines", x="Year", y="Mean Price Index") + theme_economist_white()

We also see here the plot and the trend line of the average food price index in the Philippines. Consistent with our findings:

Another one to note is that the mean price index was generally under 1 from 2008 to 2016 only with a range of .28 (min is .73, max is 1.01).

However, The increase from 2017 to 2024 shows a range of .82 (min is .92 and max is 1.76). Hypothesis of the increase:

There was still the similar pattern observed during the steep increase but not as obvious as from 2008 to 2016.