Food Price Prediction

library(tidyverse)

## Warning: package 'tidyverse' was built under R version 4.2.3

## Warning: package 'ggplot2' was built under R version 4.2.3

## Warning: package 'tibble' was built under R version 4.2.3

## Warning: package 'purrr' was built under R version 4.2.3

## Warning: package 'dplyr' was built under R version 4.2.3

## Warning: package 'lubridate' was built under R version 4.2.3

## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.3     ✔ readr     2.1.4
## ✔ forcats   1.0.0     ✔ stringr   1.5.0
## ✔ ggplot2   3.5.1     ✔ tibble    3.2.1
## ✔ lubridate 1.9.3     ✔ tidyr     1.3.0
## ✔ purrr     1.0.2     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors

library(tidytext)

## Warning: package 'tidytext' was built under R version 4.2.3

library(dplyr)
library(mice)

## Warning: package 'mice' was built under R version 4.2.3

## 
## Attaching package: 'mice'
## 
## The following object is masked from 'package:stats':
## 
##     filter
## 
## The following objects are masked from 'package:base':
## 
##     cbind, rbind

library(janitor)

## 
## Attaching package: 'janitor'
## 
## The following objects are masked from 'package:stats':
## 
##     chisq.test, fisher.test

library(tidyverse)
library(magrittr)

## 
## Attaching package: 'magrittr'
## 
## The following object is masked from 'package:purrr':
## 
##     set_names
## 
## The following object is masked from 'package:tidyr':
## 
##     extract

library(DataExplorer)

## Warning: package 'DataExplorer' was built under R version 4.2.3

library(maps)

## Warning: package 'maps' was built under R version 4.2.3

## 
## Attaching package: 'maps'
## 
## The following object is masked from 'package:purrr':
## 
##     map

library(plotly)

## Warning: package 'plotly' was built under R version 4.2.3

## 
## Attaching package: 'plotly'
## 
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## 
## The following object is masked from 'package:stats':
## 
##     filter
## 
## The following object is masked from 'package:graphics':
## 
##     layout

library(DT)

## Warning: package 'DT' was built under R version 4.2.3

library(tidytext)
library(gridExtra)

## Warning: package 'gridExtra' was built under R version 4.2.3

## 
## Attaching package: 'gridExtra'
## 
## The following object is masked from 'package:dplyr':
## 
##     combine

library(factoextra)

## Warning: package 'factoextra' was built under R version 4.2.3

## Welcome! Want to learn more? See two factoextra-related books at https://goo.gl/ve3WBa

library(mice)
library(janitor)
library(forcats)
library(readr)
library(gghighlight)

## Warning: package 'gghighlight' was built under R version 4.2.3

food <- read.csv("pk_food_prices.csv")

dim(food)

## [1] 7663   18

introduce(food)

##   rows columns discrete_columns continuous_columns all_missing_columns
## 1 7663      18               13                  4                   1
##   total_missing_values complete_rows total_observations memory_usage
## 1                 7666             0             137934      1144600

plot_intro(food)

plot_missing(food)

plot_bar(food)

## 3 columns ignored with more than 50 categories.
## date: 191 categories
## price: 2989 categories
## sn: 86 categories

head(food)

##   X_id       date               cmname       unit           category  price
## 1    1      #date           #item+name #item+unit         #item+type #value
## 2    2 2004-01-15 Wheat flour - Retail         KG cereals and tubers   13.0
## 3    3 2004-02-15 Wheat flour - Retail         KG cereals and tubers   13.0
## 4    4 2004-03-15 Wheat flour - Retail         KG cereals and tubers  14.25
## 5    5 2004-04-15 Wheat flour - Retail         KG cereals and tubers   12.5
## 6    6 2004-05-15 Wheat flour - Retail         KG cereals and tubers  13.25
##    currency       country     admname     adm1id      mktname mktid       cmid
## 1 #currency #country+name  #adm1+name #adm1+code #name+market    NA #item+code
## 2       PKR      Pakistan Balochistan       2272       Quetta   295         58
## 3       PKR      Pakistan Balochistan       2272       Quetta   295         58
## 4       PKR      Pakistan Balochistan       2272       Quetta   295         58
## 5       PKR      Pakistan Balochistan       2272       Quetta   295         58
## 6       PKR      Pakistan Balochistan       2272       Quetta   295         58
##   ptid umid           catid          sn default
## 1   NA   NA #item+type+code    #meta+id      NA
## 2   15    5               1 295_58_15_5      NA
## 3   15    5               1 295_58_15_5      NA
## 4   15    5               1 295_58_15_5      NA
## 5   15    5               1 295_58_15_5      NA
## 6   15    5               1 295_58_15_5      NA

#remove the defualt vairable
food$default <- NULL

#remove 1st row
food <- food[-1, ]

#change the class of price
food$price <- as.numeric(food$price)

md.pattern(food, rotate.names = T)

##  /\     /\
## {  `---'  }
## {  O   O  }
## ==>  V <==  No need for mice. This data set is completely observed.
##  \  \|/  /
##   `-----'

##      X_id date cmname unit category price currency country admname adm1id
## 7662    1    1      1    1        1     1        1       1       1      1
##         0    0      0    0        0     0        0       0       0      0
##      mktname mktid cmid ptid umid catid sn  
## 7662       1     1    1    1    1     1  1 0
##            0     0    0    0    0     0  0 0

summary(food)

##       X_id          date              cmname              unit          
##  Min.   :   2   Length:7662        Length:7662        Length:7662       
##  1st Qu.:1917   Class :character   Class :character   Class :character  
##  Median :3832   Mode  :character   Mode  :character   Mode  :character  
##  Mean   :3832                                                           
##  3rd Qu.:5748                                                           
##  Max.   :7663                                                           
##    category             price          currency           country         
##  Length:7662        Min.   :  9.00   Length:7662        Length:7662       
##  Class :character   1st Qu.: 36.92   Class :character   Class :character  
##  Mode  :character   Median : 72.73   Mode  :character   Mode  :character  
##                     Mean   :106.92                                        
##                     3rd Qu.:138.40                                        
##                     Max.   :997.00                                        
##    admname             adm1id            mktname              mktid    
##  Length:7662        Length:7662        Length:7662        Min.   :291  
##  Class :character   Class :character   Class :character   1st Qu.:292  
##  Mode  :character   Mode  :character   Mode  :character   Median :293  
##                                                           Mean   :293  
##                                                           3rd Qu.:294  
##                                                           Max.   :295  
##      cmid                ptid         umid           catid          
##  Length:7662        Min.   :15   Min.   : 5.000   Length:7662       
##  Class :character   1st Qu.:15   1st Qu.: 5.000   Class :character  
##  Mode  :character   Median :15   Median : 5.000   Mode  :character  
##                     Mean   :15   Mean   : 9.745                     
##                     3rd Qu.:15   3rd Qu.: 5.000                     
##                     Max.   :15   Max.   :51.000                     
##       sn           
##  Length:7662       
##  Class :character  
##  Mode  :character  
##                    
##                    
##

food$date <- as.Date(food$date)
food$year <- year(food$date)
food$month <- month(food$date)
food$day <- wday(food$date)
head(food$day)

## [1] 5 1 2 5 7 3

#weekday and weekend
food <- food %>% 
  mutate(weekend = ifelse(wday(day, week_start = 1) >= 6,
                          "weekend", "weekday"))

food$monthabb <- sapply(food$month, function(x) month.abb[as.numeric(x)])

food$monthabb = factor(food$monthabb, levels = month.abb)

#season 
food <- food %>% 
  mutate(season = ifelse(month >= 3 & month <= 5, "Spring",
                         ifelse(month >= 6 & month <= 8, "Summer",
                                ifelse(month >= 9 & month <= 11, "Autumn", "Winter"))))

head(food)

##   X_id       date               cmname unit           category  price currency
## 2    2 2004-01-15 Wheat flour - Retail   KG cereals and tubers 13.000      PKR
## 3    3 2004-02-15 Wheat flour - Retail   KG cereals and tubers 13.000      PKR
## 4    4 2004-03-15 Wheat flour - Retail   KG cereals and tubers 14.250      PKR
## 5    5 2004-04-15 Wheat flour - Retail   KG cereals and tubers 12.500      PKR
## 6    6 2004-05-15 Wheat flour - Retail   KG cereals and tubers 13.250      PKR
## 7    7 2004-06-15 Wheat flour - Retail   KG cereals and tubers 13.405      PKR
##    country     admname adm1id mktname mktid cmid ptid umid catid          sn
## 2 Pakistan Balochistan   2272  Quetta   295   58   15    5     1 295_58_15_5
## 3 Pakistan Balochistan   2272  Quetta   295   58   15    5     1 295_58_15_5
## 4 Pakistan Balochistan   2272  Quetta   295   58   15    5     1 295_58_15_5
## 5 Pakistan Balochistan   2272  Quetta   295   58   15    5     1 295_58_15_5
## 6 Pakistan Balochistan   2272  Quetta   295   58   15    5     1 295_58_15_5
## 7 Pakistan Balochistan   2272  Quetta   295   58   15    5     1 295_58_15_5
##   year month day weekend monthabb season
## 2 2004     1   5 weekday      Jan Winter
## 3 2004     2   1 weekend      Feb Winter
## 4 2004     3   2 weekday      Mar Spring
## 5 2004     4   5 weekday      Apr Spring
## 6 2004     5   7 weekend      May Spring
## 7 2004     6   3 weekday      Jun Summer

colnames(food)

##  [1] "X_id"     "date"     "cmname"   "unit"     "category" "price"   
##  [7] "currency" "country"  "admname"  "adm1id"   "mktname"  "mktid"   
## [13] "cmid"     "ptid"     "umid"     "catid"    "sn"       "year"    
## [19] "month"    "day"      "weekend"  "monthabb" "season"

#missing values
sapply(food, FUN = function(col) sum(is.na(col)))

##     X_id     date   cmname     unit category    price currency  country 
##        0        0        0        0        0        0        0        0 
##  admname   adm1id  mktname    mktid     cmid     ptid     umid    catid 
##        0        0        0        0        0        0        0        0 
##       sn     year    month      day  weekend monthabb   season 
##        0        0        0        0        0        0        0

unique(food$cmname)

##  [1] "Wheat flour - Retail"                                  
##  [2] "Rice (coarse) - Retail"                                
##  [3] "Lentils (masur) - Retail"                              
##  [4] "Milk - Retail"                                         
##  [5] "Oil (cooking) - Retail"                                
##  [6] "Wheat - Retail"                                        
##  [7] "Eggs - Retail"                                         
##  [8] "Sugar - Retail"                                        
##  [9] "Ghee (artificial) - Retail"                            
## [10] "Rice (basmati, broken) - Retail"                       
## [11] "Poultry - Retail"                                      
## [12] "Salt - Retail"                                         
## [13] "Fuel (diesel) - Retail"                                
## [14] "Fuel (petrol-gasoline) - Retail"                       
## [15] "Lentils (moong) - Retail"                              
## [16] "Beans(mash) - Retail"                                  
## [17] "Wage (non-qualified labour, non-agricultural) - Retail"

food[c("categoryname","retail")] <- str_split_fixed(food$cmname, "-", 2)


food$date <- NULL
food$myear <- NULL
food$retail <- NULL
food$cmname <- NULL
food$default <- NULL

cmid is itemcode, catid is item type code sn is meta id

#load ggally for correlation plot
library(GGally)

## Warning: package 'GGally' was built under R version 4.2.3

## Registered S3 method overwritten by 'GGally':
##   method from   
##   +.gg   ggplot2

#only numeric columns for correlation analysis
t <- food %>% select_if(is.numeric) %>% ggcorr()

## Warning in cor(data, use = method[1], method = method[2]): the standard
## deviation is zero

library(corrplot)

## Warning: package 'corrplot' was built under R version 4.2.3

## corrplot 0.92 loaded

library(RColorBrewer)

M <-cor(food %>% 
          select(X_id, price, mktid, ptid, umid, month, day, year))

## Warning in cor(food %>% select(X_id, price, mktid, ptid, umid, month, day, :
## the standard deviation is zero

corrplot(M, type="upper", order = "original",col=brewer.pal(n=8, name="RdYlBu"))

library(PerformanceAnalytics)

## Warning: package 'PerformanceAnalytics' was built under R version 4.2.3

## Loading required package: xts

## Warning: package 'xts' was built under R version 4.2.3

## Loading required package: zoo

## Warning: package 'zoo' was built under R version 4.2.3

## 
## Attaching package: 'zoo'

## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric

## 
## ######################### Warning from 'xts' package ##########################
## #                                                                             #
## # The dplyr lag() function breaks how base R's lag() function is supposed to  #
## # work, which breaks lag(my_xts). Calls to lag(my_xts) that you type or       #
## # source() into this session won't work correctly.                            #
## #                                                                             #
## # Use stats::lag() to make sure you're not using dplyr::lag(), or you can add #
## # conflictRules('dplyr', exclude = 'lag') to your .Rprofile to stop           #
## # dplyr from breaking base R's lag() function.                                #
## #                                                                             #
## # Code in packages is not affected. It's protected by R's namespace mechanism #
## # Set `options(xts.warn_dplyr_breaks_lag = FALSE)` to suppress this warning.  #
## #                                                                             #
## ###############################################################################

## 
## Attaching package: 'xts'

## The following objects are masked from 'package:dplyr':
## 
##     first, last

## 
## Attaching package: 'PerformanceAnalytics'

## The following object is masked from 'package:graphics':
## 
##     legend

chart.Correlation(food %>% select(X_id, price, mktid, ptid, umid, month, day, year), histogram=TRUE, pch=19)

## Warning in par(usr): argument 1 does not name a graphical parameter

## Warning in par(usr): argument 1 does not name a graphical parameter

## Warning in par(usr): argument 1 does not name a graphical parameter

## Warning in cor(x, y, use = use, method = method): the standard deviation is
## zero

## Warning in cor(x, y): the standard deviation is zero

## Warning in par(usr): argument 1 does not name a graphical parameter

## Warning in cor(x, y, use = use, method = method): the standard deviation is
## zero

## Warning in cor(x, y): the standard deviation is zero

## Warning in par(usr): argument 1 does not name a graphical parameter

## Warning in cor(x, y, use = use, method = method): the standard deviation is
## zero

## Warning in cor(x, y): the standard deviation is zero

## Warning in par(usr): argument 1 does not name a graphical parameter

## Warning in par(usr): argument 1 does not name a graphical parameter

## Warning in par(usr): argument 1 does not name a graphical parameter

## Warning in par(usr): argument 1 does not name a graphical parameter

## Warning in cor(x, y, use = use, method = method): the standard deviation is
## zero

## Warning in cor(x, y): the standard deviation is zero

## Warning in par(usr): argument 1 does not name a graphical parameter

## Warning in par(usr): argument 1 does not name a graphical parameter

## Warning in par(usr): argument 1 does not name a graphical parameter

## Warning in par(usr): argument 1 does not name a graphical parameter

## Warning in cor(x, y, use = use, method = method): the standard deviation is
## zero

## Warning in cor(x, y): the standard deviation is zero

## Warning in par(usr): argument 1 does not name a graphical parameter

## Warning in par(usr): argument 1 does not name a graphical parameter

## Warning in par(usr): argument 1 does not name a graphical parameter

## Warning in par(usr): argument 1 does not name a graphical parameter

## Warning in par(usr): argument 1 does not name a graphical parameter

## Warning in cor(x, y, use = use, method = method): the standard deviation is
## zero

## Warning in cor(x, y): the standard deviation is zero

## Warning in par(usr): argument 1 does not name a graphical parameter

## Warning in par(usr): argument 1 does not name a graphical parameter

## Warning in par(usr): argument 1 does not name a graphical parameter

## Warning in par(usr): argument 1 does not name a graphical parameter

## Warning in par(usr): argument 1 does not name a graphical parameter

## Warning in par(usr): argument 1 does not name a graphical parameter

## Warning in cor(x, y, use = use, method = method): the standard deviation is
## zero

## Warning in cor(x, y): the standard deviation is zero

## Warning in par(usr): argument 1 does not name a graphical parameter

## Warning in par(usr): argument 1 does not name a graphical parameter

## Warning in par(usr): argument 1 does not name a graphical parameter

## Warning in par(usr): argument 1 does not name a graphical parameter

#Outlier detection

par(mfrow = c(3,2))

boxplot(food$price, horizontal = T, main = "Price")

Item, year and price show good correlation.

food %>% 
  ggplot(aes(admname, price, color = admname)) +
  geom_boxplot(alpha = 0.5) +
  facet_wrap(~admname, scales = "free_y") +
  theme_minimal() +
  xlab(NULL)

the price

food %>% 
  ggplot(aes(price, fill = admname)) +
  geom_density(alpha = 0.5) +
  facet_wrap(~ admname, scales = "free", ncol =3) +
  labs(x = NULL, y = NULL) +
  theme_minimal()

pun_catg <- food %>% select(category, price, admname) %>% 
  group_by(category, admname)  %>% 
  summarise(avg = mean(price)) %>% arrange(desc(avg))

## `summarise()` has grouped output by 'category'. You can override using the
## `.groups` argument.

#aes( x = fct_rev(fct_reorder(division, mean_production)), y = mean_production)) + 
 
pun_catg %>% 
  #top_n(3, avg) %>% 
ggplot(mapping = aes(x = fct_reorder(category, avg), y = avg)) + geom_col() + coord_flip() +
  facet_wrap(~admname) +
  theme_minimal()+
  theme(plot.title = element_text(hjust = 0.5), plot.background = element_rect(fill = "#F9E79F")) +
  guides(fill = F) +
  labs(title = "Distribution of Prices of categories by Provinces", 
       caption = "AVerage food prices by provinces ",
       y = "Average Price", x = "Food Categories") +
  scale_fill_manual(values = c("#2E64FE", "#40FF00", "#FE642E", "#FE2E2E"))

## Warning: The `<scale>` argument of `guides()` cannot be `FALSE`. Use "none" instead as
## of ggplot2 3.3.4.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

colnames(food)

##  [1] "X_id"         "unit"         "category"     "price"        "currency"    
##  [6] "country"      "admname"      "adm1id"       "mktname"      "mktid"       
## [11] "cmid"         "ptid"         "umid"         "catid"        "sn"          
## [16] "year"         "month"        "day"          "weekend"      "monthabb"    
## [21] "season"       "categoryname"

head(food)

##   X_id unit           category  price currency  country     admname adm1id
## 2    2   KG cereals and tubers 13.000      PKR Pakistan Balochistan   2272
## 3    3   KG cereals and tubers 13.000      PKR Pakistan Balochistan   2272
## 4    4   KG cereals and tubers 14.250      PKR Pakistan Balochistan   2272
## 5    5   KG cereals and tubers 12.500      PKR Pakistan Balochistan   2272
## 6    6   KG cereals and tubers 13.250      PKR Pakistan Balochistan   2272
## 7    7   KG cereals and tubers 13.405      PKR Pakistan Balochistan   2272
##   mktname mktid cmid ptid umid catid          sn year month day weekend
## 2  Quetta   295   58   15    5     1 295_58_15_5 2004     1   5 weekday
## 3  Quetta   295   58   15    5     1 295_58_15_5 2004     2   1 weekend
## 4  Quetta   295   58   15    5     1 295_58_15_5 2004     3   2 weekday
## 5  Quetta   295   58   15    5     1 295_58_15_5 2004     4   5 weekday
## 6  Quetta   295   58   15    5     1 295_58_15_5 2004     5   7 weekend
## 7  Quetta   295   58   15    5     1 295_58_15_5 2004     6   3 weekday
##   monthabb season categoryname
## 2      Jan Winter Wheat flour 
## 3      Feb Winter Wheat flour 
## 4      Mar Spring Wheat flour 
## 5      Apr Spring Wheat flour 
## 6      May Spring Wheat flour 
## 7      Jun Summer Wheat flour

table(food$cmname)

## < table of extent 0 >

pun_cmname <- food %>% select(category, price, admname, categoryname) %>% 
  group_by(categoryname, admname)  %>% 
  summarise(avg = mean(price))

## `summarise()` has grouped output by 'categoryname'. You can override using the
## `.groups` argument.

ggplot(pun_cmname,mapping = aes(x = avg, fill = admname)) +
  geom_density(alpha = 0.5) +
  facet_wrap(~ admname, scales = "free", ncol = 2) +
  theme_minimal()

#range(food$price)
item_food <- food %>% select(price, categoryname, admname) %>% 
  group_by(categoryname) %>% summarise(avg = mean(price))


ggplot(item_food, aes(x=fct_reorder(categoryname, avg), y = avg), color = admname) +
  geom_col() + 
  labs(x = "Categories", y = "Average price") +
  coord_flip() +
  theme_minimal() +
  labs(title = "Distribution of Prices by categories", 
       caption = "AVerage food prices by Categories ",
       y = "Average Price", x = "Food Categories") +
  scale_fill_manual(values = c("#2E64FE", "#40FF00", "#FE642E", "#FE2E2E"))

colnames(food)

##  [1] "X_id"         "unit"         "category"     "price"        "currency"    
##  [6] "country"      "admname"      "adm1id"       "mktname"      "mktid"       
## [11] "cmid"         "ptid"         "umid"         "catid"        "sn"          
## [16] "year"         "month"        "day"          "weekend"      "monthabb"    
## [21] "season"       "categoryname"

cat_food <- food %>% select(price, category, admname) %>% 
  group_by(category, admname) %>% summarise(avg = mean(price))

## `summarise()` has grouped output by 'category'. You can override using the
## `.groups` argument.

ggplot(cat_food, aes(x=fct_reorder(category, avg), y = avg)) + facet_wrap(~admname) +
  geom_col() + 
  labs(x = "Categories", y = "Average price") +
  coord_flip() +
  theme_minimal() +
  labs(title = "Distribution of Prices by categories", 
       caption = "AVerage food prices by Categories ",
       y = "Average Price", x = "Food Categories") +
  scale_fill_manual(values = c("#2E64FE", "#40FF00", "#FE642E", "#FE2E2E"))

colnames(food)

##  [1] "X_id"         "unit"         "category"     "price"        "currency"    
##  [6] "country"      "admname"      "adm1id"       "mktname"      "mktid"       
## [11] "cmid"         "ptid"         "umid"         "catid"        "sn"          
## [16] "year"         "month"        "day"          "weekend"      "monthabb"    
## [21] "season"       "categoryname"

head(food)

##   X_id unit           category  price currency  country     admname adm1id
## 2    2   KG cereals and tubers 13.000      PKR Pakistan Balochistan   2272
## 3    3   KG cereals and tubers 13.000      PKR Pakistan Balochistan   2272
## 4    4   KG cereals and tubers 14.250      PKR Pakistan Balochistan   2272
## 5    5   KG cereals and tubers 12.500      PKR Pakistan Balochistan   2272
## 6    6   KG cereals and tubers 13.250      PKR Pakistan Balochistan   2272
## 7    7   KG cereals and tubers 13.405      PKR Pakistan Balochistan   2272
##   mktname mktid cmid ptid umid catid          sn year month day weekend
## 2  Quetta   295   58   15    5     1 295_58_15_5 2004     1   5 weekday
## 3  Quetta   295   58   15    5     1 295_58_15_5 2004     2   1 weekend
## 4  Quetta   295   58   15    5     1 295_58_15_5 2004     3   2 weekday
## 5  Quetta   295   58   15    5     1 295_58_15_5 2004     4   5 weekday
## 6  Quetta   295   58   15    5     1 295_58_15_5 2004     5   7 weekend
## 7  Quetta   295   58   15    5     1 295_58_15_5 2004     6   3 weekday
##   monthabb season categoryname
## 2      Jan Winter Wheat flour 
## 3      Feb Winter Wheat flour 
## 4      Mar Spring Wheat flour 
## 5      Apr Spring Wheat flour 
## 6      May Spring Wheat flour 
## 7      Jun Summer Wheat flour

table(food$mktname)

## 
##  Karachi   Lahore   Multan Peshawar   Quetta 
##     1554     1550     1550     1555     1453

season_food <- food %>% select(category, price, weekend, season, year, admname) %>% 
  group_by(category, season, admname) %>% 
  summarise(max = max(price), min = min(price), avg = mean(price))

## `summarise()` has grouped output by 'category', 'season'. You can override
## using the `.groups` argument.

max_p <- ggplot(season_food, aes(x = fct_reorder(category, max), y = max)) +
  geom_col() +
  theme_minimal() +
  facet_grid(~season)+
  coord_flip()+
  labs(title = "Max Priced categories by Seasons", 
       caption = "Max food prices by Categories by Seasons ",
       y = "Price", x = "Food Categories")+
  theme(plot.title = element_text(hjust = 0.5), plot.background = element_rect(fill = "#adceff"), axis.text.x = element_text(angle = 90)) 

min_p <- ggplot(season_food, aes(x = fct_reorder(category, min), y = min)) + geom_col() +
  theme_minimal() +
  facet_wrap(~admname)+
  coord_flip()+
  labs(title = "Min Priced categories by Seasons", 
       caption = "Min food prices by Categories by Seasons ",
       y = "Price", x = "Food Categories") +
  theme(plot.title = element_text(hjust = 0.5), plot.background = element_rect(fill = "#Fed6F7"), axis.text.x = element_text(angle = 90)) 

avg_p <- ggplot(season_food, aes(x = fct_reorder(category, avg), y = avg)) + geom_col() +
  theme_minimal() +
 facet_wrap(~admname)+
  coord_flip()+
  labs(title = "Average Priced categories by Seasons", 
       caption = "Average food prices by seasons ",
       y = "Price", x = "Food Categories") +
  theme(plot.title = element_text(hjust = 0.5), plot.background = element_rect(fill = "#F4F6F7"), axis.text.x = element_text(angle = 90)) 

grid.arrange(min_p, max_p, avg_p, nrow = 1)

b <- food %>% select(admname, category, categoryname, price, season, weekend) %>% 
  group_by(category, season) %>% filter(admname == "Balochistan") %>% 
  summarise(max = max(price), min = min(price), avg = mean(price)) %>% 
  ggplot(aes(x = fct_reorder(category, max), y = max)) +
  geom_col() +
  theme_minimal() +
  facet_grid(~season)+
  coord_flip()+
  labs(title = "Max Priced in Balochistan", 
       caption = "Categories by Seasons ",
       y = "Price", x = "Food Categories")+
  theme(plot.title = element_text(hjust = 0.5), plot.background = element_rect(fill = "#adceff"), axis.text.x = element_text(angle = 90))

## `summarise()` has grouped output by 'category'. You can override using the
## `.groups` argument.

s <- food %>% select(admname, category, categoryname, price, season, weekend) %>% 
  group_by(category, season) %>% filter(admname == "Sindh") %>% 
  summarise(max = max(price), min = min(price), avg = mean(price)) %>% 
  ggplot(aes(x = fct_reorder(category, max), y = max)) +
  geom_col() +
  theme_minimal() +
  facet_grid(~season)+
  coord_flip()+
  labs(title = "Max Priced in Sindh", 
       caption = "Categories by Seasons ",
       y = "Price", x = "Food Categories")+
  theme(plot.title = element_text(hjust = 0.5), plot.background = element_rect(fill = "#Fed6F7"), axis.text.x = element_text(angle = 90))

## `summarise()` has grouped output by 'category'. You can override using the
## `.groups` argument.

kpk <- food %>% select(admname, category, categoryname, price, season, weekend) %>% 
  group_by(category, season) %>% filter(admname == "Khyber Pakhtunkhwa") %>% 
  summarise(max = max(price), min = min(price), avg = mean(price)) %>% 
  ggplot(aes(x = fct_reorder(category, max), y = max)) +
  geom_col() +
  theme_minimal() +
  facet_grid(~season)+
  coord_flip()+
  labs(title = "Max Priced in KPK", 
       caption = "Categories by Seasons ",
       y = "Price", x = "Food Categories")+
  theme(plot.title = element_text(hjust = 0.5), plot.background = element_rect(fill = "#aedce3"), axis.text.x = element_text(angle = 90))

## `summarise()` has grouped output by 'category'. You can override using the
## `.groups` argument.

p <- food %>% select(admname, category, categoryname, price, season, weekend) %>% 
  group_by(category, season) %>% filter(admname == "Punjab") %>% 
  summarise(max = max(price), min = min(price), avg = mean(price)) %>% 
  ggplot(aes(x = fct_reorder(category, max), y = max)) +
  geom_col() +
  theme_minimal() +
  facet_grid(~season)+
  coord_flip()+
  labs(title = "Max Priced in Punjab", 
       caption = "Categories by Seasons ",
       y = "Price", x = "Food Categories")+
  theme(plot.title = element_text(hjust = 0.5), plot.background = element_rect(fill = "#affae3"), axis.text.x = element_text(angle = 90))

## `summarise()` has grouped output by 'category'. You can override using the
## `.groups` argument.

grid.arrange(b, p, kpk, s, nrow = 2, ncol = 2)

b <- food %>% select(admname, category, categoryname, price, season, weekend) %>% 
  group_by(categoryname, season) %>% filter(admname == "Balochistan") %>% 
  summarise(max = max(price), min = min(price), avg = mean(price)) %>% 
  ggplot(aes(x = fct_reorder(categoryname, max), y = max)) +
  geom_col() +
  theme_minimal() +
  facet_grid(~season)+
  coord_flip()+
  labs(title = "Max Priced in Balochistan", 
       caption = "Categories by Seasons ",
       y = "Price", x = "Food Categories")+
  theme(plot.title = element_text(hjust = 0.5), plot.background = element_rect(fill = "#adceff"), axis.text.x = element_text(angle = 90))

## `summarise()` has grouped output by 'categoryname'. You can override using the
## `.groups` argument.

s <- food %>% select(admname, category, categoryname, price, season, weekend) %>% 
  group_by(categoryname, season) %>% filter(admname == "Sindh") %>% 
  summarise(max = max(price), min = min(price), avg = mean(price)) %>% 
  ggplot(aes(x = fct_reorder(categoryname, max), y = max)) +
  geom_col() +
  theme_minimal() +
  facet_grid(~season)+
  coord_flip()+
  labs(title = "Max Priced in Sindh", 
       caption = "Categories by Seasons ",
       y = "Price", x = "Food Categories")+
  theme(plot.title = element_text(hjust = 0.5), plot.background = element_rect(fill = "#Fed6F7"), axis.text.x = element_text(angle = 90))

## `summarise()` has grouped output by 'categoryname'. You can override using the
## `.groups` argument.

kpk <- food %>% select(admname, category, categoryname, price, season, weekend) %>% 
  group_by(categoryname, season) %>% filter(admname == "Khyber Pakhtunkhwa") %>% 
  summarise(max = max(price), min = min(price), avg = mean(price)) %>% 
  ggplot(aes(x = fct_reorder(categoryname, max), y = max)) +
  geom_col() +
  theme_minimal() +
  facet_grid(~season)+
  coord_flip()+
  labs(title = "Max Priced in KPK", 
       caption = "Categories by Seasons ",
       y = "Price", x = "Food Categories")+
  theme(plot.title = element_text(hjust = 0.5), plot.background = element_rect(fill = "#aedce3"), axis.text.x = element_text(angle = 90))

## `summarise()` has grouped output by 'categoryname'. You can override using the
## `.groups` argument.

p <- food %>% select(admname, category, categoryname, price, season, weekend) %>% 
  group_by(categoryname, season) %>% filter(admname == "Punjab") %>% 
  summarise(max = max(price), min = min(price), avg = mean(price)) %>% 
  ggplot(aes(x = fct_reorder(categoryname, max), y = max)) +
  geom_col() +
  theme_minimal() +
  facet_grid(~season)+
  coord_flip()+
  labs(title = "Max Priced in Punjab", 
       caption = "Categories by Seasons ",
       y = "Price", x = "Food Categories")+
  theme(plot.title = element_text(hjust = 0.5), plot.background = element_rect(fill = "#affae3"), axis.text.x = element_text(angle = 90))

## `summarise()` has grouped output by 'categoryname'. You can override using the
## `.groups` argument.

grid.arrange(b, p, kpk, s, nrow = 2, ncol = 2)

colnames(food)

##  [1] "X_id"         "unit"         "category"     "price"        "currency"    
##  [6] "country"      "admname"      "adm1id"       "mktname"      "mktid"       
## [11] "cmid"         "ptid"         "umid"         "catid"        "sn"          
## [16] "year"         "month"        "day"          "weekend"      "monthabb"    
## [21] "season"       "categoryname"

head(food)

##   X_id unit           category  price currency  country     admname adm1id
## 2    2   KG cereals and tubers 13.000      PKR Pakistan Balochistan   2272
## 3    3   KG cereals and tubers 13.000      PKR Pakistan Balochistan   2272
## 4    4   KG cereals and tubers 14.250      PKR Pakistan Balochistan   2272
## 5    5   KG cereals and tubers 12.500      PKR Pakistan Balochistan   2272
## 6    6   KG cereals and tubers 13.250      PKR Pakistan Balochistan   2272
## 7    7   KG cereals and tubers 13.405      PKR Pakistan Balochistan   2272
##   mktname mktid cmid ptid umid catid          sn year month day weekend
## 2  Quetta   295   58   15    5     1 295_58_15_5 2004     1   5 weekday
## 3  Quetta   295   58   15    5     1 295_58_15_5 2004     2   1 weekend
## 4  Quetta   295   58   15    5     1 295_58_15_5 2004     3   2 weekday
## 5  Quetta   295   58   15    5     1 295_58_15_5 2004     4   5 weekday
## 6  Quetta   295   58   15    5     1 295_58_15_5 2004     5   7 weekend
## 7  Quetta   295   58   15    5     1 295_58_15_5 2004     6   3 weekday
##   monthabb season categoryname
## 2      Jan Winter Wheat flour 
## 3      Feb Winter Wheat flour 
## 4      Mar Spring Wheat flour 
## 5      Apr Spring Wheat flour 
## 6      May Spring Wheat flour 
## 7      Jun Summer Wheat flour

mkt_food <- food %>% select(monthabb, weekend, season, category, categoryname, mktname, price) %>% 
  group_by(season, mktname,monthabb) %>% 
  summarise(avg = mean(price), 
            min = min(price),
            max = max(price),
            sd = sd(price))

## `summarise()` has grouped output by 'season', 'mktname'. You can override using
## the `.groups` argument.

ggplot(mkt_food, aes(x = fct_reorder(season, max), y = max)) +
   geom_col() +
  facet_wrap(~mktname) +
  theme_minimal() +
  coord_flip() +
  labs(title = "Max Priced in Punjab", 
       caption = "Categories by Seasons ",
       y = "Price", x = "Food Categories")+
  theme(plot.title = element_text(hjust = 0.5), plot.background = element_rect(fill = "yellow"), axis.text.x = element_text(angle = 90))

ggplot(mkt_food, aes(x = fct_reorder(season, min), y = min)) +
   geom_col() +
  facet_wrap(~mktname) +
  theme_minimal() +
  coord_flip() +
  labs(title = "Max Priced in Punjab", 
       caption = "Categories by Seasons ",
       y = "Price", x = "Food Categories")+
  theme(plot.title = element_text(hjust = 0.5), plot.background = element_rect(fill = "green"), axis.text.x = element_text(angle = 90))

#Karachi

kf <- food %>% select(monthabb, weekend, season, category, categoryname, mktname, price) %>% group_by(season, categoryname) %>% 
  filter(mktname == "Karachi") %>% 
  summarise(avg = mean(price), 
            min = min(price),
            max = max(price),
            sd = sd(price))

## `summarise()` has grouped output by 'season'. You can override using the
## `.groups` argument.

kmax <- ggplot(kf, aes(x = fct_reorder(categoryname, max), y = max)) +
  geom_col() +
  theme_minimal() +
  coord_flip() +
  facet_wrap(~season)+
  labs(title = "Max Priced in Karachi", 
       caption = "Saless by Seasons ",
       y = "Price", x = "Food Categories")+
  theme(plot.title = element_text(hjust = 0.5), plot.background = element_rect(fill = "green"), axis.text.x = element_text(angle = 90))

kmax

kmin <- ggplot(kf, aes(x = fct_reorder(categoryname, min), y = min)) +
  geom_col() +
  theme_minimal() +
  coord_flip() +
  facet_wrap(~season)+
  labs(title = "Min Priced in Karachi", 
       caption = "Saless by Seasons ",
       y = "Price", x = "Food Categories")+
  theme(plot.title = element_text(hjust = 0.5), plot.background = element_rect(fill = "green"), axis.text.x = element_text(angle = 90))

kmin

table(food$mktname)

## 
##  Karachi   Lahore   Multan Peshawar   Quetta 
##     1554     1550     1550     1555     1453

lf <- food %>% select(monthabb, weekend, season, category, categoryname, mktname, price) %>% group_by(season, categoryname) %>% 
  filter(mktname == "Lahore") %>% 
  summarise(avg = mean(price), 
            min = min(price),
            max = max(price),
            sd = sd(price))

## `summarise()` has grouped output by 'season'. You can override using the
## `.groups` argument.

lmax <- ggplot(kf, aes(x = fct_reorder(categoryname, max), y = max)) +
  geom_col() +
  theme_minimal() +
  coord_flip() +
  facet_wrap(~season)+
  labs(title = "Max Priced in Lahore", 
       caption = "Saless by Seasons ",
       y = "Price", x = "Food Categories")+
  theme(plot.title = element_text(hjust = 0.5), plot.background = element_rect(fill = "green"), axis.text.x = element_text(angle = 90))

lmax

lmin <- ggplot(kf, aes(x = fct_reorder(categoryname, min), y = min)) +
  geom_col() +
  theme_minimal() +
  coord_flip() +
  facet_wrap(~season)+
  labs(title = "Min Priced in Lahore", 
       caption = "Saless by Seasons ",
       y = "Price", x = "Food Categories")+
  theme(plot.title = element_text(hjust = 0.5), plot.background = element_rect(fill = "green"), axis.text.x = element_text(angle = 90))

lmin

#Multan

mf <- food %>% select(monthabb, weekend, season, category, categoryname, mktname, price) %>% group_by(season, categoryname) %>% 
  filter(mktname == "Multan") %>% 
  summarise(avg = mean(price), 
            min = min(price),
            max = max(price),
            sd = sd(price))

## `summarise()` has grouped output by 'season'. You can override using the
## `.groups` argument.

mmax <- ggplot(kf, aes(x = fct_reorder(categoryname, max), y = max)) +
  geom_col() +
  theme_minimal() +
  coord_flip() +
  facet_wrap(~season)+
  labs(title = "Max Priced in Multan", 
       caption = "Saless by Seasons ",
       y = "Price", x = "Food Categories")+
  theme(plot.title = element_text(hjust = 0.5), plot.background = element_rect(fill = "green"), axis.text.x = element_text(angle = 90))

mmax

mmin <- ggplot(kf, aes(x = fct_reorder(categoryname, min), y = min)) +
  geom_col() +
  theme_minimal() +
  coord_flip() +
  facet_wrap(~season)+
  labs(title = "Min Priced in Multan", 
       caption = "Saless by Seasons ",
       y = "Price", x = "Food Categories")+
  theme(plot.title = element_text(hjust = 0.5), plot.background = element_rect(fill = "green"), axis.text.x = element_text(angle = 90))

mmin

#Peshawar

pf <- food %>% select(monthabb, weekend, season, category, categoryname, mktname, price) %>% group_by(season, categoryname) %>% 
  filter(mktname == "Peshawar") %>% 
  summarise(avg = mean(price), 
            min = min(price),
            max = max(price),
            sd = sd(price))

## `summarise()` has grouped output by 'season'. You can override using the
## `.groups` argument.

pmax <- ggplot(kf, aes(x = fct_reorder(categoryname, max), y = max)) +
  geom_col() +
  theme_minimal() +
  coord_flip() +
  facet_wrap(~season)+
  labs(title = "Max Priced in Peshawar", 
       caption = "Saless by Seasons ",
       y = "Price", x = "Food Categories")+
  theme(plot.title = element_text(hjust = 0.5), plot.background = element_rect(fill = "green"), axis.text.x = element_text(angle = 90))

pmax

pmin <- ggplot(kf, aes(x = fct_reorder(categoryname, min), y = min)) +
  geom_col() +
  theme_minimal() +
  coord_flip() +
  facet_wrap(~season)+
  labs(title = "Min Priced in Peshawar", 
       caption = "Saless by Seasons ",
       y = "Price", x = "Food Categories")+
  theme(plot.title = element_text(hjust = 0.5), plot.background = element_rect(fill = "green"), axis.text.x = element_text(angle = 90))

pmin

#Quetta

qf <- food %>% select(monthabb, weekend, season, category, categoryname, mktname, price) %>% group_by(season, categoryname) %>% 
  filter(mktname == "Quetta") %>% 
  summarise(avg = mean(price), 
            min = min(price),
            max = max(price),
            sd = sd(price))

## `summarise()` has grouped output by 'season'. You can override using the
## `.groups` argument.

qmax <- qf %>% top_n(3, max) %>% 
 ggplot(aes(x = fct_reorder(categoryname, max), y = max)) +
  geom_col() +
  theme_minimal() +
  coord_flip() +
  facet_wrap(~season)+
  labs(title = "Max Priced in Quetta", 
       caption = "Saless by Seasons ",
       y = "Price", x = "Food Categories")+
  theme(plot.title = element_text(hjust = 0.5), plot.background = element_rect(fill = "#def231"), axis.text.x = element_text(angle = 90))

qmax

qmin <- qf %>% top_n(3, min) %>% 
  ggplot(aes(x = fct_reorder(categoryname, min), y = min)) +
  geom_col() +
  theme_minimal() +
  coord_flip() +
  facet_wrap(~season)+
  labs(title = "Min Priced in Quetta", 
       caption = "Saless by Seasons ",
       y = "Price", x = "Food Categories")+
  theme(plot.title = element_text(hjust = 0.5), plot.background = element_rect(fill = "#abc123"), axis.text.x = element_text(angle = 90))

qmin

Food Price Prediction

2024-05-15