library(tidyverse)
## Warning: package 'tidyverse' was built under R version 4.2.3
## Warning: package 'ggplot2' was built under R version 4.2.3
## Warning: package 'tibble' was built under R version 4.2.3
## Warning: package 'purrr' was built under R version 4.2.3
## Warning: package 'dplyr' was built under R version 4.2.3
## Warning: package 'lubridate' was built under R version 4.2.3
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.3 ✔ readr 2.1.4
## ✔ forcats 1.0.0 ✔ stringr 1.5.0
## ✔ ggplot2 3.5.1 ✔ tibble 3.2.1
## ✔ lubridate 1.9.3 ✔ tidyr 1.3.0
## ✔ purrr 1.0.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(tidytext)
## Warning: package 'tidytext' was built under R version 4.2.3
library(dplyr)
library(mice)
## Warning: package 'mice' was built under R version 4.2.3
##
## Attaching package: 'mice'
##
## The following object is masked from 'package:stats':
##
## filter
##
## The following objects are masked from 'package:base':
##
## cbind, rbind
library(janitor)
##
## Attaching package: 'janitor'
##
## The following objects are masked from 'package:stats':
##
## chisq.test, fisher.test
library(tidyverse)
library(magrittr)
##
## Attaching package: 'magrittr'
##
## The following object is masked from 'package:purrr':
##
## set_names
##
## The following object is masked from 'package:tidyr':
##
## extract
library(DataExplorer)
## Warning: package 'DataExplorer' was built under R version 4.2.3
library(maps)
## Warning: package 'maps' was built under R version 4.2.3
##
## Attaching package: 'maps'
##
## The following object is masked from 'package:purrr':
##
## map
library(plotly)
## Warning: package 'plotly' was built under R version 4.2.3
##
## Attaching package: 'plotly'
##
## The following object is masked from 'package:ggplot2':
##
## last_plot
##
## The following object is masked from 'package:stats':
##
## filter
##
## The following object is masked from 'package:graphics':
##
## layout
library(DT)
## Warning: package 'DT' was built under R version 4.2.3
library(tidytext)
library(gridExtra)
## Warning: package 'gridExtra' was built under R version 4.2.3
##
## Attaching package: 'gridExtra'
##
## The following object is masked from 'package:dplyr':
##
## combine
library(factoextra)
## Warning: package 'factoextra' was built under R version 4.2.3
## Welcome! Want to learn more? See two factoextra-related books at https://goo.gl/ve3WBa
library(mice)
library(janitor)
library(forcats)
library(readr)
library(gghighlight)
## Warning: package 'gghighlight' was built under R version 4.2.3
food <- read.csv("pk_food_prices.csv")
dim(food)
## [1] 7663 18
introduce(food)
## rows columns discrete_columns continuous_columns all_missing_columns
## 1 7663 18 13 4 1
## total_missing_values complete_rows total_observations memory_usage
## 1 7666 0 137934 1144600
plot_intro(food)
plot_missing(food)
plot_bar(food)
## 3 columns ignored with more than 50 categories.
## date: 191 categories
## price: 2989 categories
## sn: 86 categories
head(food)
## X_id date cmname unit category price
## 1 1 #date #item+name #item+unit #item+type #value
## 2 2 2004-01-15 Wheat flour - Retail KG cereals and tubers 13.0
## 3 3 2004-02-15 Wheat flour - Retail KG cereals and tubers 13.0
## 4 4 2004-03-15 Wheat flour - Retail KG cereals and tubers 14.25
## 5 5 2004-04-15 Wheat flour - Retail KG cereals and tubers 12.5
## 6 6 2004-05-15 Wheat flour - Retail KG cereals and tubers 13.25
## currency country admname adm1id mktname mktid cmid
## 1 #currency #country+name #adm1+name #adm1+code #name+market NA #item+code
## 2 PKR Pakistan Balochistan 2272 Quetta 295 58
## 3 PKR Pakistan Balochistan 2272 Quetta 295 58
## 4 PKR Pakistan Balochistan 2272 Quetta 295 58
## 5 PKR Pakistan Balochistan 2272 Quetta 295 58
## 6 PKR Pakistan Balochistan 2272 Quetta 295 58
## ptid umid catid sn default
## 1 NA NA #item+type+code #meta+id NA
## 2 15 5 1 295_58_15_5 NA
## 3 15 5 1 295_58_15_5 NA
## 4 15 5 1 295_58_15_5 NA
## 5 15 5 1 295_58_15_5 NA
## 6 15 5 1 295_58_15_5 NA
#remove the defualt vairable
food$default <- NULL
#remove 1st row
food <- food[-1, ]
#change the class of price
food$price <- as.numeric(food$price)
md.pattern(food, rotate.names = T)
## /\ /\
## { `---' }
## { O O }
## ==> V <== No need for mice. This data set is completely observed.
## \ \|/ /
## `-----'
## X_id date cmname unit category price currency country admname adm1id
## 7662 1 1 1 1 1 1 1 1 1 1
## 0 0 0 0 0 0 0 0 0 0
## mktname mktid cmid ptid umid catid sn
## 7662 1 1 1 1 1 1 1 0
## 0 0 0 0 0 0 0 0
summary(food)
## X_id date cmname unit
## Min. : 2 Length:7662 Length:7662 Length:7662
## 1st Qu.:1917 Class :character Class :character Class :character
## Median :3832 Mode :character Mode :character Mode :character
## Mean :3832
## 3rd Qu.:5748
## Max. :7663
## category price currency country
## Length:7662 Min. : 9.00 Length:7662 Length:7662
## Class :character 1st Qu.: 36.92 Class :character Class :character
## Mode :character Median : 72.73 Mode :character Mode :character
## Mean :106.92
## 3rd Qu.:138.40
## Max. :997.00
## admname adm1id mktname mktid
## Length:7662 Length:7662 Length:7662 Min. :291
## Class :character Class :character Class :character 1st Qu.:292
## Mode :character Mode :character Mode :character Median :293
## Mean :293
## 3rd Qu.:294
## Max. :295
## cmid ptid umid catid
## Length:7662 Min. :15 Min. : 5.000 Length:7662
## Class :character 1st Qu.:15 1st Qu.: 5.000 Class :character
## Mode :character Median :15 Median : 5.000 Mode :character
## Mean :15 Mean : 9.745
## 3rd Qu.:15 3rd Qu.: 5.000
## Max. :15 Max. :51.000
## sn
## Length:7662
## Class :character
## Mode :character
##
##
##
food$date <- as.Date(food$date)
food$year <- year(food$date)
food$month <- month(food$date)
food$day <- wday(food$date)
head(food$day)
## [1] 5 1 2 5 7 3
#weekday and weekend
food <- food %>%
mutate(weekend = ifelse(wday(day, week_start = 1) >= 6,
"weekend", "weekday"))
food$monthabb <- sapply(food$month, function(x) month.abb[as.numeric(x)])
food$monthabb = factor(food$monthabb, levels = month.abb)
#season
food <- food %>%
mutate(season = ifelse(month >= 3 & month <= 5, "Spring",
ifelse(month >= 6 & month <= 8, "Summer",
ifelse(month >= 9 & month <= 11, "Autumn", "Winter"))))
head(food)
## X_id date cmname unit category price currency
## 2 2 2004-01-15 Wheat flour - Retail KG cereals and tubers 13.000 PKR
## 3 3 2004-02-15 Wheat flour - Retail KG cereals and tubers 13.000 PKR
## 4 4 2004-03-15 Wheat flour - Retail KG cereals and tubers 14.250 PKR
## 5 5 2004-04-15 Wheat flour - Retail KG cereals and tubers 12.500 PKR
## 6 6 2004-05-15 Wheat flour - Retail KG cereals and tubers 13.250 PKR
## 7 7 2004-06-15 Wheat flour - Retail KG cereals and tubers 13.405 PKR
## country admname adm1id mktname mktid cmid ptid umid catid sn
## 2 Pakistan Balochistan 2272 Quetta 295 58 15 5 1 295_58_15_5
## 3 Pakistan Balochistan 2272 Quetta 295 58 15 5 1 295_58_15_5
## 4 Pakistan Balochistan 2272 Quetta 295 58 15 5 1 295_58_15_5
## 5 Pakistan Balochistan 2272 Quetta 295 58 15 5 1 295_58_15_5
## 6 Pakistan Balochistan 2272 Quetta 295 58 15 5 1 295_58_15_5
## 7 Pakistan Balochistan 2272 Quetta 295 58 15 5 1 295_58_15_5
## year month day weekend monthabb season
## 2 2004 1 5 weekday Jan Winter
## 3 2004 2 1 weekend Feb Winter
## 4 2004 3 2 weekday Mar Spring
## 5 2004 4 5 weekday Apr Spring
## 6 2004 5 7 weekend May Spring
## 7 2004 6 3 weekday Jun Summer
colnames(food)
## [1] "X_id" "date" "cmname" "unit" "category" "price"
## [7] "currency" "country" "admname" "adm1id" "mktname" "mktid"
## [13] "cmid" "ptid" "umid" "catid" "sn" "year"
## [19] "month" "day" "weekend" "monthabb" "season"
#missing values
sapply(food, FUN = function(col) sum(is.na(col)))
## X_id date cmname unit category price currency country
## 0 0 0 0 0 0 0 0
## admname adm1id mktname mktid cmid ptid umid catid
## 0 0 0 0 0 0 0 0
## sn year month day weekend monthabb season
## 0 0 0 0 0 0 0
unique(food$cmname)
## [1] "Wheat flour - Retail"
## [2] "Rice (coarse) - Retail"
## [3] "Lentils (masur) - Retail"
## [4] "Milk - Retail"
## [5] "Oil (cooking) - Retail"
## [6] "Wheat - Retail"
## [7] "Eggs - Retail"
## [8] "Sugar - Retail"
## [9] "Ghee (artificial) - Retail"
## [10] "Rice (basmati, broken) - Retail"
## [11] "Poultry - Retail"
## [12] "Salt - Retail"
## [13] "Fuel (diesel) - Retail"
## [14] "Fuel (petrol-gasoline) - Retail"
## [15] "Lentils (moong) - Retail"
## [16] "Beans(mash) - Retail"
## [17] "Wage (non-qualified labour, non-agricultural) - Retail"
food[c("categoryname","retail")] <- str_split_fixed(food$cmname, "-", 2)
food$date <- NULL
food$myear <- NULL
food$retail <- NULL
food$cmname <- NULL
food$default <- NULL
cmid is itemcode, catid is item type code sn is meta id
#load ggally for correlation plot
library(GGally)
## Warning: package 'GGally' was built under R version 4.2.3
## Registered S3 method overwritten by 'GGally':
## method from
## +.gg ggplot2
#only numeric columns for correlation analysis
t <- food %>% select_if(is.numeric) %>% ggcorr()
## Warning in cor(data, use = method[1], method = method[2]): the standard
## deviation is zero
library(corrplot)
## Warning: package 'corrplot' was built under R version 4.2.3
## corrplot 0.92 loaded
library(RColorBrewer)
M <-cor(food %>%
select(X_id, price, mktid, ptid, umid, month, day, year))
## Warning in cor(food %>% select(X_id, price, mktid, ptid, umid, month, day, :
## the standard deviation is zero
corrplot(M, type="upper", order = "original",col=brewer.pal(n=8, name="RdYlBu"))
library(PerformanceAnalytics)
## Warning: package 'PerformanceAnalytics' was built under R version 4.2.3
## Loading required package: xts
## Warning: package 'xts' was built under R version 4.2.3
## Loading required package: zoo
## Warning: package 'zoo' was built under R version 4.2.3
##
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
##
## ######################### Warning from 'xts' package ##########################
## # #
## # The dplyr lag() function breaks how base R's lag() function is supposed to #
## # work, which breaks lag(my_xts). Calls to lag(my_xts) that you type or #
## # source() into this session won't work correctly. #
## # #
## # Use stats::lag() to make sure you're not using dplyr::lag(), or you can add #
## # conflictRules('dplyr', exclude = 'lag') to your .Rprofile to stop #
## # dplyr from breaking base R's lag() function. #
## # #
## # Code in packages is not affected. It's protected by R's namespace mechanism #
## # Set `options(xts.warn_dplyr_breaks_lag = FALSE)` to suppress this warning. #
## # #
## ###############################################################################
##
## Attaching package: 'xts'
## The following objects are masked from 'package:dplyr':
##
## first, last
##
## Attaching package: 'PerformanceAnalytics'
## The following object is masked from 'package:graphics':
##
## legend
chart.Correlation(food %>% select(X_id, price, mktid, ptid, umid, month, day, year), histogram=TRUE, pch=19)
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in cor(x, y, use = use, method = method): the standard deviation is
## zero
## Warning in cor(x, y): the standard deviation is zero
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in cor(x, y, use = use, method = method): the standard deviation is
## zero
## Warning in cor(x, y): the standard deviation is zero
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in cor(x, y, use = use, method = method): the standard deviation is
## zero
## Warning in cor(x, y): the standard deviation is zero
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in cor(x, y, use = use, method = method): the standard deviation is
## zero
## Warning in cor(x, y): the standard deviation is zero
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in cor(x, y, use = use, method = method): the standard deviation is
## zero
## Warning in cor(x, y): the standard deviation is zero
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in cor(x, y, use = use, method = method): the standard deviation is
## zero
## Warning in cor(x, y): the standard deviation is zero
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in cor(x, y, use = use, method = method): the standard deviation is
## zero
## Warning in cor(x, y): the standard deviation is zero
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
#Outlier detection
par(mfrow = c(3,2))
boxplot(food$price, horizontal = T, main = "Price")
Item, year and price show good correlation.
food %>%
ggplot(aes(admname, price, color = admname)) +
geom_boxplot(alpha = 0.5) +
facet_wrap(~admname, scales = "free_y") +
theme_minimal() +
xlab(NULL)
the price
food %>%
ggplot(aes(price, fill = admname)) +
geom_density(alpha = 0.5) +
facet_wrap(~ admname, scales = "free", ncol =3) +
labs(x = NULL, y = NULL) +
theme_minimal()
pun_catg <- food %>% select(category, price, admname) %>%
group_by(category, admname) %>%
summarise(avg = mean(price)) %>% arrange(desc(avg))
## `summarise()` has grouped output by 'category'. You can override using the
## `.groups` argument.
#aes( x = fct_rev(fct_reorder(division, mean_production)), y = mean_production)) +
pun_catg %>%
#top_n(3, avg) %>%
ggplot(mapping = aes(x = fct_reorder(category, avg), y = avg)) + geom_col() + coord_flip() +
facet_wrap(~admname) +
theme_minimal()+
theme(plot.title = element_text(hjust = 0.5), plot.background = element_rect(fill = "#F9E79F")) +
guides(fill = F) +
labs(title = "Distribution of Prices of categories by Provinces",
caption = "AVerage food prices by provinces ",
y = "Average Price", x = "Food Categories") +
scale_fill_manual(values = c("#2E64FE", "#40FF00", "#FE642E", "#FE2E2E"))
## Warning: The `<scale>` argument of `guides()` cannot be `FALSE`. Use "none" instead as
## of ggplot2 3.3.4.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
colnames(food)
## [1] "X_id" "unit" "category" "price" "currency"
## [6] "country" "admname" "adm1id" "mktname" "mktid"
## [11] "cmid" "ptid" "umid" "catid" "sn"
## [16] "year" "month" "day" "weekend" "monthabb"
## [21] "season" "categoryname"
head(food)
## X_id unit category price currency country admname adm1id
## 2 2 KG cereals and tubers 13.000 PKR Pakistan Balochistan 2272
## 3 3 KG cereals and tubers 13.000 PKR Pakistan Balochistan 2272
## 4 4 KG cereals and tubers 14.250 PKR Pakistan Balochistan 2272
## 5 5 KG cereals and tubers 12.500 PKR Pakistan Balochistan 2272
## 6 6 KG cereals and tubers 13.250 PKR Pakistan Balochistan 2272
## 7 7 KG cereals and tubers 13.405 PKR Pakistan Balochistan 2272
## mktname mktid cmid ptid umid catid sn year month day weekend
## 2 Quetta 295 58 15 5 1 295_58_15_5 2004 1 5 weekday
## 3 Quetta 295 58 15 5 1 295_58_15_5 2004 2 1 weekend
## 4 Quetta 295 58 15 5 1 295_58_15_5 2004 3 2 weekday
## 5 Quetta 295 58 15 5 1 295_58_15_5 2004 4 5 weekday
## 6 Quetta 295 58 15 5 1 295_58_15_5 2004 5 7 weekend
## 7 Quetta 295 58 15 5 1 295_58_15_5 2004 6 3 weekday
## monthabb season categoryname
## 2 Jan Winter Wheat flour
## 3 Feb Winter Wheat flour
## 4 Mar Spring Wheat flour
## 5 Apr Spring Wheat flour
## 6 May Spring Wheat flour
## 7 Jun Summer Wheat flour
table(food$cmname)
## < table of extent 0 >
pun_cmname <- food %>% select(category, price, admname, categoryname) %>%
group_by(categoryname, admname) %>%
summarise(avg = mean(price))
## `summarise()` has grouped output by 'categoryname'. You can override using the
## `.groups` argument.
ggplot(pun_cmname,mapping = aes(x = avg, fill = admname)) +
geom_density(alpha = 0.5) +
facet_wrap(~ admname, scales = "free", ncol = 2) +
theme_minimal()
#range(food$price)
item_food <- food %>% select(price, categoryname, admname) %>%
group_by(categoryname) %>% summarise(avg = mean(price))
ggplot(item_food, aes(x=fct_reorder(categoryname, avg), y = avg), color = admname) +
geom_col() +
labs(x = "Categories", y = "Average price") +
coord_flip() +
theme_minimal() +
labs(title = "Distribution of Prices by categories",
caption = "AVerage food prices by Categories ",
y = "Average Price", x = "Food Categories") +
scale_fill_manual(values = c("#2E64FE", "#40FF00", "#FE642E", "#FE2E2E"))
colnames(food)
## [1] "X_id" "unit" "category" "price" "currency"
## [6] "country" "admname" "adm1id" "mktname" "mktid"
## [11] "cmid" "ptid" "umid" "catid" "sn"
## [16] "year" "month" "day" "weekend" "monthabb"
## [21] "season" "categoryname"
cat_food <- food %>% select(price, category, admname) %>%
group_by(category, admname) %>% summarise(avg = mean(price))
## `summarise()` has grouped output by 'category'. You can override using the
## `.groups` argument.
ggplot(cat_food, aes(x=fct_reorder(category, avg), y = avg)) + facet_wrap(~admname) +
geom_col() +
labs(x = "Categories", y = "Average price") +
coord_flip() +
theme_minimal() +
labs(title = "Distribution of Prices by categories",
caption = "AVerage food prices by Categories ",
y = "Average Price", x = "Food Categories") +
scale_fill_manual(values = c("#2E64FE", "#40FF00", "#FE642E", "#FE2E2E"))
colnames(food)
## [1] "X_id" "unit" "category" "price" "currency"
## [6] "country" "admname" "adm1id" "mktname" "mktid"
## [11] "cmid" "ptid" "umid" "catid" "sn"
## [16] "year" "month" "day" "weekend" "monthabb"
## [21] "season" "categoryname"
head(food)
## X_id unit category price currency country admname adm1id
## 2 2 KG cereals and tubers 13.000 PKR Pakistan Balochistan 2272
## 3 3 KG cereals and tubers 13.000 PKR Pakistan Balochistan 2272
## 4 4 KG cereals and tubers 14.250 PKR Pakistan Balochistan 2272
## 5 5 KG cereals and tubers 12.500 PKR Pakistan Balochistan 2272
## 6 6 KG cereals and tubers 13.250 PKR Pakistan Balochistan 2272
## 7 7 KG cereals and tubers 13.405 PKR Pakistan Balochistan 2272
## mktname mktid cmid ptid umid catid sn year month day weekend
## 2 Quetta 295 58 15 5 1 295_58_15_5 2004 1 5 weekday
## 3 Quetta 295 58 15 5 1 295_58_15_5 2004 2 1 weekend
## 4 Quetta 295 58 15 5 1 295_58_15_5 2004 3 2 weekday
## 5 Quetta 295 58 15 5 1 295_58_15_5 2004 4 5 weekday
## 6 Quetta 295 58 15 5 1 295_58_15_5 2004 5 7 weekend
## 7 Quetta 295 58 15 5 1 295_58_15_5 2004 6 3 weekday
## monthabb season categoryname
## 2 Jan Winter Wheat flour
## 3 Feb Winter Wheat flour
## 4 Mar Spring Wheat flour
## 5 Apr Spring Wheat flour
## 6 May Spring Wheat flour
## 7 Jun Summer Wheat flour
table(food$mktname)
##
## Karachi Lahore Multan Peshawar Quetta
## 1554 1550 1550 1555 1453
season_food <- food %>% select(category, price, weekend, season, year, admname) %>%
group_by(category, season, admname) %>%
summarise(max = max(price), min = min(price), avg = mean(price))
## `summarise()` has grouped output by 'category', 'season'. You can override
## using the `.groups` argument.
max_p <- ggplot(season_food, aes(x = fct_reorder(category, max), y = max)) +
geom_col() +
theme_minimal() +
facet_grid(~season)+
coord_flip()+
labs(title = "Max Priced categories by Seasons",
caption = "Max food prices by Categories by Seasons ",
y = "Price", x = "Food Categories")+
theme(plot.title = element_text(hjust = 0.5), plot.background = element_rect(fill = "#adceff"), axis.text.x = element_text(angle = 90))
min_p <- ggplot(season_food, aes(x = fct_reorder(category, min), y = min)) + geom_col() +
theme_minimal() +
facet_wrap(~admname)+
coord_flip()+
labs(title = "Min Priced categories by Seasons",
caption = "Min food prices by Categories by Seasons ",
y = "Price", x = "Food Categories") +
theme(plot.title = element_text(hjust = 0.5), plot.background = element_rect(fill = "#Fed6F7"), axis.text.x = element_text(angle = 90))
avg_p <- ggplot(season_food, aes(x = fct_reorder(category, avg), y = avg)) + geom_col() +
theme_minimal() +
facet_wrap(~admname)+
coord_flip()+
labs(title = "Average Priced categories by Seasons",
caption = "Average food prices by seasons ",
y = "Price", x = "Food Categories") +
theme(plot.title = element_text(hjust = 0.5), plot.background = element_rect(fill = "#F4F6F7"), axis.text.x = element_text(angle = 90))
grid.arrange(min_p, max_p, avg_p, nrow = 1)
b <- food %>% select(admname, category, categoryname, price, season, weekend) %>%
group_by(category, season) %>% filter(admname == "Balochistan") %>%
summarise(max = max(price), min = min(price), avg = mean(price)) %>%
ggplot(aes(x = fct_reorder(category, max), y = max)) +
geom_col() +
theme_minimal() +
facet_grid(~season)+
coord_flip()+
labs(title = "Max Priced in Balochistan",
caption = "Categories by Seasons ",
y = "Price", x = "Food Categories")+
theme(plot.title = element_text(hjust = 0.5), plot.background = element_rect(fill = "#adceff"), axis.text.x = element_text(angle = 90))
## `summarise()` has grouped output by 'category'. You can override using the
## `.groups` argument.
s <- food %>% select(admname, category, categoryname, price, season, weekend) %>%
group_by(category, season) %>% filter(admname == "Sindh") %>%
summarise(max = max(price), min = min(price), avg = mean(price)) %>%
ggplot(aes(x = fct_reorder(category, max), y = max)) +
geom_col() +
theme_minimal() +
facet_grid(~season)+
coord_flip()+
labs(title = "Max Priced in Sindh",
caption = "Categories by Seasons ",
y = "Price", x = "Food Categories")+
theme(plot.title = element_text(hjust = 0.5), plot.background = element_rect(fill = "#Fed6F7"), axis.text.x = element_text(angle = 90))
## `summarise()` has grouped output by 'category'. You can override using the
## `.groups` argument.
kpk <- food %>% select(admname, category, categoryname, price, season, weekend) %>%
group_by(category, season) %>% filter(admname == "Khyber Pakhtunkhwa") %>%
summarise(max = max(price), min = min(price), avg = mean(price)) %>%
ggplot(aes(x = fct_reorder(category, max), y = max)) +
geom_col() +
theme_minimal() +
facet_grid(~season)+
coord_flip()+
labs(title = "Max Priced in KPK",
caption = "Categories by Seasons ",
y = "Price", x = "Food Categories")+
theme(plot.title = element_text(hjust = 0.5), plot.background = element_rect(fill = "#aedce3"), axis.text.x = element_text(angle = 90))
## `summarise()` has grouped output by 'category'. You can override using the
## `.groups` argument.
p <- food %>% select(admname, category, categoryname, price, season, weekend) %>%
group_by(category, season) %>% filter(admname == "Punjab") %>%
summarise(max = max(price), min = min(price), avg = mean(price)) %>%
ggplot(aes(x = fct_reorder(category, max), y = max)) +
geom_col() +
theme_minimal() +
facet_grid(~season)+
coord_flip()+
labs(title = "Max Priced in Punjab",
caption = "Categories by Seasons ",
y = "Price", x = "Food Categories")+
theme(plot.title = element_text(hjust = 0.5), plot.background = element_rect(fill = "#affae3"), axis.text.x = element_text(angle = 90))
## `summarise()` has grouped output by 'category'. You can override using the
## `.groups` argument.
grid.arrange(b, p, kpk, s, nrow = 2, ncol = 2)
b <- food %>% select(admname, category, categoryname, price, season, weekend) %>%
group_by(categoryname, season) %>% filter(admname == "Balochistan") %>%
summarise(max = max(price), min = min(price), avg = mean(price)) %>%
ggplot(aes(x = fct_reorder(categoryname, max), y = max)) +
geom_col() +
theme_minimal() +
facet_grid(~season)+
coord_flip()+
labs(title = "Max Priced in Balochistan",
caption = "Categories by Seasons ",
y = "Price", x = "Food Categories")+
theme(plot.title = element_text(hjust = 0.5), plot.background = element_rect(fill = "#adceff"), axis.text.x = element_text(angle = 90))
## `summarise()` has grouped output by 'categoryname'. You can override using the
## `.groups` argument.
s <- food %>% select(admname, category, categoryname, price, season, weekend) %>%
group_by(categoryname, season) %>% filter(admname == "Sindh") %>%
summarise(max = max(price), min = min(price), avg = mean(price)) %>%
ggplot(aes(x = fct_reorder(categoryname, max), y = max)) +
geom_col() +
theme_minimal() +
facet_grid(~season)+
coord_flip()+
labs(title = "Max Priced in Sindh",
caption = "Categories by Seasons ",
y = "Price", x = "Food Categories")+
theme(plot.title = element_text(hjust = 0.5), plot.background = element_rect(fill = "#Fed6F7"), axis.text.x = element_text(angle = 90))
## `summarise()` has grouped output by 'categoryname'. You can override using the
## `.groups` argument.
kpk <- food %>% select(admname, category, categoryname, price, season, weekend) %>%
group_by(categoryname, season) %>% filter(admname == "Khyber Pakhtunkhwa") %>%
summarise(max = max(price), min = min(price), avg = mean(price)) %>%
ggplot(aes(x = fct_reorder(categoryname, max), y = max)) +
geom_col() +
theme_minimal() +
facet_grid(~season)+
coord_flip()+
labs(title = "Max Priced in KPK",
caption = "Categories by Seasons ",
y = "Price", x = "Food Categories")+
theme(plot.title = element_text(hjust = 0.5), plot.background = element_rect(fill = "#aedce3"), axis.text.x = element_text(angle = 90))
## `summarise()` has grouped output by 'categoryname'. You can override using the
## `.groups` argument.
p <- food %>% select(admname, category, categoryname, price, season, weekend) %>%
group_by(categoryname, season) %>% filter(admname == "Punjab") %>%
summarise(max = max(price), min = min(price), avg = mean(price)) %>%
ggplot(aes(x = fct_reorder(categoryname, max), y = max)) +
geom_col() +
theme_minimal() +
facet_grid(~season)+
coord_flip()+
labs(title = "Max Priced in Punjab",
caption = "Categories by Seasons ",
y = "Price", x = "Food Categories")+
theme(plot.title = element_text(hjust = 0.5), plot.background = element_rect(fill = "#affae3"), axis.text.x = element_text(angle = 90))
## `summarise()` has grouped output by 'categoryname'. You can override using the
## `.groups` argument.
grid.arrange(b, p, kpk, s, nrow = 2, ncol = 2)
colnames(food)
## [1] "X_id" "unit" "category" "price" "currency"
## [6] "country" "admname" "adm1id" "mktname" "mktid"
## [11] "cmid" "ptid" "umid" "catid" "sn"
## [16] "year" "month" "day" "weekend" "monthabb"
## [21] "season" "categoryname"
head(food)
## X_id unit category price currency country admname adm1id
## 2 2 KG cereals and tubers 13.000 PKR Pakistan Balochistan 2272
## 3 3 KG cereals and tubers 13.000 PKR Pakistan Balochistan 2272
## 4 4 KG cereals and tubers 14.250 PKR Pakistan Balochistan 2272
## 5 5 KG cereals and tubers 12.500 PKR Pakistan Balochistan 2272
## 6 6 KG cereals and tubers 13.250 PKR Pakistan Balochistan 2272
## 7 7 KG cereals and tubers 13.405 PKR Pakistan Balochistan 2272
## mktname mktid cmid ptid umid catid sn year month day weekend
## 2 Quetta 295 58 15 5 1 295_58_15_5 2004 1 5 weekday
## 3 Quetta 295 58 15 5 1 295_58_15_5 2004 2 1 weekend
## 4 Quetta 295 58 15 5 1 295_58_15_5 2004 3 2 weekday
## 5 Quetta 295 58 15 5 1 295_58_15_5 2004 4 5 weekday
## 6 Quetta 295 58 15 5 1 295_58_15_5 2004 5 7 weekend
## 7 Quetta 295 58 15 5 1 295_58_15_5 2004 6 3 weekday
## monthabb season categoryname
## 2 Jan Winter Wheat flour
## 3 Feb Winter Wheat flour
## 4 Mar Spring Wheat flour
## 5 Apr Spring Wheat flour
## 6 May Spring Wheat flour
## 7 Jun Summer Wheat flour
mkt_food <- food %>% select(monthabb, weekend, season, category, categoryname, mktname, price) %>%
group_by(season, mktname,monthabb) %>%
summarise(avg = mean(price),
min = min(price),
max = max(price),
sd = sd(price))
## `summarise()` has grouped output by 'season', 'mktname'. You can override using
## the `.groups` argument.
ggplot(mkt_food, aes(x = fct_reorder(season, max), y = max)) +
geom_col() +
facet_wrap(~mktname) +
theme_minimal() +
coord_flip() +
labs(title = "Max Priced in Punjab",
caption = "Categories by Seasons ",
y = "Price", x = "Food Categories")+
theme(plot.title = element_text(hjust = 0.5), plot.background = element_rect(fill = "yellow"), axis.text.x = element_text(angle = 90))
ggplot(mkt_food, aes(x = fct_reorder(season, min), y = min)) +
geom_col() +
facet_wrap(~mktname) +
theme_minimal() +
coord_flip() +
labs(title = "Max Priced in Punjab",
caption = "Categories by Seasons ",
y = "Price", x = "Food Categories")+
theme(plot.title = element_text(hjust = 0.5), plot.background = element_rect(fill = "green"), axis.text.x = element_text(angle = 90))
#Karachi
kf <- food %>% select(monthabb, weekend, season, category, categoryname, mktname, price) %>% group_by(season, categoryname) %>%
filter(mktname == "Karachi") %>%
summarise(avg = mean(price),
min = min(price),
max = max(price),
sd = sd(price))
## `summarise()` has grouped output by 'season'. You can override using the
## `.groups` argument.
kmax <- ggplot(kf, aes(x = fct_reorder(categoryname, max), y = max)) +
geom_col() +
theme_minimal() +
coord_flip() +
facet_wrap(~season)+
labs(title = "Max Priced in Karachi",
caption = "Saless by Seasons ",
y = "Price", x = "Food Categories")+
theme(plot.title = element_text(hjust = 0.5), plot.background = element_rect(fill = "green"), axis.text.x = element_text(angle = 90))
kmax
kmin <- ggplot(kf, aes(x = fct_reorder(categoryname, min), y = min)) +
geom_col() +
theme_minimal() +
coord_flip() +
facet_wrap(~season)+
labs(title = "Min Priced in Karachi",
caption = "Saless by Seasons ",
y = "Price", x = "Food Categories")+
theme(plot.title = element_text(hjust = 0.5), plot.background = element_rect(fill = "green"), axis.text.x = element_text(angle = 90))
kmin
table(food$mktname)
##
## Karachi Lahore Multan Peshawar Quetta
## 1554 1550 1550 1555 1453
lf <- food %>% select(monthabb, weekend, season, category, categoryname, mktname, price) %>% group_by(season, categoryname) %>%
filter(mktname == "Lahore") %>%
summarise(avg = mean(price),
min = min(price),
max = max(price),
sd = sd(price))
## `summarise()` has grouped output by 'season'. You can override using the
## `.groups` argument.
lmax <- ggplot(kf, aes(x = fct_reorder(categoryname, max), y = max)) +
geom_col() +
theme_minimal() +
coord_flip() +
facet_wrap(~season)+
labs(title = "Max Priced in Lahore",
caption = "Saless by Seasons ",
y = "Price", x = "Food Categories")+
theme(plot.title = element_text(hjust = 0.5), plot.background = element_rect(fill = "green"), axis.text.x = element_text(angle = 90))
lmax
lmin <- ggplot(kf, aes(x = fct_reorder(categoryname, min), y = min)) +
geom_col() +
theme_minimal() +
coord_flip() +
facet_wrap(~season)+
labs(title = "Min Priced in Lahore",
caption = "Saless by Seasons ",
y = "Price", x = "Food Categories")+
theme(plot.title = element_text(hjust = 0.5), plot.background = element_rect(fill = "green"), axis.text.x = element_text(angle = 90))
lmin
#Multan
mf <- food %>% select(monthabb, weekend, season, category, categoryname, mktname, price) %>% group_by(season, categoryname) %>%
filter(mktname == "Multan") %>%
summarise(avg = mean(price),
min = min(price),
max = max(price),
sd = sd(price))
## `summarise()` has grouped output by 'season'. You can override using the
## `.groups` argument.
mmax <- ggplot(kf, aes(x = fct_reorder(categoryname, max), y = max)) +
geom_col() +
theme_minimal() +
coord_flip() +
facet_wrap(~season)+
labs(title = "Max Priced in Multan",
caption = "Saless by Seasons ",
y = "Price", x = "Food Categories")+
theme(plot.title = element_text(hjust = 0.5), plot.background = element_rect(fill = "green"), axis.text.x = element_text(angle = 90))
mmax
mmin <- ggplot(kf, aes(x = fct_reorder(categoryname, min), y = min)) +
geom_col() +
theme_minimal() +
coord_flip() +
facet_wrap(~season)+
labs(title = "Min Priced in Multan",
caption = "Saless by Seasons ",
y = "Price", x = "Food Categories")+
theme(plot.title = element_text(hjust = 0.5), plot.background = element_rect(fill = "green"), axis.text.x = element_text(angle = 90))
mmin
#Peshawar
pf <- food %>% select(monthabb, weekend, season, category, categoryname, mktname, price) %>% group_by(season, categoryname) %>%
filter(mktname == "Peshawar") %>%
summarise(avg = mean(price),
min = min(price),
max = max(price),
sd = sd(price))
## `summarise()` has grouped output by 'season'. You can override using the
## `.groups` argument.
pmax <- ggplot(kf, aes(x = fct_reorder(categoryname, max), y = max)) +
geom_col() +
theme_minimal() +
coord_flip() +
facet_wrap(~season)+
labs(title = "Max Priced in Peshawar",
caption = "Saless by Seasons ",
y = "Price", x = "Food Categories")+
theme(plot.title = element_text(hjust = 0.5), plot.background = element_rect(fill = "green"), axis.text.x = element_text(angle = 90))
pmax
pmin <- ggplot(kf, aes(x = fct_reorder(categoryname, min), y = min)) +
geom_col() +
theme_minimal() +
coord_flip() +
facet_wrap(~season)+
labs(title = "Min Priced in Peshawar",
caption = "Saless by Seasons ",
y = "Price", x = "Food Categories")+
theme(plot.title = element_text(hjust = 0.5), plot.background = element_rect(fill = "green"), axis.text.x = element_text(angle = 90))
pmin
#Quetta
qf <- food %>% select(monthabb, weekend, season, category, categoryname, mktname, price) %>% group_by(season, categoryname) %>%
filter(mktname == "Quetta") %>%
summarise(avg = mean(price),
min = min(price),
max = max(price),
sd = sd(price))
## `summarise()` has grouped output by 'season'. You can override using the
## `.groups` argument.
qmax <- qf %>% top_n(3, max) %>%
ggplot(aes(x = fct_reorder(categoryname, max), y = max)) +
geom_col() +
theme_minimal() +
coord_flip() +
facet_wrap(~season)+
labs(title = "Max Priced in Quetta",
caption = "Saless by Seasons ",
y = "Price", x = "Food Categories")+
theme(plot.title = element_text(hjust = 0.5), plot.background = element_rect(fill = "#def231"), axis.text.x = element_text(angle = 90))
qmax
qmin <- qf %>% top_n(3, min) %>%
ggplot(aes(x = fct_reorder(categoryname, min), y = min)) +
geom_col() +
theme_minimal() +
coord_flip() +
facet_wrap(~season)+
labs(title = "Min Priced in Quetta",
caption = "Saless by Seasons ",
y = "Price", x = "Food Categories")+
theme(plot.title = element_text(hjust = 0.5), plot.background = element_rect(fill = "#abc123"), axis.text.x = element_text(angle = 90))
qmin