Introduction

The below markdown brings in a data set forn the united nations cateloging the food aide sent to forieng countries over the years. The Goal will be to clean the data, look for possible trends, and ultimately prepare the data to be referenced in another report.

library(tidyverse)
## ── Attaching packages ──────────────────────────────────────────────────────────────────────────────────────────────────── tidyverse 1.3.0 ──
## ✓ ggplot2 3.3.2     ✓ purrr   0.3.4
## ✓ tibble  3.0.3     ✓ dplyr   1.0.2
## ✓ tidyr   1.1.1     ✓ stringr 1.4.0
## ✓ readr   1.3.1     ✓ forcats 0.5.0
## ── Conflicts ─────────────────────────────────────────────────────────────────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
library(stringr)
fao <- read.csv("FAO.csv")

We can pivot the date to align all of the year columns under one variable

fao <- fao%>%
  pivot_longer(cols = Y1961:Y2013, names_to = "year", values_to = "amount")

No need for the “Y” in the year variable.

fao$year <- str_sub(fao$year, 2,5)

Aggregate

fao_agg <- fao %>%
  filter(!is.na(amount))%>%
  group_by(Area, Item, latitude, longitude) %>%
  summarise("total_sent"= sum(amount))%>%
  arrange(Area, desc(total_sent ))%>%
  mutate("item_rank" = row_number())
## `summarise()` regrouping output by 'Area', 'Item', 'latitude' (override with `.groups` argument)

Side curiousity involving the top ranking food aide item.

average_item_rank <- fao %>%
  filter(!is.na(amount)) %>%
  group_by(Area, Item) %>%
  summarise("total_sent"= sum(amount)) %>%
  arrange(Area, desc(total_sent )) %>%
  mutate("item_rank" = row_number())%>%
  group_by(Item)%>%
  summarise(average_rank = mean(item_rank))%>%
  arrange(average_rank)
## `summarise()` regrouping output by 'Area' (override with `.groups` argument)
## `summarise()` ungrouping output (override with `.groups` argument)
head(average_item_rank)
## # A tibble: 6 x 2
##   Item                     average_rank
##   <chr>                           <dbl>
## 1 Cereals - Excluding Beer         2.13
## 2 Milk - Excluding Butter          4.08
## 3 Fruits - Excluding Wine          6.84
## 4 Vegetables                       7.28
## 5 Starchy Roots                    8.03
## 6 Wheat and products               8.46

Top receivers of food aide

fao_agg %>%
  group_by(Area)%>%
  summarise(area_total_sent = sum(total_sent))%>%
  arrange(desc(area_total_sent))%>%
  slice(1:10)%>%
  ggplot()+
  geom_bar(aes(x = Area, weight = area_total_sent  ))+
  coord_flip()
## `summarise()` ungrouping output (override with `.groups` argument)

Distance relationaship test

Is there a relationship between the distance from the USA and the amount of food aide.

library(geosphere )
usaloc <- c(97,38)
fao_agg %>%
  group_by(Area, latitude, longitude)%>%
  summarise(area_total_sent = sum(total_sent))%>%
  mutate("usa_dist" =  distm(c(longitude,latitude), usaloc, fun = distHaversine))%>%
  ggplot()+
  geom_point(aes(x=usa_dist, y= area_total_sent))+
  labs(x ="Distance from USA", y = "Tonage of Food Aide(1000s)")
## `summarise()` regrouping output by 'Area', 'latitude' (override with `.groups` argument)

We adjust the scales to remove the outlyers

fao_agg %>%
  group_by(Area, latitude, longitude)%>%
  summarise(area_total_sent = sum(total_sent))%>%
  mutate("usa_dist" =  distm(c(longitude,latitude), usaloc, fun = distHaversine))%>%
  ggplot()+
  geom_point(aes(x=usa_dist, y= area_total_sent))+
  labs(x ="Distance from USA", y = "Tonage of Food Aide(1000s)")+
  coord_cartesian(ylim = c(0,1000000))
## `summarise()` regrouping output by 'Area', 'latitude' (override with `.groups` argument)

There does seem to be some sort of multi-modal distribution between distance and amount of food aide.

fao_agg %>%
  group_by(Area, latitude, longitude)%>%
  summarise(area_total_sent = sum(total_sent))%>%
  mutate("usa_dist" =  distm(c(longitude,latitude), usaloc, fun = distHaversine))%>%
  ggplot()+
  geom_histogram(aes(x= usa_dist))
## `summarise()` regrouping output by 'Area', 'latitude' (override with `.groups` argument)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

Output

fao_output <- fao%>%
  select(Area, year, amount)%>%
  mutate(year = as.numeric(year), amount = as.numeric(amount))
fao_output$year <- cut(fao_output$year, seq(1985,2019, 5))
filter(fao_output , !is.na(year))
## # A tibble: 601,356 x 3
##    Area        year        amount
##    <chr>       <fct>        <dbl>
##  1 Afghanistan (1985,1990]   1683
##  2 Afghanistan (1985,1990]   2194
##  3 Afghanistan (1985,1990]   1801
##  4 Afghanistan (1985,1990]   1754
##  5 Afghanistan (1985,1990]   1640
##  6 Afghanistan (1990,1995]   1539
##  7 Afghanistan (1990,1995]   1582
##  8 Afghanistan (1990,1995]   1840
##  9 Afghanistan (1990,1995]   1855
## 10 Afghanistan (1990,1995]   1853
## # … with 601,346 more rows
fao_output <- fao_output%>%
  group_by(Area, year)%>%
  summarise(food_amount = sum(amount))
## `summarise()` regrouping output by 'Area' (override with `.groups` argument)
fao_output$year <- str_sub(fao_output$year, 7, 10)
write_csv(fao_output, "fao_out.csv")