The below markdown brings in a data set forn the united nations cateloging the food aide sent to forieng countries over the years. The Goal will be to clean the data, look for possible trends, and ultimately prepare the data to be referenced in another report.
library(tidyverse)
## ── Attaching packages ──────────────────────────────────────────────────────────────────────────────────────────────────── tidyverse 1.3.0 ──
## ✓ ggplot2 3.3.2 ✓ purrr 0.3.4
## ✓ tibble 3.0.3 ✓ dplyr 1.0.2
## ✓ tidyr 1.1.1 ✓ stringr 1.4.0
## ✓ readr 1.3.1 ✓ forcats 0.5.0
## ── Conflicts ─────────────────────────────────────────────────────────────────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
library(stringr)
fao <- read.csv("FAO.csv")
We can pivot the date to align all of the year columns under one variable
fao <- fao%>%
pivot_longer(cols = Y1961:Y2013, names_to = "year", values_to = "amount")
No need for the “Y” in the year variable.
fao$year <- str_sub(fao$year, 2,5)
fao_agg <- fao %>%
filter(!is.na(amount))%>%
group_by(Area, Item, latitude, longitude) %>%
summarise("total_sent"= sum(amount))%>%
arrange(Area, desc(total_sent ))%>%
mutate("item_rank" = row_number())
## `summarise()` regrouping output by 'Area', 'Item', 'latitude' (override with `.groups` argument)
Side curiousity involving the top ranking food aide item.
average_item_rank <- fao %>%
filter(!is.na(amount)) %>%
group_by(Area, Item) %>%
summarise("total_sent"= sum(amount)) %>%
arrange(Area, desc(total_sent )) %>%
mutate("item_rank" = row_number())%>%
group_by(Item)%>%
summarise(average_rank = mean(item_rank))%>%
arrange(average_rank)
## `summarise()` regrouping output by 'Area' (override with `.groups` argument)
## `summarise()` ungrouping output (override with `.groups` argument)
head(average_item_rank)
## # A tibble: 6 x 2
## Item average_rank
## <chr> <dbl>
## 1 Cereals - Excluding Beer 2.13
## 2 Milk - Excluding Butter 4.08
## 3 Fruits - Excluding Wine 6.84
## 4 Vegetables 7.28
## 5 Starchy Roots 8.03
## 6 Wheat and products 8.46
fao_agg %>%
group_by(Area)%>%
summarise(area_total_sent = sum(total_sent))%>%
arrange(desc(area_total_sent))%>%
slice(1:10)%>%
ggplot()+
geom_bar(aes(x = Area, weight = area_total_sent ))+
coord_flip()
## `summarise()` ungrouping output (override with `.groups` argument)
Is there a relationship between the distance from the USA and the amount of food aide.
library(geosphere )
usaloc <- c(97,38)
fao_agg %>%
group_by(Area, latitude, longitude)%>%
summarise(area_total_sent = sum(total_sent))%>%
mutate("usa_dist" = distm(c(longitude,latitude), usaloc, fun = distHaversine))%>%
ggplot()+
geom_point(aes(x=usa_dist, y= area_total_sent))+
labs(x ="Distance from USA", y = "Tonage of Food Aide(1000s)")
## `summarise()` regrouping output by 'Area', 'latitude' (override with `.groups` argument)
We adjust the scales to remove the outlyers
fao_agg %>%
group_by(Area, latitude, longitude)%>%
summarise(area_total_sent = sum(total_sent))%>%
mutate("usa_dist" = distm(c(longitude,latitude), usaloc, fun = distHaversine))%>%
ggplot()+
geom_point(aes(x=usa_dist, y= area_total_sent))+
labs(x ="Distance from USA", y = "Tonage of Food Aide(1000s)")+
coord_cartesian(ylim = c(0,1000000))
## `summarise()` regrouping output by 'Area', 'latitude' (override with `.groups` argument)
There does seem to be some sort of multi-modal distribution between distance and amount of food aide.
fao_agg %>%
group_by(Area, latitude, longitude)%>%
summarise(area_total_sent = sum(total_sent))%>%
mutate("usa_dist" = distm(c(longitude,latitude), usaloc, fun = distHaversine))%>%
ggplot()+
geom_histogram(aes(x= usa_dist))
## `summarise()` regrouping output by 'Area', 'latitude' (override with `.groups` argument)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
fao_output <- fao%>%
select(Area, year, amount)%>%
mutate(year = as.numeric(year), amount = as.numeric(amount))
fao_output$year <- cut(fao_output$year, seq(1985,2019, 5))
filter(fao_output , !is.na(year))
## # A tibble: 601,356 x 3
## Area year amount
## <chr> <fct> <dbl>
## 1 Afghanistan (1985,1990] 1683
## 2 Afghanistan (1985,1990] 2194
## 3 Afghanistan (1985,1990] 1801
## 4 Afghanistan (1985,1990] 1754
## 5 Afghanistan (1985,1990] 1640
## 6 Afghanistan (1990,1995] 1539
## 7 Afghanistan (1990,1995] 1582
## 8 Afghanistan (1990,1995] 1840
## 9 Afghanistan (1990,1995] 1855
## 10 Afghanistan (1990,1995] 1853
## # … with 601,346 more rows
fao_output <- fao_output%>%
group_by(Area, year)%>%
summarise(food_amount = sum(amount))
## `summarise()` regrouping output by 'Area' (override with `.groups` argument)
fao_output$year <- str_sub(fao_output$year, 7, 10)
write_csv(fao_output, "fao_out.csv")