library(tidyverse)
library(kableExtra)
library(knitr)
library(stringr)
library(readr)
library(plyr)
library(dplyr)
library(data.table)
library(readxl)
require(xlsx)
library(rJava)
library(xlsx)

Third Dataset- Gapminder

War or food?

  • Are nations dealing with war more food insecure?
  • How do state expenditures differ on war versus food?

Load in third dataset

delete.na <- function(DF, n=0) {
  DF[rowSums(is.na(DF)) <= n,]
}


War<- read.xlsx("War.xlsx", header=TRUE,1,)
food_security <- read.xlsx("indicator food_consumption.xlsx", header=TRUE,1,)
Agricultural_expend<- read.xlsx("Agricultural_expenditure.xlsx", header=TRUE,1,)
military_expend <- read.xlsx("military_expenditure.xlsx", header=TRUE,1,)

dim(food_security)
## [1] 259  48
dim(military_expend)
## [1] 270  25
dim(Agricultural_expend)
## [1] 270  52
dim(War)
## [1] 192   4

DF Preparation

  • Eliminate rows with substantial NA’s
  • Convert columns to numeric
  • Rename columns to avoid duplication of Column names for eventual Join
  • Filter df’s for common shared year range of 1988-2007
military_expend <- delete.na(military_expend, 20)
Agricultural_expend <- delete.na(Agricultural_expend, 15)
food_security <- delete.na(food_security, 20)
War <- as_data_frame(lapply(War, function(x) if(is.numeric(x)) round(x,2) else x))
military_expend <- as_data_frame(lapply(military_expend, function(x) if(is.numeric(x)) round(x,2) else x))
War <- War[2:4]
colnames(War) <- c("Country", 'war_2002','war_2004')
colnames(military_expend) <-  c("Country",paste(1988:2011,'Military_expend'))
colnames(food_security) <- c("Country",paste (1961:2007,"food_security"))
colnames(Agricultural_expend) <- c("Country",paste(1961:2011,"Agricultural_expend"))
Agricultural_expend <- Agricultural_expend[c(1,29:48)]
food_security <- food_security[c(1,29:48)]
military_expend <- military_expend[c(1:21)]


merged_df_3<- inner_join(War,military_expend) %>%
    inner_join(.,food_security) %>% 
    inner_join(.,Agricultural_expend)  
## Joining, by = "Country"
## Warning: Column `Country` joining factors with different levels, coercing
## to character vector
## Joining, by = "Country"
## Warning: Column `Country` joining character vector and factor, coercing
## into character vector
## Joining, by = "Country"
## Warning: Column `Country` joining character vector and factor, coercing
## into character vector
dim(merged_df_3)
## [1] 83 63

Exploratory Analysis of Death from wars data

  • Explore war death df
merged_df_3$war_2002-merged_df_3$war_2004
##  [1]  14.90   0.00   0.00   0.00   0.00   0.00   0.00   0.00   0.00   0.00
## [11]   0.00 154.58   0.00   0.01  -0.20   0.00   0.00   0.34  52.60   0.00
## [21]   8.06  88.66  -0.02   0.01   0.00   0.00   0.00   0.00   0.00   0.00
## [31]   0.00   0.00   0.00   0.00   0.02   2.71   0.12   0.00  -0.02   0.00
## [41]   0.00   4.02   1.41   0.00   0.00   0.00   0.00   0.00   0.00   0.00
## [51]   0.00   0.00   0.18 -19.12  -0.02   0.00   0.01   0.00   0.32   0.00
## [61]   0.03   1.55   0.00   0.88  -0.75   9.93   0.00  -0.44  -0.07 -44.86
## [71]   0.00   0.00   0.00   0.00   0.00   0.16   7.46   0.00  -0.07  -0.33
## [81]   0.00   0.00  33.29
merged_df_3[1:3] %>% 
    mutate(.,average_deaths=(war_2002+war_2004)/2) %>% 
    arrange(.,average_deaths) %>%  
    kable(.)
Country war_2002 war_2004 average_deaths
Argentina 0.00 0.00 0.000
Australia 0.00 0.00 0.000
Austria 0.00 0.00 0.000
Belgium 0.00 0.00 0.000
Benin 0.00 0.00 0.000
Bolivia 0.00 0.00 0.000
Botswana 0.00 0.00 0.000
Brazil 0.00 0.00 0.000
Brunei 0.00 0.00 0.000
Burkina Faso 0.00 0.00 0.000
Cameroon 0.00 0.00 0.000
Chile 0.00 0.00 0.000
China 0.00 0.00 0.000
Cuba 0.00 0.00 0.000
Fiji 0.00 0.00 0.000
Finland 0.00 0.00 0.000
France 0.00 0.00 0.000
Gabon 0.00 0.00 0.000
Gambia 0.00 0.00 0.000
Germany 0.00 0.00 0.000
Ghana 0.00 0.00 0.000
Guyana 0.00 0.00 0.000
Honduras 0.00 0.00 0.000
Ireland 0.00 0.00 0.000
Japan 0.00 0.00 0.000
Lesotho 0.00 0.00 0.000
Madagascar 0.00 0.00 0.000
Malaysia 0.00 0.00 0.000
Mali 0.00 0.00 0.000
Malta 0.00 0.00 0.000
Mauritius 0.00 0.00 0.000
Mexico 0.00 0.00 0.000
New Zealand 0.00 0.00 0.000
Norway 0.00 0.00 0.000
Paraguay 0.00 0.00 0.000
Portugal 0.00 0.00 0.000
Swaziland 0.00 0.00 0.000
Sweden 0.00 0.00 0.000
Togo 0.00 0.00 0.000
Tunisia 0.00 0.00 0.000
United Arab Emirates 0.00 0.00 0.000
Venezuela 0.00 0.00 0.000
Canada 0.01 0.00 0.005
Egypt 0.01 0.00 0.005
Denmark 0.00 0.02 0.010
Hungary 0.01 0.01 0.010
Italy 0.00 0.02 0.010
Netherlands 0.00 0.02 0.010
South Africa 0.01 0.01 0.010
Peru 0.03 0.00 0.015
United Kingdom 0.00 0.07 0.035
Zambia 0.04 0.04 0.040
Niger 0.06 0.05 0.055
Iran 0.12 0.00 0.060
Jordan 0.07 0.07 0.070
Malawi 0.07 0.07 0.070
Turkey 0.16 0.00 0.080
Mauritania 0.09 0.09 0.090
United States 0.01 0.34 0.175
Spain 0.00 0.44 0.220
Saudi Arabia 0.03 0.78 0.405
India 0.53 0.51 0.520
Kuwait 1.41 0.00 0.705
Thailand 0.99 0.99 0.990
Chad 2.03 2.23 2.130
Indonesia 3.98 1.27 2.625
Philippines 4.10 2.55 3.325
Kenya 5.47 1.45 3.460
Pakistan 4.30 3.98 4.140
Sri Lanka 4.88 4.95 4.915
Sierra Leone 9.93 0.00 4.965
Rwanda 9.04 8.16 8.600
Myanmar 9.52 9.34 9.430
Algeria 18.31 3.41 10.860
Nepal 6.25 25.37 15.810
Zimbabwe 33.80 0.51 17.155
Colombia 20.33 19.99 20.160
Congo, Rep. 52.60 0.00 26.300
Cote d’Ivoire 32.25 24.19 28.220
Uganda 43.92 36.46 40.190
Sudan 57.91 102.77 80.340
Congo, Dem. Rep. 130.20 41.54 85.870
Burundi 193.64 39.06 116.350

Tidy dataset

  • Add average deaths from war column
  • Create a categorical factor level of deaths from war
    • 0 No_Deaths

    • 0-1 Low_Deaths

    • 1-10 Moderate_Deaths

    • 10+ High_Deaths

food_war <- merged_df_3 %>% 
    gather(Wars,Values,4:63) %>% 
    separate(.,Wars,into= c("Year", "Type"),sep=" ") %>% 
    spread(Type,Values) %>% 
    mutate(.,average_deaths=(war_2002+war_2004)/2) %>% 
    select(-c(war_2002,war_2004)) %>% 
    mutate(Deaths_from_war=cut(average_deaths, breaks=c(-1,0,1,10,Inf), labels= c("No_Deaths", "Low_Deaths", "Moderate_Deaths", "High_Deaths")))
   
    

kable(food_war[1:50,])
Country Year Agricultural_expend food_security Military_expend average_deaths Deaths_from_war
Algeria 1988 12.167656 2795.00 1.75 10.86 High_Deaths
Algeria 1989 13.039015 2862.84 1.54 10.86 High_Deaths
Algeria 1990 11.358267 2856.21 1.46 10.86 High_Deaths
Algeria 1991 10.167058 2835.47 1.23 10.86 High_Deaths
Algeria 1992 12.126846 2972.77 2.19 10.86 High_Deaths
Algeria 1993 12.097073 2971.61 2.56 10.86 High_Deaths
Algeria 1994 10.058437 2865.20 3.14 10.86 High_Deaths
Algeria 1995 10.497818 2891.30 2.95 10.86 High_Deaths
Algeria 1996 11.766699 2893.72 3.09 10.86 High_Deaths
Algeria 1997 9.482320 2844.07 3.63 10.86 High_Deaths
Algeria 1998 12.533281 2904.07 3.96 10.86 High_Deaths
Algeria 1999 12.201079 2957.91 3.77 10.86 High_Deaths
Algeria 2000 8.879884 2928.84 3.44 10.86 High_Deaths
Algeria 2001 10.407560 3003.63 3.80 10.86 High_Deaths
Algeria 2002 10.003598 3034.33 3.67 10.86 High_Deaths
Algeria 2003 10.491237 3073.26 3.25 10.86 High_Deaths
Algeria 2004 10.188493 3090.13 3.30 10.86 High_Deaths
Algeria 2005 8.221657 3059.24 2.85 10.86 High_Deaths
Algeria 2006 7.988789 3101.20 2.64 10.86 High_Deaths
Algeria 2007 8.025346 3153.38 2.90 10.86 High_Deaths
Argentina 1988 8.977868 3028.90 1.98 0.00 No_Deaths
Argentina 1989 9.616065 2980.35 1.78 0.00 No_Deaths
Argentina 1990 8.123676 2924.80 1.37 0.00 No_Deaths
Argentina 1991 6.716492 3024.96 1.42 0.00 No_Deaths
Argentina 1992 5.990787 3087.95 1.34 0.00 No_Deaths
Argentina 1993 5.492025 3123.27 1.34 0.00 No_Deaths
Argentina 1994 5.440196 3162.52 1.46 0.00 No_Deaths
Argentina 1995 5.698112 3168.17 1.47 0.00 No_Deaths
Argentina 1996 5.997435 3162.69 1.24 0.00 No_Deaths
Argentina 1997 5.599945 3135.69 1.14 0.00 No_Deaths
Argentina 1998 5.617612 3166.62 1.14 0.00 No_Deaths
Argentina 1999 4.735356 3266.75 1.22 0.00 No_Deaths
Argentina 2000 4.971221 3271.70 1.15 0.00 No_Deaths
Argentina 2001 4.814450 3183.34 1.18 0.00 No_Deaths
Argentina 2002 10.683021 2965.90 1.09 0.00 No_Deaths
Argentina 2003 10.986832 3059.74 1.06 0.00 No_Deaths
Argentina 2004 10.412916 3029.91 0.96 0.00 No_Deaths
Argentina 2005 9.401029 3091.79 0.93 0.00 No_Deaths
Argentina 2006 8.392957 2969.12 0.86 0.00 No_Deaths
Argentina 2007 9.394875 2940.98 0.88 0.00 No_Deaths
Australia 1988 5.208421 3121.54 2.28 0.00 No_Deaths
Australia 1989 5.364690 3141.58 2.15 0.00 No_Deaths
Australia 1990 4.923955 3177.79 2.10 0.00 No_Deaths
Australia 1991 3.631461 3109.40 2.15 0.00 No_Deaths
Australia 1992 3.520368 3116.99 2.19 0.00 No_Deaths
Australia 1993 3.710071 3044.14 2.21 0.00 No_Deaths
Australia 1994 3.799483 3038.66 2.17 0.00 No_Deaths
Australia 1995 3.416733 3081.55 2.07 0.00 No_Deaths
Australia 1996 3.819477 3056.62 1.97 0.00 No_Deaths
Australia 1997 3.666767 3091.38 1.91 0.00 No_Deaths

Summary Explorations

food_war <-food_war %>% 
    dplyr::group_by(Deaths_from_war,Year)
agricultural_spending_by_year <- food_war %>% 
    dplyr::summarize(agri_exp_mean=mean(Agricultural_expend, na.rm = TRUE)) 
food_war %>% 
    dplyr::summarize(Military_Spending_Pct_GDP_By_Year=mean(Military_expend, na.rm = TRUE)) %>% 
    kable(.)
Deaths_from_war Year Military_Spending_Pct_GDP_By_Year
No_Deaths 1988 2.323429
No_Deaths 1989 2.268684
No_Deaths 1990 2.363947
No_Deaths 1991 2.218108
No_Deaths 1992 1.989167
No_Deaths 1993 1.972105
No_Deaths 1994 1.914865
No_Deaths 1995 1.834324
No_Deaths 1996 1.737778
No_Deaths 1997 1.873889
No_Deaths 1998 1.905556
No_Deaths 1999 1.811622
No_Deaths 2000 1.734000
No_Deaths 2001 1.732250
No_Deaths 2002 1.673750
No_Deaths 2003 1.719286
No_Deaths 2004 1.628095
No_Deaths 2005 1.590476
No_Deaths 2006 1.501951
No_Deaths 2007 1.528750
Low_Deaths 1988 4.386000
Low_Deaths 1989 4.038095
Low_Deaths 1990 5.793333
Low_Deaths 1991 8.931429
Low_Deaths 1992 4.616191
Low_Deaths 1993 3.623810
Low_Deaths 1994 3.422273
Low_Deaths 1995 3.093182
Low_Deaths 1996 2.920000
Low_Deaths 1997 2.936818
Low_Deaths 1998 3.098095
Low_Deaths 1999 2.916364
Low_Deaths 2000 2.933810
Low_Deaths 2001 3.063810
Low_Deaths 2002 2.793810
Low_Deaths 2003 2.787619
Low_Deaths 2004 2.625455
Low_Deaths 2005 2.495000
Low_Deaths 2006 2.490476
Low_Deaths 2007 2.481500
Moderate_Deaths 1988 2.511250
Moderate_Deaths 1989 2.498750
Moderate_Deaths 1990 2.955000
Moderate_Deaths 1991 3.241250
Moderate_Deaths 1992 3.203750
Moderate_Deaths 1993 3.255556
Moderate_Deaths 1994 2.980000
Moderate_Deaths 1995 3.244444
Moderate_Deaths 1996 3.170000
Moderate_Deaths 1997 2.593333
Moderate_Deaths 1998 2.661250
Moderate_Deaths 1999 2.536250
Moderate_Deaths 2000 2.921250
Moderate_Deaths 2001 2.497778
Moderate_Deaths 2002 2.276667
Moderate_Deaths 2003 2.266250
Moderate_Deaths 2004 2.028750
Moderate_Deaths 2005 1.893750
Moderate_Deaths 2006 2.001429
Moderate_Deaths 2007 2.451250
High_Deaths 1988 2.592500
High_Deaths 1989 2.520000
High_Deaths 1990 2.518750
High_Deaths 1991 2.283750
High_Deaths 1992 2.321250
High_Deaths 1993 2.395000
High_Deaths 1994 2.254286
High_Deaths 1995 2.436667
High_Deaths 1996 2.593333
High_Deaths 1997 2.473333
High_Deaths 1998 2.882857
High_Deaths 1999 3.325000
High_Deaths 2000 3.235000
High_Deaths 2001 3.190000
High_Deaths 2002 4.026250
High_Deaths 2003 2.690000
High_Deaths 2004 3.136000
High_Deaths 2005 2.664000
High_Deaths 2006 2.562000
High_Deaths 2007 2.337778
food_security_by_year <- food_war %>% 
    dplyr::summarize(my_mean=mean(food_security, na.rm = TRUE))

food_war <-food_war %>% 
    dplyr::group_by(Deaths_from_war)
food_war %>% 
    dplyr::summarize(Mean_Agricultural_Spending_Pct_GDP=mean(Agricultural_expend, na.rm = TRUE)) %>% 
    kable(.)
Deaths_from_war Mean_Agricultural_Spending_Pct_GDP
No_Deaths 13.00998
Low_Deaths 11.75719
Moderate_Deaths 31.46913
High_Deaths 29.13588
food_war %>% 
    dplyr::summarize(Mean_Military_Spending_Pct=mean(Military_expend, na.rm = TRUE)) %>% 
    kable(.)
Deaths_from_war Mean_Military_Spending_Pct
No_Deaths 1.856862
Low_Deaths 3.562188
Moderate_Deaths 2.671807
High_Deaths 2.725783
food_war %>% 
    dplyr::summarize(mean_Caloric_consuimption=mean(food_security, na.rm = TRUE)) %>% 
    kable(.)
Deaths_from_war mean_Caloric_consuimption
No_Deaths 2827.043
Low_Deaths 2913.194
Moderate_Deaths 2146.651
High_Deaths 2234.371

Warnings about observations

  • My categories aren’t of even sizes so my clustering could effect my observations and lead to large impact from outliers
  • I don’t have an overall GDP per country, this could make a big difference in relative % GDP expenditures
  • Tentative Observations
    • Areas with the most death from wars seem to spend more of their GDP on agricultural expenditures
    • Areas with the most deaths don’t spend much on military expenditures, and low death areas actually spend more than high death areas
    • High and moderate death areas are significantly more food insecure. Calorie consumption is about 30% lower in these areas

Data Visualization

food_security_by_year
## # A tibble: 80 x 3
## # Groups: Deaths_from_war [?]
##    Deaths_from_war Year  my_mean
##    <fctr>          <chr>   <dbl>
##  1 No_Deaths       1988     2744
##  2 No_Deaths       1989     2741
##  3 No_Deaths       1990     2734
##  4 No_Deaths       1991     2745
##  5 No_Deaths       1992     2766
##  6 No_Deaths       1993     2758
##  7 No_Deaths       1994     2772
##  8 No_Deaths       1995     2780
##  9 No_Deaths       1996     2801
## 10 No_Deaths       1997     2809
## # ... with 70 more rows
food_security_by_year %>% 
    ggplot(.,aes(x=Year,y=my_mean))+
    geom_bar(aes(fill = Deaths_from_war), position = "dodge", stat = "identity")+
    theme(axis.text.x=element_text(angle=45,hjust=1))

  • Caloric consumption seems rather Flat in all the categories over the last three decades
agricultural_spending_by_year %>% 
    ggplot(.,aes(x=Year,y=agri_exp_mean))+
    geom_bar(aes(fill = Deaths_from_war), position = "dodge", stat = "identity")+
    theme(axis.text.x=element_text(angle=45,hjust=1))

  • As a percent of GDP agricultural expenditure has decreased across all categories over last three decades
  • Area’s with higher casualties of war seem to have made large cuts to expenditures

Most interesting graphs and observations

  • A handful of outliers were removed for better visualization
food_war %>% 
    ggplot(.,aes(x=Agricultural_expend,y=food_security,shape=Deaths_from_war,color=Deaths_from_war))+
    geom_point()
## Warning: Removed 15 rows containing missing values (geom_point).

Graph 1 Agricultural Exp vs Food Security

  • Two Clear Clusters develop in the first graph
    • Areas with high death rates seem to be the least food secure and simultaneously spend the most on agricultural subsidy
    • Areas with low death rates spends the least on food subsidy, and are the most food secure
food_war %>% 
    filter(.,Military_expend <20) %>% 
    ggplot(.,aes(x=Military_expend,y=food_security,shape=Deaths_from_war,color=Deaths_from_war))+
    geom_point()

Graph 2 Military Exp vs Food Security

  • Areas with low death rates seem to spend the most on the Military