library(tidyverse)
## -- Attaching packages ------------------------------------------------------------------------------------------------------------ tidyverse 1.2.1 --
## v ggplot2 3.2.0     v purrr   0.3.2
## v tibble  2.1.3     v dplyr   0.8.3
## v tidyr   0.8.3     v stringr 1.4.0
## v readr   1.3.1     v forcats 0.4.0
## -- Conflicts --------------------------------------------------------------------------------------------------------------- tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
library(dplyr)
library(ggplot2)
library(plotly)
## 
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## The following object is masked from 'package:stats':
## 
##     filter
## The following object is masked from 'package:graphics':
## 
##     layout
library(ggthemes)
library(ggrepel)
library(readr)
library(scales)
## 
## Attaching package: 'scales'
## The following object is masked from 'package:purrr':
## 
##     discard
## The following object is masked from 'package:readr':
## 
##     col_factor
library(RColorBrewer)
library(timetk)
## Registered S3 method overwritten by 'xts':
##   method     from
##   as.zoo.xts zoo
library(kableExtra)
## 
## Attaching package: 'kableExtra'
## The following object is masked from 'package:dplyr':
## 
##     group_rows
library(highcharter)
## Registered S3 method overwritten by 'quantmod':
##   method            from
##   as.zoo.data.frame zoo
## Highcharts (www.highcharts.com) is a Highsoft software product which is
## not free for commercial and Governmental use
library(data.table)
## 
## Attaching package: 'data.table'
## The following objects are masked from 'package:dplyr':
## 
##     between, first, last
## The following object is masked from 'package:purrr':
## 
##     transpose

#Data cleaning First thing I did was pull up my first data set on military expenditure by country and get rid of countries with too little data or ones that did not exist anymore. These were: Austria-Hungary, Czechoslovakia, Republic of Vietnam, Saint Kitts, Saint Vincent, Sao Tome and Principe. I did not get rid of East and West Germany since there was good data and I believe there is value in including them in a project on military expenditure, especially since there is data for unified Germany right after the fall of the Berlin Wall. After this I added a new column called region, and since I planned on joining the data set with another data set named “Battle related deaths in state based conflicts”, I assigned the countries to regions that were contained in the Battle death data set. These were: Africa (Inc. North Africa), Americas, Asia and Oceania, Europe, Middle East.

#Set data frames I decided to set the two datasets

expense <- read_csv("C:/Users/chat5/OneDrive/Desktop/DATA 110/Final project data/Mil-expen-Final-csv.csv")
## Parsed with column specification:
## cols(
##   Country = col_character(),
##   Region = col_character(),
##   Year = col_double(),
##   militaryexpenditure = col_double()
## )
expense
## # A tibble: 9,210 x 4
##    Country Region  Year militaryexpenditure
##    <chr>   <chr>  <dbl>               <dbl>
##  1 Algeria Africa  1962          297532141.
##  2 Algeria Africa  1963          359428745.
##  3 Algeria Africa  1964          442442264 
##  4 Algeria Africa  1965          434634459.
##  5 Algeria Africa  1966          422655454.
##  6 Algeria Africa  1967          410695522.
##  7 Algeria Africa  1968          393973068.
##  8 Algeria Africa  1969          375585231.
##  9 Algeria Africa  1970          356632926.
## 10 Algeria Africa  1971          340250556.
## # ... with 9,200 more rows
deaths <- read_csv("C:/Users/chat5/OneDrive/Desktop/DATA 110/Final project data/battle-related-deaths-in-state-Final-csv.csv")
## Parsed with column specification:
## cols(
##   Region = col_character(),
##   Year = col_double(),
##   Battledeaths = col_double()
## )
deaths
## # A tibble: 355 x 3
##    Region  Year Battledeaths
##    <chr>  <dbl>        <dbl>
##  1 Africa  1946            0
##  2 Africa  1947            0
##  3 Africa  1948            0
##  4 Africa  1949            0
##  5 Africa  1950            0
##  6 Africa  1951            0
##  7 Africa  1952         2591
##  8 Africa  1953         2791
##  9 Africa  1954        23871
## 10 Africa  1955        23271
## # ... with 345 more rows

#Join datasets First, I attempted to join the two cleaned data sets by Region using the merge Function. Below is the code for that attempt. Unfortunately, when it was merged it would create two-year columns for the two sets of data and would omit certain data for reasons unknown to me. This problem was fixed with the full join function which gave values of zero for where data was not present all the while keeping one column for year.

#df1 <- merge(expenses, deaths, by = “Region”)

df1 <- full_join(deaths, expense )
## Joining, by = c("Region", "Year")

#Print Now let’s see the combined data set. As you can see the battle death data set only provided deaths for the region, so we don’t have individual country data for that. But we can see the battle death data with expense per year for each country on the same table.

df1
## # A tibble: 9,255 x 5
##    Region  Year Battledeaths Country      militaryexpenditure
##    <chr>  <dbl>        <dbl> <chr>                      <dbl>
##  1 Africa  1946            0 South Africa          679933657.
##  2 Africa  1947            0 South Africa          427752955 
##  3 Africa  1948            0 South Africa          408859910.
##  4 Africa  1949            0 South Africa          265859081 
##  5 Africa  1950            0 South Africa          174095162.
##  6 Africa  1951            0 South Africa          317496209.
##  7 Africa  1952         2591 South Africa          356393814.
##  8 Africa  1953         2791 South Africa          317302143.
##  9 Africa  1954        23871 South Africa          299514013.
## 10 Africa  1955        23271 South Africa          306571740.
## # ... with 9,245 more rows

#Filter table for values Now from our combined table we’re going to pull three columns to make a plot for military expenditure per region per year.

expen1 <- df1 %>%
  select("Region", "Year", "militaryexpenditure", "Battledeaths")

#Print Now print table to check for accuracy.

expen1
## # A tibble: 9,255 x 4
##    Region  Year militaryexpenditure Battledeaths
##    <chr>  <dbl>               <dbl>        <dbl>
##  1 Africa  1946          679933657.            0
##  2 Africa  1947          427752955             0
##  3 Africa  1948          408859910.            0
##  4 Africa  1949          265859081             0
##  5 Africa  1950          174095162.            0
##  6 Africa  1951          317496209.            0
##  7 Africa  1952          356393814.         2591
##  8 Africa  1953          317302143.         2791
##  9 Africa  1954          299514013.        23871
## 10 Africa  1955          306571740.        23271
## # ... with 9,245 more rows

#Filter for Regions Now we’re going to filter to isolate one region, and then use summarize to add up all the expenditures per region for one year

expense_region_Africa <- expen1 %>%
  group_by(Region, Year) %>%
  filter(Region == 'Africa') %>%
  summarise(sum(militaryexpenditure,na.rm = TRUE)) 

Print

Print again to check for accuracy

expense_region_Africa
## # A tibble: 97 x 3
## # Groups:   Region [1]
##    Region  Year `sum(militaryexpenditure, na.rm = TRUE)`
##    <chr>  <dbl>                                    <dbl>
##  1 Africa  1920                                38244880.
##  2 Africa  1921                                52148896.
##  3 Africa  1922                                35635276.
##  4 Africa  1923                                35845411.
##  5 Africa  1924                                40411297.
##  6 Africa  1925                                46407063 
##  7 Africa  1926                                46972561.
##  8 Africa  1927                                48072772.
##  9 Africa  1928                                44923462.
## 10 Africa  1929                                43459602.
## # ... with 87 more rows

#Rename Rename “sum(militaryexpenditure, na.rm = TRUE)” to militaryexpenditure for clarity

colnames(expense_region_Africa)[colnames(expense_region_Africa)=="sum(militaryexpenditure, na.rm = TRUE)"] <- "militaryexpenditure"

#Check Check to make sure table is accurate

expense_region_Africa
## # A tibble: 97 x 3
## # Groups:   Region [1]
##    Region  Year militaryexpenditure
##    <chr>  <dbl>               <dbl>
##  1 Africa  1920           38244880.
##  2 Africa  1921           52148896.
##  3 Africa  1922           35635276.
##  4 Africa  1923           35845411.
##  5 Africa  1924           40411297.
##  6 Africa  1925           46407063 
##  7 Africa  1926           46972561.
##  8 Africa  1927           48072772.
##  9 Africa  1928           44923462.
## 10 Africa  1929           43459602.
## # ... with 87 more rows

#Repeat Now repeat steps for all other regions starting with the Americas

expense_region_Americas <- expen1 %>%
  group_by(Region, Year) %>%
  filter(Region == 'Americas') %>%
  summarise(sum(militaryexpenditure,na.rm = TRUE))
expense_region_Americas
## # A tibble: 103 x 3
## # Groups:   Region [1]
##    Region    Year `sum(militaryexpenditure, na.rm = TRUE)`
##    <chr>    <dbl>                                    <dbl>
##  1 Americas  1914                              4715162389.
##  2 Americas  1915                              4667239950.
##  3 Americas  1916                              3930628503.
##  4 Americas  1917                              7205578075.
##  5 Americas  1918                             56908042922.
##  6 Americas  1919                             88343576865.
##  7 Americas  1920                             12423149386.
##  8 Americas  1921                             10579988073.
##  9 Americas  1922                              9060565347.
## 10 Americas  1923                              7301350297.
## # ... with 93 more rows
colnames(expense_region_Americas)[colnames(expense_region_Americas)=="sum(militaryexpenditure, na.rm = TRUE)"] <- "militaryexpenditure"

expense_region_Americas
## # A tibble: 103 x 3
## # Groups:   Region [1]
##    Region    Year militaryexpenditure
##    <chr>    <dbl>               <dbl>
##  1 Americas  1914         4715162389.
##  2 Americas  1915         4667239950.
##  3 Americas  1916         3930628503.
##  4 Americas  1917         7205578075.
##  5 Americas  1918        56908042922.
##  6 Americas  1919        88343576865.
##  7 Americas  1920        12423149386.
##  8 Americas  1921        10579988073.
##  9 Americas  1922         9060565347.
## 10 Americas  1923         7301350297.
## # ... with 93 more rows

#Repeat Now repeat for Asia and Oceania

expense_region_AsiaandOceania <- expen1 %>%
  group_by(Region, Year) %>%
  filter(Region == 'AsiaandOceania') %>%
  summarise(sum(militaryexpenditure,na.rm = TRUE))
colnames(expense_region_AsiaandOceania)[colnames(expense_region_AsiaandOceania)=="sum(militaryexpenditure, na.rm = TRUE)"] <- "militaryexpenditure"

expense_region_AsiaandOceania
## # A tibble: 103 x 3
## # Groups:   Region [1]
##    Region          Year militaryexpenditure
##    <chr>          <dbl>               <dbl>
##  1 AsiaandOceania  1914         2429102007.
##  2 AsiaandOceania  1915         2060532945.
##  3 AsiaandOceania  1916         2484183474.
##  4 AsiaandOceania  1917         1682723574.
##  5 AsiaandOceania  1918         4363137399.
##  6 AsiaandOceania  1919         5363554147.
##  7 AsiaandOceania  1920         3218154267.
##  8 AsiaandOceania  1921         3410718500.
##  9 AsiaandOceania  1922         3205887645.
## 10 AsiaandOceania  1923         2399335808.
## # ... with 93 more rows

#Repeat Now repeat for Europe

expense_region_Europe <- expen1 %>%
  group_by(Region, Year) %>%
  filter(Region == 'Europe') %>%
  summarise(sum(militaryexpenditure,na.rm = TRUE))
colnames(expense_region_Europe)[colnames(expense_region_Europe)=="sum(militaryexpenditure, na.rm = TRUE)"] <- "militaryexpenditure"

expense_region_Europe
## # A tibble: 103 x 3
## # Groups:   Region [1]
##    Region  Year militaryexpenditure
##    <chr>  <dbl>               <dbl>
##  1 Europe  1914        77581269908.
##  2 Europe  1915       232683174521.
##  3 Europe  1916       235393296804.
##  4 Europe  1917       233278531335.
##  5 Europe  1918       203020892186.
##  6 Europe  1919        27933766721.
##  7 Europe  1920        26479080364.
##  8 Europe  1921        29661645600.
##  9 Europe  1922        31197540993.
## 10 Europe  1923        29292003570.
## # ... with 93 more rows

#Repeat Finally repeat for the Middle East

expense_region_MiddleEast <- expen1 %>%
  group_by(Region, Year) %>%
  filter(Region == 'MiddleEast') %>%
  summarise(sum(militaryexpenditure,na.rm = TRUE))
colnames(expense_region_MiddleEast)[colnames(expense_region_MiddleEast)=="sum(militaryexpenditure, na.rm = TRUE)"] <- "militaryexpenditure"

expense_region_MiddleEast
## # A tibble: 93 x 3
## # Groups:   Region [1]
##    Region      Year militaryexpenditure
##    <chr>      <dbl>               <dbl>
##  1 MiddleEast  1923           74055253.
##  2 MiddleEast  1924           89845505.
##  3 MiddleEast  1925           88097262 
##  4 MiddleEast  1926           80250569.
##  5 MiddleEast  1927           79750833.
##  6 MiddleEast  1928           84782673.
##  7 MiddleEast  1930           98791615.
##  8 MiddleEast  1931           18580889.
##  9 MiddleEast  1932          241439921.
## 10 MiddleEast  1933          107236905.
## # ... with 83 more rows

#Now join all the tables For some reason I was not able to join them together at the same time, hence it was done in chunks…

df2 <- full_join(expense_region_Africa, expense_region_Americas)
## Joining, by = c("Region", "Year", "militaryexpenditure")
df3 <- full_join(df2, expense_region_AsiaandOceania)
## Joining, by = c("Region", "Year", "militaryexpenditure")
df4 <- full_join(df3, expense_region_Europe)
## Joining, by = c("Region", "Year", "militaryexpenditure")
regionsexpenses <- full_join(df4, expense_region_MiddleEast)
## Joining, by = c("Region", "Year", "militaryexpenditure")
regionsexpenses
## # A tibble: 499 x 3
## # Groups:   Region [5]
##    Region  Year militaryexpenditure
##    <chr>  <dbl>               <dbl>
##  1 Africa  1920           38244880.
##  2 Africa  1921           52148896.
##  3 Africa  1922           35635276.
##  4 Africa  1923           35845411.
##  5 Africa  1924           40411297.
##  6 Africa  1925           46407063 
##  7 Africa  1926           46972561.
##  8 Africa  1927           48072772.
##  9 Africa  1928           44923462.
## 10 Africa  1929           43459602.
## # ... with 489 more rows

#Plot Now we’re going to plot military expenditure for regions and use stacking so we can clearly see all of them.

cols <- brewer.pal(7, "Set2")
highchart () %>%
  hc_add_series(data = regionsexpenses,
                   type = "area",
                   hcaes(x = Year,
                   y = militaryexpenditure, 
                   group = Region)) %>%
  hc_colors(cols) %>% 
  hc_chart(style = list(fontFamily = "Georgia",
                        fontWeight = "bold")) %>%
  hc_plotOptions(series = list(stacking = "normal",
                               marker = list(enabled = FALSE,
                               states = list(hover = list(enabled = TRUE))),
                               lineWidth = 0.2,
                               lineColor = "white")) %>%
  hc_xAxis(title = list(text="Year")) %>%
  hc_yAxis(title = list(text="Military Expenditure")) %>%
  hc_title(text = "Military Expenditure per Region 1914-2007") %>%
  hc_legend(align = "right", verticalAlign = "top",
            layout = "vertical") %>%
  hc_tooltip(enabled = TRUE)
## Warning: `parse_quosure()` is deprecated as of rlang 0.2.0.
## Please use `parse_quo()` instead.
## This warning is displayed once per session.

#Filter for Regions Now we’re going to repeat the process and filter to isolate one region, and then use summarize to add up all the deaths per region for one year

death_region_Africa <- expen1 %>%
  group_by(Region, Year) %>%
  filter(Region == 'Africa') %>%
  summarise(sum(Battledeaths,na.rm = TRUE))