library(tidyverse)
## -- Attaching packages ------------------------------------------------------------------------------------------------------------ tidyverse 1.2.1 --
## v ggplot2 3.2.0 v purrr 0.3.2
## v tibble 2.1.3 v dplyr 0.8.3
## v tidyr 0.8.3 v stringr 1.4.0
## v readr 1.3.1 v forcats 0.4.0
## -- Conflicts --------------------------------------------------------------------------------------------------------------- tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
library(dplyr)
library(ggplot2)
library(plotly)
##
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
library(ggthemes)
library(ggrepel)
library(readr)
library(scales)
##
## Attaching package: 'scales'
## The following object is masked from 'package:purrr':
##
## discard
## The following object is masked from 'package:readr':
##
## col_factor
library(RColorBrewer)
library(timetk)
## Registered S3 method overwritten by 'xts':
## method from
## as.zoo.xts zoo
library(kableExtra)
##
## Attaching package: 'kableExtra'
## The following object is masked from 'package:dplyr':
##
## group_rows
library(highcharter)
## Registered S3 method overwritten by 'quantmod':
## method from
## as.zoo.data.frame zoo
## Highcharts (www.highcharts.com) is a Highsoft software product which is
## not free for commercial and Governmental use
library(data.table)
##
## Attaching package: 'data.table'
## The following objects are masked from 'package:dplyr':
##
## between, first, last
## The following object is masked from 'package:purrr':
##
## transpose
#Data cleaning First thing I did was pull up my first data set on military expenditure by country and get rid of countries with too little data or ones that did not exist anymore. These were: Austria-Hungary, Czechoslovakia, Republic of Vietnam, Saint Kitts, Saint Vincent, Sao Tome and Principe. I did not get rid of East and West Germany since there was good data and I believe there is value in including them in a project on military expenditure, especially since there is data for unified Germany right after the fall of the Berlin Wall. After this I added a new column called region, and since I planned on joining the data set with another data set named “Battle related deaths in state based conflicts”, I assigned the countries to regions that were contained in the Battle death data set. These were: Africa (Inc. North Africa), Americas, Asia and Oceania, Europe, Middle East.
#Set data frames I decided to set the two datasets
expense <- read_csv("C:/Users/chat5/OneDrive/Desktop/DATA 110/Final project data/Mil-expen-Final-csv.csv")
## Parsed with column specification:
## cols(
## Country = col_character(),
## Region = col_character(),
## Year = col_double(),
## militaryexpenditure = col_double()
## )
expense
## # A tibble: 9,210 x 4
## Country Region Year militaryexpenditure
## <chr> <chr> <dbl> <dbl>
## 1 Algeria Africa 1962 297532141.
## 2 Algeria Africa 1963 359428745.
## 3 Algeria Africa 1964 442442264
## 4 Algeria Africa 1965 434634459.
## 5 Algeria Africa 1966 422655454.
## 6 Algeria Africa 1967 410695522.
## 7 Algeria Africa 1968 393973068.
## 8 Algeria Africa 1969 375585231.
## 9 Algeria Africa 1970 356632926.
## 10 Algeria Africa 1971 340250556.
## # ... with 9,200 more rows
deaths <- read_csv("C:/Users/chat5/OneDrive/Desktop/DATA 110/Final project data/battle-related-deaths-in-state-Final-csv.csv")
## Parsed with column specification:
## cols(
## Region = col_character(),
## Year = col_double(),
## Battledeaths = col_double()
## )
deaths
## # A tibble: 355 x 3
## Region Year Battledeaths
## <chr> <dbl> <dbl>
## 1 Africa 1946 0
## 2 Africa 1947 0
## 3 Africa 1948 0
## 4 Africa 1949 0
## 5 Africa 1950 0
## 6 Africa 1951 0
## 7 Africa 1952 2591
## 8 Africa 1953 2791
## 9 Africa 1954 23871
## 10 Africa 1955 23271
## # ... with 345 more rows
#Join datasets First, I attempted to join the two cleaned data sets by Region using the merge Function. Below is the code for that attempt. Unfortunately, when it was merged it would create two-year columns for the two sets of data and would omit certain data for reasons unknown to me. This problem was fixed with the full join function which gave values of zero for where data was not present all the while keeping one column for year.
#df1 <- merge(expenses, deaths, by = “Region”)
df1 <- full_join(deaths, expense )
## Joining, by = c("Region", "Year")
#Print Now let’s see the combined data set. As you can see the battle death data set only provided deaths for the region, so we don’t have individual country data for that. But we can see the battle death data with expense per year for each country on the same table.
df1
## # A tibble: 9,255 x 5
## Region Year Battledeaths Country militaryexpenditure
## <chr> <dbl> <dbl> <chr> <dbl>
## 1 Africa 1946 0 South Africa 679933657.
## 2 Africa 1947 0 South Africa 427752955
## 3 Africa 1948 0 South Africa 408859910.
## 4 Africa 1949 0 South Africa 265859081
## 5 Africa 1950 0 South Africa 174095162.
## 6 Africa 1951 0 South Africa 317496209.
## 7 Africa 1952 2591 South Africa 356393814.
## 8 Africa 1953 2791 South Africa 317302143.
## 9 Africa 1954 23871 South Africa 299514013.
## 10 Africa 1955 23271 South Africa 306571740.
## # ... with 9,245 more rows
#Filter table for values Now from our combined table we’re going to pull three columns to make a plot for military expenditure per region per year.
expen1 <- df1 %>%
select("Region", "Year", "militaryexpenditure", "Battledeaths")
#Print Now print table to check for accuracy.
expen1
## # A tibble: 9,255 x 4
## Region Year militaryexpenditure Battledeaths
## <chr> <dbl> <dbl> <dbl>
## 1 Africa 1946 679933657. 0
## 2 Africa 1947 427752955 0
## 3 Africa 1948 408859910. 0
## 4 Africa 1949 265859081 0
## 5 Africa 1950 174095162. 0
## 6 Africa 1951 317496209. 0
## 7 Africa 1952 356393814. 2591
## 8 Africa 1953 317302143. 2791
## 9 Africa 1954 299514013. 23871
## 10 Africa 1955 306571740. 23271
## # ... with 9,245 more rows
#Filter for Regions Now we’re going to filter to isolate one region, and then use summarize to add up all the expenditures per region for one year
expense_region_Africa <- expen1 %>%
group_by(Region, Year) %>%
filter(Region == 'Africa') %>%
summarise(sum(militaryexpenditure,na.rm = TRUE))
Print again to check for accuracy
expense_region_Africa
## # A tibble: 97 x 3
## # Groups: Region [1]
## Region Year `sum(militaryexpenditure, na.rm = TRUE)`
## <chr> <dbl> <dbl>
## 1 Africa 1920 38244880.
## 2 Africa 1921 52148896.
## 3 Africa 1922 35635276.
## 4 Africa 1923 35845411.
## 5 Africa 1924 40411297.
## 6 Africa 1925 46407063
## 7 Africa 1926 46972561.
## 8 Africa 1927 48072772.
## 9 Africa 1928 44923462.
## 10 Africa 1929 43459602.
## # ... with 87 more rows
#Rename Rename “sum(militaryexpenditure, na.rm = TRUE)” to militaryexpenditure for clarity
colnames(expense_region_Africa)[colnames(expense_region_Africa)=="sum(militaryexpenditure, na.rm = TRUE)"] <- "militaryexpenditure"
#Check Check to make sure table is accurate
expense_region_Africa
## # A tibble: 97 x 3
## # Groups: Region [1]
## Region Year militaryexpenditure
## <chr> <dbl> <dbl>
## 1 Africa 1920 38244880.
## 2 Africa 1921 52148896.
## 3 Africa 1922 35635276.
## 4 Africa 1923 35845411.
## 5 Africa 1924 40411297.
## 6 Africa 1925 46407063
## 7 Africa 1926 46972561.
## 8 Africa 1927 48072772.
## 9 Africa 1928 44923462.
## 10 Africa 1929 43459602.
## # ... with 87 more rows
#Repeat Now repeat steps for all other regions starting with the Americas
expense_region_Americas <- expen1 %>%
group_by(Region, Year) %>%
filter(Region == 'Americas') %>%
summarise(sum(militaryexpenditure,na.rm = TRUE))
expense_region_Americas
## # A tibble: 103 x 3
## # Groups: Region [1]
## Region Year `sum(militaryexpenditure, na.rm = TRUE)`
## <chr> <dbl> <dbl>
## 1 Americas 1914 4715162389.
## 2 Americas 1915 4667239950.
## 3 Americas 1916 3930628503.
## 4 Americas 1917 7205578075.
## 5 Americas 1918 56908042922.
## 6 Americas 1919 88343576865.
## 7 Americas 1920 12423149386.
## 8 Americas 1921 10579988073.
## 9 Americas 1922 9060565347.
## 10 Americas 1923 7301350297.
## # ... with 93 more rows
colnames(expense_region_Americas)[colnames(expense_region_Americas)=="sum(militaryexpenditure, na.rm = TRUE)"] <- "militaryexpenditure"
expense_region_Americas
## # A tibble: 103 x 3
## # Groups: Region [1]
## Region Year militaryexpenditure
## <chr> <dbl> <dbl>
## 1 Americas 1914 4715162389.
## 2 Americas 1915 4667239950.
## 3 Americas 1916 3930628503.
## 4 Americas 1917 7205578075.
## 5 Americas 1918 56908042922.
## 6 Americas 1919 88343576865.
## 7 Americas 1920 12423149386.
## 8 Americas 1921 10579988073.
## 9 Americas 1922 9060565347.
## 10 Americas 1923 7301350297.
## # ... with 93 more rows
#Repeat Now repeat for Asia and Oceania
expense_region_AsiaandOceania <- expen1 %>%
group_by(Region, Year) %>%
filter(Region == 'AsiaandOceania') %>%
summarise(sum(militaryexpenditure,na.rm = TRUE))
colnames(expense_region_AsiaandOceania)[colnames(expense_region_AsiaandOceania)=="sum(militaryexpenditure, na.rm = TRUE)"] <- "militaryexpenditure"
expense_region_AsiaandOceania
## # A tibble: 103 x 3
## # Groups: Region [1]
## Region Year militaryexpenditure
## <chr> <dbl> <dbl>
## 1 AsiaandOceania 1914 2429102007.
## 2 AsiaandOceania 1915 2060532945.
## 3 AsiaandOceania 1916 2484183474.
## 4 AsiaandOceania 1917 1682723574.
## 5 AsiaandOceania 1918 4363137399.
## 6 AsiaandOceania 1919 5363554147.
## 7 AsiaandOceania 1920 3218154267.
## 8 AsiaandOceania 1921 3410718500.
## 9 AsiaandOceania 1922 3205887645.
## 10 AsiaandOceania 1923 2399335808.
## # ... with 93 more rows
#Repeat Now repeat for Europe
expense_region_Europe <- expen1 %>%
group_by(Region, Year) %>%
filter(Region == 'Europe') %>%
summarise(sum(militaryexpenditure,na.rm = TRUE))
colnames(expense_region_Europe)[colnames(expense_region_Europe)=="sum(militaryexpenditure, na.rm = TRUE)"] <- "militaryexpenditure"
expense_region_Europe
## # A tibble: 103 x 3
## # Groups: Region [1]
## Region Year militaryexpenditure
## <chr> <dbl> <dbl>
## 1 Europe 1914 77581269908.
## 2 Europe 1915 232683174521.
## 3 Europe 1916 235393296804.
## 4 Europe 1917 233278531335.
## 5 Europe 1918 203020892186.
## 6 Europe 1919 27933766721.
## 7 Europe 1920 26479080364.
## 8 Europe 1921 29661645600.
## 9 Europe 1922 31197540993.
## 10 Europe 1923 29292003570.
## # ... with 93 more rows
#Repeat Finally repeat for the Middle East
expense_region_MiddleEast <- expen1 %>%
group_by(Region, Year) %>%
filter(Region == 'MiddleEast') %>%
summarise(sum(militaryexpenditure,na.rm = TRUE))
colnames(expense_region_MiddleEast)[colnames(expense_region_MiddleEast)=="sum(militaryexpenditure, na.rm = TRUE)"] <- "militaryexpenditure"
expense_region_MiddleEast
## # A tibble: 93 x 3
## # Groups: Region [1]
## Region Year militaryexpenditure
## <chr> <dbl> <dbl>
## 1 MiddleEast 1923 74055253.
## 2 MiddleEast 1924 89845505.
## 3 MiddleEast 1925 88097262
## 4 MiddleEast 1926 80250569.
## 5 MiddleEast 1927 79750833.
## 6 MiddleEast 1928 84782673.
## 7 MiddleEast 1930 98791615.
## 8 MiddleEast 1931 18580889.
## 9 MiddleEast 1932 241439921.
## 10 MiddleEast 1933 107236905.
## # ... with 83 more rows
#Now join all the tables For some reason I was not able to join them together at the same time, hence it was done in chunks…
df2 <- full_join(expense_region_Africa, expense_region_Americas)
## Joining, by = c("Region", "Year", "militaryexpenditure")
df3 <- full_join(df2, expense_region_AsiaandOceania)
## Joining, by = c("Region", "Year", "militaryexpenditure")
df4 <- full_join(df3, expense_region_Europe)
## Joining, by = c("Region", "Year", "militaryexpenditure")
regionsexpenses <- full_join(df4, expense_region_MiddleEast)
## Joining, by = c("Region", "Year", "militaryexpenditure")
regionsexpenses
## # A tibble: 499 x 3
## # Groups: Region [5]
## Region Year militaryexpenditure
## <chr> <dbl> <dbl>
## 1 Africa 1920 38244880.
## 2 Africa 1921 52148896.
## 3 Africa 1922 35635276.
## 4 Africa 1923 35845411.
## 5 Africa 1924 40411297.
## 6 Africa 1925 46407063
## 7 Africa 1926 46972561.
## 8 Africa 1927 48072772.
## 9 Africa 1928 44923462.
## 10 Africa 1929 43459602.
## # ... with 489 more rows
#Plot Now we’re going to plot military expenditure for regions and use stacking so we can clearly see all of them.
cols <- brewer.pal(7, "Set2")
highchart () %>%
hc_add_series(data = regionsexpenses,
type = "area",
hcaes(x = Year,
y = militaryexpenditure,
group = Region)) %>%
hc_colors(cols) %>%
hc_chart(style = list(fontFamily = "Georgia",
fontWeight = "bold")) %>%
hc_plotOptions(series = list(stacking = "normal",
marker = list(enabled = FALSE,
states = list(hover = list(enabled = TRUE))),
lineWidth = 0.2,
lineColor = "white")) %>%
hc_xAxis(title = list(text="Year")) %>%
hc_yAxis(title = list(text="Military Expenditure")) %>%
hc_title(text = "Military Expenditure per Region 1914-2007") %>%
hc_legend(align = "right", verticalAlign = "top",
layout = "vertical") %>%
hc_tooltip(enabled = TRUE)
## Warning: `parse_quosure()` is deprecated as of rlang 0.2.0.
## Please use `parse_quo()` instead.
## This warning is displayed once per session.
#Filter for Regions Now we’re going to repeat the process and filter to isolate one region, and then use summarize to add up all the deaths per region for one year
death_region_Africa <- expen1 %>%
group_by(Region, Year) %>%
filter(Region == 'Africa') %>%
summarise(sum(Battledeaths,na.rm = TRUE))
Print again to check for accuracy
death_region_Africa
## # A tibble: 97 x 3
## # Groups: Region [1]
## Region Year `sum(Battledeaths, na.rm = TRUE)`
## <chr> <dbl> <dbl>
## 1 Africa 1920 0
## 2 Africa 1921 0
## 3 Africa 1922 0
## 4 Africa 1923 0
## 5 Africa 1924 0
## 6 Africa 1925 0
## 7 Africa 1926 0
## 8 Africa 1927 0
## 9 Africa 1928 0
## 10 Africa 1929 0
## # ... with 87 more rows
#Rename Rename “sum(Battledeaths, na.rm = TRUE)” to Battledeaths for clarity
colnames(death_region_Africa)[colnames(death_region_Africa)=="sum(sum(Battledeaths, na.rm = TRUE))"] <- "Battledeaths"
#Check Check to make sure table is accurate
death_region_Africa
## # A tibble: 97 x 3
## # Groups: Region [1]
## Region Year `sum(Battledeaths, na.rm = TRUE)`
## <chr> <dbl> <dbl>
## 1 Africa 1920 0
## 2 Africa 1921 0
## 3 Africa 1922 0
## 4 Africa 1923 0
## 5 Africa 1924 0
## 6 Africa 1925 0
## 7 Africa 1926 0
## 8 Africa 1927 0
## 9 Africa 1928 0
## 10 Africa 1929 0
## # ... with 87 more rows
#Repeat Now repeat steps for all other regions starting with the Americas
death_region_Americas <- expen1 %>%
group_by(Region, Year) %>%
filter(Region == 'Americas') %>%
summarise(sum(Battledeaths,na.rm = TRUE))
death_region_Americas
## # A tibble: 103 x 3
## # Groups: Region [1]
## Region Year `sum(Battledeaths, na.rm = TRUE)`
## <chr> <dbl> <dbl>
## 1 Americas 1914 0
## 2 Americas 1915 0
## 3 Americas 1916 0
## 4 Americas 1917 0
## 5 Americas 1918 0
## 6 Americas 1919 0
## 7 Americas 1920 0
## 8 Americas 1921 0
## 9 Americas 1922 0
## 10 Americas 1923 0
## # ... with 93 more rows
colnames(death_region_Americas)[colnames(death_region_Americas)=="sum(sum(Battledeaths, na.rm = TRUE)
)"] <- "Battledeaths"
death_region_Americas
## # A tibble: 103 x 3
## # Groups: Region [1]
## Region Year `sum(Battledeaths, na.rm = TRUE)`
## <chr> <dbl> <dbl>
## 1 Americas 1914 0
## 2 Americas 1915 0
## 3 Americas 1916 0
## 4 Americas 1917 0
## 5 Americas 1918 0
## 6 Americas 1919 0
## 7 Americas 1920 0
## 8 Americas 1921 0
## 9 Americas 1922 0
## 10 Americas 1923 0
## # ... with 93 more rows
#Repeat Now repeat for Asia and Oceania
death_region_AsiaandOceania <- expen1 %>%
group_by(Region, Year) %>%
filter(Region == 'AsiaandOceania') %>%
summarise(sum(Battledeaths,na.rm = TRUE))
colnames(death_region_AsiaandOceania)[colnames(death_region_AsiaandOceania)=="sum(Battledeaths, na.rm = TRUE)
"] <- "Battledeaths"
death_region_AsiaandOceania
## # A tibble: 103 x 3
## # Groups: Region [1]
## Region Year `sum(Battledeaths, na.rm = TRUE)`
## <chr> <dbl> <dbl>
## 1 AsiaandOceania 1914 0
## 2 AsiaandOceania 1915 0
## 3 AsiaandOceania 1916 0
## 4 AsiaandOceania 1917 0
## 5 AsiaandOceania 1918 0
## 6 AsiaandOceania 1919 0
## 7 AsiaandOceania 1920 0
## 8 AsiaandOceania 1921 0
## 9 AsiaandOceania 1922 0
## 10 AsiaandOceania 1923 0
## # ... with 93 more rows
#Repeat Now repeat for Europe
death_region_Europe <- expen1 %>%
group_by(Region, Year) %>%
filter(Region == 'Europe') %>%
summarise(sum(Battledeaths,na.rm = TRUE))
colnames(death_region_Europe)[colnames(death_region_Europe)=="sum(Battledeaths, na.rm = TRUE)
"] <- "Battledeaths"
death_region_Europe
## # A tibble: 103 x 3
## # Groups: Region [1]
## Region Year `sum(Battledeaths, na.rm = TRUE)`
## <chr> <dbl> <dbl>
## 1 Europe 1914 0
## 2 Europe 1915 0
## 3 Europe 1916 0
## 4 Europe 1917 0
## 5 Europe 1918 0
## 6 Europe 1919 0
## 7 Europe 1920 0
## 8 Europe 1921 0
## 9 Europe 1922 0
## 10 Europe 1923 0
## # ... with 93 more rows
#Repeat Finally repeat for the Middle East
death_region_MiddleEast <- expen1 %>%
group_by(Region, Year) %>%
filter(Region == 'MiddleEast') %>%
summarise(sum(Battledeaths,na.rm = TRUE))
colnames(death_region_MiddleEast)[colnames(death_region_MiddleEast)=="sum(Battledeaths, na.rm = TRUE)
"] <- "Battledeaths"
death_region_MiddleEast
## # A tibble: 93 x 3
## # Groups: Region [1]
## Region Year `sum(Battledeaths, na.rm = TRUE)`
## <chr> <dbl> <dbl>
## 1 MiddleEast 1923 0
## 2 MiddleEast 1924 0
## 3 MiddleEast 1925 0
## 4 MiddleEast 1926 0
## 5 MiddleEast 1927 0
## 6 MiddleEast 1928 0
## 7 MiddleEast 1930 0
## 8 MiddleEast 1931 0
## 9 MiddleEast 1932 0
## 10 MiddleEast 1933 0
## # ... with 83 more rows
#Renaming issues For some reason my renaming of battledeaths did not work out even though the same code worked for militaryexpenditure. I copied the code on to a new markdown, restarted R all to no avail.
#Now join all the tables For some reason I was not able to join them together at the same time, hence it was done in chunks…
df5 <- full_join(death_region_Africa, death_region_Americas)
## Joining, by = c("Region", "Year", "sum(Battledeaths, na.rm = TRUE)")
df6 <- full_join(df5, death_region_AsiaandOceania)
## Joining, by = c("Region", "Year", "sum(Battledeaths, na.rm = TRUE)")
df7 <- full_join(df6, death_region_Europe)
## Joining, by = c("Region", "Year", "sum(Battledeaths, na.rm = TRUE)")
regionsdeaths <- full_join(df7, death_region_MiddleEast)
## Joining, by = c("Region", "Year", "sum(Battledeaths, na.rm = TRUE)")
#Now lets check our finished table
setnames(regionsdeaths, old=c("sum(Battledeaths, na.rm = TRUE)"), new=c("Battledeaths"))
regionsdeaths
## # A tibble: 499 x 3
## # Groups: Region [5]
## Region Year Battledeaths
## <chr> <dbl> <dbl>
## 1 Africa 1920 0
## 2 Africa 1921 0
## 3 Africa 1922 0
## 4 Africa 1923 0
## 5 Africa 1924 0
## 6 Africa 1925 0
## 7 Africa 1926 0
## 8 Africa 1927 0
## 9 Africa 1928 0
## 10 Africa 1929 0
## # ... with 489 more rows
#Filter Now I decided to filter out all the death values that were zero so my chart would start where my data started. Before that I realized my plot would have a lot of white space at the beginning
regionsdeaths1 <- filter(regionsdeaths, Battledeaths != 0)
#Now lets see the deaths per region
cols <- brewer.pal(7, "Set2")
highchart () %>%
hc_add_series(data = regionsdeaths1,
type = "area",
hcaes(x = Year,
y = Battledeaths,
group = Region)) %>%
hc_colors(cols) %>%
hc_chart(style = list(fontFamily = "Georgia",
fontWeight = "bold")) %>%
hc_plotOptions(series = list(stacking = "percent",
marker = list(enabled = FALSE,
states = list(hover = list(enabled = TRUE))),
lineWidth = 0.2,
lineColor = "white")) %>%
hc_xAxis(title = list(text="Year")) %>%
hc_yAxis(title = list(text="Battlefield Deaths")) %>%
hc_title(text = "Battlefield Deaths per Region 1945- 2016") %>%
hc_legend(align = "right", verticalAlign = "top",
layout = "vertical") %>%
hc_tooltip(enabled = TRUE)
#Combine Now combine both sum of deaths per region for each year and sum of military expenditure per region for each year
combinedregions <- full_join(regionsdeaths, regionsexpenses)
## Joining, by = c("Region", "Year")
#Change column I eventually found a different code that lets you change column name and decided to apply it to the last dataset
#setnames(combinedregions, old=c("sum(Battledeaths, na.rm = TRUE)"), new=c("Battledeaths"))
#Print Print to check for accuracy
combinedregions
## # A tibble: 499 x 4
## # Groups: Region [5]
## Region Year Battledeaths militaryexpenditure
## <chr> <dbl> <dbl> <dbl>
## 1 Africa 1920 0 38244880.
## 2 Africa 1921 0 52148896.
## 3 Africa 1922 0 35635276.
## 4 Africa 1923 0 35845411.
## 5 Africa 1924 0 40411297.
## 6 Africa 1925 0 46407063
## 7 Africa 1926 0 46972561.
## 8 Africa 1927 0 48072772.
## 9 Africa 1928 0 44923462.
## 10 Africa 1929 0 43459602.
## # ... with 489 more rows
#Bubble charts I decided to do bubble charts, but I realized there were too many data points to display my data set as a bubble chart. It looked really messy and unreadable, as every region has a data point per year.
radius <- sqrt( combinedregions$Battledeaths/ pi )
symbols(combinedregions$Year, combinedregions$militaryexpenditure, circles=combinedregions$Battledeaths)
symbols(combinedregions$Year, combinedregions$militaryexpenditure, circles=radius, inches=0.35, fg="white", bg="red", xlab="Year", ylab="Military Expenditure")
#Scatterplot I decided to do a scatterplot to see the correlation between battlefield deaths and military expenditure, to see if there was any correlation.
p1 <- ggplot(combinedregions, aes(x = Battledeaths, y = militaryexpenditure)) +
xlab("Battlefield Deaths per Region") +
ylab("Military Expenditure per Region") +
theme_minimal(base_size = 12) +
ggtitle("Military Expenditure vs Battlefield Deaths")
p1 + geom_point() + theme_dark()
p2 <- p1 + geom_point() + geom_smooth(method='lm',formula=y~x) + theme_dark()
ggplotly(p2)