#Set working directory and read data-set
military_data<- read.csv("military-spending-as-a-share-of-gdp-sipri.csv")
head(military_data, 10)
## Entity Code Year Military.expenditure....of.GDP.
## 1 Afghanistan AFG 1970 1.629606
## 2 Afghanistan AFG 1973 1.868910
## 3 Afghanistan AFG 1974 1.610825
## 4 Afghanistan AFG 1975 1.722066
## 5 Afghanistan AFG 1976 2.046087
## 6 Afghanistan AFG 1977 2.011475
## 7 Afghanistan AFG 2004 2.431254
## 8 Afghanistan AFG 2005 1.992066
## 9 Afghanistan AFG 2006 1.896234
## 10 Afghanistan AFG 2007 2.566267
## World.regions.according.to.OWID
## 1
## 2
## 3
## 4
## 5
## 6
## 7
## 8
## 9
## 10
#Rename columns for clairty
military_rename<- military_data%>%
rename( Country = Entity,
Mil_Spend_GDP = Military.expenditure....of.GDP.,
Region = World.regions.according.to.OWID
)
#Filter and create table to compare GDP % 2020-2023 between US and China
us_china_covid <- military_rename%>%
filter(Country %in% c("United States", "China"),
Year >= 2020 & Year <= 2023) %>%
select(Country, Year, Mil_Spend_GDP)
print(us_china_covid)
## Country Year Mil_Spend_GDP
## 1 China 2020 1.756699
## 2 China 2021 1.605260
## 3 China 2022 1.630534
## 4 China 2023 1.664849
## 5 United States 2020 3.645188
## 6 United States 2021 3.404520
## 7 United States 2022 3.309477
## 8 United States 2023 3.304442
sum(is.na(us_china_covid$Mil_Spend_GDP))
## [1] 0
head(us_china_covid)
## Country Year Mil_Spend_GDP
## 1 China 2020 1.756699
## 2 China 2021 1.605260
## 3 China 2022 1.630534
## 4 China 2023 1.664849
## 5 United States 2020 3.645188
## 6 United States 2021 3.404520
us_china_summary <- us_china_covid %>%
group_by(Country) %>%
summarise( Mean_Spend = mean(Mil_Spend_GDP, na.rm = TRUE),
SD_Spend = sd(Mil_Spend_GDP, na.rm = TRUE),
Min_Spend = min(Mil_Spend_GDP, na.rm = TRUE),
Max_Spend = max(Mil_Spend_GDP, na.rm = TRUE),
n_years = n()
)
us_china_summary
## # A tibble: 2 × 6
## Country Mean_Spend SD_Spend Min_Spend Max_Spend n_years
## <chr> <dbl> <dbl> <dbl> <dbl> <int>
## 1 China 1.66 0.0662 1.61 1.76 4
## 2 United States 3.42 0.160 3.30 3.65 4
#Clean the dataset and conduct exploratory data analysis (EDA) to better understand differences in % and get the Summary Stats
us_china_summary <- us_china_covid %>%
group_by(Country) %>%
summarise(Mean_Spend = mean(Mil_Spend_GDP, na.rm = TRUE),
SD_Spend = sd(Mil_Spend_GDP, na.rm = TRUE),
Min_Spend = min(Mil_Spend_GDP, na.rm = TRUE),
Max_Spend = max(Mil_Spend_GDP, na.rm = TRUE),
n_years = n()
)
us_china_summary
## # A tibble: 2 × 6
## Country Mean_Spend SD_Spend Min_Spend Max_Spend n_years
## <chr> <dbl> <dbl> <dbl> <dbl> <int>
## 1 China 1.66 0.0662 1.61 1.76 4
## 2 United States 3.42 0.160 3.30 3.65 4
#I will perform a two-sample t-test comparing: China’s mean military spending (% of GDP) vs. U.S. mean military spending (% of GDP) for the years 2020–2023.
t_res <- t.test(Mil_Spend_GDP ~ Country,
data = us_china_covid,
alternative = "two.sided",
var.equal = FALSE)
t_res
##
## Welch Two Sample t-test
##
## data: Mil_Spend_GDP by Country
## t = -20.269, df = 4.0034, p-value = 3.475e-05
## alternative hypothesis: true difference in means between group China and group United States is not equal to 0
## 95 percent confidence interval:
## -1.991424 -1.511719
## sample estimates:
## mean in group China mean in group United States
## 1.664335 3.415907
#Create table to compare GDP % 2020-2023
us_china_covid <- military_rename %>%
filter(Country %in% c("United States", "China"),
Year >= 2020 & Year <= 2023) %>%
select(Country, Year, Mil_Spend_GDP)
print(us_china_covid)
## Country Year Mil_Spend_GDP
## 1 China 2020 1.756699
## 2 China 2021 1.605260
## 3 China 2022 1.630534
## 4 China 2023 1.664849
## 5 United States 2020 3.645188
## 6 United States 2021 3.404520
## 7 United States 2022 3.309477
## 8 United States 2023 3.304442
sum(is.na(us_china_covid$Mil_Spend_GDP)) #checking the amount of missing values in this column
## [1] 0
us_china_covid %>%
group_by(Country) %>%
summarise(Average_Mil_Spend = mean(Mil_Spend_GDP, na.rm = TRUE))
## # A tibble: 2 × 2
## Country Average_Mil_Spend
## <chr> <dbl>
## 1 China 1.66
## 2 United States 3.42
head(us_china_covid)
## Country Year Mil_Spend_GDP
## 1 China 2020 1.756699
## 2 China 2021 1.605260
## 3 China 2022 1.630534
## 4 China 2023 1.664849
## 5 United States 2020 3.645188
## 6 United States 2021 3.404520
sum(is.na(us_china_covid$Mil_Spend_GDP)) #checking the amount of missing values in this column
## [1] 0
#Line graph to visualize the difference in spending trends
library(ggplot2)
ggplot(us_china_covid, aes(x = Year, y = Mil_Spend_GDP, color = Country)) +
geom_line(size = 1.2) +
geom_point(size = 2) +
labs(title = "Military Spending as % of GDP (2020–2023)",
y = "% of GDP",
x = "Year") +
theme_minimal()
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
#**Key findings of the Line graph:**
#China stays around 1.60%–1.76% of GDP.
#The U.S. stays around 3.40%–3.64% of GDP
## The United States spends much more of its GDP on the military than China
#The US uses almost double what China does of their GDP in military expenditures.
## Neither Country shows dramatic increae or decrease during this period
#Boxplot visulization
ggplot(us_china_covid, aes(x = Country, y = Mil_Spend_GDP, fill = Country )) +
geom_boxplot() +
labs(title = "Distribution of Military Spending (% of GDP)",
x = "Country",
y = "Military Spending (% of GDP)")+
theme_minimal()
#Key findings of box plot
#The box plot illistruates the comparision of the spending distribution differences per GDP between China and the US from 2020-2023
#The entire US boxplot is 3% above Chinas box plot
#The length of the United States IQR or Interquartile range is a little wider than china, which suggests the US's spending fluctuates more year to year than China.
#Chinas box-plot is very narrow, suggesting tight and consistent spending habits.
#China median ≈ 1.64%
#U.S. median ≈ 3.52%
#Finally, there are no outliers on either plots, suggesting stables values across the years
#Welch Two Sample t-test
#data: Mil_Spend_GDP by Country
#t = -20.269, df = 4.0034, p-value = 3.475e-05
#alternative hypothesis: true difference in means between group China and group United States is not equal to 0
#95 percent confidence interval:
-1.991424 -1.511719
## [1] -3.503143
#sample estimates:
#mean in group China 1.664335
#mean in group United States 3.415907
Conclusion
# Welch T-test variables
#t = −20.269
#df ≈ 4.00
#p-value = 3.475e-05 ≈ 0.00003475
#Means:
#China: 1.6643
#US: 3.4159
#Assume your significance level is α = 0.05
#p-value=0.00003475<0.05
#*We reject the Null Hypothesis as there is strong statistical evidence that the average military spending as a percentage of GDP from 2020–2023 is significantly different between the US and China.*
Thoughts on findings and future direction
#Firstly, in reference to the line graph; while both lines fall relatively flat, Chinas stagnent trend suggests that the Covid-19 pandemic did not influence large fluctuations in % of GDP military spending in China during China averaged a military spending of 1.6% while the US averaged 3.4% of its % GDP spending.
#Both spending trends dropping in similar fashion in 2021 shines a cast down that the US had to cut spending economically affected both countries within economic trade
#While China conservatively increased their military expenditure % from 2021-2022, the US continued to cutback on spending. Suggesting the pandemic caused the economies to react in different ways after both taking a similar drop in 2021.
#Future direction the data could illistruate
#Topic: Comparing absolute military spending rather than percentages
#Context: GDP percentage does not show the real spending power.
#Future work could compare: Total military budget (in billions) Growth rates of military spending