#Set working directory and read data-set

military_data<- read.csv("military-spending-as-a-share-of-gdp-sipri.csv")

head(military_data, 10)
##         Entity Code Year Military.expenditure....of.GDP.
## 1  Afghanistan  AFG 1970                        1.629606
## 2  Afghanistan  AFG 1973                        1.868910
## 3  Afghanistan  AFG 1974                        1.610825
## 4  Afghanistan  AFG 1975                        1.722066
## 5  Afghanistan  AFG 1976                        2.046087
## 6  Afghanistan  AFG 1977                        2.011475
## 7  Afghanistan  AFG 2004                        2.431254
## 8  Afghanistan  AFG 2005                        1.992066
## 9  Afghanistan  AFG 2006                        1.896234
## 10 Afghanistan  AFG 2007                        2.566267
##    World.regions.according.to.OWID
## 1                                 
## 2                                 
## 3                                 
## 4                                 
## 5                                 
## 6                                 
## 7                                 
## 8                                 
## 9                                 
## 10

#Rename columns for clairty

military_rename<- military_data%>%
  rename( Country = Entity,
    Mil_Spend_GDP = Military.expenditure....of.GDP.,
    Region = World.regions.according.to.OWID
    ) 

#Filter and create table to compare GDP % 2020-2023 between US and China

us_china_covid <- military_rename%>%     
  filter(Country %in% c("United States", "China"),
         Year >= 2020 & Year <= 2023) %>%
  select(Country, Year, Mil_Spend_GDP)

print(us_china_covid)
##         Country Year Mil_Spend_GDP
## 1         China 2020      1.756699
## 2         China 2021      1.605260
## 3         China 2022      1.630534
## 4         China 2023      1.664849
## 5 United States 2020      3.645188
## 6 United States 2021      3.404520
## 7 United States 2022      3.309477
## 8 United States 2023      3.304442
sum(is.na(us_china_covid$Mil_Spend_GDP))
## [1] 0
head(us_china_covid)
##         Country Year Mil_Spend_GDP
## 1         China 2020      1.756699
## 2         China 2021      1.605260
## 3         China 2022      1.630534
## 4         China 2023      1.664849
## 5 United States 2020      3.645188
## 6 United States 2021      3.404520
us_china_summary <- us_china_covid %>%
  group_by(Country) %>%
  summarise( Mean_Spend = mean(Mil_Spend_GDP, na.rm = TRUE),
             SD_Spend   = sd(Mil_Spend_GDP, na.rm = TRUE),
             Min_Spend  = min(Mil_Spend_GDP, na.rm = TRUE),
             Max_Spend  = max(Mil_Spend_GDP, na.rm = TRUE),
             n_years    = n()
  )
  
    us_china_summary
## # A tibble: 2 × 6
##   Country       Mean_Spend SD_Spend Min_Spend Max_Spend n_years
##   <chr>              <dbl>    <dbl>     <dbl>     <dbl>   <int>
## 1 China               1.66   0.0662      1.61      1.76       4
## 2 United States       3.42   0.160       3.30      3.65       4

#Clean the dataset and conduct exploratory data analysis (EDA) to better understand differences in % and get the Summary Stats

us_china_summary <- us_china_covid %>%
  group_by(Country) %>%
  summarise(Mean_Spend = mean(Mil_Spend_GDP, na.rm = TRUE),
             SD_Spend   = sd(Mil_Spend_GDP, na.rm = TRUE),
            
             Min_Spend  = min(Mil_Spend_GDP, na.rm = TRUE),
            
             Max_Spend  = max(Mil_Spend_GDP, na.rm = TRUE),
             n_years = n()
  )

us_china_summary
## # A tibble: 2 × 6
##   Country       Mean_Spend SD_Spend Min_Spend Max_Spend n_years
##   <chr>              <dbl>    <dbl>     <dbl>     <dbl>   <int>
## 1 China               1.66   0.0662      1.61      1.76       4
## 2 United States       3.42   0.160       3.30      3.65       4

#I will perform a two-sample t-test comparing: China’s mean military spending (% of GDP) vs. U.S. mean military spending (% of GDP) for the years 2020–2023.

t_res <- t.test(Mil_Spend_GDP ~ Country,
                data = us_china_covid,
                alternative = "two.sided",
                var.equal = FALSE)

t_res
## 
##  Welch Two Sample t-test
## 
## data:  Mil_Spend_GDP by Country
## t = -20.269, df = 4.0034, p-value = 3.475e-05
## alternative hypothesis: true difference in means between group China and group United States is not equal to 0
## 95 percent confidence interval:
##  -1.991424 -1.511719
## sample estimates:
##         mean in group China mean in group United States 
##                    1.664335                    3.415907

#Create table to compare GDP % 2020-2023

us_china_covid <- military_rename %>%     
  filter(Country %in% c("United States", "China"),
         Year >= 2020 & Year <= 2023) %>%
  select(Country, Year, Mil_Spend_GDP)

print(us_china_covid)
##         Country Year Mil_Spend_GDP
## 1         China 2020      1.756699
## 2         China 2021      1.605260
## 3         China 2022      1.630534
## 4         China 2023      1.664849
## 5 United States 2020      3.645188
## 6 United States 2021      3.404520
## 7 United States 2022      3.309477
## 8 United States 2023      3.304442
sum(is.na(us_china_covid$Mil_Spend_GDP)) #checking the amount of missing values in this column 
## [1] 0

Create table to summarise the average GDP % spending across 2020-2023 between the US and China.

us_china_covid %>%
  group_by(Country) %>%
  summarise(Average_Mil_Spend = mean(Mil_Spend_GDP, na.rm = TRUE))
## # A tibble: 2 × 2
##   Country       Average_Mil_Spend
##   <chr>                     <dbl>
## 1 China                      1.66
## 2 United States              3.42
head(us_china_covid)
##         Country Year Mil_Spend_GDP
## 1         China 2020      1.756699
## 2         China 2021      1.605260
## 3         China 2022      1.630534
## 4         China 2023      1.664849
## 5 United States 2020      3.645188
## 6 United States 2021      3.404520
sum(is.na(us_china_covid$Mil_Spend_GDP)) #checking the amount of missing values in this column 
## [1] 0

#Line graph to visualize the difference in spending trends

library(ggplot2)

ggplot(us_china_covid, aes(x = Year, y = Mil_Spend_GDP, color = Country)) +
  geom_line(size = 1.2) +
  geom_point(size = 2) +
  labs(title = "Military Spending as % of GDP (2020–2023)",
       y = "% of GDP",
       x = "Year") +
  theme_minimal()
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

#**Key findings of the Line graph:**

#China stays around 1.60%–1.76% of GDP.

#The U.S. stays around 3.40%–3.64% of GDP

## The United States spends much more of its GDP on the  military than China

#The US uses almost double what China does of their GDP in military expenditures. 

## Neither Country shows dramatic increae or decrease during this period

#Boxplot visulization

ggplot(us_china_covid, aes(x = Country, y = Mil_Spend_GDP, fill = Country )) +
geom_boxplot() +
labs(title = "Distribution of Military Spending (% of GDP)",
x = "Country",
y = "Military Spending (% of GDP)")+
theme_minimal()

#Key findings of box plot

#The box plot illistruates the comparision of the spending distribution differences per GDP between China and the US from 2020-2023

#The entire US boxplot is 3% above Chinas box plot

#The length of the United States IQR or Interquartile range is a little wider than china, which suggests the US's spending fluctuates more year to year than China.

#Chinas box-plot is very narrow, suggesting tight and consistent spending habits.

#China median ≈ 1.64%

#U.S. median ≈ 3.52%

#Finally, there are no outliers on either plots, suggesting stables values across the years

Two-sample test findings

#Welch Two Sample t-test

#data:  Mil_Spend_GDP by Country
#t = -20.269, df = 4.0034, p-value = 3.475e-05
#alternative hypothesis: true difference in means between group China and group United States is not equal to 0
#95 percent confidence interval:
 -1.991424 -1.511719
## [1] -3.503143
#sample estimates:
#mean in group China           1.664335 
#mean in group United States   3.415907 

Conclusion

# Welch T-test variables
#t = −20.269

#df ≈ 4.00

#p-value = 3.475e-05 ≈ 0.00003475

#Means:

#China: 1.6643

#US: 3.4159

#Assume your significance level is α = 0.05


  #p-value=0.00003475<0.05 

#*We reject the Null Hypothesis as there is strong statistical evidence that the average military spending as a percentage of GDP from 2020–2023 is significantly different between the US and China.*                  

Thoughts on findings and future direction

#Firstly, in reference to the line graph; while both lines fall relatively flat, Chinas stagnent trend suggests that the Covid-19 pandemic did not influence large fluctuations in % of GDP military spending in China during  China averaged a military spending of 1.6% while the US averaged 3.4% of its % GDP spending.

#Both spending trends dropping in similar fashion in 2021 shines a cast down that the US had to cut spending  economically affected both countries within economic trade

#While China conservatively increased their military expenditure % from 2021-2022, the US continued to cutback on spending. Suggesting the pandemic caused the economies to react in different ways after both taking a similar drop in 2021.

#Future direction the data could illistruate
  
#Topic: Comparing absolute military spending rather than percentages

#Context: GDP percentage does not show the real spending power.

#Future work could compare: Total military budget (in billions) Growth rates of military spending