Introduction

This project analyzes global war and conflict data from 1950 to 2024. The objective is to understand patterns in conflicts, casualties, economic impact, and global involvement using R programming.

Dataset Description

The dataset contains information about global conflicts including: - Countries involved - Conflict type - Duration - Casualties - Economic loss - UN involvement

It helps in analyzing patterns and impact of conflicts worldwide.

data<- read.csv("C:/Users/User/Downloads/global_conflicts_dataset.csv")
View(data)

Inference: Dataset successfully loaded and ready for analysis.

Q1: How many total conflicts are recorded in the dataset?

library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
total_conflicts <- data %>%
  summarise(total = n())

print(total_conflicts)
##   total
## 1  3000

Inference: The dataset contains the total number of conflicts recorded globally.

Q2: Which climate zones have the highest number of conflicts?

climate_data <- data %>%
  filter(!is.na(Climate_Zone)) %>%
  group_by(Climate_Zone) %>%
  summarise(total_conflicts = n()) %>%
  arrange(desc(total_conflicts))

print(climate_data)
## # A tibble: 4 × 2
##   Climate_Zone total_conflicts
##   <chr>                  <int>
## 1 Polar                    790
## 2 Tropical                 771
## 3 Arid                     737
## 4 Temperate                702

Inference: Certain climate zones experience higher concentrations of conflicts.

Q3: Which countries are most frequently involved as Country_A?

countryA_data <- data %>%
  filter(!is.na(Country_A)) %>%
  group_by(Country_A) %>%
  summarise(total_conflicts = n()) %>%
  arrange(desc(total_conflicts)) %>%
  head(10)

print(countryA_data)
## # A tibble: 10 × 2
##    Country_A total_conflicts
##    <chr>               <int>
##  1 USA                   224
##  2 Ukraine               216
##  3 Canada                209
##  4 UK                    208
##  5 Israel                205
##  6 Russia                205
##  7 Turkey                201
##  8 Brazil                200
##  9 Iran                  199
## 10 China                 196

Inference: Top countries listed above are most actively involved in initiating conflicts.

Q4: Visualize top 5 countries involved in conflicts

library(ggplot2)
## Warning: package 'ggplot2' was built under R version 4.5.3
top5 <- data %>%
  filter(!is.na(Country_A)) %>%
  group_by(Country_A) %>%
  summarise(total = n()) %>%
  arrange(desc(total)) %>%
  head(5)

ggplot(top5, aes(x = reorder(Country_A, total), y = total, fill = Country_A)) +
  geom_bar(stat = "identity") +
  coord_flip() +
  labs(title = "Top 5 Countries in Conflicts")

Inference: A small number of countries dominate global conflict participation.

Q5: Which conflict types are most common?

conflict_type <- data %>%
  filter(!is.na(Conflict_Type)) %>%
  group_by(Conflict_Type) %>%
  summarise(total = n()) %>%
  arrange(desc(total))

print(conflict_type)
## # A tibble: 5 × 2
##   Conflict_Type total
##   <chr>         <int>
## 1 Skirmish        627
## 2 Proxy War       621
## 3 Civil War       597
## 4 Cold Conflict   588
## 5 War             567

Inference: Some conflict types occur more frequently than others.

Q6: What is the average number of casualties per conflict?

data <- data %>%
  mutate(total_casualties = Military_Deaths_A + Military_Deaths_B + Civilian_Deaths)

avg_casualties <- data %>%
  summarise(avg = mean(total_casualties, na.rm = TRUE))

print(avg_casualties)
##        avg
## 1 149534.7

Inference: Average casualties indicate the overall human impact of conflicts.

Q7: Visualize the distribution of total casualties

ggplot(data, aes(x = total_casualties)) +
  geom_histogram(binwidth = 5000, fill = "skyblue", color = "black") +
  scale_x_continuous(labels=scales::comma)+
  labs(title = "Distribution of Total Casualties")

Inference: Most conflicts fall within a moderate casualty range, with a few extreme cases.

Q8: Which conflicts have above-average casualties?

avg_val <- mean(data$total_casualties, na.rm = TRUE)

high_casualties <- data %>%
  filter(total_casualties > avg_val)

count_high <- high_casualties %>%
  summarise(total = n())

print(count_high)
##   total
## 1  1532

Inference: A subset of conflicts has significantly higher casualties than average.

Q9: What is the average economic loss per conflict?

avg_loss <- data %>%
  summarise(avg_loss = mean(Economic_Loss_USD_Billions, na.rm = TRUE))

print(avg_loss)
##   avg_loss
## 1 253.6844

Inference: Conflicts lead to substantial economic damage globally.

Q10: Visualize economic loss by conflict type

loss_data <- data %>%
  group_by(Conflict_Type) %>%
  summarise(avg_loss = mean(Economic_Loss_USD_Billions, na.rm = TRUE))

ggplot(loss_data, aes(x = Conflict_Type, y = avg_loss, fill = Conflict_Type)) +
  geom_bar(stat = "identity") +
  labs(title = "Economic Loss by Conflict Type")

Inference: Certain conflict types are more economically destructive.

Q11: How does UN involvement vary across conflicts?

un_data <- data %>%
  group_by(UN_Involvement) %>%
  summarise(total = n())

print(un_data)
## # A tibble: 2 × 2
##   UN_Involvement total
##   <chr>          <int>
## 1 No              1490
## 2 Yes             1510

Inference: Shows the proportion of conflicts with and without UN involvement.

Q12: Compare casualties with and without UN involvement

un_cas <- data %>%
  group_by(UN_Involvement) %>%
  summarise(avg_casualties = mean(total_casualties, na.rm = TRUE))

ggplot(un_cas, aes(x = UN_Involvement, y = avg_casualties, fill = UN_Involvement)) +
  geom_bar(stat = "identity") +
  labs(title = "Casualties vs UN Involvement")

Inference: UN involvement may influence the severity of conflicts.

Q13: Which resource disputes are most common?

resource_data <- data %>%
  filter(!is.na(Resource_Dispute)) %>%
  group_by(Resource_Dispute) %>%
  summarise(total = n()) %>%
  arrange(desc(total))

print(resource_data)
## # A tibble: 4 × 2
##   Resource_Dispute total
##   <chr>            <int>
## 1 Oil                767
## 2 Land               745
## 3 None               745
## 4 Water              743

Inference: Certain resource types are more frequently linked to conflicts.

Q14: Visualize resource dispute distribution

ggplot(resource_data, aes(x = Resource_Dispute, y = total, fill = Resource_Dispute)) +
  geom_bar(stat = "identity") +
  labs(title = "Resource Dispute Distribution")

Inference: Resource-based conflicts are unevenly distributed across categories.

Q15: Which climate zones have highest average casualties?

climate_cas <- data %>%
  group_by(Climate_Zone) %>%
  summarise(avg_casualties = mean(total_casualties, na.rm = TRUE)) %>%
  arrange(desc(avg_casualties))

print(climate_cas)
## # A tibble: 4 × 2
##   Climate_Zone avg_casualties
##   <chr>                 <dbl>
## 1 Arid                150984.
## 2 Temperate           149829.
## 3 Tropical            148968.
## 4 Polar               148475.

Inference: Some climate zones experience more severe conflicts in terms of casualties.

Q16: Visualize casualties across climate zones

ggplot(climate_cas, aes(x = Climate_Zone, y = avg_casualties, fill = Climate_Zone)) +
  geom_bar(stat = "identity") +
  labs(title = "Average Casualties by Climate Zone")

Inference: The chart highlights regions with higher conflict severity.

Q17: What is the average duration of conflicts?

avg_duration <- data %>%
  summarise(avg_duration = mean(Duration_Days, na.rm = TRUE))

print(avg_duration)
##   avg_duration
## 1     1003.735

Inference: Average duration reflects how long conflicts typically last.

Q18: Visualize distribution of conflict duration

ggplot(data, aes(x = Duration_Days)) +
  geom_histogram(binwidth = 50, fill = "orange", color = "black") +
  labs(title = "Distribution of Conflict Duration")

Inference: Most conflicts are short-term, with fewer long-duration cases.

Q19: How many conflicts are long-duration (above average)?

avg_dur <- mean(data$Duration_Days, na.rm = TRUE)

long_conflicts <- data %>%
  filter(Duration_Days > avg_dur)

count_long <- long_conflicts %>%
  summarise(total = n())

print(count_long)
##   total
## 1  1494

Inference: Only a portion of conflicts extend beyond the average duration.

Q20: Compare economic loss across climate zones

loss_climate <- data %>%
  group_by(Climate_Zone) %>%
  summarise(avg_loss = mean(Economic_Loss_USD_Billions, na.rm = TRUE))

ggplot(loss_climate, aes(x = Climate_Zone, y = avg_loss, fill = Climate_Zone)) +
  geom_bar(stat = "identity") +
  labs(title = "Economic Loss by Climate Zone")

Inference: Economic impact varies across different climate regions.

Q21: What is the average duration of conflicts?

avg_duration <- data %>%
  summarise(avg_duration = mean(Duration_Days, na.rm = TRUE))

print(avg_duration)
##   avg_duration
## 1     1003.735

Inference:
Conflicts last an average duration globally.

Q22: How many conflicts are longer than average duration?

avg_dur <- mean(data$Duration_Days, na.rm = TRUE)

long_conflicts <- data %>%
  filter(Duration_Days > avg_dur) %>%
  summarise(total = n())

print(long_conflicts)
##   total
## 1  1494

Inference:
Only some conflicts exceed the average duration.

Q23: What is the average economic loss per conflict?

avg_loss <- data %>%
  summarise(avg_loss = mean(Economic_Loss_USD_Billions, na.rm = TRUE))

print(avg_loss)
##   avg_loss
## 1 253.6844

Inference:
Conflicts cause significant economic damage on average.

Q24: Which year has the highest number of conflicts?

year_data <- data %>%
  group_by(Year) %>%
  summarise(total = n()) %>%
  arrange(desc(total))

top_year <- head(year_data, 1)
print(top_year)
## # A tibble: 1 × 2
##    Year total
##   <int> <int>
## 1  1982    56

Inference:
This year recorded the highest conflict activity.

Q25: What is the average casualties in long-duration conflicts?

avg_dur <- mean(data$Duration_Days, na.rm = TRUE)

long_data <- data %>%
  filter(Duration_Days > avg_dur)

long_cas <- long_data %>%
  summarise(avg_casualties = mean(total_casualties, na.rm = TRUE))

print(long_cas)
##   avg_casualties
## 1       147381.7

Q26: How do conflicts change over years?

year_trend <- data %>%
  group_by(Year) %>%
  summarise(total_conflicts = n())

ggplot(year_trend, aes(x = Year, y = total_conflicts)) +
  geom_line(color = "blue") +
  labs(title = "Conflicts Over Time")

Inference:
Conflict frequency shows variation over different years.

Q27: How does average economic loss change over years?

year_loss <- data %>%
  group_by(Year) %>%
  summarise(avg_loss = mean(Economic_Loss_USD_Billions, na.rm = TRUE))

ggplot(year_loss, aes(x = Year, y = avg_loss)) +
  geom_line(color = "darkgreen") +
  geom_point(color = "red") +
  labs(title = "Economic Loss Over Time")

Inference:
Economic impact of conflicts fluctuates over time.

Q28: Relationship between duration and casualties

sample_data <- data %>% sample_n(300)

ggplot(sample_data, aes(x = Duration_Days, y = total_casualties)) +
  geom_point(color = "blue") +
  scale_y_continuous(labels=scales::comma)+
  labs(title = "Duration vs Casualties")

Inference:
Longer conflicts generally result in higher casualties.

Q29: Relationship between economic loss and casualties

sample_data2 <- data %>% sample_n(300)

ggplot(sample_data2, aes(x = Economic_Loss_USD_Billions, y = total_casualties)) +
  geom_point(color = "red") +
  scale_y_continuous(labels=scales::comma)+
  labs(title = "Economic Loss vs Casualties")

Inference:
Higher casualties are associated with higher economic losses.

Q30: Conflict type distribution across climate zones

facet_data <- data %>%
  group_by(Climate_Zone, Conflict_Type) %>%
  summarise(count = n())
## `summarise()` has regrouped the output.
## ℹ Summaries were computed grouped by Climate_Zone and Conflict_Type.
## ℹ Output is grouped by Climate_Zone.
## ℹ Use `summarise(.groups = "drop_last")` to silence this message.
## ℹ Use `summarise(.by = c(Climate_Zone, Conflict_Type))` for per-operation
##   grouping (`?dplyr::dplyr_by`) instead.
ggplot(facet_data, aes(x = Conflict_Type, y = count, fill = Conflict_Type)) +
  geom_bar(stat = "identity") +
  facet_wrap(~Climate_Zone) +
  labs(title = "Conflict Type across Climate Zones")

Inference:
Conflict patterns vary significantly across different climate zones.