This project analyzes global war and conflict data from 1950 to 2024. The objective is to understand patterns in conflicts, casualties, economic impact, and global involvement using R programming.
The dataset contains information about global conflicts including: - Countries involved - Conflict type - Duration - Casualties - Economic loss - UN involvement
It helps in analyzing patterns and impact of conflicts worldwide.
data<- read.csv("C:/Users/User/Downloads/global_conflicts_dataset.csv")
View(data)
Inference: Dataset successfully loaded and ready for analysis.
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
total_conflicts <- data %>%
summarise(total = n())
print(total_conflicts)
## total
## 1 3000
Inference: The dataset contains the total number of conflicts recorded globally.
climate_data <- data %>%
filter(!is.na(Climate_Zone)) %>%
group_by(Climate_Zone) %>%
summarise(total_conflicts = n()) %>%
arrange(desc(total_conflicts))
print(climate_data)
## # A tibble: 4 × 2
## Climate_Zone total_conflicts
## <chr> <int>
## 1 Polar 790
## 2 Tropical 771
## 3 Arid 737
## 4 Temperate 702
Inference: Certain climate zones experience higher concentrations of conflicts.
countryA_data <- data %>%
filter(!is.na(Country_A)) %>%
group_by(Country_A) %>%
summarise(total_conflicts = n()) %>%
arrange(desc(total_conflicts)) %>%
head(10)
print(countryA_data)
## # A tibble: 10 × 2
## Country_A total_conflicts
## <chr> <int>
## 1 USA 224
## 2 Ukraine 216
## 3 Canada 209
## 4 UK 208
## 5 Israel 205
## 6 Russia 205
## 7 Turkey 201
## 8 Brazil 200
## 9 Iran 199
## 10 China 196
Inference: Top countries listed above are most actively involved in initiating conflicts.
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 4.5.3
top5 <- data %>%
filter(!is.na(Country_A)) %>%
group_by(Country_A) %>%
summarise(total = n()) %>%
arrange(desc(total)) %>%
head(5)
ggplot(top5, aes(x = reorder(Country_A, total), y = total, fill = Country_A)) +
geom_bar(stat = "identity") +
coord_flip() +
labs(title = "Top 5 Countries in Conflicts")
Inference: A small number of countries dominate global conflict participation.
conflict_type <- data %>%
filter(!is.na(Conflict_Type)) %>%
group_by(Conflict_Type) %>%
summarise(total = n()) %>%
arrange(desc(total))
print(conflict_type)
## # A tibble: 5 × 2
## Conflict_Type total
## <chr> <int>
## 1 Skirmish 627
## 2 Proxy War 621
## 3 Civil War 597
## 4 Cold Conflict 588
## 5 War 567
Inference: Some conflict types occur more frequently than others.
data <- data %>%
mutate(total_casualties = Military_Deaths_A + Military_Deaths_B + Civilian_Deaths)
avg_casualties <- data %>%
summarise(avg = mean(total_casualties, na.rm = TRUE))
print(avg_casualties)
## avg
## 1 149534.7
Inference: Average casualties indicate the overall human impact of conflicts.
ggplot(data, aes(x = total_casualties)) +
geom_histogram(binwidth = 5000, fill = "skyblue", color = "black") +
scale_x_continuous(labels=scales::comma)+
labs(title = "Distribution of Total Casualties")
Inference: Most conflicts fall within a moderate casualty range, with a few extreme cases.
avg_val <- mean(data$total_casualties, na.rm = TRUE)
high_casualties <- data %>%
filter(total_casualties > avg_val)
count_high <- high_casualties %>%
summarise(total = n())
print(count_high)
## total
## 1 1532
Inference: A subset of conflicts has significantly higher casualties than average.
avg_loss <- data %>%
summarise(avg_loss = mean(Economic_Loss_USD_Billions, na.rm = TRUE))
print(avg_loss)
## avg_loss
## 1 253.6844
Inference: Conflicts lead to substantial economic damage globally.
loss_data <- data %>%
group_by(Conflict_Type) %>%
summarise(avg_loss = mean(Economic_Loss_USD_Billions, na.rm = TRUE))
ggplot(loss_data, aes(x = Conflict_Type, y = avg_loss, fill = Conflict_Type)) +
geom_bar(stat = "identity") +
labs(title = "Economic Loss by Conflict Type")
Inference: Certain conflict types are more economically destructive.
un_data <- data %>%
group_by(UN_Involvement) %>%
summarise(total = n())
print(un_data)
## # A tibble: 2 × 2
## UN_Involvement total
## <chr> <int>
## 1 No 1490
## 2 Yes 1510
Inference: Shows the proportion of conflicts with and without UN involvement.
un_cas <- data %>%
group_by(UN_Involvement) %>%
summarise(avg_casualties = mean(total_casualties, na.rm = TRUE))
ggplot(un_cas, aes(x = UN_Involvement, y = avg_casualties, fill = UN_Involvement)) +
geom_bar(stat = "identity") +
labs(title = "Casualties vs UN Involvement")
Inference: UN involvement may influence the severity of conflicts.
resource_data <- data %>%
filter(!is.na(Resource_Dispute)) %>%
group_by(Resource_Dispute) %>%
summarise(total = n()) %>%
arrange(desc(total))
print(resource_data)
## # A tibble: 4 × 2
## Resource_Dispute total
## <chr> <int>
## 1 Oil 767
## 2 Land 745
## 3 None 745
## 4 Water 743
Inference: Certain resource types are more frequently linked to conflicts.
ggplot(resource_data, aes(x = Resource_Dispute, y = total, fill = Resource_Dispute)) +
geom_bar(stat = "identity") +
labs(title = "Resource Dispute Distribution")
Inference: Resource-based conflicts are unevenly distributed across categories.
climate_cas <- data %>%
group_by(Climate_Zone) %>%
summarise(avg_casualties = mean(total_casualties, na.rm = TRUE)) %>%
arrange(desc(avg_casualties))
print(climate_cas)
## # A tibble: 4 × 2
## Climate_Zone avg_casualties
## <chr> <dbl>
## 1 Arid 150984.
## 2 Temperate 149829.
## 3 Tropical 148968.
## 4 Polar 148475.
Inference: Some climate zones experience more severe conflicts in terms of casualties.
ggplot(climate_cas, aes(x = Climate_Zone, y = avg_casualties, fill = Climate_Zone)) +
geom_bar(stat = "identity") +
labs(title = "Average Casualties by Climate Zone")
Inference: The chart highlights regions with higher conflict severity.
avg_duration <- data %>%
summarise(avg_duration = mean(Duration_Days, na.rm = TRUE))
print(avg_duration)
## avg_duration
## 1 1003.735
Inference: Average duration reflects how long conflicts typically last.
ggplot(data, aes(x = Duration_Days)) +
geom_histogram(binwidth = 50, fill = "orange", color = "black") +
labs(title = "Distribution of Conflict Duration")
Inference: Most conflicts are short-term, with fewer long-duration cases.
avg_dur <- mean(data$Duration_Days, na.rm = TRUE)
long_conflicts <- data %>%
filter(Duration_Days > avg_dur)
count_long <- long_conflicts %>%
summarise(total = n())
print(count_long)
## total
## 1 1494
Inference: Only a portion of conflicts extend beyond the average duration.
loss_climate <- data %>%
group_by(Climate_Zone) %>%
summarise(avg_loss = mean(Economic_Loss_USD_Billions, na.rm = TRUE))
ggplot(loss_climate, aes(x = Climate_Zone, y = avg_loss, fill = Climate_Zone)) +
geom_bar(stat = "identity") +
labs(title = "Economic Loss by Climate Zone")
Inference: Economic impact varies across different climate regions.
avg_duration <- data %>%
summarise(avg_duration = mean(Duration_Days, na.rm = TRUE))
print(avg_duration)
## avg_duration
## 1 1003.735
Inference:
Conflicts last an average duration globally.
avg_dur <- mean(data$Duration_Days, na.rm = TRUE)
long_conflicts <- data %>%
filter(Duration_Days > avg_dur) %>%
summarise(total = n())
print(long_conflicts)
## total
## 1 1494
Inference:
Only some conflicts exceed the average duration.
avg_loss <- data %>%
summarise(avg_loss = mean(Economic_Loss_USD_Billions, na.rm = TRUE))
print(avg_loss)
## avg_loss
## 1 253.6844
Inference:
Conflicts cause significant economic damage on average.
year_data <- data %>%
group_by(Year) %>%
summarise(total = n()) %>%
arrange(desc(total))
top_year <- head(year_data, 1)
print(top_year)
## # A tibble: 1 × 2
## Year total
## <int> <int>
## 1 1982 56
Inference:
This year recorded the highest conflict activity.
avg_dur <- mean(data$Duration_Days, na.rm = TRUE)
long_data <- data %>%
filter(Duration_Days > avg_dur)
long_cas <- long_data %>%
summarise(avg_casualties = mean(total_casualties, na.rm = TRUE))
print(long_cas)
## avg_casualties
## 1 147381.7
year_trend <- data %>%
group_by(Year) %>%
summarise(total_conflicts = n())
ggplot(year_trend, aes(x = Year, y = total_conflicts)) +
geom_line(color = "blue") +
labs(title = "Conflicts Over Time")
Inference:
Conflict frequency shows variation over different years.
year_loss <- data %>%
group_by(Year) %>%
summarise(avg_loss = mean(Economic_Loss_USD_Billions, na.rm = TRUE))
ggplot(year_loss, aes(x = Year, y = avg_loss)) +
geom_line(color = "darkgreen") +
geom_point(color = "red") +
labs(title = "Economic Loss Over Time")
Inference:
Economic impact of conflicts fluctuates over time.
sample_data <- data %>% sample_n(300)
ggplot(sample_data, aes(x = Duration_Days, y = total_casualties)) +
geom_point(color = "blue") +
scale_y_continuous(labels=scales::comma)+
labs(title = "Duration vs Casualties")
Inference:
Longer conflicts generally result in higher casualties.
sample_data2 <- data %>% sample_n(300)
ggplot(sample_data2, aes(x = Economic_Loss_USD_Billions, y = total_casualties)) +
geom_point(color = "red") +
scale_y_continuous(labels=scales::comma)+
labs(title = "Economic Loss vs Casualties")
Inference:
Higher casualties are associated with higher economic losses.
facet_data <- data %>%
group_by(Climate_Zone, Conflict_Type) %>%
summarise(count = n())
## `summarise()` has regrouped the output.
## ℹ Summaries were computed grouped by Climate_Zone and Conflict_Type.
## ℹ Output is grouped by Climate_Zone.
## ℹ Use `summarise(.groups = "drop_last")` to silence this message.
## ℹ Use `summarise(.by = c(Climate_Zone, Conflict_Type))` for per-operation
## grouping (`?dplyr::dplyr_by`) instead.
ggplot(facet_data, aes(x = Conflict_Type, y = count, fill = Conflict_Type)) +
geom_bar(stat = "identity") +
facet_wrap(~Climate_Zone) +
labs(title = "Conflict Type across Climate Zones")
Inference:
Conflict patterns vary significantly across different climate zones.