# Importing Data set 

library(readxl)
## Warning: package 'readxl' was built under R version 4.5.3
data <- read_excel("C:/Users/vishu/Downloads/global_conflicts_dataset.xlsx")
View(data)
# Q1 How many total conflicts are recorded in the dataset?

total_conflicts <- nrow(data)
total_conflicts
## [1] 3000
# Q2 Which country appears most frequently as Country_A in conflicts?

countryA_freq <- table(data$Country_A)
most_countryA <- names(which.max(countryA_freq))
most_countryA
## [1] "USA"
# Q3 Which country appears most frequently as Country_B in conflicts?

countryB_freq <- table(data$Country_B)
most_countryB <- names(which.max(countryB_freq))
most_countryB
## [1] "India"
# Q4 Climate zone with highest conflicts

climate_freq <- table(data$Climate_Zone)
top_climate <- names(which.max(climate_freq))
top_climate
## [1] "Polar"
# Q5 Most common conflict type

conflict_freq <- table(data$Conflict_Type)
top_conflict <- names(which.max(conflict_freq))
top_conflict
## [1] "Skirmish"
# Q6 Average casualties

total_casualties <- data$Military_Deaths_A + data$Military_Deaths_B + data$Civilian_Deaths
avg_casualties <- mean(total_casualties)
avg_casualties
## [1] 149534.7
# Q7 Conflict with maximum casualties

total_casualties <- data$Military_Deaths_A + data$Military_Deaths_B + data$Civilian_Deaths
max_index <- which.max(total_casualties)
data[max_index, ]
## # A tibble: 1 × 30
##   Country_A Country_B Conflict_Type  Year Duration_Days Military_Deaths_A
##   <chr>     <chr>     <chr>         <dbl>         <dbl>             <dbl>
## 1 India     France    Civil War      1980           725             46381
## # ℹ 24 more variables: Military_Deaths_B <dbl>, Civilian_Deaths <dbl>,
## #   Economic_Loss_USD_Billions <dbl>, Temperature_Avg_C <dbl>,
## #   Rainfall_mm <dbl>, Terrain_Type <chr>, Population_A_Millions <dbl>,
## #   Population_B_Millions <dbl>, GDP_A_Billions <dbl>, GDP_B_Billions <dbl>,
## #   Alliance_A <chr>, Alliance_B <chr>, Weapons_Used <chr>, Air_Strikes <dbl>,
## #   Naval_Battles <dbl>, Sanctions <chr>, Refugees_Millions <dbl>,
## #   Ceasefire <chr>, Outcome <chr>, Latitude <dbl>, Longitude <dbl>, …
# Q8 Average economic loss

avg_loss <- mean(data$Economic_Loss_USD_Billions, na.rm = TRUE)
avg_loss
## [1] 253.6844
# Q9 Climate zone with highest average casualties

total_casualties <- data$Military_Deaths_A + data$Military_Deaths_B + data$Civilian_Deaths
avg_by_climate <- tapply(total_casualties, data$Climate_Zone, mean, na.rm = TRUE)
top_climate <- names(which.max(avg_by_climate))
top_climate
## [1] "Arid"
# Q10 Average duration

avg_duration <- mean(data$Duration_Days, na.rm = TRUE)
avg_duration
## [1] 1003.735
# Q11 Average casualties by conflict type

total_casualties <- data$Military_Deaths_A + data$Military_Deaths_B + data$Civilian_Deaths
avg_by_type <- tapply(total_casualties, data$Conflict_Type, mean, na.rm = TRUE)
avg_by_type
##     Civil War Cold Conflict     Proxy War      Skirmish           War 
##      147421.4      152010.0      146420.9      151162.6      150802.7
# Q12 Country with most high-casualty conflicts

total_casualties <- data$Military_Deaths_A + data$Military_Deaths_B + data$Civilian_Deaths
avg_casualties <- mean(total_casualties, na.rm = TRUE)
high_conflicts <- data[total_casualties > avg_casualties, ]
country_freq <- table(high_conflicts$Country_A)
top_country <- names(which.max(country_freq))

top_country
## [1] "Ukraine"
# Q13 Conflicts with UN involvement

un_conflicts <- data[data$UN_Involvement == "Yes", ]
head(un_conflicts)
## # A tibble: 6 × 30
##   Country_A Country_B Conflict_Type  Year Duration_Days Military_Deaths_A
##   <chr>     <chr>     <chr>         <dbl>         <dbl>             <dbl>
## 1 France    France    Cold Conflict  2020          1829             33197
## 2 Brazil    Brazil    Civil War      2013           886              9915
## 3 UK        Iran      Civil War      1957           712             34429
## 4 France    Russia    Cold Conflict  2007           728             33641
## 5 Australia Canada    Proxy War      1964          1240              9390
## 6 Brazil    Brazil    Proxy War      1986           891             22344
## # ℹ 24 more variables: Military_Deaths_B <dbl>, Civilian_Deaths <dbl>,
## #   Economic_Loss_USD_Billions <dbl>, Temperature_Avg_C <dbl>,
## #   Rainfall_mm <dbl>, Terrain_Type <chr>, Population_A_Millions <dbl>,
## #   Population_B_Millions <dbl>, GDP_A_Billions <dbl>, GDP_B_Billions <dbl>,
## #   Alliance_A <chr>, Alliance_B <chr>, Weapons_Used <chr>, Air_Strikes <dbl>,
## #   Naval_Battles <dbl>, Sanctions <chr>, Refugees_Millions <dbl>,
## #   Ceasefire <chr>, Outcome <chr>, Latitude <dbl>, Longitude <dbl>, …
# Q14 Water resource conflicts

water_conflicts <- data[data$Resource_Dispute == "Water", ]
head(water_conflicts)
## # A tibble: 6 × 30
##   Country_A Country_B Conflict_Type  Year Duration_Days Military_Deaths_A
##   <chr>     <chr>     <chr>         <dbl>         <dbl>             <dbl>
## 1 India     Japan     Cold Conflict  2013          1234             26773
## 2 Israel    USA       Civil War      1970          1982             17256
## 3 Turkey    Australia Proxy War      2021          1754              1745
## 4 Russia    Canada    War            1980          1949             21663
## 5 Australia France    Skirmish       1975            80             34918
## 6 Brazil    Brazil    Proxy War      1986           891             22344
## # ℹ 24 more variables: Military_Deaths_B <dbl>, Civilian_Deaths <dbl>,
## #   Economic_Loss_USD_Billions <dbl>, Temperature_Avg_C <dbl>,
## #   Rainfall_mm <dbl>, Terrain_Type <chr>, Population_A_Millions <dbl>,
## #   Population_B_Millions <dbl>, GDP_A_Billions <dbl>, GDP_B_Billions <dbl>,
## #   Alliance_A <chr>, Alliance_B <chr>, Weapons_Used <chr>, Air_Strikes <dbl>,
## #   Naval_Battles <dbl>, Sanctions <chr>, Refugees_Millions <dbl>,
## #   Ceasefire <chr>, Outcome <chr>, Latitude <dbl>, Longitude <dbl>, …
# Q15 Above-average casualty conflicts

total_casualties <- data$Military_Deaths_A + data$Military_Deaths_B + data$Civilian_Deaths
avg_casualties <- mean(total_casualties, na.rm = TRUE)
high_casualty_conflicts <- data[total_casualties > avg_casualties, ]

head(high_casualty_conflicts)
## # A tibble: 6 × 30
##   Country_A Country_B Conflict_Type  Year Duration_Days Military_Deaths_A
##   <chr>     <chr>     <chr>         <dbl>         <dbl>             <dbl>
## 1 India     Japan     Cold Conflict  2013          1234             26773
## 2 Brazil    Brazil    Civil War      2013           886              9915
## 3 Israel    Ukraine   Civil War      1969           464              1948
## 4 UK        Iran      Civil War      1957           712             34429
## 5 France    Russia    Cold Conflict  2007           728             33641
## 6 Russia    Canada    War            1980          1949             21663
## # ℹ 24 more variables: Military_Deaths_B <dbl>, Civilian_Deaths <dbl>,
## #   Economic_Loss_USD_Billions <dbl>, Temperature_Avg_C <dbl>,
## #   Rainfall_mm <dbl>, Terrain_Type <chr>, Population_A_Millions <dbl>,
## #   Population_B_Millions <dbl>, GDP_A_Billions <dbl>, GDP_B_Billions <dbl>,
## #   Alliance_A <chr>, Alliance_B <chr>, Weapons_Used <chr>, Air_Strikes <dbl>,
## #   Naval_Battles <dbl>, Sanctions <chr>, Refugees_Millions <dbl>,
## #   Ceasefire <chr>, Outcome <chr>, Latitude <dbl>, Longitude <dbl>, …
# Q16 Long-duration conflicts

avg_duration <- mean(data$Duration_Days, na.rm = TRUE)
long_conflicts <- data[data$Duration_Days > avg_duration, ]
head(long_conflicts)
## # A tibble: 6 × 30
##   Country_A Country_B Conflict_Type  Year Duration_Days Military_Deaths_A
##   <chr>     <chr>     <chr>         <dbl>         <dbl>             <dbl>
## 1 France    France    Cold Conflict  2020          1829             33197
## 2 India     Japan     Cold Conflict  2013          1234             26773
## 3 Israel    USA       Civil War      1970          1982             17256
## 4 Turkey    Australia Proxy War      2021          1754              1745
## 5 Russia    Canada    War            1980          1949             21663
## 6 France    Germany   Proxy War      1958          1440             15303
## # ℹ 24 more variables: Military_Deaths_B <dbl>, Civilian_Deaths <dbl>,
## #   Economic_Loss_USD_Billions <dbl>, Temperature_Avg_C <dbl>,
## #   Rainfall_mm <dbl>, Terrain_Type <chr>, Population_A_Millions <dbl>,
## #   Population_B_Millions <dbl>, GDP_A_Billions <dbl>, GDP_B_Billions <dbl>,
## #   Alliance_A <chr>, Alliance_B <chr>, Weapons_Used <chr>, Air_Strikes <dbl>,
## #   Naval_Battles <dbl>, Sanctions <chr>, Refugees_Millions <dbl>,
## #   Ceasefire <chr>, Outcome <chr>, Latitude <dbl>, Longitude <dbl>, …
# Q17 UN vs Non-UN conflicts

un_comparison <- table(data$UN_Involvement)
un_comparison
## 
##   No  Yes 
## 1490 1510
# Q18 Top 5 economic loss conflicts

sorted_data <- data[order(-data$Economic_Loss_USD_Billions), ]
top5_loss <- head(sorted_data, 5)

top5_loss
## # A tibble: 5 × 30
##   Country_A Country_B Conflict_Type  Year Duration_Days Military_Deaths_A
##   <chr>     <chr>     <chr>         <dbl>         <dbl>             <dbl>
## 1 Brazil    Brazil    Civil War      2013           886              9915
## 2 USA       UK        Cold Conflict  2022          1812             13447
## 3 China     Australia War            2003          1295             28993
## 4 Russia    Iran      Cold Conflict  1998           233              2706
## 5 China     UK        Skirmish       1972          1166             22806
## # ℹ 24 more variables: Military_Deaths_B <dbl>, Civilian_Deaths <dbl>,
## #   Economic_Loss_USD_Billions <dbl>, Temperature_Avg_C <dbl>,
## #   Rainfall_mm <dbl>, Terrain_Type <chr>, Population_A_Millions <dbl>,
## #   Population_B_Millions <dbl>, GDP_A_Billions <dbl>, GDP_B_Billions <dbl>,
## #   Alliance_A <chr>, Alliance_B <chr>, Weapons_Used <chr>, Air_Strikes <dbl>,
## #   Naval_Battles <dbl>, Sanctions <chr>, Refugees_Millions <dbl>,
## #   Ceasefire <chr>, Outcome <chr>, Latitude <dbl>, Longitude <dbl>, …
# Q19 Create Casualty_Level column

total_casualties <- data$Military_Deaths_A + data$Military_Deaths_B + data$Civilian_Deaths

data$Casualty_Level <- ifelse(
  total_casualties < 50000, "Low",
  ifelse(total_casualties < 100000, "Medium", "High")
)

head(data)
## # A tibble: 6 × 31
##   Country_A Country_B Conflict_Type  Year Duration_Days Military_Deaths_A
##   <chr>     <chr>     <chr>         <dbl>         <dbl>             <dbl>
## 1 France    France    Cold Conflict  2020          1829             33197
## 2 India     Japan     Cold Conflict  2013          1234             26773
## 3 Israel    USA       Civil War      1970          1982             17256
## 4 Turkey    Australia Proxy War      2021          1754              1745
## 5 Australia France    War            2012           753             29149
## 6 Brazil    Brazil    Civil War      2013           886              9915
## # ℹ 25 more variables: Military_Deaths_B <dbl>, Civilian_Deaths <dbl>,
## #   Economic_Loss_USD_Billions <dbl>, Temperature_Avg_C <dbl>,
## #   Rainfall_mm <dbl>, Terrain_Type <chr>, Population_A_Millions <dbl>,
## #   Population_B_Millions <dbl>, GDP_A_Billions <dbl>, GDP_B_Billions <dbl>,
## #   Alliance_A <chr>, Alliance_B <chr>, Weapons_Used <chr>, Air_Strikes <dbl>,
## #   Naval_Battles <dbl>, Sanctions <chr>, Refugees_Millions <dbl>,
## #   Ceasefire <chr>, Outcome <chr>, Latitude <dbl>, Longitude <dbl>, …
# Q20 Count of each Casualty_Level

level_table <- as.data.frame(table(data$Casualty_Level))
level_table
##     Var1 Freq
## 1   High 2222
## 2    Low  132
## 3 Medium  646
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 4.5.3
# Q21 Create a bar plot showing average casualties for each Conflict_Type

library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
data$total_casualties <- data$Military_Deaths_A + data$Military_Deaths_B + data$Civilian_Deaths

avg_type <- data %>%
  group_by(Conflict_Type) %>%
  summarise(avg_casualties = mean(total_casualties, na.rm = TRUE))

ggplot(avg_type, aes(x = Conflict_Type, y = avg_casualties, fill = Conflict_Type)) +
  geom_bar(stat = "identity") +
  labs(title = "Average Casualties by Conflict Type",
       x = "Conflict Type",
       y = "Average Casualties")

# Q22 Create a bar plot showing average casualties across Climate_Zone

avg_climate <- data %>%
  group_by(Climate_Zone) %>%
  summarise(avg_casualties = mean(total_casualties, na.rm = TRUE))

ggplot(avg_climate, aes(x = Climate_Zone, y = avg_casualties, fill = Climate_Zone)) +
  geom_bar(stat = "identity") +
  labs(title = "Average Casualties by Climate Zone",
       x = "Climate Zone",
       y = "Average Casualties")

# Q23 Compare average casualties for conflicts with and without UN involvement

avg_un <- data %>%
  group_by(UN_Involvement) %>%
  summarise(avg_casualties = mean(total_casualties, na.rm = TRUE))

ggplot(avg_un, aes(x = UN_Involvement, y = avg_casualties, fill = UN_Involvement)) +
  geom_bar(stat = "identity") +
  labs(title = "UN Involvement vs Casualties",
       x = "UN Involvement",
       y = "Average Casualties")

# Q24 Show the distribution of total casualties

ggplot(data, aes(x = total_casualties)) +
  geom_histogram(binwidth = 2000, fill = "skyblue", color = "black") +
  ggtitle("Distribution of Total Casualties") +
  xlab("Casualties") +
  ylab("Frequency")

# Q25 Show the distribution of Duration_Days

ggplot(data, aes(x = Duration_Days)) +
  geom_histogram(binwidth = 20, fill = "orange", color = "black") +
  ggtitle("Distribution of Conflict Duration")

# Q26 Show number of conflicts per year

year_data <- data %>%
  group_by(Year) %>%
  summarise(count = n())

ggplot(year_data, aes(x = Year, y = count)) +
  geom_line(color = "blue", size = 1) +
  ggtitle("Number of Conflicts Over Years") +
  labs(x = "Year", y = "Number of Conflicts")
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once per session.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

# Q27 Show trend of average casualties over time

year_casualties <- data %>%
  group_by(Year) %>%
  summarise(avg_casualties = mean(total_casualties, na.rm = TRUE))

ggplot(year_casualties, aes(x = Year, y = avg_casualties)) +
  geom_line(color = "darkgreen", size = 1) +
  geom_point(color = "red", size = 2) +
  ggtitle("Average Casualties Over Years") +
  labs(x = "Year", y = "Average Casualties")

# Q28 Analyze relationship between duration and casualties using year-wise aggregation

year_scatter <- data %>%
  group_by(Year) %>%
  summarise(
    avg_duration = mean(Duration_Days, na.rm = TRUE),
    avg_casualties = mean(total_casualties, na.rm = TRUE)
  )

ggplot(year_scatter, aes(x = avg_duration, y = avg_casualties)) +
  geom_point(color = "red", size = 3) +
  ggtitle("Avg Duration vs Casualties (Year-wise)") +
  xlab("Average Duration") +
  ylab("Average Casualties")

# Q29 Create a scatter plot to analyze the relationship between average duration and average casualties over years, using color to represent different years

ggplot(year_scatter, aes(x = avg_duration, y = avg_casualties,
                         color = Year)) +
  geom_point(size = 3) +
  ggtitle("Year-wise Scatter (Duration vs Casualties)") +
  labs(color = "Year")

# Q30 Compare conflict types across different climate zones

facet_data <- data %>%
  group_by(Climate_Zone, Conflict_Type) %>%
  summarise(count = n())
## `summarise()` has regrouped the output.
## ℹ Summaries were computed grouped by Climate_Zone and Conflict_Type.
## ℹ Output is grouped by Climate_Zone.
## ℹ Use `summarise(.groups = "drop_last")` to silence this message.
## ℹ Use `summarise(.by = c(Climate_Zone, Conflict_Type))` for per-operation
##   grouping (`?dplyr::dplyr_by`) instead.
ggplot(facet_data, aes(x = Conflict_Type, y = count, fill = Conflict_Type)) +
  geom_bar(stat = "identity") +
  facet_wrap(~Climate_Zone) +
  labs(title = "Conflict Type across Climate Zones",
       x = "Conflict Type",
       y = "Count")