# Load necessary libraries
pacman::p_load(pacman, readr, dplyr, ggplot2, ggmap, sf, patchwork)

# Load the data
data <- read.csv("hummap_processed.csv")

# List unique values in the gender column
unique_genders <- unique(data$gender)
print(unique_genders)
## [1] "Female"     "Male"       "Non Binary"
# Use a world map overlay
world_map <- map_data("world")

# Geospatial plot
map_plot <- ggplot() +
  geom_polygon(data = world_map, aes(x = long, y = lat, group = group),
               fill = "lightgray", color = "white") +
  geom_point(data = data, aes(x = longitude, y = latitude, color = gender),
             alpha = 0.7, size = 2) +
  scale_color_manual(values = c("pink", "blue", "gray")) + 
  theme_minimal() +
  labs(title = "Geographical Distribution of World Hum by Gender",
       subtitle = "Global data on individuals reporting the mysterious 'Hum' phenomenon",
       caption = "R Visualisation by Patrick Ford",
       x = "Longitude",
       y = "Latitude",
       color = "Gender")

# Bar plot for gender distribution
gender_bar_plot <- ggplot(data, aes(x = gender, fill = gender)) +
  geom_bar() +
  scale_fill_manual(values = c("pink", "blue", "gray")) +
  theme_minimal() +
  labs(title = "Gender Distribution",
       x = "Gender",
       y = "Count",
       fill = "Gender")

# Bar plot for age and gender distribution
age_gender_plot <- ggplot(data, aes(x = age, fill = gender)) +
  geom_histogram(binwidth = 5, position = "dodge") +  # Adjust binwidth for grouping
  scale_fill_manual(values = c("pink", "blue", "gray")) +
  scale_x_continuous(breaks = seq(min(data$age), max(data$age), by = 5)) + # Add every age group
  theme_minimal() +
  labs(title = "Age and Gender Distribution",
       x = "Age",
       y = "Count",
       fill = "Gender")


# Combine the plots 
(map_plot / (gender_bar_plot | age_gender_plot)) +  
  plot_layout(heights = c(2, 1))