# Load necessary libraries
pacman::p_load(pacman, readr, dplyr, ggplot2, ggmap, sf, patchwork)
# Load the data
data <- read.csv("hummap_processed.csv")
# List unique values in the gender column
unique_genders <- unique(data$gender)
print(unique_genders)
## [1] "Female" "Male" "Non Binary"
# Use a world map overlay
world_map <- map_data("world")
# Geospatial plot
map_plot <- ggplot() +
geom_polygon(data = world_map, aes(x = long, y = lat, group = group),
fill = "lightgray", color = "white") +
geom_point(data = data, aes(x = longitude, y = latitude, color = gender),
alpha = 0.7, size = 2) +
scale_color_manual(values = c("pink", "blue", "gray")) +
theme_minimal() +
labs(title = "Geographical Distribution of World Hum by Gender",
subtitle = "Global data on individuals reporting the mysterious 'Hum' phenomenon",
caption = "R Visualisation by Patrick Ford",
x = "Longitude",
y = "Latitude",
color = "Gender")
# Bar plot for gender distribution
gender_bar_plot <- ggplot(data, aes(x = gender, fill = gender)) +
geom_bar() +
scale_fill_manual(values = c("pink", "blue", "gray")) +
theme_minimal() +
labs(title = "Gender Distribution",
x = "Gender",
y = "Count",
fill = "Gender")
# Bar plot for age and gender distribution
age_gender_plot <- ggplot(data, aes(x = age, fill = gender)) +
geom_histogram(binwidth = 5, position = "dodge") + # Adjust binwidth for grouping
scale_fill_manual(values = c("pink", "blue", "gray")) +
scale_x_continuous(breaks = seq(min(data$age), max(data$age), by = 5)) + # Add every age group
theme_minimal() +
labs(title = "Age and Gender Distribution",
x = "Age",
y = "Count",
fill = "Gender")
# Combine the plots
(map_plot / (gender_bar_plot | age_gender_plot)) +
plot_layout(heights = c(2, 1))
