# Load necessary libraries
pacman::p_load(pacman, ggplot2, dplyr, scales, gridExtra, packcircles)

# Load the data
chilli_data <- read.csv("chile-variety-heat-levels.csv")

# Clean up column names 
colnames(chilli_data) <- c("Chilli_Name", "Lower_Bound", "Middle", "Upper_Bound")

# Plot of Heat Levels 
p1 <- ggplot(chilli_data, aes(x = reorder(Chilli_Name, Middle), ymin = Lower_Bound, ymax = Upper_Bound)) +
  geom_linerange(aes(ymin = Lower_Bound, ymax = Upper_Bound), color = "orange", linewidth = .5) +
  geom_point(aes(y = Middle), color = "red2", size = 3) +
  coord_flip() +
  scale_y_continuous(labels = scales::comma) + 
  labs(title = "Heat Levels of Chilli Varieties",
       x = "Chilli Variety",
       y = "Scoville Heat Units (SHU)") +
  theme()

# Plot with Logarithmic Scale for Heat Levels
p2 <- ggplot(chilli_data, aes(x = reorder(Chilli_Name, Middle), y = Middle)) +
  geom_point(color = "red2", size = 3) +
  geom_errorbar(aes(ymin = Lower_Bound, ymax = Upper_Bound), width = .2, color = "orange") +
  coord_flip() +
  scale_y_log10(labels = scales::comma) +  # Log scale for better visibility of lower values
  labs(title = "Chilli Varieties by Heat Level (Log Scale)",
       x = "Chilli Variety",
       y = "Median Heat Level Scoville Heat Units (SHU)") +
  theme()

# Select top 10 hottest chillies by their middle heat level
top_10_chillies <- chilli_data %>%
  arrange(desc(Middle)) %>%
  slice(1:10)

# Add a label column to display heat levels with commas
top_10_chillies <- top_10_chillies %>%
  mutate(label = scales::comma(Middle))

# Create the pie chart with data labels for the top 10 chillies
p3 <- ggplot(top_10_chillies, aes(x = "", y = Middle, fill = Chilli_Name)) +
  geom_bar(width = 1, stat = "identity", color = "white", linewidth = 1) +  # Added border
  coord_polar(theta = "y") +
  geom_text(aes(label = label), position = position_stack(vjust = 0.5), color = "black", size = 3) +
  scale_fill_brewer(palette = "Paired") +
  labs(title = "Top 10 Hottest Chilli Varieties by Median Heat Level (SHU)",
       fill = "Chilli Variety") +
        theme_void()

# Select lowest 7 chillies by their middle heat level
lowest_7_chillies <- chilli_data %>%
  arrange(Middle) %>%
  slice(1:7)

# Add a label column to display heat levels with commas 
lowest_7_chillies <- lowest_7_chillies %>%
  mutate(label = scales::comma(Middle))

# Create the pie chart with data labels for the lowest 7 chillies
p4 <- ggplot(lowest_7_chillies, aes(x = "", y = Middle, fill = Chilli_Name)) +
  geom_bar(width = 1, stat = "identity", color = "white", linewidth = 1) +  # Added border
  coord_polar(theta = "y") +
  geom_text(aes(label = label), position = position_stack(vjust = 0.5), color = "black", size = 3) +
  scale_fill_brewer(palette = "Paired") +
  labs(title = "Lowest 7 Chilli Varieties by Median Heat Level (SHU)",
       fill = "Chilli Variety") +
       theme_void()

# Select lowest 10 chillies by their middle heat level
lowest_10_chillies <- chilli_data %>%
  arrange(Middle) %>%
  slice(1:10)

# Add a label column to display heat levels with commas
lowest_10_chillies <- lowest_10_chillies %>%
  mutate(label = scales::comma(Middle))

# Create error bar plot for the top 10 chillies showing full heat range
p5 <- ggplot(top_10_chillies, aes(x = reorder(Chilli_Name, Middle), y = Middle)) +
  geom_errorbar(aes(ymin = Lower_Bound, ymax = Upper_Bound), width = 0.2, color = "orange") +
  geom_point(color = "red2", size = 3) +
  scale_y_continuous(labels = scales::comma) +
  labs(title = "Heat Range for Top 10 Hottest Chillies",
       x = "Chilli Variety",
       y = "Heat Level (Scoville Heat Units)") +
  theme() +
  theme(legend.position = "none") +
  coord_flip()

# Create error bar plot for the bottom 10 chillies showing full heat range
p6 <- ggplot(lowest_10_chillies, aes(x = reorder(Chilli_Name, Middle), y = Middle)) +
  geom_errorbar(aes(ymin = Lower_Bound, ymax = Upper_Bound), width = 0.2, color = "orange") +
  geom_point(color = "red2", size = 3) +
  scale_y_continuous(labels = scales::comma) +
  labs(title = "Heat Range for Lowest 10 Chillies",
       x = "Chilli Variety",
       y = "Heat Level (Scoville Heat Units)") +
  theme() +
  theme(legend.position = "none") +
  coord_flip()

# Combine plots
grid.arrange(p1, p2, ncol = 2) 
## Warning in scale_y_log10(labels = scales::comma): log-10 transformation
## introduced infinite values.

grid.arrange(p3, p4, ncol = 2)

grid.arrange(p5, p6, ncol = 2)

# Filter for chilies with SHU >= 225,000
chilli_data <- chilli_data %>%
  filter(Middle >= 225000)

# Create the packing layout based on the 'Middle' column for SHU
packing <- circleProgressiveLayout(chilli_data$Middle, sizetype = "area")

# Combine packing information with `chilli_data`
chilli_data <- cbind(chilli_data, packing)

# Create vertices for the circles
dat.gg <- circleLayoutVertices(packing, npoints = 50)

# Add the chilli names and SHU values for labels
chilli_data <- chilli_data %>%
  mutate(label = paste(Chilli_Name, scales::comma(Middle), "SHU", sep = "\n"))

# Plot (Bubble Chart)
ggplot() + 
  geom_polygon(data = dat.gg, aes(x, y, group = id, fill = as.factor(id)), colour = "black") +
  geom_text(data = chilli_data, aes(x, y, label = label), size = 3, color = "black") +  # Add labels in center
  scale_fill_manual(values = scales::hue_pal()(nrow(chilli_data)), guide = "none") +  # Distinct colours
  theme_void() + 
  coord_equal() + 
  labs(title = "Chillies by Median Heat Level in Scoville Heat Units (SHUs): >= 225,000 (SHU)")