# Load necessary libraries
pacman::p_load(pacman, ggplot2, dplyr, scales, gridExtra, packcircles)
# Load the data
chilli_data <- read.csv("chile-variety-heat-levels.csv")
# Clean up column names
colnames(chilli_data) <- c("Chilli_Name", "Lower_Bound", "Middle", "Upper_Bound")
# Plot of Heat Levels
p1 <- ggplot(chilli_data, aes(x = reorder(Chilli_Name, Middle), ymin = Lower_Bound, ymax = Upper_Bound)) +
geom_linerange(aes(ymin = Lower_Bound, ymax = Upper_Bound), color = "orange", linewidth = .5) +
geom_point(aes(y = Middle), color = "red2", size = 3) +
coord_flip() +
scale_y_continuous(labels = scales::comma) +
labs(title = "Heat Levels of Chilli Varieties",
x = "Chilli Variety",
y = "Scoville Heat Units (SHU)") +
theme()
# Plot with Logarithmic Scale for Heat Levels
p2 <- ggplot(chilli_data, aes(x = reorder(Chilli_Name, Middle), y = Middle)) +
geom_point(color = "red2", size = 3) +
geom_errorbar(aes(ymin = Lower_Bound, ymax = Upper_Bound), width = .2, color = "orange") +
coord_flip() +
scale_y_log10(labels = scales::comma) + # Log scale for better visibility of lower values
labs(title = "Chilli Varieties by Heat Level (Log Scale)",
x = "Chilli Variety",
y = "Median Heat Level Scoville Heat Units (SHU)") +
theme()
# Select top 10 hottest chillies by their middle heat level
top_10_chillies <- chilli_data %>%
arrange(desc(Middle)) %>%
slice(1:10)
# Add a label column to display heat levels with commas
top_10_chillies <- top_10_chillies %>%
mutate(label = scales::comma(Middle))
# Create the pie chart with data labels for the top 10 chillies
p3 <- ggplot(top_10_chillies, aes(x = "", y = Middle, fill = Chilli_Name)) +
geom_bar(width = 1, stat = "identity", color = "white", linewidth = 1) + # Added border
coord_polar(theta = "y") +
geom_text(aes(label = label), position = position_stack(vjust = 0.5), color = "black", size = 3) +
scale_fill_brewer(palette = "Paired") +
labs(title = "Top 10 Hottest Chilli Varieties by Median Heat Level (SHU)",
fill = "Chilli Variety") +
theme_void()
# Select lowest 7 chillies by their middle heat level
lowest_7_chillies <- chilli_data %>%
arrange(Middle) %>%
slice(1:7)
# Add a label column to display heat levels with commas
lowest_7_chillies <- lowest_7_chillies %>%
mutate(label = scales::comma(Middle))
# Create the pie chart with data labels for the lowest 7 chillies
p4 <- ggplot(lowest_7_chillies, aes(x = "", y = Middle, fill = Chilli_Name)) +
geom_bar(width = 1, stat = "identity", color = "white", linewidth = 1) + # Added border
coord_polar(theta = "y") +
geom_text(aes(label = label), position = position_stack(vjust = 0.5), color = "black", size = 3) +
scale_fill_brewer(palette = "Paired") +
labs(title = "Lowest 7 Chilli Varieties by Median Heat Level (SHU)",
fill = "Chilli Variety") +
theme_void()
# Select lowest 10 chillies by their middle heat level
lowest_10_chillies <- chilli_data %>%
arrange(Middle) %>%
slice(1:10)
# Add a label column to display heat levels with commas
lowest_10_chillies <- lowest_10_chillies %>%
mutate(label = scales::comma(Middle))
# Create error bar plot for the top 10 chillies showing full heat range
p5 <- ggplot(top_10_chillies, aes(x = reorder(Chilli_Name, Middle), y = Middle)) +
geom_errorbar(aes(ymin = Lower_Bound, ymax = Upper_Bound), width = 0.2, color = "orange") +
geom_point(color = "red2", size = 3) +
scale_y_continuous(labels = scales::comma) +
labs(title = "Heat Range for Top 10 Hottest Chillies",
x = "Chilli Variety",
y = "Heat Level (Scoville Heat Units)") +
theme() +
theme(legend.position = "none") +
coord_flip()
# Create error bar plot for the bottom 10 chillies showing full heat range
p6 <- ggplot(lowest_10_chillies, aes(x = reorder(Chilli_Name, Middle), y = Middle)) +
geom_errorbar(aes(ymin = Lower_Bound, ymax = Upper_Bound), width = 0.2, color = "orange") +
geom_point(color = "red2", size = 3) +
scale_y_continuous(labels = scales::comma) +
labs(title = "Heat Range for Lowest 10 Chillies",
x = "Chilli Variety",
y = "Heat Level (Scoville Heat Units)") +
theme() +
theme(legend.position = "none") +
coord_flip()
# Combine plots
grid.arrange(p1, p2, ncol = 2)
## Warning in scale_y_log10(labels = scales::comma): log-10 transformation
## introduced infinite values.

grid.arrange(p3, p4, ncol = 2)

grid.arrange(p5, p6, ncol = 2)

# Filter for chilies with SHU >= 225,000
chilli_data <- chilli_data %>%
filter(Middle >= 225000)
# Create the packing layout based on the 'Middle' column for SHU
packing <- circleProgressiveLayout(chilli_data$Middle, sizetype = "area")
# Combine packing information with `chilli_data`
chilli_data <- cbind(chilli_data, packing)
# Create vertices for the circles
dat.gg <- circleLayoutVertices(packing, npoints = 50)
# Add the chilli names and SHU values for labels
chilli_data <- chilli_data %>%
mutate(label = paste(Chilli_Name, scales::comma(Middle), "SHU", sep = "\n"))
# Plot (Bubble Chart)
ggplot() +
geom_polygon(data = dat.gg, aes(x, y, group = id, fill = as.factor(id)), colour = "black") +
geom_text(data = chilli_data, aes(x, y, label = label), size = 3, color = "black") + # Add labels in center
scale_fill_manual(values = scales::hue_pal()(nrow(chilli_data)), guide = "none") + # Distinct colours
theme_void() +
coord_equal() +
labs(title = "Chillies by Median Heat Level in Scoville Heat Units (SHUs): >= 225,000 (SHU)")
