Load libraries

Challenge 1: Drug Efficacy Comparison

Working dataset:

efficacy_data <- data.frame( drug = c("Drug A", "Drug B", "Placebo"), efficacy = c(84, 82, 80))
efficacy_data
##      drug efficacy
## 1  Drug A       84
## 2  Drug B       82
## 3 Placebo       80

Initial visualisation

p1 <- ggplot(efficacy_data, aes(x = drug, y = efficacy, fill = drug)) + 
  geom_bar(stat = "identity") + 
  coord_cartesian(ylim = c(75, 85)) + 
  labs(title = "Drug Efficacy Comparison", 
       subtitle = "Phase III Clinical Trial Results", 
       y = "Efficacy Rate (%)") + 
  theme_minimal() + 
  theme(legend.position = "none", plot.title = element_text(face = "bold", size = 14)) 

p1

Improved visualisation

  1. Start Y-axis at 0 instead of 75
  2. Added a numerical value as a label
  3. Added an X-axis label
p1_improv <- ggplot(efficacy_data, aes(x = drug, y = efficacy, fill = drug)) + 
  geom_bar(stat = "identity", colour = "black") + 
  scale_fill_discrete_qualitative(palette = "Harmonic") +
  geom_text(aes(label = paste0(efficacy, "%")), hjust = 0.5, nudge_y = 3) + 
  coord_cartesian(ylim = c(0, 85)) + 
  labs(title = "Drug Efficacy Comparison", 
       subtitle = "Phase III Clinical Trial Results", 
       x = "Drug", 
       y = "Efficacy Rate (%)") + 
  theme_minimal() + 
  theme(legend.position = "none", plot.title = element_text(face = "bold", size = 14)) 

p1_improv

Challenge 2: Adverse Events Distribution

Working dataset:

adverse_events <- data.frame( event = c("Headache", "Nausea", "Dizziness", 
                                        "Fatigue", "Insomnia", "Rash", "Other"), 
                              percentage = c(15, 12, 10, 8, 7, 5, 43) )
adverse_events
##       event percentage
## 1  Headache         15
## 2    Nausea         12
## 3 Dizziness         10
## 4   Fatigue          8
## 5  Insomnia          7
## 6      Rash          5
## 7     Other         43
pie_colors <- c("#FF6B6B", "#4ECDC4", "#45B7D1", "#F9CA24", "#6C5CE7", "#A29BFE", "#95A5A6") 

Initial visualisation

p2 <- ggplot(adverse_events, aes(x = "", y = percentage, fill = event)) + 
  geom_bar(stat = "identity", width = 1) + 
  coord_polar("y", start = 0) + 
  scale_fill_manual(values = pie_colors) + 
  labs(title = "Adverse Events Distribution", subtitle = "Safety Profile Analysis") + 
  theme_void() + 
  theme(plot.title = element_text(face = "bold", size = 14, hjust = 0.5), 
        plot.subtitle = element_text(hjust = 0.5), legend.position = "right") 

p2

Improved visualisation

Order: from biggest to smallest clockwise, placing Other at the end

adverse_events$event <- factor(adverse_events$event, 
                               levels = c("Headache", "Nausea", "Dizziness", "Fatigue", 
                                          "Insomnia", "Rash", "Other"))
  1. Add a legend label
  2. Add numerical values to the chart
  3. Convert to a bar chart, sort by descending frequency of the adverse events
p2_improv <- ggplot(adverse_events, aes(x = reorder(event, -percentage), y = percentage)) + 
  geom_bar(position = "dodge", stat = "identity", fill = "darksalmon", colour = "black", alpha = 0.85) + 
  geom_text(aes(label = paste0(percentage, "%")), vjust = 0.5, nudge_y = 1.5) +
  coord_flip() +
  labs(title = "Adverse Events Distribution", 
       subtitle = "Safety Profile Analysis",
       x = "Adverse Event") + 
  theme_classic() +
  theme(plot.title = element_text(face = "bold", size = 14, hjust = 0.5), 
        plot.subtitle = element_text(hjust = 0.5), legend.position = "bottom",
        axis.title.x = element_blank()) 

p2_improv

Challenge 3: Sales vs Costs Analysis

Working dataset:

sales_data <- data.frame(month = factor(c("Jan", "Feb", "Mar", "Apr"), levels = c("Jan", "Feb", "Mar", "Apr")), 
                         Sales = c(100, 110, 105, 115), 
                         Costs = c(2.0, 2.1, 2.2, 2.3) ) 

Initial visualisation

p3 <- ggplot(sales_data, aes(x = month)) + 
  geom_bar(aes(y = Sales, fill = "Sales"), stat = "identity", alpha = 0.7) + 
  geom_line(aes(y = Costs * 50, group = 1, color = "Costs"), size = 2) + 
  geom_point(aes(y = Costs * 50, color = "Costs"), size = 4) + 
  scale_y_continuous( name = "Sales ($M)", sec.axis = sec_axis(~./50, name = "Costs ($M)") ) + 
  scale_fill_manual(values = c("Sales" = "#82ca9d")) + 
  scale_color_manual(values = c("Costs" = "#ff7300")) + 
  labs(title = "Sales vs Costs Correlation", 
       subtitle = "Quarterly Performance Review", 
       x = "Month") + 
  theme_minimal() + 
  theme( plot.title = element_text(face = "bold", size = 14), 
         legend.title = element_blank(), 
         axis.title.y.left = element_text(color = "#82ca9d"), 
         axis.title.y.right = element_text(color = "#ff7300") ) 
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once per session.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
p3

Improved visualisation

sales_data_transposed <- sales_data %>%
    pivot_longer(
      cols = c(Costs, Sales),
      names_to = "group",
      values_to = "value")

p3_sales <- ggplot(sales_data_transposed[sales_data_transposed$group == "Sales",], aes(x = month, y = value)) + 
  geom_point(colour = "forestgreen") +
  geom_line(aes(group = 1), colour = "forestgreen") +
  labs(x = "Month",
       y = "Sales (M$)") +
  theme_bw() + 
  theme( plot.title = element_text(face = "bold", size = 14), 
         legend.title = element_blank(), 
         legend.position = "bottom",
         axis.title.y.left = element_text(color = "black"), 
         axis.title.y.right = element_text(color = "black")) 
  
p3_costs <-  ggplot(sales_data_transposed[sales_data_transposed$group == "Costs",], aes(x = month, y = value)) + 
  geom_point(colour = "darksalmon") +
  geom_line(aes(group = 1), colour = "darksalmon") +
  labs(x = "Month",
       y = "Costs (M$)") +
  theme_bw() + 
  theme( plot.title = element_text(face = "bold", size = 14), 
         legend.title = element_blank(), 
         legend.position = "bottom",
         axis.title.y.left = element_text(color = "black"), 
         axis.title.y.right = element_text(color = "black"))

p3_title <- ggdraw() + draw_label("Sales vs Costs Analysis", fontface = 'bold',
                                  size = 15, x = 0.4, y = 0.5, hjust = 1, vjust = 1)

p3_improv <- plot_grid(p3_title, plot_grid(p3_costs, NULL, p3_sales, ncol = 3, rel_widths = c(1, 0.03, 1)),
                       nrow = 2, rel_heights = c(0.5, 5))
  
p3_improv

Challenge 4: Treatment Response Over Time

Working dataset:

trial_data <- data.frame( week = 1:12, 
                          response = c(40, 45, 44, 52, 50, 48, 55, 68, 65, 66, 69, 71), 
                          se = c(3, 3, 2.5, 3, 2.8, 3.2, 2.9, 2.7, 2.8, 2.6, 2.5, 2.4) ) 

Initial visualisation

p4 <- ggplot(trial_data |> dplyr::filter(week %in%c(2, 4, 8, 12)), aes(x = week, y = response)) + 
  geom_line(color = "blue", size = 1.5) + 
  geom_point(color = "blue", size = 3) + 
  scale_x_continuous(breaks = c(2, 4, 8, 12)) + 
  labs(title = "Treatment Response Over Time", 
       subtitle = "Selected measurement timepoints", 
       x = "Week", y = "Response Rate (%)") + 
  theme_minimal() + 
  theme(plot.title = element_text(face = "bold", size = 14)) 

p4

Improved visualisation (2 options)

Flag visits of greater interest

trial_data$select <- ifelse(trial_data$week %in% c(2, 4, 8, 12), 1, 0)

Option 1

  1. Add +- SE error bars
  2. Update Y-axis title and X-axis labels
  3. Present data across all visits, and highlight selected visits using colour
  4. Add numerical values
p4_improv <- ggplot(trial_data, aes(x = week, y = response)) + 
  geom_line(color = "dimgray", size = 0.5) + 
  geom_errorbar(aes(ymin = response - se, ymax = response + se), width = 0.2, 
                colour = ifelse(trial_data$select == 0, "dimgray", "blue")) +
  geom_label(data = trial_data[trial_data$select == 1,], aes(label = paste0(response, " (±", se, ")"), 
                 y = response + 2*se), color = "blue") +
  geom_point(color = ifelse(trial_data$select == 0, "dimgray", "blue"), size = 3, 
             shape = ifelse(trial_data$select == 0, 1, 19)) + 
  scale_y_continuous(limits = c(0, 80)) +
  scale_x_continuous(breaks = c(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12), 
                     labels = c("Week 1", "Week 2", "Week 3", "Week 4", "Week 5", "Week 6", 
                                "Week 7", "Week 8", "Week 9", "Week 10", "Week 11", "Week 12"),
                     expand = expansion(add = 1)) + 
  labs(title = "Treatment Response Over Time", 
       x = "Week", 
       y = "Response Rate (%)") + 
  theme_minimal() + 
  theme(plot.title = element_text(face = "bold", size = 14)) 

p4_improv

Option 2

  1. Add +- SE error bands
  2. Update Y-axis title and X-axis labels
  3. Present data across all visits
p4_improv_2 <- ggplot(trial_data, aes(x = week, y = response)) + 
  geom_line(color = "dimgray", size = 0.5) + 
  geom_ribbon(aes(ymin = response - se, ymax = response + se), alpha = 0.25, fill = "forestgreen") +
  geom_point(color = "dimgray") + 
  scale_y_continuous(limits = c(0, 80)) +
  scale_x_continuous(breaks = c(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12), 
                     labels = c("Week 1", "Week 2", "Week 3", "Week 4", "Week 5", "Week 6", 
                                "Week 7", "Week 8", "Week 9", "Week 10", "Week 11", "Week 12"),
                     expand = expansion(add = 1)) + 
  labs(title = "Treatment Response Over Time", 
       x = "Week", 
       y = "Response Rate (%)") + 
  theme_minimal() + 
  theme(plot.title = element_text(face = "bold", size = 14)) 

p4_improv_2

Challenge 5: Adverse Event Severity

Working dataset:

safety_data <- data.frame( category = factor(c("Severe", "Moderate", "Mild"), 
                                             levels = c("Severe", "Moderate", "Mild")), count = c(3, 15, 45) ) 

Initial visualisation

p5 <- ggplot(safety_data, aes(x = category, y = count, fill = category)) + 
  geom_bar(stat = "identity") + 
  scale_fill_manual(values = c("Severe" = "#90EE90", "Moderate" = "#FFB6C1", "Mild" = "#8B0000")) + 
  labs(title = "Adverse Event Severity Distribution", 
       subtitle = "Safety monitoring report", 
       x = "Severity", 
       y = "Number of Events") + 
  theme_minimal() + 
  theme(legend.position = "none", 
        plot.title = element_text(face = "bold", size = 14))

p5

Improved visualisation

Re-order the categories in order of increasing AE intensity (mild - moderate - severe)

safety_data$category <- factor(levels(safety_data$category[c(3, 2, 1)]))

N <- sum(safety_data$count)

safety_data$percent <- round(100 * safety_data$count / N, 1)

Update colours - use red for severe, orange for mild, and green for mild

p5_improv <- ggplot(safety_data, aes(x = category, y = count, fill = category)) + 
  geom_bar(stat = "identity", colour = "black", alpha = 0.9) + 
  geom_text(aes(label = paste0(count, " (", percent, "%)"), y = count, nudge_y = 1.5)) +
  scale_fill_manual(values = c("Mild" = "#90EE90", "Moderate" = "#EAB123", "Severe" = "#B30000")) + 
  labs(title = "Adverse Event Severity Distribution", 
       subtitle = "Safety monitoring report", 
       x = "Severity", 
       y = "Number of Events") + 
  theme_minimal() + 
  theme(legend.position = "none", 
        plot.title = element_text(face = "bold", size = 14))

p5_improv