Analysis of Ronan’s Huckleberry Data

Courtney Russ

2024-01-07


This is an analysis of data from the Huckleberry App, an app used by parents to record information about their baby including sleep, feeding, and growth.

Set Up

Overview of Data

raw <- read_csv("ronan.csv") %>%
  dplyr::select(Type, Start, End, `Start Condition`, `Start Location`, `End Condition`, Notes) %>%
  rename(
    Start_Condition = `Start Condition`,
    Start_Location = `Start Location`,
    End_Condition = `End Condition`) %>%
  mutate(
    Start = strptime(Start, format = "%d/%m/%y %H:%M"),
    End = strptime(End, format = "%d/%m/%y %H:%M"),
    age_days = as.numeric(difftime(as.Date(Start), as.Date("2023-03-02"), units = "days")),
    age_weeks = floor(age_days/7)
  )


skim(raw)
Data summary
Name raw
Number of rows 3356
Number of columns 9
_______________________
Column type frequency:
character 7
numeric 2
________________________
Group variables None

Variable type: character

skim_variable n_missing complete_rate min max empty n_unique whitespace
Type 0 1.00 4 6 0 7 0
Start 0 1.00 10089 26902 0 3294 0
End 1842 0.45 11934 19519 0 1513 0
Start_Condition 1596 0.52 3 24 0 108 0
Start_Location 2570 0.23 4 13 0 13 0
End_Condition 2490 0.26 4 27 0 73 0
Notes 3230 0.04 5 156 0 124 0

Variable type: numeric

skim_variable n_missing complete_rate mean sd p0 p25 p50 p75 p100 hist
age_days 0 1 96.95 67.12 0 43 82 143 283 ▇▇▃▃▁
age_weeks 0 1 13.42 9.58 0 6 11 20 40 ▇▆▃▂▁


Columns with a complete rate <1 need to be inspected further.

Inspect Column Types

end_table <- raw %>%
  filter(is.na(End)) %>%
  head(3)

sc <- raw %>%
  filter(!is.na(Start_Condition)) %>%
  head(3)

sl <- raw %>%
  filter(!is.na(Start_Location)) %>%
  head(3)

ec <- raw %>%
  filter(!is.na(End_Condition)) %>%
  head(3)
When ‘end’ column is NA
Type Start End Start_Condition Start_Location End_Condition Notes age_days age_weeks
Pump 2023-11-22 12:00:00 NA 10ml NA NA NA 265 37
Growth 2023-11-20 15:11:00 NA 9.02kg 71.9cm NA NA 263 37
Pump 2023-11-13 10:01:00 NA 40ml NA NA NA 256 36
When ‘Start_Condition’ column is not NA
Type Start End Start_Condition Start_Location End_Condition Notes age_days age_weeks
Pump 2023-11-22 12:00:00 NA 10ml NA NA NA 265 37
Growth 2023-11-20 15:11:00 NA 9.02kg 71.9cm NA NA 263 37
Pump 2023-11-13 10:01:00 NA 40ml NA NA NA 256 36
When ‘Start_Location’ column is not NA
Type Start End Start_Condition Start_Location End_Condition Notes age_days age_weeks
Growth 2023-11-20 15:11:00 NA 9.02kg 71.9cm NA NA 263 37
Growth 2023-11-01 09:33:00 NA 9.02kg 69cm 45cm NA 244 34
Feed 2023-09-27 18:50:00 NA Breast Milk Bottle 60ml NA 209 29
When ‘End_Condition’ column is not NA
Type Start End Start_Condition Start_Location End_Condition Notes age_days age_weeks
Growth 2023-11-01 09:33:00 NA 9.02kg 69cm 45cm NA 244 34
Feed 2023-09-27 18:50:00 NA Breast Milk Bottle 60ml NA 209 29
Feed 2023-09-27 12:47:00 NA Breast Milk Bottle 100ml NA 209 29

Start Condition, Start Location, and End condition seem to be used inconsistently depending on the Type so this will need to be assessed as the data is split into groups for analysis. The End only appears for events that have a distinct Start and End e.g. sleep so an incomplete column is acceptable.

Sleep

Longest and Shortest Sleeps

# Filter for rows recording sleep
sleep <- raw %>% 
  filter(Type == "Sleep" & !is.na(End)) %>%
  dplyr::select(Start, End, age_weeks) %>%
  mutate(diff_minutes = as.numeric(round(difftime(End, Start, units = "mins"))))
# Get longest periods of sleep
longest_sleep <- sleep %>%
  arrange(desc(diff_minutes)) %>%
  head(5) %>%
  mutate(
    Start = as.character(Start),
    Length = round(diff_minutes/60, 1)
  ) %>%
  dplyr::select(Start, Length) %>%
  arrange(Start)

# Get shortest periods of sleep
shortest_sleep <- sleep %>%
  arrange(diff_minutes) %>%
  head(5) %>%
  mutate(
    Start = as.character(Start)) %>%
  dplyr::select(Start, diff_minutes) %>%
  arrange(Start)


kable(longest_sleep, caption = "Longest Periods of Sleep", col.names = c("Start", "Length (Hours)"))
Longest Periods of Sleep
Start Length (Hours)
2023-06-11 18:40:00 12.7
2023-06-13 18:29:00 13.0
2023-08-10 19:07:00 12.4
2023-08-17 18:54:00 12.7
2023-08-26 19:19:00 12.5
kable(shortest_sleep, caption = "Shortest Periods of Sleep", col.names = c("Start", "Length (Minutes)"))
Shortest Periods of Sleep
Start Length (Minutes)
2023-04-19 15:05:00 1
2023-05-03 16:38:00 1
2023-05-05 18:54:00 1
2023-09-26 09:45:00 1
2023-10-11 07:05:00 1

There seem to have been some ‘false start’ sleeps of 1 minute. A more useful measure may be the frequency of the lowest sleep lengths.

shortest_sleep <- sleep %>%
  group_by(diff_minutes) %>%
  summarise(frequency = n()) %>%
  arrange(diff_minutes) %>%
  head(5)

kable(shortest_sleep, caption = "Shortest Periods of Sleep", col.names = c("Length (Minutes)", "Count"))
Shortest Periods of Sleep
Length (Minutes) Count
1 6
2 7
3 5
4 9
5 15

Hours of Sleep

Per Day

sleep_day <- raw %>% 
  filter(Type == "Sleep" & !is.na(End)) %>%
  dplyr::select(Start, End, age_weeks) %>%
  mutate(sleep_length = round(difftime(End, Start, units = "mins")),
         date = as.Date(Start)) %>%
  group_by(date) %>%
  summarise(total_sleep = sum(sleep_length, na.rm = TRUE)) %>%
  mutate(rolling_average = zoo::rollmean(total_sleep, k = 10, fill = NA)) %>%
  dplyr::select(date, total_sleep, rolling_average)

p1 <- ggplot(sleep_day, aes(x = date)) +
  geom_line(aes(y = total_sleep), size = 1, alpha = 0.9, color = "lightgrey") +
  geom_line(aes(y = rolling_average, color = "10-Day Rolling Average"), size = 1, alpha = 1) +
  labs(title = "Daily Sleep Over Time",
       x = "Date",
       y = "Minutes") +
  scale_color_manual(values = brewer.pal(n = 2, name = "Set2")) +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1), legend.position = "bottom") +
  guides(
    linetype = "none",
    color = guide_legend(title = NULL),
    fill = guide_legend(title = NULL)
  ) +
  scale_x_date(date_labels = "%b %Y", date_breaks = "1 month", expand = c(0, 0))

p1

Mum confirmed that they stopped recording overnight sleep in around September.

sleep_day_filtered <- sleep_day %>%
  filter(date < as.Date("2023-09-01"))

p2 <- ggplot(sleep_day_filtered, aes(x = date)) +
  geom_line(aes(y = total_sleep), size = 1, alpha = 0.9, color = "lightgrey") +
  geom_line(aes(y = rolling_average, color = "10-Day Rolling Average"), size = 1, alpha = 1) +
  labs(title = "Daily Sleep Over Time - Birth to 6 Months",
       x = "Date",
       y = "Minutes") +
  scale_color_manual(values = brewer.pal(n = 2, name = "Set2")) +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1), legend.position = "bottom") +
  guides(
    linetype = "none",
    color = guide_legend(title = NULL),
    fill = guide_legend(title = NULL)
  ) +
  scale_x_date(date_labels = "%b %Y", date_breaks = "1 month", expand = c(0, 0))

p2
## Don't know how to automatically pick scale for object of type <difftime>.
## Defaulting to continuous.

Feeds

Volume per Day

feeds <- raw %>% 
  filter(Type == "Feed") %>%
  dplyr::select(Start, End_Condition) %>%
  mutate(Volume = as.numeric(str_extract(End_Condition, "\\d+"))) %>%
  rename(date = Start) %>%
  dplyr::select(-End_Condition)

feeds_day <- feeds %>%
  mutate(date = as.Date(date),
         volume = as.numeric(Volume)) %>%
  group_by(date) %>%
  summarise(vol_day = sum(volume)) %>%
  mutate(rolling_average = zoo::rollmean(vol_day, k = 10, fill = NA))


p5 <- ggplot(feeds_day, aes(x = date)) +
  geom_line(aes(y = vol_day, color = "Volume"), size = 1, alpha = 0.7, linetype = "solid") +
  geom_line(aes(y = rolling_average, color = "10-Day Rolling Average"), size = 1, alpha = 0.9) +
  labs(title = "Daily Volume of Breast Milk Consumed",
       x = "Date",
       y = "Millilitres") +
  scale_color_manual(values = c("10-Day Rolling Average" = "#66C2A5", "Volume" = "lightgrey")) +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1)) +  # Rotate labels by 45 degrees
  guides(
    linetype = "none",
    color = guide_legend(title = NULL),
    fill = guide_legend(title = NULL)
  ) +
  scale_x_date(date_labels = "%b %Y", date_breaks = "1 month", expand = c(0, 0))

p5


Mum confirmed they stopped recording feeds midway through June.

feeds_day_filtered <- feeds_day %>%
  filter(date < as.Date("2023-07-01"))

p6 <- ggplot(feeds_day_filtered, aes(x = date)) +
  geom_line(aes(y = vol_day, color = "Volume"), size = 1, alpha = 0.7, linetype = "solid") +
  geom_line(aes(y = rolling_average, color = "10-Day Rolling Average"), size = 1, alpha = 0.9) +
  labs(title = "Daily Volume of Milk Consumed - Birth to 3 Months",
       y = "Millilitres") +
  scale_color_manual(values = c("10-Day Rolling Average" = "#66C2A5", "Volume" = "lightgrey")) +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1)) +  # Rotate labels by 45 degrees
  guides(
    linetype = "none",
    color = guide_legend(title = NULL),
    fill = guide_legend(title = NULL)
  ) +
  scale_x_date(date_labels = "%b %Y", date_breaks = "1 month", expand = c(0, 0))

p6

Pumping

Overall Pumping Statistics

pump <- raw %>%
  filter(Type == "Pump") %>%
  select(Start, Start_Condition) %>%
mutate(volume = as.numeric(str_extract(Start_Condition, "\\d+"))) %>%
  select(Start, volume)

pump_statistics <- pump %>%
  summarise(
    total_pumps = n(),
    total_volume = sum(volume, na.rm = TRUE)/1000,
    mean_volume = round(mean(volume, na.rm = TRUE), 1),
    median_volume = round(median(volume, na.rm = TRUE), 1),
    min_volume = round(min(volume, na.rm = TRUE), 1),
    max_volume = round(max(volume, na.rm = TRUE), 1),
    sd_volume = round(sd(volume, na.rm = TRUE), 1)
  )

kable(pump_statistics, caption = "Pumping Statistics", col.names = c("Total Pumps", "Total Volume (L)", "Mean Volume (mL)", "Median Volume (mL)", "Smallest Volume (mL)", "Largest Volume (mL)", "Standard Deviation (mL)"))
Pumping Statistics
Total Pumps Total Volume (L) Mean Volume (mL) Median Volume (mL) Smallest Volume (mL) Largest Volume (mL) Standard Deviation (mL)
961 211.1 219.7 180 10 570 109.3

Total Daily Volume Pumped Statistics

pump_day <- raw %>%
  filter(Type == "Pump") %>%
  dplyr::select(Start, Start_Condition) %>%
  mutate(date = as.Date(Start),
         volume = as.numeric(str_extract(Start_Condition, "\\d+"))) %>%
  dplyr::select(date, volume) %>%
  group_by(date) %>%
  summarise(total_volume = sum(volume)) %>%
  mutate(rolling_average = zoo::rollmean(total_volume, k = 10, fill = NA))


pump_day_statistics <- pump_day %>%
  summarise(
    mean_volume = mean(total_volume, na.rm = TRUE),
    median_volume = median(total_volume, na.rm = TRUE),
    min_volume = min(total_volume, na.rm = TRUE),
    max_volume = max(total_volume, na.rm = TRUE),
    sd_volume = sd(total_volume, na.rm = TRUE)
  )

kable(pump_day_statistics, caption = "Daily Pumping Statistics", col.names = c("Mean Volume (mL)", "Median Volume (mL)", "Smallest Volume (mL)", "Largest Volume (mL)", "Standard Deviation (mL)"))
Daily Pumping Statistics
Mean Volume (mL) Median Volume (mL) Smallest Volume (mL) Largest Volume (mL) Standard Deviation (mL)
872.314 930 10 1230 217.5086

Plot of Volume over Time

p7 <- ggplot(data = pump_day, aes(x = date)) +
  geom_line(aes(y = total_volume, color = "Volume"), size = 1, alpha = 0.7) +
  geom_line(aes(y = rolling_average, color = "10-Day Rolling Average"), size = 1, alpha = 0.9) +
  labs(title = "Daily Volume of Breast Milk Pumped",
       x = "Date",
       y = "Millilitres") +
  scale_color_manual(name = "Legend", 
                     values = c("10-Day Rolling Average" = "#66C2A5", "Volume" = "lightgrey"),
                     labels = c("10-Day Rolling Average", "Volume")) +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
  guides(
    linetype = "none",
    color = guide_legend(title = NULL),
    fill = guide_legend(title = NULL)
  ) +
  scale_x_date(date_labels = "%b %Y", date_breaks = "1 month", expand = c(0, 0))

p7
## Warning: Removed 9 rows containing missing values (`geom_line()`).

Distribution by Month

p8 <- pump_day %>%
  mutate(month = month(date, label = TRUE)) %>%
  ggplot(aes(x = month, y = total_volume, fill = month)) +
  geom_boxplot() +
  labs(title = "Pumped Milk Volumes by Month",
       x = "Month",
       y = "Volume") +
  scale_fill_brewer(palette = "Pastel1") +  # Choose a color palette
  theme_minimal() +
  theme(legend.position = "none")
p8

Mum confirmed that she stopped pumping by early November. This explains the decrease in volume and change in distribution - while decreasing supply, daily pumps throughout October decreased in total volume and this trend continued into November until stopping. We can also see that while establishing pumping in March (baby was born early March) volumes were quite varied and the median was lower than in April.

Feeding & Pumping Comparison

pump <- pump %>%
  mutate(date = as.Date(Start)) %>%
  select(date, volume)

avg_pump <- pump %>%
  group_by(date) %>%
  summarise(count = n(),
            vol = sum(volume)) %>%
  filter(!is.na(count))

feeds <- feeds %>%
  mutate(date = as.Date(date))

avg_feeds <- feeds %>%
  group_by(date) %>%
  summarise(count = n(),
            Vol = sum(Volume)) %>%
  filter(!is.na(count))
Feeding vs Pump Counts
Mean Pumps per Day Mean Feeds per Day Pump/Feed Ratio
3.97 7.33 1.846348

The baby fed 80% more times than mum pumped.

mean_vol_pumped <- round(mean(pump$volume), 0)

mean_vol_fed <- round(mean(feeds$Volume), 0)

prop = round(mean_vol_pumped/mean_vol_fed, 2)

mean_vol_data <- data.frame("Mean Volume Pumped" = mean_vol_pumped, "Mean Volume Fed" = mean_vol_fed, "Pump/Feed Ratio" = prop)

kable(mean_vol_data, caption = "Feeding vs Pump Volumes", col.names = c("Mean Pumped Volume per Day", "Mean Volume Fed per Day", "Pump/Feed Ratio"))
Feeding vs Pump Volumes
Mean Pumped Volume per Day Mean Volume Fed per Day Pump/Feed Ratio
220 106 2.08

On average each pump produced more than twice as much milk as baby ate at each feed.

The density plot of number of pumps and number of feeds per day has quite wide spacing between the two. The feeds per day has an interesting multimodal distribution but still with an overall negative skew.

combined_plot <- bind_rows(
  mutate(pump, source = "pump", volume = volume),
  mutate(feeds, source = "feeds", volume = Volume)
)

ggplot(combined_plot, aes(x = volume, fill = source)) +
  geom_density(alpha = 0.5) +
  labs(title = "Density Plot of Feed and Pump Volumes",
       x = "Volume",
       y = "Density") +
  scale_fill_manual(values = c("feeds" = "#66C2A5", "pump" = "lightgray"))

The density plot of volumes is quite different to the countse, with the pump volumes having a bimodal distributon with the lower peak much closer to the peak of the volume fed and significant overlap.

Pumping Linear Model

This model attempts to explain the variation in volume of milk pumped during each session.

A correlation matrix and VIF calculations are used to see if the predictors (baby’s age, minutes since last pump, hour of the day) are multicollinear.

Correlation Matrix

The correlation matrix identifies strong relationships between two predictor variables.

# data prep
pump_dist <- raw %>%
  filter(Type == "Pump") %>%
  dplyr::select(Start, Start_Condition) %>%
  mutate(
    volume = as.numeric(str_extract(Start_Condition, "\\d+")),
    hour = hour(Start),
    age_days = as.numeric((as.Date(Start)) - as.Date("2023-03-02"))
  ) %>%
  dplyr::select(Start, volume, hour, age_days) %>%
  arrange(Start) %>%  # Arrange the data by timestamp
  mutate(min_since_last_pump = as.numeric(difftime(Start, lag(Start), units = "mins"))) %>%
  arrange(desc(min_since_last_pump)) %>%
  slice(-c(1, 2)) %>%
  # Normalize the variables
  mutate(
    volume_norm = scale(volume),
    hour_norm = scale(hour),
    age_days_norm = scale(age_days),
    min_pump_norm = scale(min_since_last_pump)
  )

# Normalisation
dist_norm <- pump_dist %>%
  dplyr::select(volume_norm, hour_norm, age_days_norm, min_pump_norm) %>%
  rename(vol = volume_norm,
         hour = hour_norm,
         age = age_days_norm,
         mins = min_pump_norm) %>%
  na.omit()

# Visualize correlation matrix (heatmap)
cor_matrix <- cor(dist_norm[, c("hour", "age", "mins")], use = "complete.obs")
cor_matrix
##            hour        age       mins
## hour  1.0000000 -0.0631607 -0.5138272
## age  -0.0631607  1.0000000  0.2990544
## mins -0.5138272  0.2990544  1.0000000
# Getting absolute values for visualisation
abs_cor_matrix <- abs(cor_matrix)


pheatmap(abs_cor_matrix, display_numbers = T, color = colorRampPalette(c('white','#66C2A5'))(100), fontsize_number = 15)

The correlation matrix suggests that there is a moderate relationship between hour of the day and minutes since last pump, and a small relationship between the age of the baby and minutes since last pump. Next I will calculate the VIF (Variance Inflation Factor) values to establish the degree of multicollinearity between them.

Variance Inflation Factor

model <- lm(vol ~ hour + age + mins, data = dist_norm)

# Calculate VIF
vif_values <- vif(model)

# Create a data frame with Variable and VIF columns
vif_df <- data.frame(Variable = names(vif_values), VIF = as.vector(vif_values))

# Display VIF values using knitr::kable
knitr::kable(vif_df, col.names = c("Variable", "VIF"))
Variable VIF
hour 1.375541
age 1.111806
mins 1.504617


The low VIF values indicate that there is not a high level of correlation between these variables. This means there is no strong evidence to suggest the need for removing any of these variables due to multicollinearity despite the apparent correlation between the hour of the day and minutes since the last pump in the correlation matrix.

Linearity Test

Here I’m plotting the distribution of residuals for each of the explanatory variables to assess whether the linear model is appropriate.

Age seems to have a bimodal distribution of residuals so I’m going to exclude it from this model. The polynomial model of minutes since last pump has residuals that appear to be more noramlly distributed than the linear model so I will also use this in the final model.

Final Model

lm <- lm(vol ~ poly(mins, 2) + hour, data = dist_norm)

residuals <- residuals(lm)
ggplot(data.frame(residuals), aes(x = residuals)) +
  geom_histogram(binwidth = 0.1, fill = "skyblue", color = "black", alpha = 0.7) +
  labs(title = "Histogram of Residuals", x = "Residuals", y = "Frequency")

summary(lm)
## 
## Call:
## lm(formula = vol ~ poly(mins, 2) + hour, data = dist_norm)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -2.72712 -0.20733  0.01066  0.26070  2.10912 
## 
## Coefficients:
##                  Estimate Std. Error t value Pr(>|t|)    
## (Intercept)     6.987e-04  1.417e-02   0.049    0.961    
## poly(mins, 2)1  1.707e+01  5.265e-01  32.420   <2e-16 ***
## poly(mins, 2)2 -1.502e+01  4.863e-01 -30.888   <2e-16 ***
## hour           -2.320e-01  1.832e-02 -12.661   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.4386 on 954 degrees of freedom
## Multiple R-squared:  0.8084, Adjusted R-squared:  0.8078 
## F-statistic:  1342 on 3 and 954 DF,  p-value: < 2.2e-16


The hour of the day and the minutes since the last pump explain about 81% of the variation in volume pumped in this linear model. The residuals are approximately normally distributed which supports this model being a good fit for the data. Age of the baby may also have some explanatory power, however the residuals were not normally distributed and thus has been excluded as beyond the scope of this project.

Growth

growth <- raw %>%
  filter(Type == "Growth") %>%
  select(Start, Start_Condition, Start_Location, End_Condition) %>%
  rename(weight = Start_Condition,
         length = Start_Location,
         head = End_Condition) %>%
  mutate(date = as.Date(Start),
         weight = as.numeric(sub("kg", "", weight)),
         head = as.numeric(sub("cm", "", head)),
         length = as.numeric(sub("cm", "", length)),
         age_days = as.numeric(difftime(date, "2023-03-02", units = "days")),
         age_weeks = round(age_days / 7, 1),
         age_days = round(age_days))

# Process the growth charts data
growth_charts <- read_excel("growth_chart.xlsx")%>%
  rename(age_days = Age) %>%
  select(age_days, P10, P25, P50, P75)

# Filter and select data for plotting
weight_growth <- growth %>%
  filter(!is.na(weight)) %>%
  select(weight, age_days)

# Define the color palette
colors <- brewer.pal(6, "Dark2")
# Plotting with adjusted x-axis limit and percentile labels
ggplot() +
  geom_line(data = weight_growth, 
            aes(x = age_days, y = weight), 
            color = colors[3], linetype = "solid", size = 1.5, alpha = 0.8) +  # Increased line width and decreased alpha
  geom_line(data = growth_charts, 
            aes(x = age_days, y = P25), 
            color = colors[1], linetype = "dashed", size = 1, alpha = 0.6) +  # Decreased line width and alpha
  geom_line(data = growth_charts, 
            aes(x = age_days, y = P50), 
            color = colors[2], linetype = "dashed", size = 1, alpha = 0.6) +  # Decreased line width and alpha
  geom_line(data = growth_charts, 
            aes(x = age_days, y = P75), 
            color = colors[6], linetype = "dashed", size = 1, alpha = 0.6) +  # Decreased line width and alpha
  labs(title = "Weight by Age with Percentiles",
       x = "Age (days)",
       y = "Weight (kg)") +
  theme_minimal() +
  theme(legend.position = "bottom") + 
  scale_x_continuous(limits = c(0, 275), breaks = seq(0, 275, by = 20)) +  # Adjusted x-axis limit
  scale_y_continuous(limits = c(2.5, 10), breaks = seq(2.5, 10, by = 2)) +
  scale_color_manual(values = c(colors[3], colors[1], colors[2], colors[6])) +  # Match label text colors to line colors
  geom_text(data = growth_charts %>% filter(age_days == 270), 
            aes(x = age_days, y = P25, label = "P25"), 
            vjust = -0.5, hjust = -0.5, color = colors[1]) +
  geom_text(data = growth_charts %>% filter(age_days == 270), 
            aes(x = age_days, y = P50, label = "P50"), 
            vjust = -0.5, hjust = -0.5, color = colors[2]) +
  geom_text(data = growth_charts %>% filter(age_days == 270), 
            aes(x = age_days, y = P75, label = "P75"), 
            vjust = -0.5, hjust = -0.5, color = colors[6])