Analysis of Ronan’s Huckleberry Data

Courtney Russ

2024-01-07

This is an analysis of data from the Huckleberry App, an app used by parents to record information about their baby including sleep, feeding, and growth.

Set Up

Overview of Data

raw <- read_csv("ronan.csv") %>%
  dplyr::select(Type, Start, End, `Start Condition`, `Start Location`, `End Condition`, Notes) %>%
  rename(
    Start_Condition = `Start Condition`,
    Start_Location = `Start Location`,
    End_Condition = `End Condition`) %>%
  mutate(
    Start = strptime(Start, format = "%d/%m/%y %H:%M"),
    End = strptime(End, format = "%d/%m/%y %H:%M"),
    age_days = as.numeric(difftime(as.Date(Start), as.Date("2023-03-02"), units = "days")),
    age_weeks = floor(age_days/7)
  )


skim(raw)

Data summary
Name	raw
Number of rows	3356
Number of columns	9
_______________________
Column type frequency:
character	7
numeric	2
________________________
Group variables	None

Variable type: character

skim_variable	n_missing	complete_rate	min	max	n_unique
Type	0	1.00	4	6	7
Start	0	1.00	10089	26902	3294
End	1842	0.45	11934	19519	1513
Start_Condition	1596	0.52	3	24	108
Start_Location	2570	0.23	4	13	13
End_Condition	2490	0.26	4	27	73
Notes	3230	0.04	5	156	124

Variable type: numeric

skim_variable	n_missing	complete_rate	mean	sd	p0	p25	p50	p75	p100	hist
age_days	0	1	96.95	67.12	0	43	82	143	283	▇▇▃▃▁
age_weeks	0	1	13.42	9.58	0	6	11	20	40	▇▆▃▂▁

Columns with a complete rate <1 need to be inspected further.

Inspect Column Types

end_table <- raw %>%
  filter(is.na(End)) %>%
  head(3)

sc <- raw %>%
  filter(!is.na(Start_Condition)) %>%
  head(3)

sl <- raw %>%
  filter(!is.na(Start_Location)) %>%
  head(3)

ec <- raw %>%
  filter(!is.na(End_Condition)) %>%
  head(3)

When ‘end’ column is NA
Type	Start	End	Start_Condition	Start_Location	End_Condition	Notes	age_days	age_weeks
Pump	2023-11-22 12:00:00	NA	10ml	NA	NA	NA	265	37
Growth	2023-11-20 15:11:00	NA	9.02kg	71.9cm	NA	NA	263	37
Pump	2023-11-13 10:01:00	NA	40ml	NA	NA	NA	256	36

When ‘Start_Condition’ column is not NA
Type	Start	End	Start_Condition	Start_Location	End_Condition	Notes	age_days	age_weeks
Pump	2023-11-22 12:00:00	NA	10ml	NA	NA	NA	265	37
Growth	2023-11-20 15:11:00	NA	9.02kg	71.9cm	NA	NA	263	37
Pump	2023-11-13 10:01:00	NA	40ml	NA	NA	NA	256	36

When ‘Start_Location’ column is not NA
Type	Start	End	Start_Condition	Start_Location	End_Condition	Notes	age_days	age_weeks
Growth	2023-11-20 15:11:00	NA	9.02kg	71.9cm	NA	NA	263	37
Growth	2023-11-01 09:33:00	NA	9.02kg	69cm	45cm	NA	244	34
Feed	2023-09-27 18:50:00	NA	Breast Milk	Bottle	60ml	NA	209	29

When ‘End_Condition’ column is not NA
Type	Start	End	Start_Condition	Start_Location	End_Condition	Notes	age_days	age_weeks
Growth	2023-11-01 09:33:00	NA	9.02kg	69cm	45cm	NA	244	34
Feed	2023-09-27 18:50:00	NA	Breast Milk	Bottle	60ml	NA	209	29
Feed	2023-09-27 12:47:00	NA	Breast Milk	Bottle	100ml	NA	209	29

Start Condition, Start Location, and End condition seem to be used inconsistently depending on the Type so this will need to be assessed as the data is split into groups for analysis. The End only appears for events that have a distinct Start and End e.g. sleep so an incomplete column is acceptable.

Sleep

Longest and Shortest Sleeps

# Filter for rows recording sleep
sleep <- raw %>% 
  filter(Type == "Sleep" & !is.na(End)) %>%
  dplyr::select(Start, End, age_weeks) %>%
  mutate(diff_minutes = as.numeric(round(difftime(End, Start, units = "mins"))))

# Get longest periods of sleep
longest_sleep <- sleep %>%
  arrange(desc(diff_minutes)) %>%
  head(5) %>%
  mutate(
    Start = as.character(Start),
    Length = round(diff_minutes/60, 1)
  ) %>%
  dplyr::select(Start, Length) %>%
  arrange(Start)

# Get shortest periods of sleep
shortest_sleep <- sleep %>%
  arrange(diff_minutes) %>%
  head(5) %>%
  mutate(
    Start = as.character(Start)) %>%
  dplyr::select(Start, diff_minutes) %>%
  arrange(Start)


kable(longest_sleep, caption = "Longest Periods of Sleep", col.names = c("Start", "Length (Hours)"))

Longest Periods of Sleep
Start	Length (Hours)
2023-06-11 18:40:00	12.7
2023-06-13 18:29:00	13.0
2023-08-10 19:07:00	12.4
2023-08-17 18:54:00	12.7
2023-08-26 19:19:00	12.5

kable(shortest_sleep, caption = "Shortest Periods of Sleep", col.names = c("Start", "Length (Minutes)"))

Shortest Periods of Sleep
Start	Length (Minutes)
2023-04-19 15:05:00	1
2023-05-03 16:38:00	1
2023-05-05 18:54:00	1
2023-09-26 09:45:00	1
2023-10-11 07:05:00	1

There seem to have been some ‘false start’ sleeps of 1 minute. A more useful measure may be the frequency of the lowest sleep lengths.

shortest_sleep <- sleep %>%
  group_by(diff_minutes) %>%
  summarise(frequency = n()) %>%
  arrange(diff_minutes) %>%
  head(5)

kable(shortest_sleep, caption = "Shortest Periods of Sleep", col.names = c("Length (Minutes)", "Count"))

Shortest Periods of Sleep
Length (Minutes)	Count
1	6
2	7
3	5
4	9
5	15

Hours of Sleep

Per Day

sleep_day <- raw %>% 
  filter(Type == "Sleep" & !is.na(End)) %>%
  dplyr::select(Start, End, age_weeks) %>%
  mutate(sleep_length = round(difftime(End, Start, units = "mins")),
         date = as.Date(Start)) %>%
  group_by(date) %>%
  summarise(total_sleep = sum(sleep_length, na.rm = TRUE)) %>%
  mutate(rolling_average = zoo::rollmean(total_sleep, k = 10, fill = NA)) %>%
  dplyr::select(date, total_sleep, rolling_average)

p1 <- ggplot(sleep_day, aes(x = date)) +
  geom_line(aes(y = total_sleep), size = 1, alpha = 0.9, color = "lightgrey") +
  geom_line(aes(y = rolling_average, color = "10-Day Rolling Average"), size = 1, alpha = 1) +
  labs(title = "Daily Sleep Over Time",
       x = "Date",
       y = "Minutes") +
  scale_color_manual(values = brewer.pal(n = 2, name = "Set2")) +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1), legend.position = "bottom") +
  guides(
    linetype = "none",
    color = guide_legend(title = NULL),
    fill = guide_legend(title = NULL)
  ) +
  scale_x_date(date_labels = "%b %Y", date_breaks = "1 month", expand = c(0, 0))

p1

Mum confirmed that they stopped recording overnight sleep in around September.

sleep_day_filtered <- sleep_day %>%
  filter(date < as.Date("2023-09-01"))

p2 <- ggplot(sleep_day_filtered, aes(x = date)) +
  geom_line(aes(y = total_sleep), size = 1, alpha = 0.9, color = "lightgrey") +
  geom_line(aes(y = rolling_average, color = "10-Day Rolling Average"), size = 1, alpha = 1) +
  labs(title = "Daily Sleep Over Time - Birth to 6 Months",
       x = "Date",
       y = "Minutes") +
  scale_color_manual(values = brewer.pal(n = 2, name = "Set2")) +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1), legend.position = "bottom") +
  guides(
    linetype = "none",
    color = guide_legend(title = NULL),
    fill = guide_legend(title = NULL)
  ) +
  scale_x_date(date_labels = "%b %Y", date_breaks = "1 month", expand = c(0, 0))

p2

## Don't know how to automatically pick scale for object of type <difftime>.
## Defaulting to continuous.

Feeds

Volume per Day

feeds <- raw %>% 
  filter(Type == "Feed") %>%
  dplyr::select(Start, End_Condition) %>%
  mutate(Volume = as.numeric(str_extract(End_Condition, "\\d+"))) %>%
  rename(date = Start) %>%
  dplyr::select(-End_Condition)

feeds_day <- feeds %>%
  mutate(date = as.Date(date),
         volume = as.numeric(Volume)) %>%
  group_by(date) %>%
  summarise(vol_day = sum(volume)) %>%
  mutate(rolling_average = zoo::rollmean(vol_day, k = 10, fill = NA))


p5 <- ggplot(feeds_day, aes(x = date)) +
  geom_line(aes(y = vol_day, color = "Volume"), size = 1, alpha = 0.7, linetype = "solid") +
  geom_line(aes(y = rolling_average, color = "10-Day Rolling Average"), size = 1, alpha = 0.9) +
  labs(title = "Daily Volume of Breast Milk Consumed",
       x = "Date",
       y = "Millilitres") +
  scale_color_manual(values = c("10-Day Rolling Average" = "#66C2A5", "Volume" = "lightgrey")) +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1)) +  # Rotate labels by 45 degrees
  guides(
    linetype = "none",
    color = guide_legend(title = NULL),
    fill = guide_legend(title = NULL)
  ) +
  scale_x_date(date_labels = "%b %Y", date_breaks = "1 month", expand = c(0, 0))

p5

Mum confirmed they stopped recording feeds midway through June.

feeds_day_filtered <- feeds_day %>%
  filter(date < as.Date("2023-07-01"))

p6 <- ggplot(feeds_day_filtered, aes(x = date)) +
  geom_line(aes(y = vol_day, color = "Volume"), size = 1, alpha = 0.7, linetype = "solid") +
  geom_line(aes(y = rolling_average, color = "10-Day Rolling Average"), size = 1, alpha = 0.9) +
  labs(title = "Daily Volume of Milk Consumed - Birth to 3 Months",
       y = "Millilitres") +
  scale_color_manual(values = c("10-Day Rolling Average" = "#66C2A5", "Volume" = "lightgrey")) +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1)) +  # Rotate labels by 45 degrees
  guides(
    linetype = "none",
    color = guide_legend(title = NULL),
    fill = guide_legend(title = NULL)
  ) +
  scale_x_date(date_labels = "%b %Y", date_breaks = "1 month", expand = c(0, 0))

p6

Pumping

Overall Pumping Statistics

pump <- raw %>%
  filter(Type == "Pump") %>%
  select(Start, Start_Condition) %>%
mutate(volume = as.numeric(str_extract(Start_Condition, "\\d+"))) %>%
  select(Start, volume)

pump_statistics <- pump %>%
  summarise(
    total_pumps = n(),
    total_volume = sum(volume, na.rm = TRUE)/1000,
    mean_volume = round(mean(volume, na.rm = TRUE), 1),
    median_volume = round(median(volume, na.rm = TRUE), 1),
    min_volume = round(min(volume, na.rm = TRUE), 1),
    max_volume = round(max(volume, na.rm = TRUE), 1),
    sd_volume = round(sd(volume, na.rm = TRUE), 1)
  )

kable(pump_statistics, caption = "Pumping Statistics", col.names = c("Total Pumps", "Total Volume (L)", "Mean Volume (mL)", "Median Volume (mL)", "Smallest Volume (mL)", "Largest Volume (mL)", "Standard Deviation (mL)"))

Pumping Statistics
Total Pumps	Total Volume (L)	Mean Volume (mL)	Median Volume (mL)	Smallest Volume (mL)	Largest Volume (mL)	Standard Deviation (mL)
961	211.1	219.7	180	10	570	109.3

Total Daily Volume Pumped Statistics

pump_day <- raw %>%
  filter(Type == "Pump") %>%
  dplyr::select(Start, Start_Condition) %>%
  mutate(date = as.Date(Start),
         volume = as.numeric(str_extract(Start_Condition, "\\d+"))) %>%
  dplyr::select(date, volume) %>%
  group_by(date) %>%
  summarise(total_volume = sum(volume)) %>%
  mutate(rolling_average = zoo::rollmean(total_volume, k = 10, fill = NA))


pump_day_statistics <- pump_day %>%
  summarise(
    mean_volume = mean(total_volume, na.rm = TRUE),
    median_volume = median(total_volume, na.rm = TRUE),
    min_volume = min(total_volume, na.rm = TRUE),
    max_volume = max(total_volume, na.rm = TRUE),
    sd_volume = sd(total_volume, na.rm = TRUE)
  )

kable(pump_day_statistics, caption = "Daily Pumping Statistics", col.names = c("Mean Volume (mL)", "Median Volume (mL)", "Smallest Volume (mL)", "Largest Volume (mL)", "Standard Deviation (mL)"))

Daily Pumping Statistics
Mean Volume (mL)	Median Volume (mL)	Smallest Volume (mL)	Largest Volume (mL)	Standard Deviation (mL)
872.314	930	10	1230	217.5086

Plot of Volume over Time

p7 <- ggplot(data = pump_day, aes(x = date)) +
  geom_line(aes(y = total_volume, color = "Volume"), size = 1, alpha = 0.7) +
  geom_line(aes(y = rolling_average, color = "10-Day Rolling Average"), size = 1, alpha = 0.9) +
  labs(title = "Daily Volume of Breast Milk Pumped",
       x = "Date",
       y = "Millilitres") +
  scale_color_manual(name = "Legend", 
                     values = c("10-Day Rolling Average" = "#66C2A5", "Volume" = "lightgrey"),
                     labels = c("10-Day Rolling Average", "Volume")) +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
  guides(
    linetype = "none",
    color = guide_legend(title = NULL),
    fill = guide_legend(title = NULL)
  ) +
  scale_x_date(date_labels = "%b %Y", date_breaks = "1 month", expand = c(0, 0))

p7

## Warning: Removed 9 rows containing missing values (`geom_line()`).

Distribution by Month

p8 <- pump_day %>%
  mutate(month = month(date, label = TRUE)) %>%
  ggplot(aes(x = month, y = total_volume, fill = month)) +
  geom_boxplot() +
  labs(title = "Pumped Milk Volumes by Month",
       x = "Month",
       y = "Volume") +
  scale_fill_brewer(palette = "Pastel1") +  # Choose a color palette
  theme_minimal() +
  theme(legend.position = "none")
p8

Mum confirmed that she stopped pumping by early November. This explains the decrease in volume and change in distribution - while decreasing supply, daily pumps throughout October decreased in total volume and this trend continued into November until stopping. We can also see that while establishing pumping in March (baby was born early March) volumes were quite varied and the median was lower than in April.

Feeding & Pumping Comparison

pump <- pump %>%
  mutate(date = as.Date(Start)) %>%
  select(date, volume)

avg_pump <- pump %>%
  group_by(date) %>%
  summarise(count = n(),
            vol = sum(volume)) %>%
  filter(!is.na(count))

feeds <- feeds %>%
  mutate(date = as.Date(date))

avg_feeds <- feeds %>%
  group_by(date) %>%
  summarise(count = n(),
            Vol = sum(Volume)) %>%
  filter(!is.na(count))

Feeding vs Pump Counts
Mean Pumps per Day	Mean Feeds per Day	Pump/Feed Ratio
3.97	7.33	1.846348

The baby fed 80% more times than mum pumped.

mean_vol_pumped <- round(mean(pump$volume), 0)

mean_vol_fed <- round(mean(feeds$Volume), 0)

prop = round(mean_vol_pumped/mean_vol_fed, 2)

mean_vol_data <- data.frame("Mean Volume Pumped" = mean_vol_pumped, "Mean Volume Fed" = mean_vol_fed, "Pump/Feed Ratio" = prop)

kable(mean_vol_data, caption = "Feeding vs Pump Volumes", col.names = c("Mean Pumped Volume per Day", "Mean Volume Fed per Day", "Pump/Feed Ratio"))

Feeding vs Pump Volumes
Mean Pumped Volume per Day	Mean Volume Fed per Day	Pump/Feed Ratio
220	106	2.08

On average each pump produced more than twice as much milk as baby ate at each feed.

The density plot of number of pumps and number of feeds per day has quite wide spacing between the two. The feeds per day has an interesting multimodal distribution but still with an overall negative skew.

combined_plot <- bind_rows(
  mutate(pump, source = "pump", volume = volume),
  mutate(feeds, source = "feeds", volume = Volume)
)

ggplot(combined_plot, aes(x = volume, fill = source)) +
  geom_density(alpha = 0.5) +
  labs(title = "Density Plot of Feed and Pump Volumes",
       x = "Volume",
       y = "Density") +
  scale_fill_manual(values = c("feeds" = "#66C2A5", "pump" = "lightgray"))

The density plot of volumes is quite different to the countse, with the pump volumes having a bimodal distributon with the lower peak much closer to the peak of the volume fed and significant overlap.

Pumping Linear Model

This model attempts to explain the variation in volume of milk pumped during each session.

A correlation matrix and VIF calculations are used to see if the predictors (baby’s age, minutes since last pump, hour of the day) are multicollinear.

Correlation Matrix

The correlation matrix identifies strong relationships between two predictor variables.

# data prep
pump_dist <- raw %>%
  filter(Type == "Pump") %>%
  dplyr::select(Start, Start_Condition) %>%
  mutate(
    volume = as.numeric(str_extract(Start_Condition, "\\d+")),
    hour = hour(Start),
    age_days = as.numeric((as.Date(Start)) - as.Date("2023-03-02"))
  ) %>%
  dplyr::select(Start, volume, hour, age_days) %>%
  arrange(Start) %>%  # Arrange the data by timestamp
  mutate(min_since_last_pump = as.numeric(difftime(Start, lag(Start), units = "mins"))) %>%
  arrange(desc(min_since_last_pump)) %>%
  slice(-c(1, 2)) %>%
  # Normalize the variables
  mutate(
    volume_norm = scale(volume),
    hour_norm = scale(hour),
    age_days_norm = scale(age_days),
    min_pump_norm = scale(min_since_last_pump)
  )

# Normalisation
dist_norm <- pump_dist %>%
  dplyr::select(volume_norm, hour_norm, age_days_norm, min_pump_norm) %>%
  rename(vol = volume_norm,
         hour = hour_norm,
         age = age_days_norm,
         mins = min_pump_norm) %>%
  na.omit()

# Visualize correlation matrix (heatmap)
cor_matrix <- cor(dist_norm[, c("hour", "age", "mins")], use = "complete.obs")
cor_matrix

##            hour        age       mins
## hour  1.0000000 -0.0631607 -0.5138272
## age  -0.0631607  1.0000000  0.2990544
## mins -0.5138272  0.2990544  1.0000000

# Getting absolute values for visualisation
abs_cor_matrix <- abs(cor_matrix)


pheatmap(abs_cor_matrix, display_numbers = T, color = colorRampPalette(c('white','#66C2A5'))(100), fontsize_number = 15)

The correlation matrix suggests that there is a moderate relationship between hour of the day and minutes since last pump, and a small relationship between the age of the baby and minutes since last pump. Next I will calculate the VIF (Variance Inflation Factor) values to establish the degree of multicollinearity between them.

Variance Inflation Factor

model <- lm(vol ~ hour + age + mins, data = dist_norm)

# Calculate VIF
vif_values <- vif(model)

# Create a data frame with Variable and VIF columns
vif_df <- data.frame(Variable = names(vif_values), VIF = as.vector(vif_values))

# Display VIF values using knitr::kable
knitr::kable(vif_df, col.names = c("Variable", "VIF"))

Variable	VIF
hour	1.375541
age	1.111806
mins	1.504617

The low VIF values indicate that there is not a high level of correlation between these variables. This means there is no strong evidence to suggest the need for removing any of these variables due to multicollinearity despite the apparent correlation between the hour of the day and minutes since the last pump in the correlation matrix.

Linearity Test

Here I’m plotting the distribution of residuals for each of the explanatory variables to assess whether the linear model is appropriate.

Age seems to have a bimodal distribution of residuals so I’m going to exclude it from this model. The polynomial model of minutes since last pump has residuals that appear to be more noramlly distributed than the linear model so I will also use this in the final model.

Final Model

lm <- lm(vol ~ poly(mins, 2) + hour, data = dist_norm)

residuals <- residuals(lm)
ggplot(data.frame(residuals), aes(x = residuals)) +
  geom_histogram(binwidth = 0.1, fill = "skyblue", color = "black", alpha = 0.7) +
  labs(title = "Histogram of Residuals", x = "Residuals", y = "Frequency")

summary(lm)

## 
## Call:
## lm(formula = vol ~ poly(mins, 2) + hour, data = dist_norm)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -2.72712 -0.20733  0.01066  0.26070  2.10912 
## 
## Coefficients:
##                  Estimate Std. Error t value Pr(>|t|)    
## (Intercept)     6.987e-04  1.417e-02   0.049    0.961    
## poly(mins, 2)1  1.707e+01  5.265e-01  32.420   <2e-16 ***
## poly(mins, 2)2 -1.502e+01  4.863e-01 -30.888   <2e-16 ***
## hour           -2.320e-01  1.832e-02 -12.661   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.4386 on 954 degrees of freedom
## Multiple R-squared:  0.8084, Adjusted R-squared:  0.8078 
## F-statistic:  1342 on 3 and 954 DF,  p-value: < 2.2e-16

The hour of the day and the minutes since the last pump explain about 81% of the variation in volume pumped in this linear model. The residuals are approximately normally distributed which supports this model being a good fit for the data. Age of the baby may also have some explanatory power, however the residuals were not normally distributed and thus has been excluded as beyond the scope of this project.

Growth

growth <- raw %>%
  filter(Type == "Growth") %>%
  select(Start, Start_Condition, Start_Location, End_Condition) %>%
  rename(weight = Start_Condition,
         length = Start_Location,
         head = End_Condition) %>%
  mutate(date = as.Date(Start),
         weight = as.numeric(sub("kg", "", weight)),
         head = as.numeric(sub("cm", "", head)),
         length = as.numeric(sub("cm", "", length)),
         age_days = as.numeric(difftime(date, "2023-03-02", units = "days")),
         age_weeks = round(age_days / 7, 1),
         age_days = round(age_days))

# Process the growth charts data
growth_charts <- read_excel("growth_chart.xlsx")%>%
  rename(age_days = Age) %>%
  select(age_days, P10, P25, P50, P75)

# Filter and select data for plotting
weight_growth <- growth %>%
  filter(!is.na(weight)) %>%
  select(weight, age_days)

# Define the color palette
colors <- brewer.pal(6, "Dark2")
# Plotting with adjusted x-axis limit and percentile labels
ggplot() +
  geom_line(data = weight_growth, 
            aes(x = age_days, y = weight), 
            color = colors[3], linetype = "solid", size = 1.5, alpha = 0.8) +  # Increased line width and decreased alpha
  geom_line(data = growth_charts, 
            aes(x = age_days, y = P25), 
            color = colors[1], linetype = "dashed", size = 1, alpha = 0.6) +  # Decreased line width and alpha
  geom_line(data = growth_charts, 
            aes(x = age_days, y = P50), 
            color = colors[2], linetype = "dashed", size = 1, alpha = 0.6) +  # Decreased line width and alpha
  geom_line(data = growth_charts, 
            aes(x = age_days, y = P75), 
            color = colors[6], linetype = "dashed", size = 1, alpha = 0.6) +  # Decreased line width and alpha
  labs(title = "Weight by Age with Percentiles",
       x = "Age (days)",
       y = "Weight (kg)") +
  theme_minimal() +
  theme(legend.position = "bottom") + 
  scale_x_continuous(limits = c(0, 275), breaks = seq(0, 275, by = 20)) +  # Adjusted x-axis limit
  scale_y_continuous(limits = c(2.5, 10), breaks = seq(2.5, 10, by = 2)) +
  scale_color_manual(values = c(colors[3], colors[1], colors[2], colors[6])) +  # Match label text colors to line colors
  geom_text(data = growth_charts %>% filter(age_days == 270), 
            aes(x = age_days, y = P25, label = "P25"), 
            vjust = -0.5, hjust = -0.5, color = colors[1]) +
  geom_text(data = growth_charts %>% filter(age_days == 270), 
            aes(x = age_days, y = P50, label = "P50"), 
            vjust = -0.5, hjust = -0.5, color = colors[2]) +
  geom_text(data = growth_charts %>% filter(age_days == 270), 
            aes(x = age_days, y = P75, label = "P75"), 
            vjust = -0.5, hjust = -0.5, color = colors[6])