This is a quick forecast for the next 10 months of menstrual period start dates for vacation planning purposes.

This raw data was copy-pasted from a text note on mobile.

# Import and tidy
menses <- read.table("period.txt", header = FALSE, ",")
menses <- menses |> select(V1) |> rename(date = V1)
menses$date <- as.Date(menses$date, format = "%m/%d/%y")

gaps <- diff(menses$date)
gaps_num <- as.numeric(gaps)

Some data exploration to check for missing entries and consistency.

# Drop max outlier
descending <- sort(gaps_num, decreasing = TRUE)
head(descending)
## [1] 71 49 46 42 39 39
gaps_num <- gaps_num[gaps_num != max(gaps_num)]
descending <- sort(gaps_num, decreasing = TRUE)
head(descending)
## [1] 49 46 42 39 39 38

Convert the data to a time series to use the FPP2 package.

# Calculate gaps as a time series value
gaps_ts <- ts(gaps_num)

# Visualize
plot(gaps_ts)

Forecast Models

Forecast based on historical median

h_preds = 10

# Median baseline gap
median_gap <- median(as.numeric(gaps_ts))
fc_median <- list(mean = rep(median_gap, h_preds))

# Calculate dates
last_date <- max(menses$date)

median_pred <- seq(last_date + median_gap, by = median_gap, length.out = h_preds)

ARIMA model

# ARIMA model
fit_arima <- auto.arima(gaps_ts)
fc_arima <- forecast(fit_arima, h = h_preds)
fc_arima$mean  # next gaps in days
## Time Series:
## Start = 132 
## End = 141 
## Frequency = 1 
##  [1] 26.57973 26.40921 26.92612 27.36988 27.45287 27.31248 27.17547 27.14123
##  [9] 27.17845 27.22027
arima_pred <- last_date + cumsum(round(fc_arima$mean))
autoplot(fc_arima)

ETS(M,N,N)

# ETS model
fit_ets <- ets(gaps_ts)
fc_ets <- forecast(fit_ets, h = h_preds)
fc_ets$mean
## Time Series:
## Start = 132 
## End = 141 
## Frequency = 1 
##  [1] 26.69554 26.69554 26.69554 26.69554 26.69554 26.69554 26.69554 26.69554
##  [9] 26.69554 26.69554
ets_pred <- last_date + cumsum(round(fc_ets$mean))
autoplot(fc_ets)

Model Evaluation

Cross Validation

# Function to forecast given a model
f_arima_fun <- function(y, h) forecast(auto.arima(y), h=h)
f_ets_fun   <- function(y, h) forecast(ets(y), h=h)
f_median_fun <- function(y, h) {
  m <- median(y)
  return(list(mean=ts(rep(m, h))))
}

# Cross-validation with rolling origin
e_arima  <- tsCV(gaps_ts, f_arima_fun, h=1)
e_ets    <- tsCV(gaps_ts, f_ets_fun, h=1)

# Median baseline manually
e_median <- tsCV(gaps_ts, function(y,h) {
  m <- median(y)
  return(list(mean=ts(rep(m, h))))
}, h=1)

RMSE

Root mean squared errors for each:

# Compare RMSE
rmse <- c(
  ARIMA  = sqrt(mean(e_arima^2, na.rm=TRUE)),
  ETS    = sqrt(mean(e_ets^2, na.rm=TRUE)),
  Median = sqrt(mean(e_median^2, na.rm=TRUE))
)

rmse
##    ARIMA      ETS   Median 
## 3.553696 3.310761 3.830294

Residuals

Confirming that the residuals for our models look like white noise:

checkresiduals(fit_arima)

## 
##  Ljung-Box test
## 
## data:  Residuals from ARIMA(2,1,2)
## Q* = 3.4375, df = 6, p-value = 0.7523
## 
## Model df: 4.   Total lags used: 10
checkresiduals(fit_ets)

## 
##  Ljung-Box test
## 
## data:  Residuals from ETS(M,N,N)
## Q* = 14.948, df = 10, p-value = 0.134
## 
## Model df: 0.   Total lags used: 10

Prediction table

The final predictions:

results <- data.frame(arima = arima_pred, ets = ets_pred, median = median_pred)
results
##         arima        ets     median
## 1  2025-12-27 2025-12-27 2025-12-27
## 2  2026-01-22 2026-01-23 2026-01-23
## 3  2026-02-18 2026-02-19 2026-02-19
## 4  2026-03-17 2026-03-18 2026-03-18
## 5  2026-04-13 2026-04-14 2026-04-14
## 6  2026-05-10 2026-05-11 2026-05-11
## 7  2026-06-06 2026-06-07 2026-06-07
## 8  2026-07-03 2026-07-04 2026-07-04
## 9  2026-07-30 2026-07-31 2026-07-31
## 10 2026-08-26 2026-08-27 2026-08-27

Both the ARIMA and ETS(M,N,N) models predicted a shorter 25 day gap vs the historical 27 day median gap. Since their predictions are the same and their errors are both lower and uncorrelated, the predictions should hold up well for travel planning.