1 Create data frame

# Create the data frame
df_hz <- data.frame(Sample = 1:10, Date = as.Date(c("2024-07-24", "2024-09-30", "2024-10-05", "2025-01-01", "2025-01-31", "2025-02-08", "2025-02-20", "2025-03-16", "2025-04-15", "2025-05-13")), Time_on_HRT = c(126, 198, 203, 291, 321, 329, 341, 365, 395, 423), Average = c(180.3, 178.5, 153.8, 171.7, 116.3, 138.3, 139.5, 134.4, 126.5, 123.8), Median = c(175.9, 175.8, 148.2, 154.3, 113.3, 134.9, 137.6, 134.0, 120.6, 120.5), High = c(202.5, 211.7, 176.9, 244.5, 141.8, 176.6, 177.9, 157.7, 161.3, 154.0), Low = c(157.8, 152.4, 127.9, 132.4, 100.5, 114.4, 111.0, 113.1, 103.6, 104.9))

2 Create table

# Set CRAN mirror explicitly (e.g., Cloud mirror)
options(repos = c(CRAN = "https://cloud.r-project.org"))
# Then install your package
install.packages("knitr")

## Installing package into 'C:/Users/miede/AppData/Local/R/win-library/4.5'
## (as 'lib' is unspecified)

## package 'knitr' successfully unpacked and MD5 sums checked
## 
## The downloaded binary packages are in
##  C:\Users\miede\AppData\Local\Temp\Rtmp4MHJAu\downloaded_packages

library(knitr)
# Display as table
kable(df_hz, caption = "HRT Data Summary Table")

HRT Data Summary Table
Sample	Date	Time_on_HRT	Average	Median	High	Low
1	2024-07-24	126	180.3	175.9	202.5	157.8
2	2024-09-30	198	178.5	175.8	211.7	152.4
3	2024-10-05	203	153.8	148.2	176.9	127.9
4	2025-01-01	291	171.7	154.3	244.5	132.4
5	2025-01-31	321	116.3	113.3	141.8	100.5
6	2025-02-08	329	138.3	134.9	176.6	114.4
7	2025-02-20	341	139.5	137.6	177.9	111.0
8	2025-03-16	365	134.4	134.0	157.7	113.1
9	2025-04-15	395	126.5	120.6	161.3	103.6
10	2025-05-13	423	123.8	120.5	154.0	104.9

3 Linear regression

# Linear regression: Average vs Time_on_HRT
summary(lm(Average ~ Time_on_HRT, data = df_hz))

## 
## Call:
## lm(formula = Average ~ Time_on_HRT, data = df_hz)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -25.5382  -1.8075   0.7145   2.6049  23.7079 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 207.6848    15.0633  13.787 7.39e-07 ***
## Time_on_HRT  -0.2051     0.0482  -4.256  0.00278 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 13.76 on 8 degrees of freedom
## Multiple R-squared:  0.6936, Adjusted R-squared:  0.6553 
## F-statistic: 18.11 on 1 and 8 DF,  p-value: 0.002777

plot(df_hz$Time_on_HRT, df_hz$Average)

library(car)

## Loading required package: carData

ncvTest(lm(Average ~ Time_on_HRT, data=df_hz))

## Non-constant Variance Score Test 
## Variance formula: ~ fitted.values 
## Chisquare = 0.0773448, Df = 1, p = 0.78093

durbinWatsonTest(lm(Average ~ Time_on_HRT, data=df_hz))

##  lag Autocorrelation D-W Statistic p-value
##    1      -0.6641694      3.321281   0.024
##  Alternative hypothesis: rho != 0

The p-value is 0.028, so we can reject the \(H_0\) and accept the \(H_a\) that there is autocorrelation, which is obvious, since average Hz is literally based on time on HRT.

4 Correlation between time on HRT and average

cor(df_hz$Time_on_HRT, df_hz$Average)

## [1] -0.8328484

5 Correlation between time on HRT and median

cor(df_hz$Time_on_HRT, df_hz$Median)

## [1] -0.8622817

6 t-test

# Example: paired t-test between Average and Median
t.test(df_hz$Average, df_hz$Median, paired = TRUE)

## 
##  Paired t-test
## 
## data:  df_hz$Average and df_hz$Median
## t = 3.2167, df = 9, p-value = 0.01054
## alternative hypothesis: true mean difference is not equal to 0
## 95 percent confidence interval:
##  1.424405 8.175595
## sample estimates:
## mean difference 
##             4.8

7 Is average consistently higher than median?

df_hz$AvgMinusMed <- df_hz$Average - df_hz$Median
summary(df_hz$AvgMinusMed)

##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   0.400   2.775   3.350   4.800   5.300  17.400

8 Visualize the relationship

plot(df_hz$Average, df_hz$Median, main = "Average vs Median", xlab = "Average", ylab = "Median")
abline(0, 1, col = "red")  # 45-degree line for reference

9 Correlation between average and median

cor(df_hz$Average, df_hz$Median)

## [1] 0.9804134

10 1-way ANOVA

# Categorize HRT time
df_hz$Group <- cut(df_hz$Time_on_HRT, breaks = 3, labels = c("Early", "Mid", "Late"))
# Run ANOVA
summary(aov(Average ~ Group, data = df_hz))

##             Df Sum Sq Mean Sq F value Pr(>F)  
## Group        2   2773  1386.7   4.471 0.0561 .
## Residuals    7   2171   310.1                 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

11 Variability over time

Are high-low ranges shrinking or expanding?

df_hz$Range <- df_hz$High - df_hz$Low
plot(df_hz$Time_on_HRT, df_hz$Range)

cor.test(df_hz$Time_on_HRT, df_hz$Range)

## 
##  Pearson's product-moment correlation
## 
## data:  df_hz$Time_on_HRT and df_hz$Range
## t = 0.11476, df = 8, p-value = 0.9115
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.6045152  0.6534873
## sample estimates:
##        cor 
## 0.04054219

12 Smoothing/LOESS regression

library(ggplot2)
ggplot(df_hz, aes(Time_on_HRT, Average)) +
  geom_point() +
  geom_smooth(method = "loess") +
  labs(title = "Average Hormone Level over Time on HRT")

## `geom_smooth()` using formula = 'y ~ x'

13 Slope comparison

Are lows dropping faster than highs?

lm_high <- lm(High ~ Time_on_HRT, data = df_hz)
lm_low  <- lm(Low ~ Time_on_HRT, data = df_hz)

summary(lm_high)

## 
## Call:
## lm(formula = High ~ Time_on_HRT, data = df_hz)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -34.722 -10.489  -2.855   4.147  62.517 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 234.94866   29.92063   7.852 4.99e-05 ***
## Time_on_HRT  -0.18201    0.09574  -1.901   0.0938 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 27.33 on 8 degrees of freedom
## Multiple R-squared:  0.3112, Adjusted R-squared:  0.2251 
## F-statistic: 3.614 on 1 and 8 DF,  p-value: 0.0938

summary(lm_low)

## 
## Call:
## lm(formula = Low ~ Time_on_HRT, data = df_hz)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -17.141  -2.547   1.516   6.003  11.292 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 178.88344   10.47691  17.074 1.41e-07 ***
## Time_on_HRT  -0.19079    0.03352  -5.691 0.000459 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 9.571 on 8 degrees of freedom
## Multiple R-squared:  0.8019, Adjusted R-squared:  0.7772 
## F-statistic: 32.39 on 1 and 8 DF,  p-value: 0.0004591

Voice Samples

RJ