1 Create data frame

# Create the data frame
df_hz <- data.frame(Sample = 1:10, Date = as.Date(c("2024-07-24", "2024-09-30", "2024-10-05", "2025-01-01", "2025-01-31", "2025-02-08", "2025-02-20", "2025-03-16", "2025-04-15", "2025-05-13")), Time_on_HRT = c(126, 198, 203, 291, 321, 329, 341, 365, 395, 423), Average = c(180.3, 178.5, 153.8, 171.7, 116.3, 138.3, 139.5, 134.4, 126.5, 123.8), Median = c(175.9, 175.8, 148.2, 154.3, 113.3, 134.9, 137.6, 134.0, 120.6, 120.5), High = c(202.5, 211.7, 176.9, 244.5, 141.8, 176.6, 177.9, 157.7, 161.3, 154.0), Low = c(157.8, 152.4, 127.9, 132.4, 100.5, 114.4, 111.0, 113.1, 103.6, 104.9))

2 Create table

# Set CRAN mirror explicitly (e.g., Cloud mirror)
options(repos = c(CRAN = "https://cloud.r-project.org"))
# Then install your package
install.packages("knitr")
## Installing package into 'C:/Users/miede/AppData/Local/R/win-library/4.5'
## (as 'lib' is unspecified)
## package 'knitr' successfully unpacked and MD5 sums checked
## 
## The downloaded binary packages are in
##  C:\Users\miede\AppData\Local\Temp\Rtmp4MHJAu\downloaded_packages
library(knitr)
# Display as table
kable(df_hz, caption = "HRT Data Summary Table")
HRT Data Summary Table
Sample Date Time_on_HRT Average Median High Low
1 2024-07-24 126 180.3 175.9 202.5 157.8
2 2024-09-30 198 178.5 175.8 211.7 152.4
3 2024-10-05 203 153.8 148.2 176.9 127.9
4 2025-01-01 291 171.7 154.3 244.5 132.4
5 2025-01-31 321 116.3 113.3 141.8 100.5
6 2025-02-08 329 138.3 134.9 176.6 114.4
7 2025-02-20 341 139.5 137.6 177.9 111.0
8 2025-03-16 365 134.4 134.0 157.7 113.1
9 2025-04-15 395 126.5 120.6 161.3 103.6
10 2025-05-13 423 123.8 120.5 154.0 104.9

3 Linear regression

# Linear regression: Average vs Time_on_HRT
summary(lm(Average ~ Time_on_HRT, data = df_hz))
## 
## Call:
## lm(formula = Average ~ Time_on_HRT, data = df_hz)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -25.5382  -1.8075   0.7145   2.6049  23.7079 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 207.6848    15.0633  13.787 7.39e-07 ***
## Time_on_HRT  -0.2051     0.0482  -4.256  0.00278 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 13.76 on 8 degrees of freedom
## Multiple R-squared:  0.6936, Adjusted R-squared:  0.6553 
## F-statistic: 18.11 on 1 and 8 DF,  p-value: 0.002777
plot(df_hz$Time_on_HRT, df_hz$Average)

library(car)
## Loading required package: carData
ncvTest(lm(Average ~ Time_on_HRT, data=df_hz))
## Non-constant Variance Score Test 
## Variance formula: ~ fitted.values 
## Chisquare = 0.0773448, Df = 1, p = 0.78093
durbinWatsonTest(lm(Average ~ Time_on_HRT, data=df_hz))
##  lag Autocorrelation D-W Statistic p-value
##    1      -0.6641694      3.321281   0.024
##  Alternative hypothesis: rho != 0

The p-value is 0.028, so we can reject the \(H_0\) and accept the \(H_a\) that there is autocorrelation, which is obvious, since average Hz is literally based on time on HRT.

4 Correlation between time on HRT and average

cor(df_hz$Time_on_HRT, df_hz$Average)
## [1] -0.8328484

5 Correlation between time on HRT and median

cor(df_hz$Time_on_HRT, df_hz$Median)
## [1] -0.8622817

6 t-test

# Example: paired t-test between Average and Median
t.test(df_hz$Average, df_hz$Median, paired = TRUE)
## 
##  Paired t-test
## 
## data:  df_hz$Average and df_hz$Median
## t = 3.2167, df = 9, p-value = 0.01054
## alternative hypothesis: true mean difference is not equal to 0
## 95 percent confidence interval:
##  1.424405 8.175595
## sample estimates:
## mean difference 
##             4.8

7 Is average consistently higher than median?

df_hz$AvgMinusMed <- df_hz$Average - df_hz$Median
summary(df_hz$AvgMinusMed)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   0.400   2.775   3.350   4.800   5.300  17.400

8 Visualize the relationship

plot(df_hz$Average, df_hz$Median, main = "Average vs Median", xlab = "Average", ylab = "Median")
abline(0, 1, col = "red")  # 45-degree line for reference

9 Correlation between average and median

cor(df_hz$Average, df_hz$Median)
## [1] 0.9804134

10 1-way ANOVA

# Categorize HRT time
df_hz$Group <- cut(df_hz$Time_on_HRT, breaks = 3, labels = c("Early", "Mid", "Late"))
# Run ANOVA
summary(aov(Average ~ Group, data = df_hz))
##             Df Sum Sq Mean Sq F value Pr(>F)  
## Group        2   2773  1386.7   4.471 0.0561 .
## Residuals    7   2171   310.1                 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

11 Variability over time

Are high-low ranges shrinking or expanding?

df_hz$Range <- df_hz$High - df_hz$Low
plot(df_hz$Time_on_HRT, df_hz$Range)

cor.test(df_hz$Time_on_HRT, df_hz$Range)
## 
##  Pearson's product-moment correlation
## 
## data:  df_hz$Time_on_HRT and df_hz$Range
## t = 0.11476, df = 8, p-value = 0.9115
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.6045152  0.6534873
## sample estimates:
##        cor 
## 0.04054219

12 Smoothing/LOESS regression

library(ggplot2)
ggplot(df_hz, aes(Time_on_HRT, Average)) +
  geom_point() +
  geom_smooth(method = "loess") +
  labs(title = "Average Hormone Level over Time on HRT")
## `geom_smooth()` using formula = 'y ~ x'

13 Slope comparison

Are lows dropping faster than highs?

lm_high <- lm(High ~ Time_on_HRT, data = df_hz)
lm_low  <- lm(Low ~ Time_on_HRT, data = df_hz)

summary(lm_high)
## 
## Call:
## lm(formula = High ~ Time_on_HRT, data = df_hz)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -34.722 -10.489  -2.855   4.147  62.517 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 234.94866   29.92063   7.852 4.99e-05 ***
## Time_on_HRT  -0.18201    0.09574  -1.901   0.0938 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 27.33 on 8 degrees of freedom
## Multiple R-squared:  0.3112, Adjusted R-squared:  0.2251 
## F-statistic: 3.614 on 1 and 8 DF,  p-value: 0.0938
summary(lm_low)
## 
## Call:
## lm(formula = Low ~ Time_on_HRT, data = df_hz)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -17.141  -2.547   1.516   6.003  11.292 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 178.88344   10.47691  17.074 1.41e-07 ***
## Time_on_HRT  -0.19079    0.03352  -5.691 0.000459 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 9.571 on 8 degrees of freedom
## Multiple R-squared:  0.8019, Adjusted R-squared:  0.7772 
## F-statistic: 32.39 on 1 and 8 DF,  p-value: 0.0004591