#Load packages

library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ ggplot2   3.5.2     ✔ tibble    3.3.0
## ✔ lubridate 1.9.4     ✔ tidyr     1.3.1
## ✔ purrr     1.1.0     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(medicaldata)
library(janitor)
## 
## Attaching package: 'janitor'
## 
## The following objects are masked from 'package:stats':
## 
##     chisq.test, fisher.test
library(rstatix)
## 
## Attaching package: 'rstatix'
## 
## The following object is masked from 'package:janitor':
## 
##     make_clean_names
## 
## The following object is masked from 'package:stats':
## 
##     filter
view(cytomegalovirus)
?cytomegalovirus
## starting httpd help server ... done

#Clean column names (you need to assign the cleaned names to a dataframe)

cytomegalovirus <- cytomegalovirus |> 
  clean_names() 

#Donor CMV status vs time to development of CMV

cytomegalovirus |>
  group_by(donor_cmv) |> 
  summarise(mean_time = mean(time_to_cmv))
## # A tibble: 2 × 2
##   donor_cmv mean_time
##       <dbl>     <dbl>
## 1         0      17.6
## 2         1      10.2

#Is the above difference significant?

cytomegalovirus |> 
  t_test(time_to_cmv ~ donor_cmv, detailed = TRUE)
## # A tibble: 1 × 15
##   estimate estimate1 estimate2 .y.     group1 group2    n1    n2 statistic     p
## *    <dbl>     <dbl>     <dbl> <chr>   <chr>  <chr>  <int> <int>     <dbl> <dbl>
## 1     7.45      17.6      10.2 time_t… 0      1         26    38      1.64 0.106
## # ℹ 5 more variables: df <dbl>, conf.low <dbl>, conf.high <dbl>, method <chr>,
## #   alternative <chr>

#This example might be wrong in that while the code runs, donor_cmv is categorical data.