#install.packages(“janitor”) #install.packages(“rstatix”) #install.packages(“remotes”) #tidyverse and medicaldata are already installed.
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.5
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ ggplot2 3.5.2 ✔ tibble 3.3.0
## ✔ lubridate 1.9.4 ✔ tidyr 1.3.1
## ✔ purrr 1.1.0
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(janitor)
##
## Attaching package: 'janitor'
##
## The following objects are masked from 'package:stats':
##
## chisq.test, fisher.test
library(rstatix)
##
## Attaching package: 'rstatix'
##
## The following object is masked from 'package:janitor':
##
## make_clean_names
##
## The following object is masked from 'package:stats':
##
## filter
library(remotes)
data(package = "medicaldata")
prostate <- medicaldata::blood_storage |>
clean_names()
#now columns aa and fam_hx are numbers. But they are actually categorical variables. #so we will mutate.
prostate |>
mutate(aa = factor(aa, levels = c(0,1),
labels = c("White", "African-American"))) |>
mutate(fam_hx = factor(fam_hx, levels = c(0,1),
labels = c("No family history", "Fhx of Prostate Cancer"))) -> prostate_factors
prostate_factors |>
select(age, p_vol, preop_psa, aa, fam_hx) |>
group_by(aa, fam_hx) |>
summarise(across(age:preop_psa, \(x) mean(x, na.rm = TRUE)))
## `summarise()` has grouped output by 'aa'. You can override using the `.groups`
## argument.
## # A tibble: 4 × 5
## # Groups: aa [2]
## aa fam_hx age p_vol preop_psa
## <fct> <fct> <dbl> <dbl> <dbl>
## 1 White No family history 61.8 56.9 8.06
## 2 White Fhx of Prostate Cancer 59.5 57.3 7.22
## 3 African-American No family history 60.7 54.3 9.90
## 4 African-American Fhx of Prostate Cancer 60.1 51.4 8.71
ggplot(prostate_factors) +
aes(x = p_vol, y = preop_psa, col = aa) +
geom_point() +
geom_smooth(method = "lm") +
facet_grid(aa~fam_hx) +
labs(x = 'Prostate Volume', y = "Preoperative PSA",
title = 'Relationship Between Prostate Volume and Preop PSA,\nSubdivided by Family History and Race') +
theme(legend.position = "bottom")
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 11 rows containing non-finite outside the scale range
## (`stat_smooth()`).
## Warning: Removed 11 rows containing missing values or values outside the scale range
## (`geom_point()`).
prostate_factors |>
t_test(formula = preop_psa~aa,
detailed = TRUE)
## # A tibble: 1 × 15
## estimate estimate1 estimate2 .y. group1 group2 n1 n2 statistic p
## * <dbl> <dbl> <dbl> <chr> <chr> <chr> <int> <int> <dbl> <dbl>
## 1 -1.89 7.86 9.75 preop… White Afric… 259 54 -1.96 0.0534
## # ℹ 5 more variables: df <dbl>, conf.low <dbl>, conf.high <dbl>, method <chr>,
## # alternative <chr>