r_workflow

Packages

library(tidyverse)
── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
✔ dplyr     1.1.4     ✔ readr     2.1.5
✔ forcats   1.0.0     ✔ stringr   1.5.1
✔ ggplot2   3.5.2     ✔ tibble    3.2.1
✔ lubridate 1.9.4     ✔ tidyr     1.3.1
✔ purrr     1.0.4     
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag()    masks stats::lag()
ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(plotly)

Attaching package: 'plotly'

The following object is masked from 'package:ggplot2':

    last_plot

The following object is masked from 'package:stats':

    filter

The following object is masked from 'package:graphics':

    layout
library(gt)

Load some clinical data

library(haven)
dmae <- read_sas("https://github.com/philbowsher/Foundation-of-the-R-Workflow-SAS-to-R/raw/refs/heads/master/Examples/data/dmae.sas7bdat", NULL)
dmae
# A tibble: 3,380 × 63
   STUDYID DOMAIN USUBJID AESEQ AESPID AETERM AELLT AELLTCD AEDECOD AEPTCD AEHLT
   <chr>   <chr>  <chr>   <dbl> <chr>  <chr>  <chr>   <dbl> <chr>    <dbl> <chr>
 1 STUDY_E AE     STUDY_…     1 RAVE-… XXXXX… Hern…  1.00e7 Interv… 1.01e7 Inte…
 2 STUDY_E AE     STUDY_…     1 RAVE-… XXXXX… Acut…  1.00e7 Bronch… 1.00e7 Lowe…
 3 STUDY_E AE     STUDY_…     1 RAVE-… XXXXX… Peni…  1.00e7 Balani… 1.00e7 Cand…
 4 STUDY_E AE     STUDY_…     2 RAVE-… XXXXX… Infl…  1.00e7 Influe… 1.00e7 Infl…
 5 STUDY_E AE     STUDY_…     3 RAVE-… XXXXX… Pneu…  1.00e7 Pneumo… 1.00e7 Lowe…
 6 STUDY_E AE     STUDY_…     1 RAVE-… XXXXX… Hype…  1.00e7 Hypert… 1.00e7 Vasc…
 7 STUDY_E AE     STUDY_…     1 RAVE-… XXXXX… Geni…  1.00e7 Genita… 1.00e7 Cand…
 8 STUDY_E AE     STUDY_…     2 RAVE-… XXXXX… Bala…  1.00e7 Balano… 1.00e7 Peni…
 9 STUDY_E AE     STUDY_…     1 RAVE-… XXXXX… Loss…  1.00e7 Diabet… 1.00e7 Diab…
10 STUDY_E AE     STUDY_…     2 RAVE-… XXXXX… Non-…  1.01e7 Non-ca… 1.01e7 Pain…
# ℹ 3,370 more rows
# ℹ 52 more variables: AEHLTCD <dbl>, AEHLGT <chr>, AEHLGTCD <dbl>,
#   AEBODSYS <chr>, AEBDSYCD <dbl>, AESOC <chr>, AESOCCD <dbl>, AESEV <chr>,
#   AESER <chr>, AEACN <chr>, AEREL <chr>, AEOUT <chr>, AESCONG <chr>,
#   AESDISAB <chr>, AESDTH <chr>, AESHOSP <chr>, AESLIFE <chr>, AESMIE <chr>,
#   AECONTRT <chr>, EPOCH <chr>, AESTDTC <chr>, AEENDTC <chr>, AESTDY <dbl>,
#   AEENDY <dbl>, SUBJID <chr>, RFSTDTC <chr>, RFENDTC <chr>, RFXSTDTC <chr>, …

Data wrangle

dmae50 <- dmae %>%
  filter(AGE >= 50L & AGE <= 85L)
dmae50
# A tibble: 3,291 × 63
   STUDYID DOMAIN USUBJID AESEQ AESPID AETERM AELLT AELLTCD AEDECOD AEPTCD AEHLT
   <chr>   <chr>  <chr>   <dbl> <chr>  <chr>  <chr>   <dbl> <chr>    <dbl> <chr>
 1 STUDY_E AE     STUDY_…     1 RAVE-… XXXXX… Hern…  1.00e7 Interv… 1.01e7 Inte…
 2 STUDY_E AE     STUDY_…     1 RAVE-… XXXXX… Acut…  1.00e7 Bronch… 1.00e7 Lowe…
 3 STUDY_E AE     STUDY_…     1 RAVE-… XXXXX… Peni…  1.00e7 Balani… 1.00e7 Cand…
 4 STUDY_E AE     STUDY_…     2 RAVE-… XXXXX… Infl…  1.00e7 Influe… 1.00e7 Infl…
 5 STUDY_E AE     STUDY_…     3 RAVE-… XXXXX… Pneu…  1.00e7 Pneumo… 1.00e7 Lowe…
 6 STUDY_E AE     STUDY_…     1 RAVE-… XXXXX… Hype…  1.00e7 Hypert… 1.00e7 Vasc…
 7 STUDY_E AE     STUDY_…     1 RAVE-… XXXXX… Geni…  1.00e7 Genita… 1.00e7 Cand…
 8 STUDY_E AE     STUDY_…     2 RAVE-… XXXXX… Bala…  1.00e7 Balano… 1.00e7 Peni…
 9 STUDY_E AE     STUDY_…     1 RAVE-… XXXXX… Loss…  1.00e7 Diabet… 1.00e7 Diab…
10 STUDY_E AE     STUDY_…     1 RAVE-… XXXXX… Hype…  1.00e7 Hyperg… 1.00e7 Hype…
# ℹ 3,281 more rows
# ℹ 52 more variables: AEHLTCD <dbl>, AEHLGT <chr>, AEHLGTCD <dbl>,
#   AEBODSYS <chr>, AEBDSYCD <dbl>, AESOC <chr>, AESOCCD <dbl>, AESEV <chr>,
#   AESER <chr>, AEACN <chr>, AEREL <chr>, AEOUT <chr>, AESCONG <chr>,
#   AESDISAB <chr>, AESDTH <chr>, AESHOSP <chr>, AESLIFE <chr>, AESMIE <chr>,
#   AECONTRT <chr>, EPOCH <chr>, AESTDTC <chr>, AEENDTC <chr>, AESTDY <dbl>,
#   AEENDY <dbl>, SUBJID <chr>, RFSTDTC <chr>, RFENDTC <chr>, RFXSTDTC <chr>, …

Clin Data Viz

ggplot(dmae) + 
  aes(x = AESEV) + 
  geom_bar(fill = "#112446") +
  theme_minimal()

dv1 <- ggplot(dmae) + 
  aes(x = AESEV) + 
  geom_bar(fill = "#112446") +
  theme_minimal()

ggplotly (dv1)
# S&P 500 example
# Define the start and end dates for the data range
start_date <- "2010-06-07"
end_date <- "2010-06-14"

# Create a gt table based on preprocessed
# `sp500` table data
sp500 |>
  dplyr::filter(date >= start_date & date <= end_date) |>
  dplyr::select(-adj_close) |>
  gt() |>
  tab_header(
    title = "S&P 500",
    subtitle = glue::glue("{start_date} to {end_date}")
  ) |>
  fmt_currency() |>
  fmt_date(columns = date, date_style = "wd_m_day_year") |>
  fmt_number(columns = volume, suffixing = TRUE)
S&P 500
2010-06-07 to 2010-06-14
date open high low close volume
Mon, Jun 14, 2010 $1,095.00 $1,105.91 $1,089.03 $1,089.63 4.43B
Fri, Jun 11, 2010 $1,082.65 $1,092.25 $1,077.12 $1,091.60 4.06B
Thu, Jun 10, 2010 $1,058.77 $1,087.85 $1,058.77 $1,086.84 5.14B
Wed, Jun 9, 2010 $1,062.75 $1,077.74 $1,052.25 $1,055.69 5.98B
Tue, Jun 8, 2010 $1,050.81 $1,063.15 $1,042.17 $1,062.00 6.19B
Mon, Jun 7, 2010 $1,065.84 $1,071.36 $1,049.86 $1,050.47 5.47B