Show R versions and home directory

R version

R.version
##                _                           
## platform       aarch64-apple-darwin20      
## arch           aarch64                     
## os             darwin20                    
## system         aarch64, darwin20           
## status                                     
## major          4                           
## minor          5.2                         
## year           2025                        
## month          10                          
## day            31                          
## svn rev        88974                       
## language       R                           
## version.string R version 4.5.2 (2025-10-31)
## nickname       [Not] Part in a Rumble

Show home directory

Sys.getenv()
## __CF_USER_TEXT_ENCODING
##                         0x1F5:0x0:0x0
## __CFBundleIdentifier    com.rstudio.desktop
## CLICOLOR_FORCE          1
## COMMAND_MODE            unix2003
## DISPLAY                 :0
## DYLD_FALLBACK_LIBRARY_PATH
##                         /Library/Frameworks/R.framework/Resources/lib:/Library/Java/JavaVirtualMachines/jdk-11.0.18+10/Contents/Home/lib/server
## EDITOR                  vi
## GIT_ASKPASS             rpostback-askpass
## HOME                    /Users/fauzan
## LANG                    en_US.UTF-8
## LC_CTYPE                en_US.UTF-8
## LN_S                    ln -s
## LOGNAME                 fauzan
## MAKE                    make
## MallocNanoZone          0
## MPLENGINE               tkAgg
## NOT_CRAN                true
## ORIGINAL_XDG_CURRENT_DESKTOP
##                         undefined
## OSLogRateLimit          64
## PAGER                   /usr/bin/less
## PATH                    /usr/local/bin:/System/Cryptexes/App/usr/bin:/usr/bin:/bin:/usr/sbin:/sbin:/var/run/com.apple.security.cryptexd/codex.system/bootstrap/usr/local/bin:/var/run/com.apple.security.cryptexd/codex.system/bootstrap/usr/bin:/var/run/com.apple.security.cryptexd/codex.system/bootstrap/usr/appleinternal/bin:/Applications/quarto/bin:/Library/TeX/texbin:/usr/texbin:/private/var/folders/qb/_8p5wt4948x_q3ygct_d64mw0000gn/T/AppTranslocation/A009C335-F6B2-453D-A1DD-793EFECE87C0/d/RStudio.app/Contents/Resources/app/quarto/bin:/private/var/folders/qb/_8p5wt4948x_q3ygct_d64mw0000gn/T/AppTranslocation/A009C335-F6B2-453D-A1DD-793EFECE87C0/d/RStudio.app/Contents/Resources/app/bin/postback
## PWD                     /Users/fauzan
## PYTHONIOENCODING        utf-8
## R_ARCH                  
## R_BROWSER               /usr/bin/open
## R_BZIPCMD               /usr/bin/bzip2
## R_CLI_HAS_HYPERLINK_IDE_HELP
##                         true
## R_CLI_HAS_HYPERLINK_IDE_RUN
##                         true
## R_CLI_HAS_HYPERLINK_IDE_VIGNETTE
##                         true
## R_DOC_DIR               /Library/Frameworks/R.framework/Resources/doc
## R_GZIPCMD               /usr/bin/gzip
## R_HOME                  /Library/Frameworks/R.framework/Resources
## R_INCLUDE_DIR           /Library/Frameworks/R.framework/Resources/include
## R_LIBS                  /Library/Frameworks/R.framework/Versions/4.5-arm64/Resources/library
## R_LIBS_SITE             /Library/Frameworks/R.framework/Resources/site-library
## R_LIBS_USER             /Users/fauzan/Library/R/arm64/4.5/library
## R_PAPERSIZE             a4
## R_PAPERSIZE_USER        a4
## R_PDFVIEWER             /usr/bin/open
## R_PLATFORM              aarch64-apple-darwin20
## R_PRINTCMD              lpr
## R_QPDF                  /Library/Frameworks/R.framework/Resources/bin/qpdf
## R_RD4PDF                times,inconsolata,hyper
## R_RUNTIME               
## R_SESSION_TMPDIR        /var/folders/qb/_8p5wt4948x_q3ygct_d64mw0000gn/T//Rtmp3cAuWb
## R_SHARE_DIR             /Library/Frameworks/R.framework/Resources/share
## R_STRIP_SHARED_LIB      strip -x
## R_STRIP_STATIC_LIB      strip -S
## R_TEXI2DVICMD           /opt/R/arm64/bin/texi2dvi
## R_UNZIPCMD              /usr/bin/unzip
## R_ZIPCMD                /usr/bin/zip
## RMARKDOWN_MATHJAX_PATH
##                         /private/var/folders/qb/_8p5wt4948x_q3ygct_d64mw0000gn/T/AppTranslocation/A009C335-F6B2-453D-A1DD-793EFECE87C0/d/RStudio.app/Contents/Resources/app/resources/mathjax-27
## RMARKDOWN_PREVIEW_DIR   /var/folders/qb/_8p5wt4948x_q3ygct_d64mw0000gn/T//RtmpS4zL4f
## RS_LOG_LEVEL            WARN
## RS_RPOSTBACK_PATH       /private/var/folders/qb/_8p5wt4948x_q3ygct_d64mw0000gn/T/AppTranslocation/A009C335-F6B2-453D-A1DD-793EFECE87C0/d/RStudio.app/Contents/Resources/app/bin/rpostback
## RS_SHARED_SECRET        bd08d9e1-91ce-493e-ac63-3158599c870f
## RSTUDIO                 1
## RSTUDIO_CHILD_PROCESS_PANE
##                         render
## RSTUDIO_CLI_HYPERLINKS
##                         true
## RSTUDIO_CONSOLE_COLOR   256
## RSTUDIO_CONSOLE_WIDTH   144
## RSTUDIO_DESKTOP_EXE     /private/var/folders/qb/_8p5wt4948x_q3ygct_d64mw0000gn/T/AppTranslocation/A009C335-F6B2-453D-A1DD-793EFECE87C0/d/RStudio.app/Contents/MacOS/RStudio
## RSTUDIO_FALLBACK_LIBRARY_PATH
##                         /var/folders/qb/_8p5wt4948x_q3ygct_d64mw0000gn/T/rstudio-fallback-library-path-859225681
## RSTUDIO_LONG_VERSION    2025.09.2+418
## RSTUDIO_PANDOC          /private/var/folders/qb/_8p5wt4948x_q3ygct_d64mw0000gn/T/AppTranslocation/A009C335-F6B2-453D-A1DD-793EFECE87C0/d/RStudio.app/Contents/Resources/app/quarto/bin/tools/aarch64
## RSTUDIO_PROGRAM_MODE    desktop
## RSTUDIO_SESSION_PID     96394
## RSTUDIO_SESSION_PORT    39990
## RSTUDIO_USER_IDENTITY   fauzan
## RSTUDIO_VERSION         2025.09.2.418
## RSTUDIOAPI_IPC_REQUESTS_FILE
##                         /var/folders/qb/_8p5wt4948x_q3ygct_d64mw0000gn/T/RtmpS4zL4f/rstudio-ipc-requests-1788a7836c1fa.rds
## RSTUDIOAPI_IPC_RESPONSE_FILE
##                         /var/folders/qb/_8p5wt4948x_q3ygct_d64mw0000gn/T/RtmpS4zL4f/rstudio-ipc-response-1788a52f941be.rds
## RSTUDIOAPI_IPC_SHARED_SECRET
##                         86e50a56-5c4f-43f9-9d1e-5fb0013770dc
## SED                     /usr/bin/sed
## SF_PARTNER              posit_rstudio
## SHELL                   /bin/zsh
## SHLVL                   0
## SPARK_CONNECT_USER_AGENT
##                         posit-rstudio
## SSH_ASKPASS             rpostback-askpass
## SSH_AUTH_SOCK           /private/tmp/com.apple.launchd.hVLGjCKiJT/Listeners
## TAR                     /usr/bin/tar
## TERM                    xterm-256color
## TMPDIR                  /var/folders/qb/_8p5wt4948x_q3ygct_d64mw0000gn/T/
## TZDIR                   /usr/share/zoneinfo
## USER                    fauzan
## XPC_FLAGS               0x0
## XPC_SERVICE_NAME        application.com.rstudio.desktop.17.22

Run basic functions

read csv data

library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.1     ✔ stringr   1.6.0
## ✔ ggplot2   4.0.0     ✔ tibble    3.3.0
## ✔ lubridate 1.9.4     ✔ tidyr     1.3.1
## ✔ purrr     1.2.0     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
SLSII <- read_csv("/Users/fauzan/SLSII_PFT_clin_baseline.csv")
## Rows: 97 Columns: 12
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr  (2): ScreenID, Treatment
## dbl (10): FVC_predicted, TLC_liter, DLCO_predicted, Male, age, mrss_n1, BDI,...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
head(SLSII)
## # A tibble: 6 × 12
##   ScreenID  Treatment FVC_predicted TLC_liter DLCO_predicted  Male   age mrss_n1
##   <chr>     <chr>             <dbl>     <dbl>          <dbl> <dbl> <dbl>   <dbl>
## 1 01-J-S-S… MMF               0.805      3.71          0.501     0  47.4      27
## 2 02-AGR-S… MMF               0.661      4.11          0.686     0  50.0       7
## 3 02-AML-S… CYC               0.811      4.01          0.801     0  54.9      32
## 4 02-CAB-S… MMF               0.610      2.92          0.462     0  40.0      10
## 5 02-ELT-S… MMF               0.525      3.52          0.466     0  70.4      39
## 6 02-ETM-S… CYC               0.680      2.94          0.867     0  55.0       2
## # ℹ 4 more variables: BDI <dbl>, disease_duration_yr <dbl>,
## #   Raynaud_Diag_yn <dbl>, Ray_duration_yr <dbl>

understanding the overview of data structure

str(SLSII)
## spc_tbl_ [97 × 12] (S3: spec_tbl_df/tbl_df/tbl/data.frame)
##  $ ScreenID           : chr [1:97] "01-J-S-S103" "02-AGR-S103" "02-AML-S122" "02-CAB-S104" ...
##  $ Treatment          : chr [1:97] "MMF" "MMF" "CYC" "MMF" ...
##  $ FVC_predicted      : num [1:97] 0.805 0.661 0.811 0.61 0.525 ...
##  $ TLC_liter          : num [1:97] 3.71 4.11 4.01 2.92 3.52 ...
##  $ DLCO_predicted     : num [1:97] 0.501 0.686 0.801 0.462 0.466 ...
##  $ Male               : num [1:97] 0 0 0 0 0 0 0 1 1 0 ...
##  $ age                : num [1:97] 47.4 50 54.9 40 70.4 ...
##  $ mrss_n1            : num [1:97] 27 7 32 10 39 2 5 5 41 19 ...
##  $ BDI                : num [1:97] 9 9 12 7 8 9 6 6 10 8 ...
##  $ disease_duration_yr: num [1:97] 2.58 2.25 1.02 1.85 4.92 ...
##  $ Raynaud_Diag_yn    : num [1:97] 1 1 1 1 1 1 1 1 1 1 ...
##  $ Ray_duration_yr    : num [1:97] 2.58 6.67 1.02 1.6 4.66 ...
##  - attr(*, "spec")=
##   .. cols(
##   ..   ScreenID = col_character(),
##   ..   Treatment = col_character(),
##   ..   FVC_predicted = col_double(),
##   ..   TLC_liter = col_double(),
##   ..   DLCO_predicted = col_double(),
##   ..   Male = col_double(),
##   ..   age = col_double(),
##   ..   mrss_n1 = col_double(),
##   ..   BDI = col_double(),
##   ..   disease_duration_yr = col_double(),
##   ..   Raynaud_Diag_yn = col_double(),
##   ..   Ray_duration_yr = col_double()
##   .. )
##  - attr(*, "problems")=<externalptr>

another way of understanding the overview of data structure

glimpse(SLSII)
## Rows: 97
## Columns: 12
## $ ScreenID            <chr> "01-J-S-S103", "02-AGR-S103", "02-AML-S122", "02-C…
## $ Treatment           <chr> "MMF", "MMF", "CYC", "MMF", "MMF", "CYC", "CYC", "…
## $ FVC_predicted       <dbl> 0.8049215, 0.6609544, 0.8114863, 0.6095501, 0.5254…
## $ TLC_liter           <dbl> 3.710000, 4.113333, 4.006667, 2.923333, 3.523333, …
## $ DLCO_predicted      <dbl> 0.5008026, 0.6855451, 0.8005001, 0.4623120, 0.4659…
## $ Male                <dbl> 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0, 1, 1, 0,…
## $ age                 <dbl> 47.37534, 49.95617, 54.90411, 40.00548, 70.42740, …
## $ mrss_n1             <dbl> 27, 7, 32, 10, 39, 2, 5, 5, 41, 19, 6, 17, 5, 20, …
## $ BDI                 <dbl> 9, 9, 12, 7, 8, 9, 6, 6, 10, 8, 8, 7, 6, 10, 1, 9,…
## $ disease_duration_yr <dbl> 2.5753420, 2.2493150, 1.0191780, 1.8547950, 4.9150…
## $ Raynaud_Diag_yn     <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,…
## $ Ray_duration_yr     <dbl> 2.5753420, 6.6712330, 1.0191780, 1.6027400, 4.6630…

understanding the number of row and column

dim(SLSII)
## [1] 97 12

understanding the number of row

nrow(SLSII)
## [1] 97

understanding the number of column

ncol(SLSII)
## [1] 12

selecting the data, row 1 to 3, and column 1 to 5

SLSII[1:3,1:5]
## # A tibble: 3 × 5
##   ScreenID    Treatment FVC_predicted TLC_liter DLCO_predicted
##   <chr>       <chr>             <dbl>     <dbl>          <dbl>
## 1 01-J-S-S103 MMF               0.805      3.71          0.501
## 2 02-AGR-S103 MMF               0.661      4.11          0.686
## 3 02-AML-S122 CYC               0.811      4.01          0.801

show the mean of age column

mean(SLSII$age)
## [1] 51.58783

show the variance of age column

var(SLSII$age)
## [1] 81.71442

show the summary statistics of age column

summary(SLSII$age)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   29.99   44.43   51.88   51.59   58.39   70.43

show the mean of age column, with removing the NA or missing values first

mean(SLSII$age,na.rm=T)
## [1] 51.58783

show the numeric data value distribution with stem and leaves plot, which shows the first and last digits

stem(SLSII$age)
## 
##   The decimal point is 1 digit(s) to the right of the |
## 
##   2 | 
##   3 | 04
##   3 | 55567799
##   4 | 000012233344444
##   4 | 55677777899
##   5 | 0000111111222223333444
##   5 | 555555666777888999
##   6 | 0000001123334
##   6 | 5567778
##   7 | 0

show the data distribution in boxplot, shows q1, median, and q3

boxplot(SLSII$age)

## show the data distribution in histogram plot

hist(SLSII$age)

## show the data frequency of categorical data, for one column

table(SLSII$Male)
## 
##  0  1 
## 72 25

show the data frequency of each value in categorical data, for multiple column

table(SLSII$Male, SLSII$Treatment)
##    
##     CYC MMF
##   0  39  33
##   1   8  17

show the data frequency proportion of value each respective row, in categorical data, for multiple column

prop.table(table(SLSII$Male, SLSII$Treatment), 1)
##    
##           CYC       MMF
##   0 0.5416667 0.4583333
##   1 0.3200000 0.6800000

show the data frequency proportion of value each respective column, in categorical data, for multiple column

prop.table(table(SLSII$Male, SLSII$Treatment), 2)
##    
##           CYC       MMF
##   0 0.8297872 0.6600000
##   1 0.1702128 0.3400000

show the data frequency proportion of value in grand total, in categorical data, for multiple column

prop.table(table(SLSII$Male, SLSII$Treatment))
##    
##            CYC        MMF
##   0 0.40206186 0.34020619
##   1 0.08247423 0.17525773