R Markdown

##install packages

options(repos = c(CRAN = "https://cloud.r-project.org"))
install.packages("tidyverse")
## Installing package into 'C:/Users/lisar/AppData/Local/R/win-library/4.5'
## (as 'lib' is unspecified)
## package 'tidyverse' successfully unpacked and MD5 sums checked
## 
## The downloaded binary packages are in
##  C:\Users\lisar\AppData\Local\Temp\RtmpgLfJvp\downloaded_packages
install.packages("broom")
## Installing package into 'C:/Users/lisar/AppData/Local/R/win-library/4.5'
## (as 'lib' is unspecified)
## package 'broom' successfully unpacked and MD5 sums checked
## 
## The downloaded binary packages are in
##  C:\Users\lisar\AppData\Local\Temp\RtmpgLfJvp\downloaded_packages
install.packages("performance")
## Installing package into 'C:/Users/lisar/AppData/Local/R/win-library/4.5'
## (as 'lib' is unspecified)
## package 'performance' successfully unpacked and MD5 sums checked
## 
## The downloaded binary packages are in
##  C:\Users\lisar\AppData\Local\Temp\RtmpgLfJvp\downloaded_packages
install.packages("car")
## Installing package into 'C:/Users/lisar/AppData/Local/R/win-library/4.5'
## (as 'lib' is unspecified)
## package 'car' successfully unpacked and MD5 sums checked
## 
## The downloaded binary packages are in
##  C:\Users\lisar\AppData\Local\Temp\RtmpgLfJvp\downloaded_packages
install.packages("see")
## Installing package into 'C:/Users/lisar/AppData/Local/R/win-library/4.5'
## (as 'lib' is unspecified)
## package 'see' successfully unpacked and MD5 sums checked
## 
## The downloaded binary packages are in
##  C:\Users\lisar\AppData\Local\Temp\RtmpgLfJvp\downloaded_packages

##load library

library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ ggplot2   3.5.2     ✔ tibble    3.3.0
## ✔ lubridate 1.9.4     ✔ tidyr     1.3.1
## ✔ purrr     1.1.0     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(broom)
library(performance)
library(car)
## Loading required package: carData
## 
## Attaching package: 'car'
## 
## The following object is masked from 'package:dplyr':
## 
##     recode
## 
## The following object is masked from 'package:purrr':
## 
##     some
library(see)

##Read in SomaScan Data

soma_meta <- read_delim("C:/Users/lisar/OneDrive/COPD Research/manuscripts in progress/exposure/spiromics_soma_7_meta_Jan23.txt", delim = "\t", escape_double = FALSE, trim_ws = TRUE)
## Rows: 7288 Columns: 14
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: "\t"
## chr (9): seqID, SOMAID, TargetName, TargetFullName, UniProt, EntrezGeneSymbo...
## dbl (5): SeqIDVersion, Serum_Scalar_v4_1_to_v4_0, Serum_Lins_CCC, Plasma_Sca...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
spiromics_somascan7_log_24Juneoct24 <- read_delim("C:/Users/lisar/OneDrive/COPD Research/manuscripts in progress/exposure/spiromics_somascan7_log_24Juneoct24.txt",delim = "\t", escape_double = FALSE,trim_ws = TRUE)
## Rows: 5132 Columns: 7292
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: "\t"
## chr    (3): Lab_Adat, SUBJID, VISIT
## dbl (7289): X10000_28, X10001_7, X10003_15, X10006_25, X10008_43, X10010_10,...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
sample_meta <- read_delim("C:/Users/lisar/OneDrive/COPD Research/manuscripts in progress/exposure/spiromics_soma_7_smpmeta_10oct23.txt", delim = "\t", escape_double = FALSE, trim_ws = TRUE)
## Warning: One or more parsing issues, call `problems()` on your data frame for details,
## e.g.:
##   dat <- vroom(...)
##   problems(dat)
## Rows: 5159 Columns: 25
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: "\t"
## chr  (12): SubjID, Lab_ADAT, VISIT, PlateId, ScannerID, PlatePosition, Sampl...
## dbl  (10): SlideId, Subarray, PercentDilution, HybControlNormScale, NormScal...
## lgl   (2): SampleNotes, SampleDescription
## date  (1): PlateRunDate
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

##get somascan marker sequence IDs

markers <- colnames(spiromics_somascan7_log_24Juneoct24)[!colnames(spiromics_somascan7_log_24Juneoct24) %in% c("SUBJID", "VISIT", "Lab_Adat")]
length(markers)
## [1] 7289

##Read in clinical data

core6_7_clin <- read_csv("C:/Users/lisar/OneDrive/COPD Research/manuscripts in progress/exposure/core 6.7/core6_7_clinical_250611.csv")
## Warning: One or more parsing issues, call `problems()` on your data frame for details,
## e.g.:
##   dat <- vroom(...)
##   problems(dat)
## Rows: 2973 Columns: 715
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (204): SUBJID, SITE, DATE_V1, DATE_V2, DATE_V3, DATE_V4, ACCEPTABILITY_S...
## dbl (501): AGE_DERV_V1, GENDER, RACE, ETHNICITY, ADVAIR_DOSE_V1, ADVAIR_PUFF...
## lgl  (10): AEROBID_PUFFSDAY_V1, AZMACORT_PUFFSDAY_V1, BECLOVENT_PUFFSDAY_V1,...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
#extract necessary columns
core6_7_clin <- core6_7_clin[,c("SUBJID", "SITE", "AGE_DERV_V1", "GENDER", "RACE", "BMI_CM_V1", "SMOKING_PACK_YEARS_V1", "VGDF_EVER_V1", "COT_URN_V1", "DEM04_V1")]

##create new group column for race and income

#race_group column <- 1 = white, 2 = non-white
core6_7_clin$Race_group <- ifelse(core6_7_clin[,5] == 1, 1, 2)

#income column <- 1= income is 49,000 or less, 2 = income > 50,000
core6_7_clin$income_group <- ifelse(core6_7_clin[, 11] %in% c(1, 2, 3), 1, 
+ ifelse(core6_7_clin[, 11] %in% c(4, 5, 6), 2, NA))

##add in exposure data and adi

pm25_no2_exposure_and_adi <- read_csv("C:/Users/lisar/OneDrive/COPD Research/manuscripts in progress/exposure/pm25 no2 exposure and adi.csv")
## Rows: 2973 Columns: 9
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (3): SUBJID, adi_natrank, adi_staternk
## dbl (6): pm25in_1, pm25_1yr_1, pm25_1mo, no2in_1, no2_1yr_1, no2_1mo
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
##add adi to clinical data
core6_7_clin <- core6_7_clin %>% left_join(pm25_no2_exposure_and_adi[, c("SUBJID", "adi_natrank")], by = "SUBJID")

names(core6_7_clin)
##  [1] "SUBJID"                "SITE"                  "AGE_DERV_V1"          
##  [4] "GENDER"                "RACE"                  "BMI_CM_V1"            
##  [7] "SMOKING_PACK_YEARS_V1" "VGDF_EVER_V1"          "COT_URN_V1"           
## [10] "DEM04_V1"              "Race_group"            "income_group"         
## [13] "adi_natrank"

##combine clincal and exposure into table

clin_exposure <- merge(core6_7_clin, pm25_no2_exposure_and_adi, by = "SUBJID", all = TRUE)

##change somainput year names to align SPIROMICS Visit names

spiromics_somascan7_log_24Juneoct24$VISIT[spiromics_somascan7_log_24Juneoct24$VISIT == "BASELINE"] = "VISIT_1"
spiromics_somascan7_log_24Juneoct24$VISIT[spiromics_somascan7_log_24Juneoct24$VISIT == "YEAR1"] = "VISIT_2"
spiromics_somascan7_log_24Juneoct24$VISIT[spiromics_somascan7_log_24Juneoct24$VISIT == "YEAR3"] = "VISIT_4"
spiromics_somascan7_log_24Juneoct24$VISIT[spiromics_somascan7_log_24Juneoct24$VISIT == "VISIT5"] = "VISIT_5"

##prepare somascan marker data

Note that the echo = FALSE parameter was added to the code chunk to prevent printing of the R code that generated the plot.