##install packages
options(repos = c(CRAN = "https://cloud.r-project.org"))
install.packages("tidyverse")
## Installing package into 'C:/Users/lisar/AppData/Local/R/win-library/4.5'
## (as 'lib' is unspecified)
## package 'tidyverse' successfully unpacked and MD5 sums checked
##
## The downloaded binary packages are in
## C:\Users\lisar\AppData\Local\Temp\RtmpgLfJvp\downloaded_packages
install.packages("broom")
## Installing package into 'C:/Users/lisar/AppData/Local/R/win-library/4.5'
## (as 'lib' is unspecified)
## package 'broom' successfully unpacked and MD5 sums checked
##
## The downloaded binary packages are in
## C:\Users\lisar\AppData\Local\Temp\RtmpgLfJvp\downloaded_packages
install.packages("performance")
## Installing package into 'C:/Users/lisar/AppData/Local/R/win-library/4.5'
## (as 'lib' is unspecified)
## package 'performance' successfully unpacked and MD5 sums checked
##
## The downloaded binary packages are in
## C:\Users\lisar\AppData\Local\Temp\RtmpgLfJvp\downloaded_packages
install.packages("car")
## Installing package into 'C:/Users/lisar/AppData/Local/R/win-library/4.5'
## (as 'lib' is unspecified)
## package 'car' successfully unpacked and MD5 sums checked
##
## The downloaded binary packages are in
## C:\Users\lisar\AppData\Local\Temp\RtmpgLfJvp\downloaded_packages
install.packages("see")
## Installing package into 'C:/Users/lisar/AppData/Local/R/win-library/4.5'
## (as 'lib' is unspecified)
## package 'see' successfully unpacked and MD5 sums checked
##
## The downloaded binary packages are in
## C:\Users\lisar\AppData\Local\Temp\RtmpgLfJvp\downloaded_packages
##load library
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.5
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ ggplot2 3.5.2 ✔ tibble 3.3.0
## ✔ lubridate 1.9.4 ✔ tidyr 1.3.1
## ✔ purrr 1.1.0
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(broom)
library(performance)
library(car)
## Loading required package: carData
##
## Attaching package: 'car'
##
## The following object is masked from 'package:dplyr':
##
## recode
##
## The following object is masked from 'package:purrr':
##
## some
library(see)
##Read in SomaScan Data
soma_meta <- read_delim("C:/Users/lisar/OneDrive/COPD Research/manuscripts in progress/exposure/spiromics_soma_7_meta_Jan23.txt", delim = "\t", escape_double = FALSE, trim_ws = TRUE)
## Rows: 7288 Columns: 14
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: "\t"
## chr (9): seqID, SOMAID, TargetName, TargetFullName, UniProt, EntrezGeneSymbo...
## dbl (5): SeqIDVersion, Serum_Scalar_v4_1_to_v4_0, Serum_Lins_CCC, Plasma_Sca...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
spiromics_somascan7_log_24Juneoct24 <- read_delim("C:/Users/lisar/OneDrive/COPD Research/manuscripts in progress/exposure/spiromics_somascan7_log_24Juneoct24.txt",delim = "\t", escape_double = FALSE,trim_ws = TRUE)
## Rows: 5132 Columns: 7292
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: "\t"
## chr (3): Lab_Adat, SUBJID, VISIT
## dbl (7289): X10000_28, X10001_7, X10003_15, X10006_25, X10008_43, X10010_10,...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
sample_meta <- read_delim("C:/Users/lisar/OneDrive/COPD Research/manuscripts in progress/exposure/spiromics_soma_7_smpmeta_10oct23.txt", delim = "\t", escape_double = FALSE, trim_ws = TRUE)
## Warning: One or more parsing issues, call `problems()` on your data frame for details,
## e.g.:
## dat <- vroom(...)
## problems(dat)
## Rows: 5159 Columns: 25
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: "\t"
## chr (12): SubjID, Lab_ADAT, VISIT, PlateId, ScannerID, PlatePosition, Sampl...
## dbl (10): SlideId, Subarray, PercentDilution, HybControlNormScale, NormScal...
## lgl (2): SampleNotes, SampleDescription
## date (1): PlateRunDate
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
##get somascan marker sequence IDs
markers <- colnames(spiromics_somascan7_log_24Juneoct24)[!colnames(spiromics_somascan7_log_24Juneoct24) %in% c("SUBJID", "VISIT", "Lab_Adat")]
length(markers)
## [1] 7289
##Read in clinical data
core6_7_clin <- read_csv("C:/Users/lisar/OneDrive/COPD Research/manuscripts in progress/exposure/core 6.7/core6_7_clinical_250611.csv")
## Warning: One or more parsing issues, call `problems()` on your data frame for details,
## e.g.:
## dat <- vroom(...)
## problems(dat)
## Rows: 2973 Columns: 715
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (204): SUBJID, SITE, DATE_V1, DATE_V2, DATE_V3, DATE_V4, ACCEPTABILITY_S...
## dbl (501): AGE_DERV_V1, GENDER, RACE, ETHNICITY, ADVAIR_DOSE_V1, ADVAIR_PUFF...
## lgl (10): AEROBID_PUFFSDAY_V1, AZMACORT_PUFFSDAY_V1, BECLOVENT_PUFFSDAY_V1,...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
#extract necessary columns
core6_7_clin <- core6_7_clin[,c("SUBJID", "SITE", "AGE_DERV_V1", "GENDER", "RACE", "BMI_CM_V1", "SMOKING_PACK_YEARS_V1", "VGDF_EVER_V1", "COT_URN_V1", "DEM04_V1")]
##create new group column for race and income
#race_group column <- 1 = white, 2 = non-white
core6_7_clin$Race_group <- ifelse(core6_7_clin[,5] == 1, 1, 2)
#income column <- 1= income is 49,000 or less, 2 = income > 50,000
core6_7_clin$income_group <- ifelse(core6_7_clin[, 11] %in% c(1, 2, 3), 1,
+ ifelse(core6_7_clin[, 11] %in% c(4, 5, 6), 2, NA))
##add in exposure data and adi
pm25_no2_exposure_and_adi <- read_csv("C:/Users/lisar/OneDrive/COPD Research/manuscripts in progress/exposure/pm25 no2 exposure and adi.csv")
## Rows: 2973 Columns: 9
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (3): SUBJID, adi_natrank, adi_staternk
## dbl (6): pm25in_1, pm25_1yr_1, pm25_1mo, no2in_1, no2_1yr_1, no2_1mo
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
##add adi to clinical data
core6_7_clin <- core6_7_clin %>% left_join(pm25_no2_exposure_and_adi[, c("SUBJID", "adi_natrank")], by = "SUBJID")
names(core6_7_clin)
## [1] "SUBJID" "SITE" "AGE_DERV_V1"
## [4] "GENDER" "RACE" "BMI_CM_V1"
## [7] "SMOKING_PACK_YEARS_V1" "VGDF_EVER_V1" "COT_URN_V1"
## [10] "DEM04_V1" "Race_group" "income_group"
## [13] "adi_natrank"
##combine clincal and exposure into table
clin_exposure <- merge(core6_7_clin, pm25_no2_exposure_and_adi, by = "SUBJID", all = TRUE)
##change somainput year names to align SPIROMICS Visit names
spiromics_somascan7_log_24Juneoct24$VISIT[spiromics_somascan7_log_24Juneoct24$VISIT == "BASELINE"] = "VISIT_1"
spiromics_somascan7_log_24Juneoct24$VISIT[spiromics_somascan7_log_24Juneoct24$VISIT == "YEAR1"] = "VISIT_2"
spiromics_somascan7_log_24Juneoct24$VISIT[spiromics_somascan7_log_24Juneoct24$VISIT == "YEAR3"] = "VISIT_4"
spiromics_somascan7_log_24Juneoct24$VISIT[spiromics_somascan7_log_24Juneoct24$VISIT == "VISIT5"] = "VISIT_5"
##prepare somascan marker data
Note that the echo = FALSE
parameter was added to the
code chunk to prevent printing of the R code that generated the
plot.