This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.
When you click the Knit button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this:
ob <- read.delim("~/Downloads/observation(in).csv", header = TRUE, stringsAsFactors = FALSE)
# If ob is already loaded as a dataframe with one column
ob_split <- ob %>%
separate(col = 1,
into = c("observation_id","person_id","observation_concept_id",
"observation_date","observation_datetime","observation_type_concept_id",
"value_as_number","value_as_string","value_as_concept_id",
"qualifier_concept_id","unit_concept_id","provider_id",
"visit_occurrence_id","visit_detail_id","observation_source_value",
"observation_source_concept_id","unit_source_value","value_source_value",
"value_source_concept_id","some_extra1","some_extra2"),
sep = ",", fill = "right")
## Warning: Expected 21 pieces. Additional pieces discarded in 272928 rows [1, 3, 4, 5, 6,
## 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 22, ...].
unique(ob_split$observation_source_value)
## [1] "\"pxrd3"
## [2] "years_of_education"
## [3] "\"pxed1"
## [4] "\"pxrd15"
## [5] "\"mhoccur_oa"
## [6] "\"brthyy"
## [7] "\"dmledu"
## [8] "\"dri2"
## [9] "\"dmlhex"
## [10] "\"pxne4"
## [11] "\"viaomthd"
## [12] "0"
## [13] "\"mhoccur_fall"
## [14] "\"susmkncf"
## [15] "\"pxdhc4"
## [16] "\"pxrd51"
## [17] "\"cmtrt_glcs"
## [18] "\"pxrd50"
## [19] "\"pxrd11"
## [20] "\"pxhi1"
## [21] "moca_total_score_time"
## [22] "\"diet1"
## [23] "\"sumrjceddur"
## [24] "\"pxrd5"
## [25] "\"pxnestartts"
## [26] "\"pxrd10"
## [27] "\"ces5"
## [28] "\"pxahc10"
## [29] "\"pxfi4"
## [30] "\"cm_ibp"
## [31] "\"pxrd47"
## [32] "\"pxrd45"
## [33] "\"pxrdcmpdat"
## [34] "\"lbdattim1"
## [35] "\"cm_dcg"
## [36] "\"paidstartts"
## [37] "\"pxsficmpts"
## [38] "\"cesmpdat"
## [39] "\"ces3"
## [40] "\"diet3"
## [41] "\"mhoccur_ca"
## [42] "\"dmlfeet"
## [43] "\"diet9"
## [44] "\"pxhic3"
## [45] "\"mhoccur_circ"
## [46] "\"pxrd25"
## [47] "\"mhoccur_ms"
## [48] "\"via2"
## [49] "\"mhoccur_ear"
## [50] "\"cm_slp"
## [51] "\"pxrd33"
## [52] "\"pxrd6"
## [53] "\"pxfi5"
## [54] "\"mocacmpdattim"
## [55] "\"mhoccur_pdr"
## [56] "\"pxrd23"
## [57] "\"mhoccur_pd"
## [58] "\"pxsdohstartts"
## [59] "\"cage"
## [60] "\"pxahc9"
## [61] "\"pxne11"
## [62] "\"pxsficmpdat"
## [63] "\"pxji2"
## [64] "\"mhoccur_ra"
## [65] "\"visdat"
## [66] "\"dmlgrain"
## [67] "\"mhoccur_cogn"
## [68] "\"sualcdrv"
## [69] "\"pxnecmpts"
## [70] "\"mlcscmpdat"
## [71] "\"pxne13"
## [72] "\"diet8"
## [73] "\"diet2"
## [74] "\"pxrd16"
## [75] "\"pxsdohcmpdat"
## [76] "\"dri1"
## [77] "\"reccmpts"
## [78] "\"preconcmpts"
## [79] "\"pxrd17"
## [80] "\"sualckncf"
## [81] "\"pxrd14"
## [82] "\"dmgstartts"
## [83] "\"cm_act"
## [84] "\"paate"
## [85] "\"pxrdcmpts"
## [86] "\"mhoccur_yn"
## [87] "\"pxrd26"
## [88] "\"mhoccur_clsh"
## [89] "\"sucmpts"
## [90] "\"pxne3"
## [91] "\"pxrd18"
## [92] "\"sumrjcdur"
## [93] "\"diet6"
## [94] "\"mhoccur_rnl"
## [95] "test_upload_date"
## [96] "\"pxrd1"
## [97] "\"mh_a1c"
## [98] "\"pxahc2"
## [99] "\"pxne8"
## [100] "\"dmlcmpts"
## [101] "\"pxne14"
## [102] "clock_visuospatial_executive"
## [103] "\"pxrd30"
## [104] "\"ces2"
## [105] "\"mhterm_dm1"
## [106] "\"mhoccur_obs"
## [107] "\"pxrd43"
## [108] "\"px281401_metadata"
## [109] "\"paid_dpr"
## [110] "\"cmcmpdat"
## [111] "\"susmkstoage"
## [112] "\"pdrx9"
## [113] "\"pxfi1"
## [114] "\"px281501_metadata"
## [115] "\"scrcmpdat"
## [116] "\"pxdhc5"
## [117] "\"pxrd28"
## [118] "\"diet4"
## [119] "\"pxahc7"
## [120] "\"preconstartts"
## [121] "\"viaocmpdat"
## [122] "\"pxne16"
## [123] "\"pxhic4"
## [124] "\"paid_cml"
## [125] "\"dmlmd"
## [126] "\"fh_dm2sb"
## [127] "\"dricmpdat"
## [128] "\"pxrd53"
## [129] "\"pxahc8"
## [130] "\"pxdhc7"
## [131] "\"pxhic5"
## [132] "\"recstartts"
## [133] "\"via4"
## [134] "\"suvptoyr"
## [135] "\"scrstartts"
## [136] "\"eos_ds"
## [137] "\"viacmpdat"
## [138] "\"studyid"
## [139] "\"pxrd29"
## [140] "\"paid_eng"
## [141] "\"dmlvex"
## [142] "\"pxrd44"
## [143] "\"via6"
## [144] "\"sustartts"
## [145] "\"pxne15"
## [146] "\"ces7"
## [147] "\"dietscore"
## [148] "\"viastartts"
## [149] "\"pxhic2"
## [150] "\"via3"
## [151] "\"pxji1"
## [152] "\"diet5"
## [153] "\"pxrd49"
## [154] "\"pxrd32"
## [155] "\"pxdhc2"
## [156] "digitspan"
## [157] "\"pacmpdat"
## [158] "\"paid_scrd"
## [159] "\"mhoccur_ad"
## [160] "\"pxne5"
## [161] "\"mhoccur_plm"
## [162] "\"pxhic7"
## [163] "\"mhoccur_rvo"
## [164] "\"diet7"
## [165] "\"ces10"
## [166] "\"sulqkncf"
## [167] "\"ces8"
## [168] "\"pxrd37"
## [169] "\"dietstartts"
## [170] "\"msscmpdat"
## [171] "\"mhcmpts"
## [172] "\"pxrd52"
## [173] "\"pxahc5"
## [174] "\"mhterm_dm2"
## [175] "\"ces4"
## [176] "\"sucmpdat"
## [177] "\"pxrd13"
## [178] "\"dmlpor"
## [179] "\"pxrd39"
## [180] "\"mhoccur_glc"
## [181] "\"mhterm_predm"
## [182] "\"cm_asp"
## [183] "\"pxne9"
## [184] "\"dmlsugar"
## [185] "\"dmlact"
## [186] "\"mhoccur_strk"
## [187] "\"pxrd46"
## [188] "\"pxep1"
## [189] "\"pxne2"
## [190] "\"paidscore"
## [191] "\"pxdhc3"
## [192] "\"dmlcmpdat"
## [193] "\"reccmpdat"
## [194] "\"pxne10"
## [195] "\"mhstartts"
## [196] "\"mhoccur_hbp"
## [197] "\"pxne12"
## [198] "\"pxdhc6"
## [199] "\"viacmpts"
## [200] "\"dmgcmpdat"
## [201] "\"dmgcmpts"
## [202] "\"pxpa3"
## [203] "\"pxrd22"
## [204] "\"paid_wr"
## [205] "\"pxep3"
## [206] "\"pxpa1"
## [207] "\"px280301_metadata"
## [208] "age_years_at_interview"
## [209] "\"mhcmpdat"
## [210] "\"mhoccur_crt"
## [211] "\"px280801_metadata"
## [212] "\"pxrd31"
## [213] "\"pxrd40"
## [214] "\"cmtrt_insln"
## [215] "\"mhoccur_amd"
## [216] "\"pxrd8"
## [217] "\"mhoccur_lbp"
## [218] "\"via1"
## [219] "\"pxrd35"
## [220] "\"fh_dm2pt"
## [221] "\"dmlfrveg"
## [222] "\"pxahc1"
## [223] "\"eos_dsstdat"
## [224] "\"pxahc4"
## [225] "\"mhoccur_ded"
## [226] "\"pxrdstartts"
## [227] "\"mhoccur_cns"
## [228] "\"pxrd20"
## [229] "\"px280901_metadata"
## [230] "\"scrcmpts"
## [231] "\"cl_maristat"
## [232] "\"pxrd48"
## [233] "\"sulqdosfr"
## [234] "\"dvenvstdat"
## [235] "\"pxpa4"
## [236] "\"cestl"
## [237] "\"pxne6"
## [238] "\"pxfi3"
## [239] "\"mhoccur_cvdot"
## [240] "\"pxne17"
## [241] "\"cm_ant"
## [242] "\"ces1"
## [243] "\"pxhic1"
## [244] "\"pxrd2"
## [245] "\"pxpa2"
## [246] "\"pxne1"
## [247] "\"pxfistartts"
## [248] "\"cesstartts"
## [249] "\"pxrd24"
## [250] "\"sumrjstage"
## [251] "\"plcscmpdat"
## [252] "\"ces9"
## [253] "\"pxrd41"
## [254] "\"pxrd12"
## [255] "\"dmlstartts"
## [256] "\"faqcmpts"
## [257] "\"subrkncf"
## [258] "\"sualcage"
## [259] "\"pxrd4"
## [260] "\"mhoccur_gi"
## [261] "\"subrdosfr"
## [262] "\"suvpkncf1"
## [263] "\"pxrd21"
## [264] "\"fh_dm2sbsp"
## [265] "\"mh_dm_age"
## [266] "\"suvptoage"
## [267] "\"mhoccur_mi"
## [268] "\"dvamwstdat"
## [269] "\"ces6"
## [270] "\"sumrjkncf"
## [271] "\"sumrjtrt"
## [272] "\"pxrd34"
## [273] "\"pxrd19"
## [274] "\"pxrd7"
## [275] "\"cmtrt_lfst"
## [276] "\"pxrd38"
## [277] "\"pxne7"
## [278] "\"mhoccur_ua"
## [279] "\"paidcmpts"
## [280] "\"susmkdosfr"
## [281] "\"preconcmpdat"
## [282] "\"sumrjfr"
## [283] "\"pxrd36"
## [284] "\"px011002_metadata"
## [285] "\"cmtrt_a1c"
## [286] "\"susmkcdur"
## [287] "\"suwndosfr"
## [288] "\"pxrd27"
## [289] "\"via5"
## [290] "\"pxfi2"
## [291] "\"pxrd54"
## [292] "\"pxdhc1"
## [293] "\"susmkstaage"
## [294] "\"suwnkncf"
## [295] "\"pxep2"
## [296] "\"suvpstage"
## [297] "\"food_insecurity_change_diet_frequency"
## [298] "\"mhoccur_fallot"
## [299] "\"pxrd42"
## [300] "\"faqstartts"
## [301] "\"suvpdosfr"
## [302] "\"pxahc3"
## [303] "\"c184390_dat"
## [304] "\"pxhic8"
## [305] "\"suvpcdur"
## [306] "\"eos_dsdecod"
## [307] "\"eos_dsvst"
## [308] "\"mhcat_adot"
## [309] "\"pxahc6"
## [310] "\"icfsvyts"
## [311] "\"icfcmpts"
paate_rows <- ob_split %>%
filter(grepl("paate", observation_source_value, ignore.case = TRUE))
paate_rows = paate_rows %>% dplyr::select(value_as_string, person_id)
paate_rows$value_as_string = as.numeric(paate_rows$value_as_string)
# Convert value_as_string to numeric if it isn't already
paate_rows$value_as_string <- as.numeric(paate_rows$value_as_string)
# Filter for fasting 8 hours or more
fasting_rows <- paate_rows %>%
filter(value_as_string >= 8)
# View the result
head(fasting_rows)
## value_as_string person_id
## 1 15 1118
## 2 12 1183
## 3 13 1271
## 4 12 4066
## 5 15 7018
## 6 12 4189
# Optional: check how many people meet fasting criteria
nrow(fasting_rows)
## [1] 300
hba1c <- read.delim("~/Downloads/participants.tsv", header = TRUE, stringsAsFactors = FALSE)
biom <- read.csv("~/Downloads/biomarker_data.csv", header = TRUE, stringsAsFactors = FALSE)
df <- merge(hba1c, biom, by.x = "participant_id", by.y = "person_id")
You can also embed plots, for example:
Note that the echo = FALSE parameter was added to the
code chunk to prevent printing of the R code that generated the
plot.