suppressPackageStartupMessages({
library(dplyr)
library(dbgap2x)
})
Study Disease/Focuse: Atrial Fibrillation, Cardiomyopathies, Cardiomyopathy (Dilated), Cardiovascular Disease, Cardiovascular Diseases, Coronary Artery Disease, Coronary Disease, Heart Conduction System, Heart Defects (Congenital), Heart Diseases, Heart Septal Defects, Hypertension, Hypertension (Pulmonary), Myocardial Infarction, Myocardial Revasularization, Stroke, Venous Thromboembolism, Venous Thrombosis
Study Molecular Data type: SNP/CNV Genotypes (NGS), SNP Genotypes (Array), SNP Genotypes (NGS), SNP Genotypes (imputed), SNP Genotypes (PCR), Legacy Genotypes
# Downloaded
x = read.csv("~/data2/BioDataCatalyst/genotype_171.csv")
study = stringr::str_extract(x$Study, "phs.{6}")
x$studyId = study
# Mark the 'parent' study
for (i in seq_along(x$studyId)) {
if (is.parent(x$studyId[i]) == TRUE) {
x$parent[i] = x$studyId[i]
} else {
parent = parent.study(x$studyId[i])[1]
x$parent[i] = parent
}
}
# Save
write.csv(x, "~/data2/BioDataCatalyst/genotype_171_annotated.csv", row.names = FALSE)
y = x[,c("studyId", "parent")] %>% unique # merge multiple studyId
y$parent = gsub("'", "", y$parent)
length(unique(y$parent)) # the number of unique parent studies
## [1] 77
Query detail:
To the above query, I added one more filter, Study Subject Count = More than 10000.
# Downloaded
x = read.csv("~/data2/BioDataCatalyst/genotype_21.csv")
study = stringr::str_extract(x$Study, "phs.{6}")
x$studyId = study
# Mark the 'parent' study
for (i in seq_along(x$studyId)) {
if (is.parent(x$studyId[i]) == TRUE) {
x$parent[i] = x$studyId[i]
} else {
parent = parent.study(x$studyId[i])[1]
x$parent[i] = parent
}
}
y = x[,c("studyId", "parent")] %>% unique # merge multiple studyId
y$parent = gsub(" '", "", y$parent)
length(unique(y$parent)) # the number of unique parent studies
## [1] 7
unique(y$parent)
## [1] "phs000280.v6.p1" "phs000007.v30.p11" "phs000810.v1.p1"
## [4] "phs000925" "phs001211" "phs001237"
## [7] "phs001644"
the number of participants included in each consent group (total 183,077)
a = unique(y$parent) %>% stringr::str_extract(., "phs.{6}")
for (i in seq_along(a)) {
pop = n.pop(a[i])
print(a[i])
print(pop)
}
## [1] "phs000280"
## consent_group male female total
## 1 HMB-IRB 7055 8621 15682
## 2 HMB-IRB 7018 8586 15610
## 3 DS-CVD-IRB 35 33 68
## 4 TOTAL 14108 17240 31360
## [1] "phs000007"
## consent_group male female total
## 1 HMB-IRB-MDS 8964 9292 18267
## 2 HMB-IRB-MDS 6200 6926 13126
## 3 HMB-IRB-NPU-MDS 842 1169 2011
## 4 TOTAL 16006 17387 33404
## [1] "phs000810"
## consent_group male female total
## 1 HMB 5466 7780 13296
## 2 HMB-NPU 1448 2233 3681
## 3 HMB 3839 5375 9214
## 4 TOTAL 10753 15388 26191
## [1] "phs000925"
## consent_group male female total
## 1 GRU 4949 7955 13245
## 2 GRU 4864 7881 13067
## 3 TOTAL 9813 15836 26312
## [1] "phs001211"
## consent_group male female total
## 1 HMB-IRB 6110 7436 13546
## 2 HMB-IRB 6083 7403 13486
## 3 DS-CVD-IRB 27 33 60
## 4 TOTAL 12220 14872 27092
## [1] "phs001237"
## consent_group female total
## 1 HMB-IRB 11357 11357
## 2 HMB-IRB 9271 9271
## 3 HMB-IRB-NPU 2086 2086
## 4 TOTAL 22714 22714
## [1] "phs001644"
## consent_group male female total
## 1 HMB-NPU 6751 9244 16004
## 2 TOTAL 6751 9244 16004
for (i in seq_along(a)) {
consent = consent.groups(a[i])
print(a[i])
print(consent)
}
## [1] "phs000280"
## shortName
## 0 NRUP
## 1 HMB-IRB
## 2 DS-CVD-IRB
## longName
## 0 Subjects did not participate in the study, did not complete a consent document and are included only for the pedigree structure and/or genotype controls, such as HapMap subjects
## 1 Health/Medical/Biomedical (IRB)
## 2 Disease-Specific (Cardiovascular Disease, IRB)
## [1] "phs000007"
## shortName
## 0 NRUP
## 1 HMB-IRB-MDS
## 2 HMB-IRB-NPU-MDS
## longName
## 0 Subjects did not participate in the study, did not complete a consent document and are included only for the pedigree structure and/or genotype controls, such as HapMap subjects
## 1 Health/Medical/Biomedical (IRB, MDS)
## 2 Health/Medical/Biomedical (IRB, NPU, MDS)
## [1] "phs000810"
## shortName
## 0 NRUP
## 1 HMB-NPU
## 2 HMB
## longName
## 0 Subjects did not participate in the study, did not complete a consent document and are included only for the pedigree structure and/or genotype controls, such as HapMap subjects
## 1 Health/Medical/Biomedical (NPU)
## 2 Health/Medical/Biomedical
## [1] "phs000925"
## shortName
## 0 NRUP
## 1 GRU
## longName
## 0 Subjects did not participate in the study, did not complete a consent document and are included only for the pedigree structure and/or genotype controls, such as HapMap subjects
## 1 General Research Use
## [1] "phs001211"
## shortName longName
## 1 HMB-IRB Health/Medical/Biomedical (IRB)
## 2 DS-CVD-IRB Disease-Specific (Cardiovascular Disease, IRB)
## [1] "phs001237"
## shortName longName
## 1 HMB-IRB Health/Medical/Biomedical (IRB)
## 2 HMB-IRB-NPU Health/Medical/Biomedical (IRB, NPU)
## [1] "phs001644"
## shortName longName
## 1 HMB-NPU Health/Medical/Biomedical (NPU)