Read in all the ThermoFisher order QC files
qc1 <- read_delim("ThermoFisher_QC/Assay_Info_TaqMan_SNP_SalesOrder_76879698_RackID_7755886_1.txt",
delim = "\t", escape_double = FALSE,
comment = "*", trim_ws = TRUE)
qc2 <- read_delim("ThermoFisher_QC/Assay_Info_TaqMan_SNP_SalesOrder_76879698_RackID_7755886_2.txt",
delim = "\t", escape_double = FALSE,
comment = "*", trim_ws = TRUE)
qc3 <- read_delim("ThermoFisher_QC/Assay_Info_Custom_TaqMan_SNP_SalesOrder_76879698_RackID_7755886_3.txt",
delim = "\t", escape_double = FALSE,
comment = "*", trim_ws = TRUE)
qc5 <- read_delim("ThermoFisher_QC/Assay_Info_TaqMan_SNP_SalesOrder_76879698_RackID_7755886_5.txt",
delim = "\t", escape_double = FALSE,
comment = "*", trim_ws = TRUE)
qc6 <- read_delim("ThermoFisher_QC/Assay_Info_Custom_TaqMan_SNP_SalesOrder_76879698_RackID_7755886_6.txt",
delim = "\t", escape_double = FALSE,
comment = "*", trim_ws = TRUE)
qc7 <- read_delim("ThermoFisher_QC/Assay_Info_TaqMan_SNP_SalesOrder_76879698_RackID_7755886_7.txt",
delim = "\t", escape_double = FALSE,
comment = "*", trim_ws = TRUE)
qc8 <- read_delim("ThermoFisher_QC/Assay_Info_Custom_TaqMan_SNP_SalesOrder_76879698_RackID_7755886_8.txt",
delim = "\t", escape_double = FALSE,
comment = "*", trim_ws = TRUE)
qc9 <- read_delim("ThermoFisher_QC/Assay_Info_TaqMan_SNP_SalesOrder_76879698_RackID_7755886_9.txt",
delim = "\t", escape_double = FALSE,
comment = "*", trim_ws = TRUE)
qc10 <- read_delim("ThermoFisher_QC/Assay_Info_TaqMan_SNP_SalesOrder_76879698_RackID_7755886_10.txt",
delim = "\t", escape_double = FALSE,
comment = "*", trim_ws = TRUE)
qc11 <- read_delim("ThermoFisher_QC/Assay_Info_TaqMan_SNP_SalesOrder_76879698_RackID_7755886_11.txt",
delim = "\t", escape_double = FALSE,
comment = "*", trim_ws = TRUE)
qc12 <- read_delim("ThermoFisher_QC/Assay_Info_Custom_TaqMan_SNP_SalesOrder_76929576_RackID_7765174_1.txt",
delim = "\t", escape_double = FALSE,
comment = "*", trim_ws = TRUE)
q1 <- qc1 %>% rbind(qc2) %>% rbind(qc5) %>% rbind(qc7) %>% rbind(qc9) %>% rbind(qc10) %>% rbind(qc11) %>%
dplyr::rename(rsID=`NCBI SNP Reference`) %>% select(-`Celera ID`, -`...56`)
q2 <- qc3 %>% rbind(qc6) %>% rbind(qc8) %>% rbind(qc12) %>%
dplyr::rename(rsID=`Assay Name`) %>% select(-`NCBI SNP Reference`, -`...56`)
qc <- q1 %>% rbind(q2) %>%
mutate(x = as.data.frame(str_match(`Context Sequence`, regex("\\w+\\[(\\w+)/(\\w+)\\]")))) %>%
unnest_wider(x) %>%
dplyr::select(-V1) %>%
dplyr::rename(ref_allele_QC = V2, alt_allele_QC = V3) %>%
mutate(chrpos_QC = paste0(`Chromosome`, ':',`Location on NCBI Assembly` )) %>%
dplyr::select(`Assay ID`, `rsID`, `ref_allele_QC`, `alt_allele_QC`, `chrpos_QC`)
qc[139:149,2] <- c('rs1372420', 'rs8064765', 'rs3806760', 'rs10448130', 'rs62075803', 'rs7515370', 'rs9330264', 'rs79531911', 'rs11097213', 'rs536718528', 'rs1859223')
#`Chromosome`, `Location on NCBI Assembly`, `Forward Primer Seq.`, `Reverse Primer Seq.`, `Context Sequence`, `Design Strand`)
datatable(qc, options=list(pageLength=10))
dbSNP QC
snps_dbsnp <- snps %>% as_tibble() %>%
mutate(ref = as.data.frame(str_match(SPDI, regex("\\w+:\\d+:(\\w+):"))),
alt_alleles_dbsnp = map_chr(str_match_all(SPDI, regex("\\w+:\\d+:\\w+:(\\w+)")), ~paste(.x[,2], collapse=","))) %>%
unnest_wider(ref) %>% rename(ref_allele_dbsnp = V2) %>% select(-V1) %>%
dplyr::select('rsID', 'ref_allele_dbsnp', 'alt_alleles_dbsnp', 'ALLELE', 'CHRPOS') %>%
rename(chrpos_dbsnp = CHRPOS, allele_dbsnp= ALLELE)
snps_dbsnp
WGS QC
qc_wgs <- read_tsv("48samples_149SNPs_genotypes_by_rsID.txt") %>%
select('chrom', 'chromEnd', 'rsID', 'ref', 'alt') %>% unique() %>%
mutate(chrpos_wgs=paste0(chrom, ':', chromEnd)) %>%
select(-chrom, -chromEnd) %>%
rename(ref_wgs=ref, alt_wgs=alt)
qc_wgs
TaqMan vs dbSNP vs WGS
qc_all <- qc %>%
inner_join(snps_dbsnp, by='rsID') %>%
inner_join(qc_wgs) %>%
relocate(chrpos_QC, .after = last_col()) %>%
relocate(chrpos_dbsnp, .after = last_col()) %>%
relocate(chrpos_wgs, .after = last_col())
write_excel_csv(qc_all, "QC_all.csv")
qc_all