#Step 1: Download First100SNPs.csv file from Canvas and upload to a new
#Posit Workspace
#Step 2: Use read.csv() to get data from a spreadsheet, get one column
#Note this is the format used by companies like 23andMe and ancestry to report
#genotypes
mySNPs <- read.csv('First100SNPs.csv')
#Make a new variable that is just the rsid column data
myRSIDs <- mySNPs$rsid
#Step 3: Install bioconductor and biomaRt tools
# if (!require("BiocManager", quietly = TRUE))
# install.packages("BiocManager")
#
# BiocManager::install("biomaRt")
library(biomaRt)
#Run a Biomart search to download minor allele frequency for each SNP
#Run biomart search with settings listed above
ensembl=useMart("ENSEMBL_MART_SNP",
dataset="hsapiens_snp")
searchResults <-getBM(attributes = c('refsnp_id','minor_allele_freq'),
filters = 'snp_filter',
values=myRSIDs,
mart=ensembl)
#Step 4: Merge mySNPs and searchResults
mergedDFs <- merge(mySNPs,searchResults, by.x='rsid',by.y='refsnp_id')
#Step 5: Find rare mutations (MAF < 0.01)
rareDF <- subset(mergedDFs,mergedDFs$minor_allele_freq < 0.01)
#Step 6: Output as .csv file
write.csv(rareDF,'rare.csv',row.names=FALSE,quote=FALSE)