#Load required libraries
library(biomaRt, quietly = TRUE)
library(dplyr, quietly = TRUE)
##
## Attaching package: 'dplyr'
## The following object is masked from 'package:biomaRt':
##
## select
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
#Import data
gData <- read.csv('FormattedSNPS.csv')
#Find options for ensembl mart
#Look at options for mart
myMarts <- listEnsembl()
#select Variation Dataset
ensembl <- useEnsembl(biomart = "snp")
## Ensembl site unresponsive, trying asia mirror
## Ensembl site unresponsive, trying useast mirror
#Look at options for dataset in that mart
myDatasets <- listDatasets(ensembl)
#Reset ensembl object to the dataset
ensembl <- useEnsembl(biomart = "snp",dataset="hsapiens_snp")
#Find options for filters
myFilters <- listFilters(ensembl)
myPhenotypes <- read.csv('phenotype_options.csv', header=FALSE)
projectPheno <- "Matrix metalloproteinase levels"
#Search with phenotype, select 6+ columns
myAttributes <- listAttributes(ensembl)
#Select attributes (choose at least 6 related to your search term/phenotype)
#There are some here that are empty and shouldn't be included
att1 <- c('refsnp_id',
'allele_1',
'minor_allele',
'minor_allele_freq',
'phenotype_name',
'phenotype_description',
'clinical_significance',
'associated_variant_risk_allele',
'p_value',
'associated_gene',
'validated')
searchResults <-getBM(att1,
filters= 'phenotype_description',
values= projectPheno,
mart=ensembl)
#Merge data
myMerge<- merge(gData,searchResults, by.x="rsid", by.y="refsnp_id")
#Use function from learnr lesson
my_search_fxn <- function(allele, genotype){
#Check if allele letter is empty
if (allele == ""){
# if it is, return FALSE to drop the row.
FALSE
} else {
# if the allele is not empty,
# use grepl() to search for the allele somewhere
# in the two genotype letters.
grepl(allele, genotype)
}
}
#Filter data
#Filter only rows where associated_variant_risk_allele is in genotype
#This example results in only one SNP and one row of data
#Most phenotypes will have to add an additional filter to get between 8-12 SNPs
# Storage logical vector
our_logic_vector<-logical(nrow(myMerge))
# Run the loop
for (i in 1:nrow(myMerge)){
our_logic_vector[i]<-my_search_fxn(myMerge[i,"associated_variant_risk_allele"],
myMerge[i,"genotype"])
}
myMerge %>%
filter(our_logic_vector) -> filteredSNPs