# install.packages("devtools")
# install.packages("BiocManager")
# BiocManager::install(c('BiocGenerics', 'GenomicRanges', 'VariantAnnotation', 'Biostrings', 'IRanges'))
# devtools::install_github("jamesdiao/clinvaR")
# BiocManager::install("ensemblVEP")
#install.packages("myvariant")
#BiocManager::install("igvR")
#install.packages('BSgenome.Hsapiens.UCSC.hg19')
#BiocManager::install("variants")
#BiocManager::install("BioPred")
library(ensemblVEP)
## Loading required package: BiocGenerics
## Loading required package: generics
##
## Attaching package: 'generics'
## The following objects are masked from 'package:base':
##
## as.difftime, as.factor, as.ordered, intersect, is.element, setdiff,
## setequal, union
##
## Attaching package: 'BiocGenerics'
## The following objects are masked from 'package:stats':
##
## IQR, mad, sd, var, xtabs
## The following objects are masked from 'package:base':
##
## anyDuplicated, aperm, append, as.data.frame, basename, cbind,
## colnames, dirname, do.call, duplicated, eval, evalq, Filter, Find,
## get, grep, grepl, is.unsorted, lapply, Map, mapply, match, mget,
## order, paste, pmax, pmax.int, pmin, pmin.int, Position, rank,
## rbind, Reduce, rownames, sapply, saveRDS, table, tapply, unique,
## unsplit, which.max, which.min
## Loading required package: GenomicRanges
## Loading required package: stats4
## Loading required package: S4Vectors
##
## Attaching package: 'S4Vectors'
## The following object is masked from 'package:utils':
##
## findMatches
## The following objects are masked from 'package:base':
##
## expand.grid, I, unname
## Loading required package: IRanges
## Loading required package: GenomeInfoDb
## Loading required package: VariantAnnotation
## Loading required package: MatrixGenerics
## Loading required package: matrixStats
##
## Attaching package: 'MatrixGenerics'
## The following objects are masked from 'package:matrixStats':
##
## colAlls, colAnyNAs, colAnys, colAvgsPerRowSet, colCollapse,
## colCounts, colCummaxs, colCummins, colCumprods, colCumsums,
## colDiffs, colIQRDiffs, colIQRs, colLogSumExps, colMadDiffs,
## colMads, colMaxs, colMeans2, colMedians, colMins, colOrderStats,
## colProds, colQuantiles, colRanges, colRanks, colSdDiffs, colSds,
## colSums2, colTabulates, colVarDiffs, colVars, colWeightedMads,
## colWeightedMeans, colWeightedMedians, colWeightedSds,
## colWeightedVars, rowAlls, rowAnyNAs, rowAnys, rowAvgsPerColSet,
## rowCollapse, rowCounts, rowCummaxs, rowCummins, rowCumprods,
## rowCumsums, rowDiffs, rowIQRDiffs, rowIQRs, rowLogSumExps,
## rowMadDiffs, rowMads, rowMaxs, rowMeans2, rowMedians, rowMins,
## rowOrderStats, rowProds, rowQuantiles, rowRanges, rowRanks,
## rowSdDiffs, rowSds, rowSums2, rowTabulates, rowVarDiffs, rowVars,
## rowWeightedMads, rowWeightedMeans, rowWeightedMedians,
## rowWeightedSds, rowWeightedVars
## Loading required package: SummarizedExperiment
## Loading required package: Biobase
## Welcome to Bioconductor
##
## Vignettes contain introductory material; view with
## 'browseVignettes()'. To cite Bioconductor, see
## 'citation("Biobase")', and for packages 'citation("pkgname")'.
##
## Attaching package: 'Biobase'
## The following object is masked from 'package:MatrixGenerics':
##
## rowMedians
## The following objects are masked from 'package:matrixStats':
##
## anyMissing, rowMedians
## Loading required package: Rsamtools
## Loading required package: Biostrings
## Loading required package: XVector
##
## Attaching package: 'Biostrings'
## The following object is masked from 'package:base':
##
## strsplit
##
## Attaching package: 'VariantAnnotation'
## The following object is masked from 'package:base':
##
## tabulate
## variant_effect_predictor.pl or vep script not found. Ensembl VEP is not installed in your path.
##
## Attaching package: 'ensemblVEP'
## The following object is masked from 'package:Biobase':
##
## cache
library(clinvaR)
## Loading required package: ggplot2
## Loading required package: dplyr
##
## Attaching package: 'dplyr'
## The following object is masked from 'package:VariantAnnotation':
##
## select
## The following objects are masked from 'package:Biostrings':
##
## collapse, intersect, setdiff, setequal, union
## The following object is masked from 'package:XVector':
##
## slice
## The following object is masked from 'package:Biobase':
##
## combine
## The following object is masked from 'package:matrixStats':
##
## count
## The following objects are masked from 'package:GenomicRanges':
##
## intersect, setdiff, union
## The following object is masked from 'package:GenomeInfoDb':
##
## intersect
## The following objects are masked from 'package:IRanges':
##
## collapse, desc, intersect, setdiff, slice, union
## The following objects are masked from 'package:S4Vectors':
##
## first, intersect, rename, setdiff, setequal, union
## The following objects are masked from 'package:BiocGenerics':
##
## combine, intersect, setdiff, setequal, union
## The following object is masked from 'package:generics':
##
## explain
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
## Loading required package: tidyr
##
## Attaching package: 'tidyr'
## The following object is masked from 'package:VariantAnnotation':
##
## expand
## The following object is masked from 'package:S4Vectors':
##
## expand
## Loading required package: seqminer
## Loading required package: stringr
##
## Attaching package: 'stringr'
## The following object is masked from 'package:VariantAnnotation':
##
## fixed
## Loading required package: RCurl
##
## Attaching package: 'RCurl'
## The following object is masked from 'package:tidyr':
##
## complete
## Loading required package: knitr
library(httr)
##
## Attaching package: 'httr'
## The following object is masked from 'package:Biobase':
##
## content
library(jsonlite)
#library(myvariant)
# Define the variant (using HGVS notation)
#variant <- "PTPN11:c.188A>G"
#library(variants)
#library(BioPred)
# Download ClinVar data
cvdata <- get_clinvar()
ptpngene <- cvdata[which(cvdata$GENE == "PTPN11") ,]
# Search for the specific variant c.188A>G, and get dbSNP ID
# Look for the variant in the ClinVar data
# https://www.ncbi.nlm.nih.gov/clinvar/variation/13333/?oq=%22PTPN11%22[GENE]+c.188A%3EG+(p.Tyr63Cys)&m=NM_002834.5(PTPN11):c.188A%3EG%20(p.Tyr63Cys)%3Fterm=PTPN11%20c.188A%3EG%20(p.Tyr63Cys)
noonvar <- ptpngene[which(ptpngene$ID == "rs121918459"),]
nvepvar <- function(chromosome, position, ref_allele, alt_allele,
species = "human", genome = "GRCh38") {
if (genome == "GRCh37" || genome == "hg19") {
server <- "https://grch37.rest.ensembl.org"
} else if (genome == "GRCh38" || genome == "hg38") {
server <- "https://rest.ensembl.org"
} else {
stop("Genome must be either 'GRCh37' or 'GRCh38'")
}
region <- paste0(chromosome, ":", position, "-", position, "/", alt_allele)
endpoint <- paste0("/vep/", species, "/region/", region)
response <- GET(paste0(server, endpoint),
content_type("application/json"),
accept("application/json"))
if (status_code(response) == 200) {
res <- fromJSON(toJSON(content(response)))
return(res)
} else {
stop("request failed with status: ", status_code(response))
}
}
# noonan variant consequences
noonan_variant <- nvepvar(noonvar$CHROM, noonvar$POS, noonvar$REF, noonvar$ALT, genome = "GRCh37")
conseq <- noonan_variant$transcript_consequences
print(conseq)
## [[1]]
## hgnc_id cds_end cdna_start gene_id impact cds_start consequence_terms
## 1 9644 188 386 ENSG0000.... MODERATE 188 missense....
## 2 9644 188 391 ENSG0000.... MODERATE 188 missense....
## 3 9644 ENSG0000.... MODIFIER upstream....
## strand gene_symbol codons gene_symbol_source protein_end polyphen_prediction
## 1 1 PTPN11 tAt/tGt HGNC 63 probably....
## 2 1 PTPN11 tAt/tGt HGNC 63 probably....
## 3 1 PTPN11 HGNC
## polyphen_score variant_allele protein_start sift_score amino_acids cdna_end
## 1 0.998 G 63 0 Y/C 386
## 2 0.999 G 63 0 Y/C 391
## 3 G
## biotype sift_prediction transcript_id flags distance
## 1 protein_.... deleterious ENST0000....
## 2 protein_.... deleterious ENST0000....
## 3 protein_.... ENST0000.... cds_start_NF 2961