# install.packages("devtools")
# install.packages("BiocManager")
# BiocManager::install(c('BiocGenerics', 'GenomicRanges', 'VariantAnnotation', 'Biostrings', 'IRanges'))
# devtools::install_github("jamesdiao/clinvaR")
# BiocManager::install("ensemblVEP")
#install.packages("myvariant")
#BiocManager::install("igvR")
#install.packages('BSgenome.Hsapiens.UCSC.hg19')
#BiocManager::install("variants")
#BiocManager::install("BioPred")
library(ensemblVEP)
## Loading required package: BiocGenerics
## Loading required package: generics
## 
## Attaching package: 'generics'
## The following objects are masked from 'package:base':
## 
##     as.difftime, as.factor, as.ordered, intersect, is.element, setdiff,
##     setequal, union
## 
## Attaching package: 'BiocGenerics'
## The following objects are masked from 'package:stats':
## 
##     IQR, mad, sd, var, xtabs
## The following objects are masked from 'package:base':
## 
##     anyDuplicated, aperm, append, as.data.frame, basename, cbind,
##     colnames, dirname, do.call, duplicated, eval, evalq, Filter, Find,
##     get, grep, grepl, is.unsorted, lapply, Map, mapply, match, mget,
##     order, paste, pmax, pmax.int, pmin, pmin.int, Position, rank,
##     rbind, Reduce, rownames, sapply, saveRDS, table, tapply, unique,
##     unsplit, which.max, which.min
## Loading required package: GenomicRanges
## Loading required package: stats4
## Loading required package: S4Vectors
## 
## Attaching package: 'S4Vectors'
## The following object is masked from 'package:utils':
## 
##     findMatches
## The following objects are masked from 'package:base':
## 
##     expand.grid, I, unname
## Loading required package: IRanges
## Loading required package: GenomeInfoDb
## Loading required package: VariantAnnotation
## Loading required package: MatrixGenerics
## Loading required package: matrixStats
## 
## Attaching package: 'MatrixGenerics'
## The following objects are masked from 'package:matrixStats':
## 
##     colAlls, colAnyNAs, colAnys, colAvgsPerRowSet, colCollapse,
##     colCounts, colCummaxs, colCummins, colCumprods, colCumsums,
##     colDiffs, colIQRDiffs, colIQRs, colLogSumExps, colMadDiffs,
##     colMads, colMaxs, colMeans2, colMedians, colMins, colOrderStats,
##     colProds, colQuantiles, colRanges, colRanks, colSdDiffs, colSds,
##     colSums2, colTabulates, colVarDiffs, colVars, colWeightedMads,
##     colWeightedMeans, colWeightedMedians, colWeightedSds,
##     colWeightedVars, rowAlls, rowAnyNAs, rowAnys, rowAvgsPerColSet,
##     rowCollapse, rowCounts, rowCummaxs, rowCummins, rowCumprods,
##     rowCumsums, rowDiffs, rowIQRDiffs, rowIQRs, rowLogSumExps,
##     rowMadDiffs, rowMads, rowMaxs, rowMeans2, rowMedians, rowMins,
##     rowOrderStats, rowProds, rowQuantiles, rowRanges, rowRanks,
##     rowSdDiffs, rowSds, rowSums2, rowTabulates, rowVarDiffs, rowVars,
##     rowWeightedMads, rowWeightedMeans, rowWeightedMedians,
##     rowWeightedSds, rowWeightedVars
## Loading required package: SummarizedExperiment
## Loading required package: Biobase
## Welcome to Bioconductor
## 
##     Vignettes contain introductory material; view with
##     'browseVignettes()'. To cite Bioconductor, see
##     'citation("Biobase")', and for packages 'citation("pkgname")'.
## 
## Attaching package: 'Biobase'
## The following object is masked from 'package:MatrixGenerics':
## 
##     rowMedians
## The following objects are masked from 'package:matrixStats':
## 
##     anyMissing, rowMedians
## Loading required package: Rsamtools
## Loading required package: Biostrings
## Loading required package: XVector
## 
## Attaching package: 'Biostrings'
## The following object is masked from 'package:base':
## 
##     strsplit
## 
## Attaching package: 'VariantAnnotation'
## The following object is masked from 'package:base':
## 
##     tabulate
## variant_effect_predictor.pl or vep script not found. Ensembl VEP is not installed in your path.
## 
## Attaching package: 'ensemblVEP'
## The following object is masked from 'package:Biobase':
## 
##     cache
library(clinvaR)
## Loading required package: ggplot2
## Loading required package: dplyr
## 
## Attaching package: 'dplyr'
## The following object is masked from 'package:VariantAnnotation':
## 
##     select
## The following objects are masked from 'package:Biostrings':
## 
##     collapse, intersect, setdiff, setequal, union
## The following object is masked from 'package:XVector':
## 
##     slice
## The following object is masked from 'package:Biobase':
## 
##     combine
## The following object is masked from 'package:matrixStats':
## 
##     count
## The following objects are masked from 'package:GenomicRanges':
## 
##     intersect, setdiff, union
## The following object is masked from 'package:GenomeInfoDb':
## 
##     intersect
## The following objects are masked from 'package:IRanges':
## 
##     collapse, desc, intersect, setdiff, slice, union
## The following objects are masked from 'package:S4Vectors':
## 
##     first, intersect, rename, setdiff, setequal, union
## The following objects are masked from 'package:BiocGenerics':
## 
##     combine, intersect, setdiff, setequal, union
## The following object is masked from 'package:generics':
## 
##     explain
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
## Loading required package: tidyr
## 
## Attaching package: 'tidyr'
## The following object is masked from 'package:VariantAnnotation':
## 
##     expand
## The following object is masked from 'package:S4Vectors':
## 
##     expand
## Loading required package: seqminer
## Loading required package: stringr
## 
## Attaching package: 'stringr'
## The following object is masked from 'package:VariantAnnotation':
## 
##     fixed
## Loading required package: RCurl
## 
## Attaching package: 'RCurl'
## The following object is masked from 'package:tidyr':
## 
##     complete
## Loading required package: knitr
library(httr)
## 
## Attaching package: 'httr'
## The following object is masked from 'package:Biobase':
## 
##     content
library(jsonlite)
#library(myvariant)
# Define the variant (using HGVS notation)
#variant <- "PTPN11:c.188A>G"
#library(variants)
#library(BioPred)


# Download ClinVar data
cvdata <- get_clinvar()
ptpngene <- cvdata[which(cvdata$GENE == "PTPN11") ,]

# Search for the specific variant c.188A>G, and get dbSNP ID
# Look for the variant in the ClinVar data 
# https://www.ncbi.nlm.nih.gov/clinvar/variation/13333/?oq=%22PTPN11%22[GENE]+c.188A%3EG+(p.Tyr63Cys)&m=NM_002834.5(PTPN11):c.188A%3EG%20(p.Tyr63Cys)%3Fterm=PTPN11%20c.188A%3EG%20(p.Tyr63Cys)

noonvar <- ptpngene[which(ptpngene$ID == "rs121918459"),]


nvepvar <- function(chromosome, position, ref_allele, alt_allele, 
                            species = "human", genome = "GRCh38") {
  if (genome == "GRCh37" || genome == "hg19") {
    server <- "https://grch37.rest.ensembl.org"
  } else if (genome == "GRCh38" || genome == "hg38") {
    server <- "https://rest.ensembl.org"
  } else {
    stop("Genome must be either 'GRCh37' or 'GRCh38'")
  }
  
  region <- paste0(chromosome, ":", position, "-", position, "/", alt_allele)
  endpoint <- paste0("/vep/", species, "/region/", region)
  response <- GET(paste0(server, endpoint),
                  content_type("application/json"),
                  accept("application/json"))
  
  if (status_code(response) == 200) {
    res <- fromJSON(toJSON(content(response)))
    return(res)
  } else {
    stop("request failed with status: ", status_code(response))
  }
}


# noonan variant consequences
noonan_variant <- nvepvar(noonvar$CHROM, noonvar$POS, noonvar$REF, noonvar$ALT, genome = "GRCh37")
conseq <- noonan_variant$transcript_consequences
print(conseq)
## [[1]]
##   hgnc_id cds_end cdna_start      gene_id   impact cds_start consequence_terms
## 1    9644     188        386 ENSG0000.... MODERATE       188      missense....
## 2    9644     188        391 ENSG0000.... MODERATE       188      missense....
## 3    9644                    ENSG0000.... MODIFIER                upstream....
##   strand gene_symbol  codons gene_symbol_source protein_end polyphen_prediction
## 1      1      PTPN11 tAt/tGt               HGNC          63        probably....
## 2      1      PTPN11 tAt/tGt               HGNC          63        probably....
## 3      1      PTPN11                       HGNC                                
##   polyphen_score variant_allele protein_start sift_score amino_acids cdna_end
## 1          0.998              G            63          0         Y/C      386
## 2          0.999              G            63          0         Y/C      391
## 3                             G                                              
##        biotype sift_prediction transcript_id        flags distance
## 1 protein_....     deleterious  ENST0000....                      
## 2 protein_....     deleterious  ENST0000....                      
## 3 protein_....                  ENST0000.... cds_start_NF     2961