Install the development branch of taxize on github

# install.packages('devtools') library(devtools) install_github('taxize_',
# 'ropensci', 'local_sql')
library(taxize)
## Loading required package: RSQLite
## Loading required package: DBI
library(plyr)

Download the sqlite database from dropbox

https://www.dropbox.com/s/gz1vvsu2d0qps19/itis2_sqlite.zip

Set path to database

Just set the path to your sqlite database once, then choose locally = TRUE in your ITIS function calls to use the local database. This creates an object in your options named “conn”, which the ITIS functions call when locally = TRUE.

taxize:::taxize_options(localpath = "~/github/ropensci/sql/itis2.sqlite")  # change to your path

Some examples

Search by scientific name

Single search

Using the web API

searchbyscientificname("Tardigrada")
##           combinedname    tsn
## 1   Rotaria tardigrada  58274
## 2 Notommata tardigrada  58898
## 3  Pilargis tardigrada  65562
## 4           Tardigrada 155166
## 5     Heterotardigrada 155167
## 6     Arthrotardigrada 155168
## 7       Mesotardigrada 155358
## 8         Eutardigrada 155362
## 9  Scytodes tardigrada 866744

Using local search

searchbyscientificname("Tardigrada", locally = TRUE)
##           combinedname    tsn
## 1   Rotaria tardigrada  58274
## 2 Notommata tardigrada  58898
## 3  Pilargis tardigrada  65562
## 4           Tardigrada 155166
## 5     Heterotardigrada 155167
## 6     Arthrotardigrada 155168
## 7       Mesotardigrada 155358
## 8         Eutardigrada 155362
## 9  Scytodes tardigrada 866744

As you can see, there is some performance improvement with even single queries

system.time(searchbyscientificname("Tardigrada"))
##    user  system elapsed 
##   0.015   0.001   1.549
system.time(searchbyscientificname("Tardigrada", locally = TRUE))
##    user  system elapsed 
##   0.478   0.072   0.550

Search for multiple names

Using the web API, you have to submit one at a time using e.g., an lapply fxn

spnames <- c("oryza sativa", "Chironomus riparius", "Helianthus annuus", "Quercus lobata", 
    "Poa annua", "Lampetra tridentata", "Mordacia lapicida", "Bathyraja abyssicola", 
    "Arhynchobatis asperrimus", "Alytes obstetricans")
head(ldply(spnames, searchbyscientificname))
##                     combinedname    tsn
## 1                   Oryza sativa  41976
## 2        Oryza sativa var. fatua 566528
## 3    Oryza sativa ssp. rufipogon 797955
## 4     Oryza sativa var. elongata 801263
## 5 Oryza sativa var. grandiglumis 801264
## 6    Oryza sativa var. latifolia 801265

Using local search, you can submit many names in one vector

head(searchbyscientificname(srchkey = spnames, locally = TRUE))
##                     combinedname    tsn
## 1            Alytes obstetricans 662327
## 2       Arhynchobatis asperrimus 564294
## 3           Bathyraja abyssicola 564114
## 4            Chironomus riparius 129313
## 5              Helianthus annuus  36616
## 6 Helianthus annuus ssp. jaegeri 525928

Searches with many queries is where we see the major time savings

system.time(ldply(spnames, searchbyscientificname))  # web API
##    user  system elapsed 
##   0.106   0.008  14.682
system.time(searchbyscientificname(srchkey = spnames, locally = TRUE))  # local sql search
##    user  system elapsed 
##   2.620   0.089   2.715

Here's what the one of the functions looks like with the sql syntax

gettsnbyvernacularlanguage <- function(language = NA, locally = FALSE) {
    if (locally) {
        sqlconn <- getOption("conn")
        query_TSNS_BY_LANGUAGE <- paste("select", paste("CASE", paste(sapply(language, 
            function(x) paste("WHEN language LIKE ", paste("'", x, "'", sep = ""), 
                " THEN ", paste0("'", x, "'"), sep = ""), USE.NAMES = FALSE), 
            collapse = " "), "END AS querystring,"), "tsn, vernacular_name from vernaculars where ", 
            paste0(" language like ", sapply(language, function(x) paste("'", 
                x, "'", sep = ""), USE.NAMES = FALSE), collapse = " OR "), " order by tsn, vernacular_name;")
        temp <- dbGetQuery(conn = sqlconn, query_TSNS_BY_LANGUAGE)
        return(data.frame(language = temp$querystring, comname = temp$vernacular_name, 
            tsn = temp$tsn))
    } else {
        url = "http://www.itis.gov/ITISWebService/services/ITISService/getTsnByVernacularLanguage"
        args <- list()
        if (!is.na(language)) 
            args$language <- language
        tt <- getForm(url, .params = args, ..., curl = curl)
        out <- xmlParse(tt)
        namespaces <- c(namespaces <- c(ax21 = "http://data.itis_service.itis.usgs.gov/xsd"))
        nodes <- getNodeSet(out, "//ax21:commonName", namespaces = namespaces)
        comname <- sapply(nodes, xmlValue)
        nodes <- getNodeSet(out, "//ax21:tsn", namespaces = namespaces)
        tsn <- sapply(nodes, xmlValue)
        data.frame(comname = comname, tsn = tsn)
    }
}

Note:

Not all function that use ITIS data have a local sql option yet, but many do. Send any bug reports here