Preamble

Following is just some fooling around

Load packages

require(dplyr)

Generate sqlite database

mar <- src_sqlite("mar.sqlite3", create = T)

Load some data

attach("/net/hafkaldi/export/u2/reikn/Splus5/SMB/.RData")
stodvar <- plyr::ldply(STODVAR)[,-1]
copy_to(mar, stodvar,
        temporary = FALSE, 
        indexes = list("synis.id","ar"))
#backup
stodvar <- tbl(mar,"stodvar") %>% collect()
save(stodvar,file="stodvar_backup.rda")

id <- mar %>%
  tbl("stodvar") %>%
  select(synis.id) %>%
  collect()

numer <- all.nu %>%
  filter(synis.id %in% id$synis.id)
copy_to(mar, numer,
        temporary = FALSE, 
        indexes = list("synis.id","tegund"))

lengdir <- all.le %>%
  filter(synis.id %in% id$synis.id)
copy_to(mar, lengdir,
        temporary = FALSE, 
        indexes = list("synis.id","tegund"))

kvarnir <- all.kv %>%
  filter(synis.id %in% id$synis.id)
copy_to(mar, kvarnir,
        temporary = FALSE, 
        indexes = list("synis.id","tegund"))

rm(list=ls())

Some example of usage

mar <- src_sqlite("mar.sqlite3")
mar
## src:  sqlite 3.8.6 [mar.sqlite3]
## tbls: kvarnir, lengdir, numer, sqlite_stat1, stodvar
db_list_tables(mar$con)
## [1] "kvarnir"      "lengdir"      "numer"        "sqlite_stat1"
## [5] "stodvar"
db_has_table(mar$con, 'stodvar')
## [1] TRUE
db_data_type(mar$con, 'stodvar') # expected overview of type
## stodvar 
##  "TEXT"
db_query_fields(mar$con, sql('stodvar'))
##  [1] "synis.id"        "leidangur"       "skip"           
##  [4] "stod"            "kastad.n.breidd" "kastad.v.lengd" 
##  [7] "hift.n.breidd"   "hift.v.lengd"    "dypi.kastad"    
## [10] "dypi.hift"       "veidarfaeri"     "moskvastaerd"   
## [13] "reitur"          "smareitur"       "heildarafli"    
## [16] "vindhradi"       "vindatt"         "vedur"          
## [19] "sky"             "sjor"            "botnhiti"       
## [22] "yfirbordshiti"   "lofthiti"        "loftvog"        
## [25] "hafis"           "straumstefna"    "straumhradi"    
## [28] "togdypi.kastad"  "togdypi.hift"    "toglengd"       
## [31] "ar"              "man"             "dags"           
## [34] "kl.kastad"       "kl.hift"         "lon"            
## [37] "lat"             "dypi"            "tognumer"       
## [40] "net.nr"          "index"           "newstrata"      
## [43] "strata"          "area"            "lodrett.opnun"  
## [46] "larett.opnun"
st <- tbl(mar,"stodvar") 
explain(st)
## <SQL>
## SELECT "synis.id", "leidangur", "skip", "stod", "kastad.n.breidd", "kastad.v.lengd", "hift.n.breidd", "hift.v.lengd", "dypi.kastad", "dypi.hift", "veidarfaeri", "moskvastaerd", "reitur", "smareitur", "heildarafli", "vindhradi", "vindatt", "vedur", "sky", "sjor", "botnhiti", "yfirbordshiti", "lofthiti", "loftvog", "hafis", "straumstefna", "straumhradi", "togdypi.kastad", "togdypi.hift", "toglengd", "ar", "man", "dags", "kl.kastad", "kl.hift", "lon", "lat", "dypi", "tognumer", "net.nr", "index", "newstrata", "strata", "area", "lodrett.opnun", "larett.opnun"
## FROM "stodvar"
## 
## 
## <PLAN>
##   selectid order from             detail
## 1        0     0    0 SCAN TABLE stodvar
dim(st)
## [1] 17506    46
names(st)
## [1] "src"       "from"      "select"    "summarise" "mutate"    "where"    
## [7] "group_by"  "order_by"  "query"
st
## Source: sqlite 3.8.6 [mar.sqlite3]
## From: stodvar [17,506 x 46]
## 
##    synis.id leidangur skip stod kastad.n.breidd kastad.v.lengd
## 1     30182    TA1-85 1307  133           66.87         -23.15
## 2     30180    TA1-85 1307  135           66.97         -23.50
## 3     30179    TA1-85 1307  136           67.00         -23.68
## 4     30178    TA1-85 1307  137           67.02         -23.45
## 5     30177    TA1-85 1307  138           67.10         -23.53
## 6     30175    TA1-85 1307  140           67.10         -23.23
## 7     30174    TA1-85 1307  141           67.00         -23.23
## 8     30184    TA1-85 1307  131           66.85         -23.05
## 9     30183    TA1-85 1307  132           66.93         -23.00
## 10    30193    TA1-85 1307  122           66.75         -22.95
## ..      ...       ...  ...  ...             ...            ...
## Variables not shown: hift.n.breidd (dbl), hift.v.lengd (dbl), dypi.kastad
##   (dbl), dypi.hift (dbl), veidarfaeri (dbl), moskvastaerd (dbl), reitur
##   (dbl), smareitur (dbl), heildarafli (dbl), vindhradi (dbl), vindatt
##   (dbl), vedur (dbl), sky (dbl), sjor (dbl), botnhiti (dbl), yfirbordshiti
##   (dbl), lofthiti (dbl), loftvog (dbl), hafis (dbl), straumstefna (dbl),
##   straumhradi (dbl), togdypi.kastad (dbl), togdypi.hift (dbl), toglengd
##   (dbl), ar (dbl), man (dbl), dags (dbl), kl.kastad (dbl), kl.hift (dbl),
##   lon (dbl), lat (dbl), dypi (dbl), tognumer (dbl), net.nr (dbl), index
##   (dbl), newstrata (dbl), strata (dbl), area (dbl), lodrett.opnun (dbl),
##   larett.opnun (dbl)
st <- st %>% arrange(ar)
explain(st)
## <SQL>
## SELECT "synis.id", "leidangur", "skip", "stod", "kastad.n.breidd", "kastad.v.lengd", "hift.n.breidd", "hift.v.lengd", "dypi.kastad", "dypi.hift", "veidarfaeri", "moskvastaerd", "reitur", "smareitur", "heildarafli", "vindhradi", "vindatt", "vedur", "sky", "sjor", "botnhiti", "yfirbordshiti", "lofthiti", "loftvog", "hafis", "straumstefna", "straumhradi", "togdypi.kastad", "togdypi.hift", "toglengd", "ar", "man", "dags", "kl.kastad", "kl.hift", "lon", "lat", "dypi", "tognumer", "net.nr", "index", "newstrata", "strata", "area", "lodrett.opnun", "larett.opnun"
## FROM "stodvar"
## ORDER BY "ar"
## 
## 
## <PLAN>
##   selectid order from                                    detail
## 1        0     0    0 SCAN TABLE stodvar USING INDEX stodvar_ar
d <- tbl(mar,"stodvar") %>%
  group_by(ar) %>%
  summarise(n=n(),
            toglengd=mean(toglengd),
            toglengd.sdev=sqrt(var(toglengd))/n(),
            dypi=mean(dypi),
            dypi.sdev=var(dypi)/n())
explain(d)
## <SQL>
## SELECT "ar", "n", "toglengd", "toglengd.sdev", "dypi", "dypi.sdev"
## FROM (SELECT "ar", COUNT() AS "n", AVG("toglengd") AS "toglengd", SQRT(VARIANCE("toglengd")) / COUNT() AS "toglengd.sdev", AVG("dypi") AS "dypi", VARIANCE("dypi") / COUNT() AS "dypi.sdev"
## FROM "stodvar"
## GROUP BY "ar") AS "_W1"
## 
## 
## <PLAN>
##   selectid order from                                    detail
## 1        0     0    0 SCAN TABLE stodvar USING INDEX stodvar_ar
d
## Source: sqlite 3.8.6 [mar.sqlite3]
## From: <derived table> [?? x 6]
## 
##      ar   n toglengd toglengd.sdev  dypi dypi.sdev
## 1  1985 593    3.938     0.0004929 197.6     13.83
## 2  1986 585    3.944     0.0004651 198.6     14.08
## 3  1987 566    3.939     0.0005266 201.1     13.94
## 4  1988 545    3.912     0.0006333 197.6     14.83
## 5  1989 568    3.897     0.0006824 198.8     13.53
## 6  1990 567    3.915     0.0005663 200.5     13.79
## 7  1991 570    3.754     0.0008122 201.8     14.65
## 8  1992 574    3.903     0.0006569 201.9     14.83
## 9  1993 597    3.893     0.0006538 192.4     13.32
## 10 1994 596    3.876     0.0007266 191.8     12.98
## ..  ... ...      ...           ...   ...       ...
d <- tbl(mar,'stodvar') %>%
  filter(ar %in% c(2015, -9)) %>% # -9 is a dummy because of but
  select(synis.id,ar)
explain(d)
## <SQL>
## SELECT "synis.id" AS "synis.id", "ar" AS "ar"
## FROM "stodvar"
## WHERE "ar" IN ('2015', ' - 9.0')
## 
## 
## <PLAN>
##   selectid order from                                             detail
## 1        0     0    0 SEARCH TABLE stodvar USING INDEX stodvar_ar (ar=?)
## 2        0     0    0                            EXECUTE LIST SUBQUERY 1
d
## Source: sqlite 3.8.6 [mar.sqlite3]
## From: stodvar [586 x 2]
## Filter: ar %in% c(2015, -9) 
## 
##    synis.id   ar
## 1    425921 2015
## 2    425922 2015
## 3    425923 2015
## 4    425924 2015
## 5    425925 2015
## 6    425926 2015
## 7    425927 2015
## 8    425928 2015
## 9    425929 2015
## 10   425930 2015
## ..      ...  ...
st <- mar %>%
  tbl('stodvar') %>%
  filter(ar == 2014) %>%
  select(kastad.v.lengd, kastad.n.breidd,
         hift.v.lengd, hift.n.breidd) %>%
  rename(lonS=kastad.v.lengd, latS=kastad.n.breidd,
         lonE=hift.v.lengd, latE=hift.n.breidd)
explain(st)
require(ggplot2)
ggplot(collect(st)) +
  theme_bw() +
  geom_segment(aes(lonS,latS,xend=lonE,yend=latE), col="red") +
  labs(x="", y="", title="2015 survey stations") +
  coord_map()

plot of chunk unnamed-chunk-9