Load Required Packages

Install the twitter anomaly detection package.

#pacman::p_install_gh("twitter/AnomalyDetection")


#devtools::install_github("twitter/AnomalyDetection")

library(readr)
library(plotly)
library(lubridate)
library(rvest)
library(dplyr)
library(tidyr)
library(foreach)
library(doParallel)
library(ggfortify)
library(readxl)
library(tidyquant)  
library(DT)
library(reticulate)
library(DBI)
pacman::p_load("rio","tidyverse","data.table")


library(ggpubr)
theme_set(theme_pubclean())

# Calculate the number of cores
no_cores <- detectCores() - 1

cl<-makeCluster(no_cores)
registerDoParallel(cl)

SQLite

dplyr supports a couple of databases such as sqlite, mysql and postgresql.

library(DBI)
# Create an ephemeral in-memory RSQLite database
con <- dbConnect(RSQLite::SQLite(), ":memory:")

dbListTables(con)
## character(0)
dbWriteTable(con, "mtcars", mtcars)
dbListTables(con)
## [1] "mtcars"
dbListFields(con, "mtcars")
##  [1] "mpg"  "cyl"  "disp" "hp"   "drat" "wt"   "qsec" "vs"   "am"   "gear"
## [11] "carb"
dbReadTable(con, "mtcars")
##     mpg cyl  disp  hp drat    wt  qsec vs am gear carb
## 1  21.0   6 160.0 110 3.90 2.620 16.46  0  1    4    4
## 2  21.0   6 160.0 110 3.90 2.875 17.02  0  1    4    4
## 3  22.8   4 108.0  93 3.85 2.320 18.61  1  1    4    1
## 4  21.4   6 258.0 110 3.08 3.215 19.44  1  0    3    1
## 5  18.7   8 360.0 175 3.15 3.440 17.02  0  0    3    2
## 6  18.1   6 225.0 105 2.76 3.460 20.22  1  0    3    1
## 7  14.3   8 360.0 245 3.21 3.570 15.84  0  0    3    4
## 8  24.4   4 146.7  62 3.69 3.190 20.00  1  0    4    2
## 9  22.8   4 140.8  95 3.92 3.150 22.90  1  0    4    2
## 10 19.2   6 167.6 123 3.92 3.440 18.30  1  0    4    4
## 11 17.8   6 167.6 123 3.92 3.440 18.90  1  0    4    4
## 12 16.4   8 275.8 180 3.07 4.070 17.40  0  0    3    3
## 13 17.3   8 275.8 180 3.07 3.730 17.60  0  0    3    3
## 14 15.2   8 275.8 180 3.07 3.780 18.00  0  0    3    3
## 15 10.4   8 472.0 205 2.93 5.250 17.98  0  0    3    4
## 16 10.4   8 460.0 215 3.00 5.424 17.82  0  0    3    4
## 17 14.7   8 440.0 230 3.23 5.345 17.42  0  0    3    4
## 18 32.4   4  78.7  66 4.08 2.200 19.47  1  1    4    1
## 19 30.4   4  75.7  52 4.93 1.615 18.52  1  1    4    2
## 20 33.9   4  71.1  65 4.22 1.835 19.90  1  1    4    1
## 21 21.5   4 120.1  97 3.70 2.465 20.01  1  0    3    1
## 22 15.5   8 318.0 150 2.76 3.520 16.87  0  0    3    2
## 23 15.2   8 304.0 150 3.15 3.435 17.30  0  0    3    2
## 24 13.3   8 350.0 245 3.73 3.840 15.41  0  0    3    4
## 25 19.2   8 400.0 175 3.08 3.845 17.05  0  0    3    2
## 26 27.3   4  79.0  66 4.08 1.935 18.90  1  1    4    1
## 27 26.0   4 120.3  91 4.43 2.140 16.70  0  1    5    2
## 28 30.4   4  95.1 113 3.77 1.513 16.90  1  1    5    2
## 29 15.8   8 351.0 264 4.22 3.170 14.50  0  1    5    4
## 30 19.7   6 145.0 175 3.62 2.770 15.50  0  1    5    6
## 31 15.0   8 301.0 335 3.54 3.570 14.60  0  1    5    8
## 32 21.4   4 121.0 109 4.11 2.780 18.60  1  1    4    2
# You can fetch all results:
res <- dbSendQuery(con, "SELECT * FROM mtcars WHERE cyl = 4 LIMIT 5")
dbFetch(res)
##    mpg cyl  disp hp drat    wt  qsec vs am gear carb
## 1 22.8   4 108.0 93 3.85 2.320 18.61  1  1    4    1
## 2 24.4   4 146.7 62 3.69 3.190 20.00  1  0    4    2
## 3 22.8   4 140.8 95 3.92 3.150 22.90  1  0    4    2
## 4 32.4   4  78.7 66 4.08 2.200 19.47  1  1    4    1
## 5 30.4   4  75.7 52 4.93 1.615 18.52  1  1    4    2
dbClearResult(res)

# Or a chunk at a time
res <- dbSendQuery(con, "SELECT * FROM mtcars WHERE cyl = 4")
while(!dbHasCompleted(res)){
  chunk <- dbFetch(res, n = 5)
  print(nrow(chunk))
}
## [1] 5
## [1] 5
## [1] 1
# Clear the result
dbClearResult(res)

# Disconnect from the database
dbDisconnect(con)
Create a database

To create SQLite database in R, we specify the path. We use src_sqlite to connect to an existing sqlite database, and tbl to connect to tables within that database. We can also use src_sqlite to create new SQlite database at the specified path. If we do not specify a path, it will be created in our working directory.

# create =TRUE creates a new database
allconcatenated<-fread("/Users/nanaakwasiabayieboateng/Desktop/df2.csv")
## 
Read 0.0% of 16715601 rows
Read 1.1% of 16715601 rows
Read 1.7% of 16715601 rows
Read 2.5% of 16715601 rows
Read 3.4% of 16715601 rows
Read 4.5% of 16715601 rows
Read 5.9% of 16715601 rows
Read 6.7% of 16715601 rows
Read 7.6% of 16715601 rows
Read 9.7% of 16715601 rows
Read 9.8% of 16715601 rows
Read 11.8% of 16715601 rows
Read 12.4% of 16715601 rows
Read 13.8% of 16715601 rows
Read 15.8% of 16715601 rows
Read 18.0% of 16715601 rows
Read 19.9% of 16715601 rows
Read 20.2% of 16715601 rows
Read 22.0% of 16715601 rows
Read 23.9% of 16715601 rows
Read 25.7% of 16715601 rows
Read 25.9% of 16715601 rows
Read 27.8% of 16715601 rows
Read 29.6% of 16715601 rows
Read 31.3% of 16715601 rows
Read 31.5% of 16715601 rows
Read 33.0% of 16715601 rows
Read 34.9% of 16715601 rows
Read 37.0% of 16715601 rows
Read 39.1% of 16715601 rows
Read 41.0% of 16715601 rows
Read 41.9% of 16715601 rows
Read 43.9% of 16715601 rows
Read 45.5% of 16715601 rows
Read 47.3% of 16715601 rows
Read 49.1% of 16715601 rows
Read 50.9% of 16715601 rows
Read 52.7% of 16715601 rows
Read 53.1% of 16715601 rows
Read 54.7% of 16715601 rows
Read 56.4% of 16715601 rows
Read 58.0% of 16715601 rows
Read 59.7% of 16715601 rows
Read 61.3% of 16715601 rows
Read 62.9% of 16715601 rows
Read 64.5% of 16715601 rows
Read 66.1% of 16715601 rows
Read 67.2% of 16715601 rows
Read 69.0% of 16715601 rows
Read 70.8% of 16715601 rows
Read 72.0% of 16715601 rows
Read 72.9% of 16715601 rows
Read 73.8% of 16715601 rows
Read 75.9% of 16715601 rows
Read 77.9% of 16715601 rows
Read 80.0% of 16715601 rows
Read 82.0% of 16715601 rows
Read 83.3% of 16715601 rows
Read 84.7% of 16715601 rows
Read 86.5% of 16715601 rows
Read 88.4% of 16715601 rows
Read 90.2% of 16715601 rows
Read 91.8% of 16715601 rows
Read 93.5% of 16715601 rows
Read 95.1% of 16715601 rows
Read 96.8% of 16715601 rows
Read 98.4% of 16715601 rows
Read 16715601 rows and 10 (of 10) columns from 2.930 GB file in 00:03:28
allconcatenated$VIN=as.factor(allconcatenated$VIN)
my_database<- src_sqlite(path="ActiveEngineMount", create = TRUE)
Put data in the database

To upload data to the database, we use the dplyr function copy_to. According to the documentation, wherever possible, the new object will be temporary, limited to the current connection to the source. So, we have to change temporary to false to make it permanent.

 # uploading  data

copy_to(my_database,allconcatenated,temporary = FALSE)

The datasets “allconcatenated” is now in the database. It can be queried now.

# create is false now because I am connecting to an existing databaseCopy
#my_db <- src_sqlite("allconcatenated", create = FALSE)

List the tables in the database

src_tbls(my_database)
## [1] "allconcatenated" "sqlite_stat1"    "sqlite_stat4"
Querying the database

We use the same dplyr verbs that we use in data manipulation to work with databases. dplyr translates the R code we write to SQL code. We use tbl to connect to tables within the database.

allconcatenated = tbl(my_database,"allconcatenated" )

class(allconcatenated)
## [1] "tbl_dbi"  "tbl_sql"  "tbl_lazy" "tbl"
head(allconcatenated,3)
## # Source:   lazy query [?? x 10]
## # Database: sqlite 3.22.0 [ActiveEngineMount]
##   id     time_stamp Time_Vector `time_vector_ze… Veh_Speed VIN   TimeStamp
##   <chr>  <chr>            <int>            <int>     <dbl> <chr> <chr>    
## 1 1C4RD… 2017-05-1…       12895            11891        0. 1C4R… 32:12.302
## 2 1C4RD… 2017-05-1…       13894            12890        0. 1C4R… 32:13.301
## 3 1C4RD… 2017-05-1…       14896            13892        0. 1C4R… 32:14.303
## # ... with 3 more variables: TimeStamp1 <chr>, TimeStamp2 <chr>,
## #   TimeStamp3 <chr>

Get all the first VIN’s

FirstVIN = filter(allconcatenated, VIN=='1C4RDJAG1HC623554')  # Filtering demography of patients from the US

We can see the query dplyr has generated:

FirstVIN%>% 
  show_query()

We can also see how the database plans to execute the query:

explain(FirstVIN)

Find the top five VIN’s with the highest number of counts in the data

allconcatenated%>%group_by(VINTYPE= VIN)%>% 
           summarize(Total=n())%>%      
           arrange(desc(Total))%>%       
           filter(VINTYPE!='')%>% head(5)
## # Source:     lazy query [?? x 2]
## # Database:   sqlite 3.22.0 [ActiveEngineMount]
## # Ordered by: desc(Total)
##   VINTYPE             Total
##   <chr>               <int>
## 1 1C4RJFBGXGC382063 1699543
## 2 1C4RJFAG7GC324302 1660480
## 3 1C4RJFAG2GC366957 1298967
## 4 1C4RJFAG1GC378520 1280370
## 5 1C4RJFBM0EC481168 1148244

We can also include ggplot in the chain:

library(forcats)

allconcatenated%>%group_by(VINTYPE= VIN)%>% #grouped by VIN
           summarize(Total=n())%>%    # found the count for each VIN
           arrange(desc(Total))%>%    # sorted them in descending order
           filter(VINTYPE!='')%>%     # removed reports that does not have VIN information
           head(10)%>%                   # took the top ten

 #mutate(VINTYPE = fct_reorder(VINTYPE, Total)) %>%
  
  #mutate(VINTYPE = factor(VINTYPE,levels = VINTYPE[order(Total,decreasing =F)]))%>%
        
        ggplot(aes(x=VINTYPE,y=Total))+geom_bar(stat='identity',color='skyblue',fill='#b35900')+
           xlab("")+ggtitle('Top ten VINTYPE with highest number counts')+
           coord_flip()+ylab('Total number of VINs')
Fig. 30

Fig. 30

RSQLite

RSQLite package contains SQLite; no external software is needed.

RSQLite is a DBI-compatible interface which means you primarily use functions defined in the DBI package, so you should always start by loading DBI, not RSQLite:

Creating a new database

To create a new SQLite database, you simply supply the filename to dbConnect():

library(DBI)
mydb <- dbConnect(RSQLite::SQLite(), "my-db.sqlite")
dbDisconnect(mydb)
unlink("my-db.sqlite")
Loading data

You can easily copy an R data frame into a SQLite database with dbWriteTable():

allconcatenated<-fread("/Users/nanaakwasiabayieboateng/Desktop/df2.csv")
## 
Read 0.0% of 16715601 rows
Read 0.2% of 16715601 rows
Read 0.7% of 16715601 rows
Read 1.4% of 16715601 rows
Read 2.3% of 16715601 rows
Read 3.5% of 16715601 rows
Read 5.0% of 16715601 rows
Read 6.6% of 16715601 rows
Read 8.1% of 16715601 rows
Read 9.5% of 16715601 rows
Read 10.8% of 16715601 rows
Read 12.0% of 16715601 rows
Read 12.8% of 16715601 rows
Read 13.5% of 16715601 rows
Read 14.4% of 16715601 rows
Read 15.4% of 16715601 rows
Read 16.4% of 16715601 rows
Read 17.5% of 16715601 rows
Read 18.5% of 16715601 rows
Read 19.6% of 16715601 rows
Read 20.8% of 16715601 rows
Read 22.0% of 16715601 rows
Read 23.2% of 16715601 rows
Read 24.1% of 16715601 rows
Read 24.9% of 16715601 rows
Read 25.8% of 16715601 rows
Read 26.7% of 16715601 rows
Read 27.8% of 16715601 rows
Read 29.0% of 16715601 rows
Read 30.1% of 16715601 rows
Read 31.2% of 16715601 rows
Read 31.8% of 16715601 rows
Read 32.0% of 16715601 rows
Read 32.4% of 16715601 rows
Read 32.6% of 16715601 rows
Read 32.9% of 16715601 rows
Read 33.2% of 16715601 rows
Read 33.6% of 16715601 rows
Read 33.9% of 16715601 rows
Read 34.2% of 16715601 rows
Read 34.5% of 16715601 rows
Read 34.8% of 16715601 rows
Read 35.1% of 16715601 rows
Read 35.4% of 16715601 rows
Read 35.7% of 16715601 rows
Read 36.0% of 16715601 rows
Read 36.3% of 16715601 rows
Read 36.6% of 16715601 rows
Read 36.9% of 16715601 rows
Read 37.2% of 16715601 rows
Read 37.6% of 16715601 rows
Read 38.0% of 16715601 rows
Read 38.3% of 16715601 rows
Read 38.6% of 16715601 rows
Read 38.9% of 16715601 rows
Read 39.2% of 16715601 rows
Read 39.5% of 16715601 rows
Read 39.8% of 16715601 rows
Read 40.1% of 16715601 rows
Read 40.4% of 16715601 rows
Read 40.8% of 16715601 rows
Read 41.2% of 16715601 rows
Read 41.5% of 16715601 rows
Read 41.9% of 16715601 rows
Read 42.2% of 16715601 rows
Read 42.7% of 16715601 rows
Read 43.1% of 16715601 rows
Read 43.4% of 16715601 rows
Read 43.9% of 16715601 rows
Read 44.3% of 16715601 rows
Read 44.7% of 16715601 rows
Read 45.0% of 16715601 rows
Read 45.3% of 16715601 rows
Read 45.7% of 16715601 rows
Read 46.2% of 16715601 rows
Read 46.6% of 16715601 rows
Read 47.0% of 16715601 rows
Read 47.4% of 16715601 rows
Read 47.9% of 16715601 rows
Read 48.3% of 16715601 rows
Read 48.9% of 16715601 rows
Read 49.4% of 16715601 rows
Read 49.8% of 16715601 rows
Read 50.2% of 16715601 rows
Read 50.6% of 16715601 rows
Read 51.0% of 16715601 rows
Read 51.4% of 16715601 rows
Read 51.8% of 16715601 rows
Read 52.2% of 16715601 rows
Read 52.5% of 16715601 rows
Read 52.8% of 16715601 rows
Read 53.1% of 16715601 rows
Read 53.5% of 16715601 rows
Read 53.8% of 16715601 rows
Read 54.2% of 16715601 rows
Read 54.6% of 16715601 rows
Read 54.9% of 16715601 rows
Read 55.3% of 16715601 rows
Read 55.6% of 16715601 rows
Read 56.1% of 16715601 rows
Read 56.4% of 16715601 rows
Read 56.7% of 16715601 rows
Read 57.0% of 16715601 rows
Read 57.3% of 16715601 rows
Read 57.6% of 16715601 rows
Read 57.8% of 16715601 rows
Read 58.0% of 16715601 rows
Read 58.3% of 16715601 rows
Read 58.6% of 16715601 rows
Read 58.9% of 16715601 rows
Read 59.1% of 16715601 rows
Read 59.3% of 16715601 rows
Read 59.6% of 16715601 rows
Read 59.8% of 16715601 rows
Read 60.1% of 16715601 rows
Read 60.3% of 16715601 rows
Read 60.5% of 16715601 rows
Read 60.8% of 16715601 rows
Read 61.0% of 16715601 rows
Read 61.3% of 16715601 rows
Read 61.5% of 16715601 rows
Read 61.7% of 16715601 rows
Read 62.0% of 16715601 rows
Read 62.2% of 16715601 rows
Read 62.5% of 16715601 rows
Read 62.6% of 16715601 rows
Read 62.9% of 16715601 rows
Read 63.1% of 16715601 rows
Read 63.3% of 16715601 rows
Read 63.6% of 16715601 rows
Read 63.9% of 16715601 rows
Read 64.1% of 16715601 rows
Read 64.3% of 16715601 rows
Read 64.6% of 16715601 rows
Read 64.8% of 16715601 rows
Read 65.0% of 16715601 rows
Read 65.3% of 16715601 rows
Read 65.5% of 16715601 rows
Read 65.7% of 16715601 rows
Read 65.9% of 16715601 rows
Read 66.1% of 16715601 rows
Read 66.3% of 16715601 rows
Read 66.5% of 16715601 rows
Read 66.7% of 16715601 rows
Read 66.9% of 16715601 rows
Read 67.2% of 16715601 rows
Read 67.4% of 16715601 rows
Read 67.7% of 16715601 rows
Read 67.8% of 16715601 rows
Read 68.1% of 16715601 rows
Read 68.3% of 16715601 rows
Read 68.5% of 16715601 rows
Read 68.7% of 16715601 rows
Read 68.9% of 16715601 rows
Read 69.2% of 16715601 rows
Read 69.4% of 16715601 rows
Read 69.6% of 16715601 rows
Read 69.9% of 16715601 rows
Read 70.1% of 16715601 rows
Read 70.4% of 16715601 rows
Read 70.5% of 16715601 rows
Read 70.8% of 16715601 rows
Read 71.0% of 16715601 rows
Read 71.3% of 16715601 rows
Read 71.5% of 16715601 rows
Read 71.7% of 16715601 rows
Read 72.0% of 16715601 rows
Read 72.2% of 16715601 rows
Read 72.4% of 16715601 rows
Read 72.7% of 16715601 rows
Read 72.9% of 16715601 rows
Read 73.2% of 16715601 rows
Read 73.5% of 16715601 rows
Read 73.8% of 16715601 rows
Read 74.3% of 16715601 rows
Read 74.9% of 16715601 rows
Read 75.5% of 16715601 rows
Read 76.1% of 16715601 rows
Read 76.9% of 16715601 rows
Read 77.8% of 16715601 rows
Read 78.8% of 16715601 rows
Read 79.8% of 16715601 rows
Read 80.7% of 16715601 rows
Read 81.5% of 16715601 rows
Read 82.3% of 16715601 rows
Read 82.8% of 16715601 rows
Read 83.2% of 16715601 rows
Read 83.5% of 16715601 rows
Read 83.9% of 16715601 rows
Read 84.5% of 16715601 rows
Read 85.2% of 16715601 rows
Read 85.8% of 16715601 rows
Read 86.5% of 16715601 rows
Read 87.0% of 16715601 rows
Read 87.6% of 16715601 rows
Read 88.1% of 16715601 rows
Read 88.5% of 16715601 rows
Read 88.9% of 16715601 rows
Read 89.4% of 16715601 rows
Read 89.9% of 16715601 rows
Read 90.2% of 16715601 rows
Read 90.5% of 16715601 rows
Read 90.9% of 16715601 rows
Read 91.2% of 16715601 rows
Read 91.6% of 16715601 rows
Read 92.0% of 16715601 rows
Read 92.4% of 16715601 rows
Read 92.8% of 16715601 rows
Read 93.3% of 16715601 rows
Read 93.8% of 16715601 rows
Read 94.2% of 16715601 rows
Read 94.6% of 16715601 rows
Read 95.0% of 16715601 rows
Read 95.4% of 16715601 rows
Read 95.8% of 16715601 rows
Read 96.2% of 16715601 rows
Read 96.7% of 16715601 rows
Read 97.2% of 16715601 rows
Read 97.6% of 16715601 rows
Read 98.0% of 16715601 rows
Read 98.4% of 16715601 rows
Read 98.9% of 16715601 rows
Read 99.4% of 16715601 rows
Read 99.8% of 16715601 rows
Read 16715601 rows and 10 (of 10) columns from 2.930 GB file in 00:04:19
mydb <- dbConnect(RSQLite::SQLite(), "")
dbWriteTable(mydb, "allconcatenated", allconcatenated)
dbWriteTable(mydb, "iris", iris)
dbListTables(mydb)
## [1] "allconcatenated" "iris"
Queries

Issue a query with dbGetQuery():

dbGetQuery(mydb, 'SELECT * FROM allconcatenated LIMIT 5')
##                                                      id
## 1 1C4RDJAG1HC623554_Trip-Detail_2017-05-17 16-31-59.csv
## 2 1C4RDJAG1HC623554_Trip-Detail_2017-05-17 16-31-59.csv
## 3 1C4RDJAG1HC623554_Trip-Detail_2017-05-17 16-31-59.csv
## 4 1C4RDJAG1HC623554_Trip-Detail_2017-05-17 16-31-59.csv
## 5 1C4RDJAG1HC623554_Trip-Detail_2017-05-17 16-31-59.csv
##                time_stamp Time_Vector time_vector_zerobased (ms) Veh_Speed
## 1 2017-05-17 16:32:12.302       12895                      11891         0
## 2 2017-05-17 16:32:13.301       13894                      12890         0
## 3 2017-05-17 16:32:14.303       14896                      13892         0
## 4 2017-05-17 16:32:15.305       15898                      14894         0
## 5 2017-05-17 16:32:16.301       16894                      15890         0
##                 VIN TimeStamp TimeStamp1               TimeStamp2
## 1 1C4RDJAG1HC623554 32:12.302  32:12.302 2017-05-17T20:32:12.302Z
## 2 1C4RDJAG1HC623554 32:13.301  32:13.301 2017-05-17T20:32:13.301Z
## 3 1C4RDJAG1HC623554 32:14.303  32:14.303 2017-05-17T20:32:14.303Z
## 4 1C4RDJAG1HC623554 32:15.305  32:15.305 2017-05-17T20:32:15.305Z
## 5 1C4RDJAG1HC623554 32:16.301  32:16.301 2017-05-17T20:32:16.301Z
##                 TimeStamp3
## 1 2018-04-04T04:32:12.302Z
## 2 2018-04-04T04:32:13.301Z
## 3 2018-04-04T04:32:14.303Z
## 4 2018-04-04T04:32:15.305Z
## 5 2018-04-04T04:32:16.301Z

Not all R variable names are valid SQL variable names, so you may need to escape them with “:

dbGetQuery(mydb, 'SELECT * FROM iris WHERE "Sepal.Length" < 4.6')
##   Sepal.Length Sepal.Width Petal.Length Petal.Width Species
## 1          4.4         2.9          1.4         0.2  setosa
## 2          4.3         3.0          1.1         0.1  setosa
## 3          4.4         3.0          1.3         0.2  setosa
## 4          4.5         2.3          1.3         0.3  setosa
## 5          4.4         3.2          1.3         0.2  setosa
Batched queries

If you run a query and the results don’t fit in memory, you can use dbSendQuery(), dbFetch() and dbClearResults() to retrieve the results in batches. By default dbFetch() will retrieve all available rows: use n to set the maximum number of rows to return.

rs <- dbSendQuery(mydb, 'SELECT * FROM allconcatenated')
while (!dbHasCompleted(rs)) {
  df <- dbFetch(rs, n = 10^6)
  print(nrow(df))
}
## [1] 1000000
## [1] 1000000
## [1] 1000000
## [1] 1000000
## [1] 1000000
## [1] 1000000
## [1] 1000000
## [1] 1000000
## [1] 1000000
## [1] 1000000
## [1] 1000000
## [1] 1000000
## [1] 1000000
## [1] 1000000
## [1] 1000000
## [1] 1000000
## [1] 715601
dbClearResult(rs)

Use dplyr with database

iris_shorth<-dbGetQuery(mydb, 'SELECT * FROM iris LIMIT 75')

iris_1<-iris_shorth%>%group_by(SPECIES= Species)%>% 
           summarize(Total=n())%>%      
           arrange(desc(Total))%>%       
           filter(SPECIES!='')%>% head(5)

iris_1
## # A tibble: 2 x 2
##   SPECIES    Total
##   <chr>      <int>
## 1 setosa        50
## 2 versicolor    25
Multiple parameterised queries

You can use the same approach to run the same parameterised query with different parameters. Call dbBind() to set the parameters:

rs <- dbSendQuery(mydb, 'SELECT * FROM iris WHERE "Sepal.Length" < :x')
dbBind(rs, param = list(x = 4.5))
nrow(dbFetch(rs))
## [1] 4
#> [1] 4
dbBind(rs, param = list(x = 4))
nrow(dbFetch(rs))
## [1] 0
#> [1] 0
dbClearResult(rs)

You can also pass multiple parameters in one call to dbBind():

rs <- dbSendQuery(mydb, 'SELECT * FROM iris WHERE "Sepal.Length" = :x')
dbBind(rs, param = list(x = seq(4, 4.4, by = 0.1)))
nrow(dbFetch(rs))
## [1] 4
#> [1] 4
dbClearResult(rs)

Statements

DBI has new functions dbSendStatement() and dbExecute(), which are the counterparts of dbSendQuery() and dbGetQuery() for SQL statements that do not return a tabular result, such as inserting records into a table, updating a table, or setting engine parameters.

dbExecute(mydb, 'DELETE FROM iris WHERE "Sepal.Length" < 4')
## [1] 0
#> [1] 0
rs <- dbSendStatement(mydb, 'DELETE FROM iris WHERE "Sepal.Length" < :x')
dbBind(rs, param = list(x = 4.5))
dbGetRowsAffected(rs)
## [1] 4
#> [1] 4
dbClearResult(rs)
dbListFields(mydb, "allconcatenated")
##  [1] "id"                         "time_stamp"                
##  [3] "Time_Vector"                "time_vector_zerobased (ms)"
##  [5] "Veh_Speed"                  "VIN"                       
##  [7] "TimeStamp"                  "TimeStamp1"                
##  [9] "TimeStamp2"                 "TimeStamp3"
# Clear the result
dbClearResult(res)

# Disconnect from the database
dbDisconnect(con)