Install the twitter anomaly detection package.
#pacman::p_install_gh("twitter/AnomalyDetection")
#devtools::install_github("twitter/AnomalyDetection")
library(readr)
library(plotly)
library(lubridate)
library(rvest)
library(dplyr)
library(tidyr)
library(foreach)
library(doParallel)
library(ggfortify)
library(readxl)
library(tidyquant)
library(DT)
library(reticulate)
library(DBI)
pacman::p_load("rio","tidyverse","data.table")
library(ggpubr)
theme_set(theme_pubclean())
# Calculate the number of cores
no_cores <- detectCores() - 1
cl<-makeCluster(no_cores)
registerDoParallel(cl)
dplyr supports a couple of databases such as sqlite, mysql and postgresql.
library(DBI)
# Create an ephemeral in-memory RSQLite database
con <- dbConnect(RSQLite::SQLite(), ":memory:")
dbListTables(con)
## character(0)
dbWriteTable(con, "mtcars", mtcars)
dbListTables(con)
## [1] "mtcars"
dbListFields(con, "mtcars")
## [1] "mpg" "cyl" "disp" "hp" "drat" "wt" "qsec" "vs" "am" "gear"
## [11] "carb"
dbReadTable(con, "mtcars")
## mpg cyl disp hp drat wt qsec vs am gear carb
## 1 21.0 6 160.0 110 3.90 2.620 16.46 0 1 4 4
## 2 21.0 6 160.0 110 3.90 2.875 17.02 0 1 4 4
## 3 22.8 4 108.0 93 3.85 2.320 18.61 1 1 4 1
## 4 21.4 6 258.0 110 3.08 3.215 19.44 1 0 3 1
## 5 18.7 8 360.0 175 3.15 3.440 17.02 0 0 3 2
## 6 18.1 6 225.0 105 2.76 3.460 20.22 1 0 3 1
## 7 14.3 8 360.0 245 3.21 3.570 15.84 0 0 3 4
## 8 24.4 4 146.7 62 3.69 3.190 20.00 1 0 4 2
## 9 22.8 4 140.8 95 3.92 3.150 22.90 1 0 4 2
## 10 19.2 6 167.6 123 3.92 3.440 18.30 1 0 4 4
## 11 17.8 6 167.6 123 3.92 3.440 18.90 1 0 4 4
## 12 16.4 8 275.8 180 3.07 4.070 17.40 0 0 3 3
## 13 17.3 8 275.8 180 3.07 3.730 17.60 0 0 3 3
## 14 15.2 8 275.8 180 3.07 3.780 18.00 0 0 3 3
## 15 10.4 8 472.0 205 2.93 5.250 17.98 0 0 3 4
## 16 10.4 8 460.0 215 3.00 5.424 17.82 0 0 3 4
## 17 14.7 8 440.0 230 3.23 5.345 17.42 0 0 3 4
## 18 32.4 4 78.7 66 4.08 2.200 19.47 1 1 4 1
## 19 30.4 4 75.7 52 4.93 1.615 18.52 1 1 4 2
## 20 33.9 4 71.1 65 4.22 1.835 19.90 1 1 4 1
## 21 21.5 4 120.1 97 3.70 2.465 20.01 1 0 3 1
## 22 15.5 8 318.0 150 2.76 3.520 16.87 0 0 3 2
## 23 15.2 8 304.0 150 3.15 3.435 17.30 0 0 3 2
## 24 13.3 8 350.0 245 3.73 3.840 15.41 0 0 3 4
## 25 19.2 8 400.0 175 3.08 3.845 17.05 0 0 3 2
## 26 27.3 4 79.0 66 4.08 1.935 18.90 1 1 4 1
## 27 26.0 4 120.3 91 4.43 2.140 16.70 0 1 5 2
## 28 30.4 4 95.1 113 3.77 1.513 16.90 1 1 5 2
## 29 15.8 8 351.0 264 4.22 3.170 14.50 0 1 5 4
## 30 19.7 6 145.0 175 3.62 2.770 15.50 0 1 5 6
## 31 15.0 8 301.0 335 3.54 3.570 14.60 0 1 5 8
## 32 21.4 4 121.0 109 4.11 2.780 18.60 1 1 4 2
# You can fetch all results:
res <- dbSendQuery(con, "SELECT * FROM mtcars WHERE cyl = 4 LIMIT 5")
dbFetch(res)
## mpg cyl disp hp drat wt qsec vs am gear carb
## 1 22.8 4 108.0 93 3.85 2.320 18.61 1 1 4 1
## 2 24.4 4 146.7 62 3.69 3.190 20.00 1 0 4 2
## 3 22.8 4 140.8 95 3.92 3.150 22.90 1 0 4 2
## 4 32.4 4 78.7 66 4.08 2.200 19.47 1 1 4 1
## 5 30.4 4 75.7 52 4.93 1.615 18.52 1 1 4 2
dbClearResult(res)
# Or a chunk at a time
res <- dbSendQuery(con, "SELECT * FROM mtcars WHERE cyl = 4")
while(!dbHasCompleted(res)){
chunk <- dbFetch(res, n = 5)
print(nrow(chunk))
}
## [1] 5
## [1] 5
## [1] 1
# Clear the result
dbClearResult(res)
# Disconnect from the database
dbDisconnect(con)
To create SQLite database in R, we specify the path. We use src_sqlite to connect to an existing sqlite database, and tbl to connect to tables within that database. We can also use src_sqlite to create new SQlite database at the specified path. If we do not specify a path, it will be created in our working directory.
# create =TRUE creates a new database
allconcatenated<-fread("/Users/nanaakwasiabayieboateng/Desktop/df2.csv")
##
Read 0.0% of 16715601 rows
Read 1.1% of 16715601 rows
Read 1.7% of 16715601 rows
Read 2.5% of 16715601 rows
Read 3.4% of 16715601 rows
Read 4.5% of 16715601 rows
Read 5.9% of 16715601 rows
Read 6.7% of 16715601 rows
Read 7.6% of 16715601 rows
Read 9.7% of 16715601 rows
Read 9.8% of 16715601 rows
Read 11.8% of 16715601 rows
Read 12.4% of 16715601 rows
Read 13.8% of 16715601 rows
Read 15.8% of 16715601 rows
Read 18.0% of 16715601 rows
Read 19.9% of 16715601 rows
Read 20.2% of 16715601 rows
Read 22.0% of 16715601 rows
Read 23.9% of 16715601 rows
Read 25.7% of 16715601 rows
Read 25.9% of 16715601 rows
Read 27.8% of 16715601 rows
Read 29.6% of 16715601 rows
Read 31.3% of 16715601 rows
Read 31.5% of 16715601 rows
Read 33.0% of 16715601 rows
Read 34.9% of 16715601 rows
Read 37.0% of 16715601 rows
Read 39.1% of 16715601 rows
Read 41.0% of 16715601 rows
Read 41.9% of 16715601 rows
Read 43.9% of 16715601 rows
Read 45.5% of 16715601 rows
Read 47.3% of 16715601 rows
Read 49.1% of 16715601 rows
Read 50.9% of 16715601 rows
Read 52.7% of 16715601 rows
Read 53.1% of 16715601 rows
Read 54.7% of 16715601 rows
Read 56.4% of 16715601 rows
Read 58.0% of 16715601 rows
Read 59.7% of 16715601 rows
Read 61.3% of 16715601 rows
Read 62.9% of 16715601 rows
Read 64.5% of 16715601 rows
Read 66.1% of 16715601 rows
Read 67.2% of 16715601 rows
Read 69.0% of 16715601 rows
Read 70.8% of 16715601 rows
Read 72.0% of 16715601 rows
Read 72.9% of 16715601 rows
Read 73.8% of 16715601 rows
Read 75.9% of 16715601 rows
Read 77.9% of 16715601 rows
Read 80.0% of 16715601 rows
Read 82.0% of 16715601 rows
Read 83.3% of 16715601 rows
Read 84.7% of 16715601 rows
Read 86.5% of 16715601 rows
Read 88.4% of 16715601 rows
Read 90.2% of 16715601 rows
Read 91.8% of 16715601 rows
Read 93.5% of 16715601 rows
Read 95.1% of 16715601 rows
Read 96.8% of 16715601 rows
Read 98.4% of 16715601 rows
Read 16715601 rows and 10 (of 10) columns from 2.930 GB file in 00:03:28
allconcatenated$VIN=as.factor(allconcatenated$VIN)
my_database<- src_sqlite(path="ActiveEngineMount", create = TRUE)
To upload data to the database, we use the dplyr function copy_to. According to the documentation, wherever possible, the new object will be temporary, limited to the current connection to the source. So, we have to change temporary to false to make it permanent.
# uploading data
copy_to(my_database,allconcatenated,temporary = FALSE)
The datasets “allconcatenated” is now in the database. It can be queried now.
# create is false now because I am connecting to an existing databaseCopy
#my_db <- src_sqlite("allconcatenated", create = FALSE)
List the tables in the database
src_tbls(my_database)
## [1] "allconcatenated" "sqlite_stat1" "sqlite_stat4"
We use the same dplyr verbs that we use in data manipulation to work with databases. dplyr translates the R code we write to SQL code. We use tbl to connect to tables within the database.
allconcatenated = tbl(my_database,"allconcatenated" )
class(allconcatenated)
## [1] "tbl_dbi" "tbl_sql" "tbl_lazy" "tbl"
head(allconcatenated,3)
## # Source: lazy query [?? x 10]
## # Database: sqlite 3.22.0 [ActiveEngineMount]
## id time_stamp Time_Vector `time_vector_ze… Veh_Speed VIN TimeStamp
## <chr> <chr> <int> <int> <dbl> <chr> <chr>
## 1 1C4RD… 2017-05-1… 12895 11891 0. 1C4R… 32:12.302
## 2 1C4RD… 2017-05-1… 13894 12890 0. 1C4R… 32:13.301
## 3 1C4RD… 2017-05-1… 14896 13892 0. 1C4R… 32:14.303
## # ... with 3 more variables: TimeStamp1 <chr>, TimeStamp2 <chr>,
## # TimeStamp3 <chr>
Get all the first VIN’s
FirstVIN = filter(allconcatenated, VIN=='1C4RDJAG1HC623554') # Filtering demography of patients from the US
We can see the query dplyr has generated:
FirstVIN%>%
show_query()
We can also see how the database plans to execute the query:
explain(FirstVIN)
Find the top five VIN’s with the highest number of counts in the data
allconcatenated%>%group_by(VINTYPE= VIN)%>%
summarize(Total=n())%>%
arrange(desc(Total))%>%
filter(VINTYPE!='')%>% head(5)
## # Source: lazy query [?? x 2]
## # Database: sqlite 3.22.0 [ActiveEngineMount]
## # Ordered by: desc(Total)
## VINTYPE Total
## <chr> <int>
## 1 1C4RJFBGXGC382063 1699543
## 2 1C4RJFAG7GC324302 1660480
## 3 1C4RJFAG2GC366957 1298967
## 4 1C4RJFAG1GC378520 1280370
## 5 1C4RJFBM0EC481168 1148244
We can also include ggplot in the chain:
library(forcats)
allconcatenated%>%group_by(VINTYPE= VIN)%>% #grouped by VIN
summarize(Total=n())%>% # found the count for each VIN
arrange(desc(Total))%>% # sorted them in descending order
filter(VINTYPE!='')%>% # removed reports that does not have VIN information
head(10)%>% # took the top ten
#mutate(VINTYPE = fct_reorder(VINTYPE, Total)) %>%
#mutate(VINTYPE = factor(VINTYPE,levels = VINTYPE[order(Total,decreasing =F)]))%>%
ggplot(aes(x=VINTYPE,y=Total))+geom_bar(stat='identity',color='skyblue',fill='#b35900')+
xlab("")+ggtitle('Top ten VINTYPE with highest number counts')+
coord_flip()+ylab('Total number of VINs')
Fig. 30
RSQLite package contains SQLite; no external software is needed.
RSQLite is a DBI-compatible interface which means you primarily use functions defined in the DBI package, so you should always start by loading DBI, not RSQLite:
To create a new SQLite database, you simply supply the filename to dbConnect():
library(DBI)
mydb <- dbConnect(RSQLite::SQLite(), "my-db.sqlite")
dbDisconnect(mydb)
unlink("my-db.sqlite")
You can easily copy an R data frame into a SQLite database with dbWriteTable():
allconcatenated<-fread("/Users/nanaakwasiabayieboateng/Desktop/df2.csv")
##
Read 0.0% of 16715601 rows
Read 0.2% of 16715601 rows
Read 0.7% of 16715601 rows
Read 1.4% of 16715601 rows
Read 2.3% of 16715601 rows
Read 3.5% of 16715601 rows
Read 5.0% of 16715601 rows
Read 6.6% of 16715601 rows
Read 8.1% of 16715601 rows
Read 9.5% of 16715601 rows
Read 10.8% of 16715601 rows
Read 12.0% of 16715601 rows
Read 12.8% of 16715601 rows
Read 13.5% of 16715601 rows
Read 14.4% of 16715601 rows
Read 15.4% of 16715601 rows
Read 16.4% of 16715601 rows
Read 17.5% of 16715601 rows
Read 18.5% of 16715601 rows
Read 19.6% of 16715601 rows
Read 20.8% of 16715601 rows
Read 22.0% of 16715601 rows
Read 23.2% of 16715601 rows
Read 24.1% of 16715601 rows
Read 24.9% of 16715601 rows
Read 25.8% of 16715601 rows
Read 26.7% of 16715601 rows
Read 27.8% of 16715601 rows
Read 29.0% of 16715601 rows
Read 30.1% of 16715601 rows
Read 31.2% of 16715601 rows
Read 31.8% of 16715601 rows
Read 32.0% of 16715601 rows
Read 32.4% of 16715601 rows
Read 32.6% of 16715601 rows
Read 32.9% of 16715601 rows
Read 33.2% of 16715601 rows
Read 33.6% of 16715601 rows
Read 33.9% of 16715601 rows
Read 34.2% of 16715601 rows
Read 34.5% of 16715601 rows
Read 34.8% of 16715601 rows
Read 35.1% of 16715601 rows
Read 35.4% of 16715601 rows
Read 35.7% of 16715601 rows
Read 36.0% of 16715601 rows
Read 36.3% of 16715601 rows
Read 36.6% of 16715601 rows
Read 36.9% of 16715601 rows
Read 37.2% of 16715601 rows
Read 37.6% of 16715601 rows
Read 38.0% of 16715601 rows
Read 38.3% of 16715601 rows
Read 38.6% of 16715601 rows
Read 38.9% of 16715601 rows
Read 39.2% of 16715601 rows
Read 39.5% of 16715601 rows
Read 39.8% of 16715601 rows
Read 40.1% of 16715601 rows
Read 40.4% of 16715601 rows
Read 40.8% of 16715601 rows
Read 41.2% of 16715601 rows
Read 41.5% of 16715601 rows
Read 41.9% of 16715601 rows
Read 42.2% of 16715601 rows
Read 42.7% of 16715601 rows
Read 43.1% of 16715601 rows
Read 43.4% of 16715601 rows
Read 43.9% of 16715601 rows
Read 44.3% of 16715601 rows
Read 44.7% of 16715601 rows
Read 45.0% of 16715601 rows
Read 45.3% of 16715601 rows
Read 45.7% of 16715601 rows
Read 46.2% of 16715601 rows
Read 46.6% of 16715601 rows
Read 47.0% of 16715601 rows
Read 47.4% of 16715601 rows
Read 47.9% of 16715601 rows
Read 48.3% of 16715601 rows
Read 48.9% of 16715601 rows
Read 49.4% of 16715601 rows
Read 49.8% of 16715601 rows
Read 50.2% of 16715601 rows
Read 50.6% of 16715601 rows
Read 51.0% of 16715601 rows
Read 51.4% of 16715601 rows
Read 51.8% of 16715601 rows
Read 52.2% of 16715601 rows
Read 52.5% of 16715601 rows
Read 52.8% of 16715601 rows
Read 53.1% of 16715601 rows
Read 53.5% of 16715601 rows
Read 53.8% of 16715601 rows
Read 54.2% of 16715601 rows
Read 54.6% of 16715601 rows
Read 54.9% of 16715601 rows
Read 55.3% of 16715601 rows
Read 55.6% of 16715601 rows
Read 56.1% of 16715601 rows
Read 56.4% of 16715601 rows
Read 56.7% of 16715601 rows
Read 57.0% of 16715601 rows
Read 57.3% of 16715601 rows
Read 57.6% of 16715601 rows
Read 57.8% of 16715601 rows
Read 58.0% of 16715601 rows
Read 58.3% of 16715601 rows
Read 58.6% of 16715601 rows
Read 58.9% of 16715601 rows
Read 59.1% of 16715601 rows
Read 59.3% of 16715601 rows
Read 59.6% of 16715601 rows
Read 59.8% of 16715601 rows
Read 60.1% of 16715601 rows
Read 60.3% of 16715601 rows
Read 60.5% of 16715601 rows
Read 60.8% of 16715601 rows
Read 61.0% of 16715601 rows
Read 61.3% of 16715601 rows
Read 61.5% of 16715601 rows
Read 61.7% of 16715601 rows
Read 62.0% of 16715601 rows
Read 62.2% of 16715601 rows
Read 62.5% of 16715601 rows
Read 62.6% of 16715601 rows
Read 62.9% of 16715601 rows
Read 63.1% of 16715601 rows
Read 63.3% of 16715601 rows
Read 63.6% of 16715601 rows
Read 63.9% of 16715601 rows
Read 64.1% of 16715601 rows
Read 64.3% of 16715601 rows
Read 64.6% of 16715601 rows
Read 64.8% of 16715601 rows
Read 65.0% of 16715601 rows
Read 65.3% of 16715601 rows
Read 65.5% of 16715601 rows
Read 65.7% of 16715601 rows
Read 65.9% of 16715601 rows
Read 66.1% of 16715601 rows
Read 66.3% of 16715601 rows
Read 66.5% of 16715601 rows
Read 66.7% of 16715601 rows
Read 66.9% of 16715601 rows
Read 67.2% of 16715601 rows
Read 67.4% of 16715601 rows
Read 67.7% of 16715601 rows
Read 67.8% of 16715601 rows
Read 68.1% of 16715601 rows
Read 68.3% of 16715601 rows
Read 68.5% of 16715601 rows
Read 68.7% of 16715601 rows
Read 68.9% of 16715601 rows
Read 69.2% of 16715601 rows
Read 69.4% of 16715601 rows
Read 69.6% of 16715601 rows
Read 69.9% of 16715601 rows
Read 70.1% of 16715601 rows
Read 70.4% of 16715601 rows
Read 70.5% of 16715601 rows
Read 70.8% of 16715601 rows
Read 71.0% of 16715601 rows
Read 71.3% of 16715601 rows
Read 71.5% of 16715601 rows
Read 71.7% of 16715601 rows
Read 72.0% of 16715601 rows
Read 72.2% of 16715601 rows
Read 72.4% of 16715601 rows
Read 72.7% of 16715601 rows
Read 72.9% of 16715601 rows
Read 73.2% of 16715601 rows
Read 73.5% of 16715601 rows
Read 73.8% of 16715601 rows
Read 74.3% of 16715601 rows
Read 74.9% of 16715601 rows
Read 75.5% of 16715601 rows
Read 76.1% of 16715601 rows
Read 76.9% of 16715601 rows
Read 77.8% of 16715601 rows
Read 78.8% of 16715601 rows
Read 79.8% of 16715601 rows
Read 80.7% of 16715601 rows
Read 81.5% of 16715601 rows
Read 82.3% of 16715601 rows
Read 82.8% of 16715601 rows
Read 83.2% of 16715601 rows
Read 83.5% of 16715601 rows
Read 83.9% of 16715601 rows
Read 84.5% of 16715601 rows
Read 85.2% of 16715601 rows
Read 85.8% of 16715601 rows
Read 86.5% of 16715601 rows
Read 87.0% of 16715601 rows
Read 87.6% of 16715601 rows
Read 88.1% of 16715601 rows
Read 88.5% of 16715601 rows
Read 88.9% of 16715601 rows
Read 89.4% of 16715601 rows
Read 89.9% of 16715601 rows
Read 90.2% of 16715601 rows
Read 90.5% of 16715601 rows
Read 90.9% of 16715601 rows
Read 91.2% of 16715601 rows
Read 91.6% of 16715601 rows
Read 92.0% of 16715601 rows
Read 92.4% of 16715601 rows
Read 92.8% of 16715601 rows
Read 93.3% of 16715601 rows
Read 93.8% of 16715601 rows
Read 94.2% of 16715601 rows
Read 94.6% of 16715601 rows
Read 95.0% of 16715601 rows
Read 95.4% of 16715601 rows
Read 95.8% of 16715601 rows
Read 96.2% of 16715601 rows
Read 96.7% of 16715601 rows
Read 97.2% of 16715601 rows
Read 97.6% of 16715601 rows
Read 98.0% of 16715601 rows
Read 98.4% of 16715601 rows
Read 98.9% of 16715601 rows
Read 99.4% of 16715601 rows
Read 99.8% of 16715601 rows
Read 16715601 rows and 10 (of 10) columns from 2.930 GB file in 00:04:19
mydb <- dbConnect(RSQLite::SQLite(), "")
dbWriteTable(mydb, "allconcatenated", allconcatenated)
dbWriteTable(mydb, "iris", iris)
dbListTables(mydb)
## [1] "allconcatenated" "iris"
Issue a query with dbGetQuery():
dbGetQuery(mydb, 'SELECT * FROM allconcatenated LIMIT 5')
## id
## 1 1C4RDJAG1HC623554_Trip-Detail_2017-05-17 16-31-59.csv
## 2 1C4RDJAG1HC623554_Trip-Detail_2017-05-17 16-31-59.csv
## 3 1C4RDJAG1HC623554_Trip-Detail_2017-05-17 16-31-59.csv
## 4 1C4RDJAG1HC623554_Trip-Detail_2017-05-17 16-31-59.csv
## 5 1C4RDJAG1HC623554_Trip-Detail_2017-05-17 16-31-59.csv
## time_stamp Time_Vector time_vector_zerobased (ms) Veh_Speed
## 1 2017-05-17 16:32:12.302 12895 11891 0
## 2 2017-05-17 16:32:13.301 13894 12890 0
## 3 2017-05-17 16:32:14.303 14896 13892 0
## 4 2017-05-17 16:32:15.305 15898 14894 0
## 5 2017-05-17 16:32:16.301 16894 15890 0
## VIN TimeStamp TimeStamp1 TimeStamp2
## 1 1C4RDJAG1HC623554 32:12.302 32:12.302 2017-05-17T20:32:12.302Z
## 2 1C4RDJAG1HC623554 32:13.301 32:13.301 2017-05-17T20:32:13.301Z
## 3 1C4RDJAG1HC623554 32:14.303 32:14.303 2017-05-17T20:32:14.303Z
## 4 1C4RDJAG1HC623554 32:15.305 32:15.305 2017-05-17T20:32:15.305Z
## 5 1C4RDJAG1HC623554 32:16.301 32:16.301 2017-05-17T20:32:16.301Z
## TimeStamp3
## 1 2018-04-04T04:32:12.302Z
## 2 2018-04-04T04:32:13.301Z
## 3 2018-04-04T04:32:14.303Z
## 4 2018-04-04T04:32:15.305Z
## 5 2018-04-04T04:32:16.301Z
Not all R variable names are valid SQL variable names, so you may need to escape them with “:
dbGetQuery(mydb, 'SELECT * FROM iris WHERE "Sepal.Length" < 4.6')
## Sepal.Length Sepal.Width Petal.Length Petal.Width Species
## 1 4.4 2.9 1.4 0.2 setosa
## 2 4.3 3.0 1.1 0.1 setosa
## 3 4.4 3.0 1.3 0.2 setosa
## 4 4.5 2.3 1.3 0.3 setosa
## 5 4.4 3.2 1.3 0.2 setosa
If you run a query and the results don’t fit in memory, you can use dbSendQuery(), dbFetch() and dbClearResults() to retrieve the results in batches. By default dbFetch() will retrieve all available rows: use n to set the maximum number of rows to return.
rs <- dbSendQuery(mydb, 'SELECT * FROM allconcatenated')
while (!dbHasCompleted(rs)) {
df <- dbFetch(rs, n = 10^6)
print(nrow(df))
}
## [1] 1000000
## [1] 1000000
## [1] 1000000
## [1] 1000000
## [1] 1000000
## [1] 1000000
## [1] 1000000
## [1] 1000000
## [1] 1000000
## [1] 1000000
## [1] 1000000
## [1] 1000000
## [1] 1000000
## [1] 1000000
## [1] 1000000
## [1] 1000000
## [1] 715601
dbClearResult(rs)
Use dplyr with database
iris_shorth<-dbGetQuery(mydb, 'SELECT * FROM iris LIMIT 75')
iris_1<-iris_shorth%>%group_by(SPECIES= Species)%>%
summarize(Total=n())%>%
arrange(desc(Total))%>%
filter(SPECIES!='')%>% head(5)
iris_1
## # A tibble: 2 x 2
## SPECIES Total
## <chr> <int>
## 1 setosa 50
## 2 versicolor 25
You can use the same approach to run the same parameterised query with different parameters. Call dbBind() to set the parameters:
rs <- dbSendQuery(mydb, 'SELECT * FROM iris WHERE "Sepal.Length" < :x')
dbBind(rs, param = list(x = 4.5))
nrow(dbFetch(rs))
## [1] 4
#> [1] 4
dbBind(rs, param = list(x = 4))
nrow(dbFetch(rs))
## [1] 0
#> [1] 0
dbClearResult(rs)
You can also pass multiple parameters in one call to dbBind():
rs <- dbSendQuery(mydb, 'SELECT * FROM iris WHERE "Sepal.Length" = :x')
dbBind(rs, param = list(x = seq(4, 4.4, by = 0.1)))
nrow(dbFetch(rs))
## [1] 4
#> [1] 4
dbClearResult(rs)
Statements
DBI has new functions dbSendStatement() and dbExecute(), which are the counterparts of dbSendQuery() and dbGetQuery() for SQL statements that do not return a tabular result, such as inserting records into a table, updating a table, or setting engine parameters.
dbExecute(mydb, 'DELETE FROM iris WHERE "Sepal.Length" < 4')
## [1] 0
#> [1] 0
rs <- dbSendStatement(mydb, 'DELETE FROM iris WHERE "Sepal.Length" < :x')
dbBind(rs, param = list(x = 4.5))
dbGetRowsAffected(rs)
## [1] 4
#> [1] 4
dbClearResult(rs)
dbListFields(mydb, "allconcatenated")
## [1] "id" "time_stamp"
## [3] "Time_Vector" "time_vector_zerobased (ms)"
## [5] "Veh_Speed" "VIN"
## [7] "TimeStamp" "TimeStamp1"
## [9] "TimeStamp2" "TimeStamp3"
# Clear the result
dbClearResult(res)
# Disconnect from the database
dbDisconnect(con)