suppressPackageStartupMessages(library(dplyr))

pg_wrds <- src_postgres(host = "wrds-pgdata.wharton.upenn.edu",
             port = 9737,
             dbname = "wrds",
             user = "iangow")

dsf <- tbl(pg_wrds, sql("SELECT * FROM crsp.dsf"))

Perfect, I only care about CUSIP, PERMNO, DATE, BIDLO, and ASKHI. Let’s see what the data looks like if I grab the first hundred rows filtering for only these variables:

dsf %>%
    select(cusip, permno, date, bidlo, askhi) %>%
    collect(n=100)
## # A tibble: 100 × 5
##       cusip permno       date  bidlo  askhi
## *     <chr>  <dbl>     <date>  <dbl>  <dbl>
## 1  20836810  71765 1996-08-13 68.875 69.875
## 2  20836810  71765 1996-08-14 68.375 69.250
## 3  20836810  71765 1996-08-15 67.750 68.750
## 4  20836810  71765 1996-08-16 67.875 68.750
## 5  20836810  71765 1996-08-19 68.375 69.125
## 6  20836810  71765 1996-08-20 68.375 69.000
## 7  20836810  71765 1996-08-21 69.000 69.750
## 8  20836810  71765 1996-08-22 69.250 69.750
## 9  20836810  71765 1996-08-23 69.000 69.500
## 10 20836810  71765 1996-08-26 68.375 69.000
## # ... with 90 more rows

That’s not very helpful, and will probably return an awful lot of data. Let’s filter it by a few PERMNOs I’m interested in and limit to one day:

dsf %>%
    select(cusip, permno, date, bidlo, askhi) %>%
    filter(permno %in% c(14593, 90319, 12490, 17778)) %>%
    filter(date == '2013-01-04') 
## Source:     lazy query [?? x 5]
## Database:   postgres 9.5.5 [iangow@wrds-pgdata.wharton.upenn.edu:9737/wrds]
## 
##      cusip permno       date       bidlo       askhi
##      <chr>  <dbl>     <date>       <dbl>       <dbl>
## 1 45920010  12490 2013-01-04    192.7800    194.4600
## 2 03783310  14593 2013-01-04    525.8286    538.6299
## 3 08467010  17778 2013-01-04 140047.0000 141003.7969
## 4 02079K30  90319 2013-01-04    727.6801    741.4700

That’s more like it. I’m interested in high Asks; let’s get a list of dates where these PERMNOs posted an Ask Price over $2000 between 2010 and 2013:

dsf %>%
    select(cusip, permno, date, bidlo, askhi) %>%
    filter(permno %in% c(14593, 90319, 12490, 17778),
           askhi >  2000,
           between(date, '2010-01-01', '2013-12-31'))
## Source:     lazy query [?? x 5]
## Database:   postgres 9.5.5 [iangow@wrds-pgdata.wharton.upenn.edu:9737/wrds]
## 
##       cusip permno       date bidlo     askhi
##       <chr>  <dbl>     <date> <dbl>     <dbl>
## 1  08467010  17778 2010-01-04 99201  99910.00
## 2  08467010  17778 2010-01-05 99550 100001.00
## 3  08467010  17778 2010-01-06 99500 100000.00
## 4  08467010  17778 2010-01-07 99594 100000.00
## 5  08467010  17778 2010-01-08 99700 100300.00
## 6  08467010  17778 2010-01-11 99320 100750.01
## 7  08467010  17778 2010-01-12 99350  99948.99
## 8  08467010  17778 2010-01-13 99150  99948.99
## 9  08467010  17778 2010-01-14 98920  99480.00
## 10 08467010  17778 2010-01-15 97205  99362.50
## # ... with more rows

How about all PERMNOs that have ever posted an Ask Price over $2000 in any date range:

hi_ask_permnos <-
    dsf %>%
    filter(askhi >  2000) %>%
    select(permno) %>%
    distinct() 

hi_ask_permnos %>%
    count()
## Source:     lazy query [?? x 1]
## Database:   postgres 9.5.5 [iangow@wrds-pgdata.wharton.upenn.edu:9737/wrds]
## 
##       n
##   <dbl>
## 1    31
hi_ask_permnos %>%
    head(10)
## Source:     lazy query [?? x 1]
## Database:   postgres 9.5.5 [iangow@wrds-pgdata.wharton.upenn.edu:9737/wrds]
## 
##    permno
##     <dbl>
## 1   10225
## 2   11498
## 3   12132
## 4   12343
## 5   12490
## 6   13370
## 7   14533
## 8   14752
## 9   14883
## 10  15675
## # ... with more rows

Lastly, let’s retrieve all the dates for which an Ask Price over $2000 was posted, along with the PERMNOs that posted them:

hi_ask_permno_dates <-
    dsf %>%
    filter(askhi >  2000) %>%
    select(permno, date) %>%
    distinct() 

hi_ask_permno_dates %>%
    count()
## Source:     lazy query [?? x 1]
## Database:   postgres 9.5.5 [iangow@wrds-pgdata.wharton.upenn.edu:9737/wrds]
## 
##       n
##   <dbl>
## 1 12296
hi_ask_permno_dates %>%
    head(10)
## Source:     lazy query [?? x 2]
## Database:   postgres 9.5.5 [iangow@wrds-pgdata.wharton.upenn.edu:9737/wrds]
## 
##    permno       date
##     <dbl>     <date>
## 1   17778 1988-08-15
## 2   36281 2015-12-11
## 3   17778 2012-12-19
## 4   83443 2006-01-19
## 5   17778 2010-12-16
## 6   17778 1992-04-27
## 7   17778 1995-11-16
## 8   83443 2008-12-26
## 9   17778 2005-10-10
## 10  36281 2015-06-12
## # ... with more rows