suppressPackageStartupMessages(library(dplyr))
pg_wrds <- src_postgres(host = "wrds-pgdata.wharton.upenn.edu",
port = 9737,
dbname = "wrds",
user = "iangow")
dsf <- tbl(pg_wrds, sql("SELECT * FROM crsp.dsf"))
Perfect, I only care about CUSIP, PERMNO, DATE, BIDLO, and ASKHI. Let’s see what the data looks like if I grab the first hundred rows filtering for only these variables:
dsf %>%
select(cusip, permno, date, bidlo, askhi) %>%
collect(n=100)
## # A tibble: 100 × 5
## cusip permno date bidlo askhi
## * <chr> <dbl> <date> <dbl> <dbl>
## 1 20836810 71765 1996-08-13 68.875 69.875
## 2 20836810 71765 1996-08-14 68.375 69.250
## 3 20836810 71765 1996-08-15 67.750 68.750
## 4 20836810 71765 1996-08-16 67.875 68.750
## 5 20836810 71765 1996-08-19 68.375 69.125
## 6 20836810 71765 1996-08-20 68.375 69.000
## 7 20836810 71765 1996-08-21 69.000 69.750
## 8 20836810 71765 1996-08-22 69.250 69.750
## 9 20836810 71765 1996-08-23 69.000 69.500
## 10 20836810 71765 1996-08-26 68.375 69.000
## # ... with 90 more rows
That’s not very helpful, and will probably return an awful lot of data. Let’s filter it by a few PERMNOs I’m interested in and limit to one day:
dsf %>%
select(cusip, permno, date, bidlo, askhi) %>%
filter(permno %in% c(14593, 90319, 12490, 17778)) %>%
filter(date == '2013-01-04')
## Source: lazy query [?? x 5]
## Database: postgres 9.5.5 [iangow@wrds-pgdata.wharton.upenn.edu:9737/wrds]
##
## cusip permno date bidlo askhi
## <chr> <dbl> <date> <dbl> <dbl>
## 1 45920010 12490 2013-01-04 192.7800 194.4600
## 2 03783310 14593 2013-01-04 525.8286 538.6299
## 3 08467010 17778 2013-01-04 140047.0000 141003.7969
## 4 02079K30 90319 2013-01-04 727.6801 741.4700
That’s more like it. I’m interested in high Asks; let’s get a list of dates where these PERMNOs posted an Ask Price over $2000 between 2010 and 2013:
dsf %>%
select(cusip, permno, date, bidlo, askhi) %>%
filter(permno %in% c(14593, 90319, 12490, 17778),
askhi > 2000,
between(date, '2010-01-01', '2013-12-31'))
## Source: lazy query [?? x 5]
## Database: postgres 9.5.5 [iangow@wrds-pgdata.wharton.upenn.edu:9737/wrds]
##
## cusip permno date bidlo askhi
## <chr> <dbl> <date> <dbl> <dbl>
## 1 08467010 17778 2010-01-04 99201 99910.00
## 2 08467010 17778 2010-01-05 99550 100001.00
## 3 08467010 17778 2010-01-06 99500 100000.00
## 4 08467010 17778 2010-01-07 99594 100000.00
## 5 08467010 17778 2010-01-08 99700 100300.00
## 6 08467010 17778 2010-01-11 99320 100750.01
## 7 08467010 17778 2010-01-12 99350 99948.99
## 8 08467010 17778 2010-01-13 99150 99948.99
## 9 08467010 17778 2010-01-14 98920 99480.00
## 10 08467010 17778 2010-01-15 97205 99362.50
## # ... with more rows
How about all PERMNOs that have ever posted an Ask Price over $2000 in any date range:
hi_ask_permnos <-
dsf %>%
filter(askhi > 2000) %>%
select(permno) %>%
distinct()
hi_ask_permnos %>%
count()
## Source: lazy query [?? x 1]
## Database: postgres 9.5.5 [iangow@wrds-pgdata.wharton.upenn.edu:9737/wrds]
##
## n
## <dbl>
## 1 31
hi_ask_permnos %>%
head(10)
## Source: lazy query [?? x 1]
## Database: postgres 9.5.5 [iangow@wrds-pgdata.wharton.upenn.edu:9737/wrds]
##
## permno
## <dbl>
## 1 10225
## 2 11498
## 3 12132
## 4 12343
## 5 12490
## 6 13370
## 7 14533
## 8 14752
## 9 14883
## 10 15675
## # ... with more rows
Lastly, let’s retrieve all the dates for which an Ask Price over $2000 was posted, along with the PERMNOs that posted them:
hi_ask_permno_dates <-
dsf %>%
filter(askhi > 2000) %>%
select(permno, date) %>%
distinct()
hi_ask_permno_dates %>%
count()
## Source: lazy query [?? x 1]
## Database: postgres 9.5.5 [iangow@wrds-pgdata.wharton.upenn.edu:9737/wrds]
##
## n
## <dbl>
## 1 12296
hi_ask_permno_dates %>%
head(10)
## Source: lazy query [?? x 2]
## Database: postgres 9.5.5 [iangow@wrds-pgdata.wharton.upenn.edu:9737/wrds]
##
## permno date
## <dbl> <date>
## 1 17778 1988-08-15
## 2 36281 2015-12-11
## 3 17778 2012-12-19
## 4 83443 2006-01-19
## 5 17778 2010-12-16
## 6 17778 1992-04-27
## 7 17778 1995-11-16
## 8 83443 2008-12-26
## 9 17778 2005-10-10
## 10 36281 2015-06-12
## # ... with more rows