Please note that when trying to Knit a report to PDF I encountered an error due to memory constraints. This file contains the full code, while the PDF Report contains only the plots and summary statistics for illustrative purposes.
Load Commands
I loaded each file individually into R from the web to create a combined file of SEC data.
temp <- tempfile()
download.file("https://www.sec.gov/files/opa/data/market-structure/metrics-individual-security/individual_security_2012_q10.zip",temp, mode="wb")
trying URL 'https://www.sec.gov/files/opa/data/market-structure/metrics-individual-security/individual_security_2012_q10.zip'
Content type 'application/octet-stream' length 12002626 bytes (11.4 MB)
downloaded 11.4 MB
unzip(temp, "q1_2012_all.csv")
q1_2012 <- read.csv("q1_2012_all.csv", sep=",", skipNul=T, header=T)
temp <- tempfile()
download.file("https://www.sec.gov/files/opa/data/market-structure/metrics-individual-security/individual_security_2012_q2.zip",temp, mode="wb")
trying URL 'https://www.sec.gov/files/opa/data/market-structure/metrics-individual-security/individual_security_2012_q2.zip'
Content type 'application/octet-stream' length 12257834 bytes (11.7 MB)
downloaded 11.7 MB
unzip(temp, "q2_2012_all.csv")
q2_2012 <- read.csv("q2_2012_all.csv", sep=",", skipNul=T, header=T)
temp <- tempfile()
download.file("https://www.sec.gov/files/opa/data/market-structure/metrics-individual-security/individual_security_2012_q3.zip",temp, mode="wb")
trying URL 'https://www.sec.gov/files/opa/data/market-structure/metrics-individual-security/individual_security_2012_q3.zip'
Content type 'application/octet-stream' length 11813209 bytes (11.3 MB)
downloaded 11.3 MB
unzip(temp, "q3_2012_all.csv")
q3_2012 <- read.csv("q3_2012_all.csv", sep=",", skipNul=T, header=T)
temp <- tempfile()
download.file("https://www.sec.gov/files/opa/data/market-structure/metrics-individual-security/individual_security_2012_q4.zip",temp, mode="wb")
trying URL 'https://www.sec.gov/files/opa/data/market-structure/metrics-individual-security/individual_security_2012_q4.zip'
Content type 'application/octet-stream' length 11759021 bytes (11.2 MB)
downloaded 10.3 MB
downloaded length 10761477 != reported length 11759021
unzip(temp, "q4_2012_all.csv")
error 1 in extracting from zip file
q4_2012 <- read.csv("q4_2012_all.csv", sep=",", skipNul=T, header=T)
temp <- tempfile()
download.file("https://www.sec.gov/files/opa/data/market-structure/metrics-individual-security/individual_security_2013_q1.zip",temp, mode="wb")
trying URL 'https://www.sec.gov/files/opa/data/market-structure/metrics-individual-security/individual_security_2013_q1.zip'
Content type 'application/octet-stream' length 11426842 bytes (10.9 MB)
downloaded 10.9 MB
unzip(temp, "q1_2013_all.csv")
q1_2013 <- read.csv("q1_2013_all.csv", sep=",", skipNul=T, header=T)
temp <- tempfile()
download.file("https://www.sec.gov/files/opa/data/market-structure/metrics-individual-security/individual_security_2013_q2.zip",temp, mode="wb")
trying URL 'https://www.sec.gov/files/opa/data/market-structure/metrics-individual-security/individual_security_2013_q2.zip'
Content type 'application/octet-stream' length 12294800 bytes (11.7 MB)
downloaded 11.7 MB
unzip(temp, "q2_2013_all.csv")
q2_2013 <- read.csv("q2_2013_all.csv", sep=",", skipNul=T, header=T)
temp <- tempfile()
download.file("https://www.sec.gov/files/opa/data/market-structure/metrics-individual-security/individual_security_2013_q3.zip",temp, mode="wb")
trying URL 'https://www.sec.gov/files/opa/data/market-structure/metrics-individual-security/individual_security_2013_q3.zip'
Content type 'application/octet-stream' length 12331275 bytes (11.8 MB)
downloaded 11.8 MB
unzip(temp, "q3_2013_all.csv")
q3_2013 <- read.csv("q3_2013_all.csv", sep=",", skipNul=T, header=T)
temp <- tempfile()
download.file("https://www.sec.gov/files/opa/data/market-structure/metrics-individual-security/individual_security_2013_q4.zip",temp, mode="wb")
trying URL 'https://www.sec.gov/files/opa/data/market-structure/metrics-individual-security/individual_security_2013_q4.zip'
Content type 'application/octet-stream' length 12489295 bytes (11.9 MB)
downloaded 11.9 MB
unzip(temp, "q4_2013_all.csv")
q4_2013 <- read.csv("q4_2013_all.csv", sep=",", skipNul=T, header=T)
temp <- tempfile()
download.file("https://www.sec.gov/files/opa/data/market-structure/metrics-individual-security/individual_security_2014_q1.zip",temp, mode="wb")
trying URL 'https://www.sec.gov/files/opa/data/market-structure/metrics-individual-security/individual_security_2014_q1.zip'
Content type 'application/octet-stream' length 12621169 bytes (12.0 MB)
downloaded 12.0 MB
unzip(temp, "q1_2014_all.csv")
q1_2014 <- read.csv("q1_2014_all.csv", sep=",", skipNul=T, header=T)
setwd("~/Georgia Tech/Managing FIs/Assign1")
The working directory was changed to C:/Users/joev2/OneDrive/Documents/Georgia Tech/Managing FIs/Assign1 inside a notebook chunk. The working directory will be reset when the chunk is finished running. Use the knitr root.dir option in the setup chunk to change the working directory for notebook chunks.
q2_2014 <- read.csv("~/Georgia Tech/Managing FIs/Assign1/q2_2014_all.csv")
temp <- tempfile()
download.file("https://www.sec.gov/files/opa/data/market-structure/metrics-individual-security/individual_security_2014_q3.zip",temp, mode="wb")
trying URL 'https://www.sec.gov/files/opa/data/market-structure/metrics-individual-security/individual_security_2014_q3.zip'
Content type 'application/octet-stream' length 13239788 bytes (12.6 MB)
downloaded 12.6 MB
unzip(temp, "q3_2014_all.csv")
q3_2014 <- read.csv("q3_2014_all.csv", sep=",", skipNul=T, header=T)
temp <- tempfile()
download.file("https://www.sec.gov/files/opa/data/market-structure/metrics-individual-security/individual_security_2014_q4.zip",temp, mode="wb")
trying URL 'https://www.sec.gov/files/opa/data/market-structure/metrics-individual-security/individual_security_2014_q4.zip'
Content type 'application/octet-stream' length 13436977 bytes (12.8 MB)
downloaded 12.8 MB
unzip(temp, "q4_2014_all.csv")
q4_2014 <- read.csv("q4_2014_all.csv", sep=",", skipNul=T, header=T)
temp <- tempfile()
download.file("https://www.sec.gov/files/opa/data/market-structure/metrics-individual-security/individual_security_2015_q1.zip",temp, mode="wb")
trying URL 'https://www.sec.gov/files/opa/data/market-structure/metrics-individual-security/individual_security_2015_q1.zip'
Content type 'application/octet-stream' length 12806276 bytes (12.2 MB)
downloaded 12.2 MB
unzip(temp, "q1_2015_all.csv")
q1_2015 <- read.csv("q1_2015_all.csv", sep=",", skipNul=T, header=T)
temp <- tempfile()
download.file("https://www.sec.gov/files/opa/data/market-structure/metrics-individual-security/individual_security_2015_q2.zip",temp, mode="wb")
trying URL 'https://www.sec.gov/files/opa/data/market-structure/metrics-individual-security/individual_security_2015_q2.zip'
Content type 'application/octet-stream' length 13183979 bytes (12.6 MB)
downloaded 12.6 MB
unzip(temp, "q2_2015_all.csv")
q2_2015 <- read.csv("q2_2015_all.csv", sep=",", skipNul=T, header=T)
temp <- tempfile()
download.file("https://www.sec.gov/files/opa/data/market-structure/metrics-individual-security/individual_security_2015_q3.zip",temp, mode="wb")
trying URL 'https://www.sec.gov/files/opa/data/market-structure/metrics-individual-security/individual_security_2015_q3.zip'
Content type 'application/octet-stream' length 13707984 bytes (13.1 MB)
downloaded 13.1 MB
unzip(temp, "q3_2015_all.csv")
q3_2015 <- read.csv("q3_2015_all.csv", sep=",", skipNul=T, header=T)
temp <- tempfile()
download.file("https://www.sec.gov/files/opa/data/market-structure/metrics-individual-security/individual_security_2015_q4.zip",temp, mode="wb")
trying URL 'https://www.sec.gov/files/opa/data/market-structure/metrics-individual-security/individual_security_2015_q4.zip'
Content type 'application/octet-stream' length 13672831 bytes (13.0 MB)
downloaded 13.0 MB
unzip(temp, "q4_2015_all.csv")
q4_2015 <- read.csv("q4_2015_all.csv", sep=",", skipNul=T, header=T)
temp <- tempfile()
download.file("https://www.sec.gov/files/opa/data/market-structure/metrics-individual-security/individual_security_2016_q1.zip",temp, mode="wb")
trying URL 'https://www.sec.gov/files/opa/data/market-structure/metrics-individual-security/individual_security_2016_q1.zip'
Content type 'application/octet-stream' length 15177551 bytes (14.5 MB)
downloaded 14.5 MB
unzip(temp, "q1_2016_all.csv")
q1_2016 <- read.csv("q1_2016_all.csv", sep=",", skipNul=T, header=T)
temp <- tempfile()
download.file("https://www.sec.gov/files/opa/data/market-structure/metrics-individual-security/individual_security_2016_q2.zip",temp, mode="wb")
trying URL 'https://www.sec.gov/files/opa/data/market-structure/metrics-individual-security/individual_security_2016_q2.zip'
Content type 'application/octet-stream' length 15643843 bytes (14.9 MB)
downloaded 14.9 MB
unzip(temp, "q2_2016_all.csv")
q2_2016 <- read.csv("q2_2016_all.csv", sep=",", skipNul=T, header=T)
temp <- tempfile()
download.file("https://www.sec.gov/files/opa/data/market-structure/metrics-individual-security/individual_security_2016_q3.zip",temp, mode="wb")
trying URL 'https://www.sec.gov/files/opa/data/market-structure/metrics-individual-security/individual_security_2016_q3.zip'
Content type 'application/octet-stream' length 15566189 bytes (14.8 MB)
downloaded 14.8 MB
unzip(temp, "q3_2016_all.csv")
q3_2016 <- read.csv("q3_2016_all.csv", sep=",", skipNul=T, header=T)
temp <- tempfile()
download.file("https://www.sec.gov/files/opa/data/market-structure/metrics-individual-security/individual_security_2016_q4.zip",temp, mode="wb")
trying URL 'https://www.sec.gov/files/opa/data/market-structure/metrics-individual-security/individual_security_2016_q4.zip'
Content type 'application/octet-stream' length 15371263 bytes (14.7 MB)
downloaded 14.7 MB
unzip(temp, "q4_2016_all.csv")
q4_2016 <- read.csv("q4_2016_all.csv", sep=",", skipNul=T, header=T)
temp <- tempfile()
download.file("https://www.sec.gov/files/opa/data/market-structure/metrics-individual-security/individual_security_2017_q1.zip",temp, mode="wb")
trying URL 'https://www.sec.gov/files/opa/data/market-structure/metrics-individual-security/individual_security_2017_q1.zip'
Content type 'application/octet-stream' length 15146559 bytes (14.4 MB)
downloaded 14.4 MB
unzip(temp, "q1_2017_all.csv")
q1_2017 <- read.csv("q1_2017_all.csv", sep=",", skipNul=T, header=T)
temp <- tempfile()
download.file("https://www.sec.gov/files/opa/data/market-structure/metrics-individual-security/individual_security_2017_q2.zip",temp, mode="wb")
trying URL 'https://www.sec.gov/files/opa/data/market-structure/metrics-individual-security/individual_security_2017_q2.zip'
Content type 'application/octet-stream' length 15231558 bytes (14.5 MB)
downloaded 14.5 MB
unzip(temp, "q2_2017_all.csv")
q2_2017 <- read.csv("q2_2017_all.csv", sep=",", skipNul=T, header=T)
temp <- tempfile()
download.file("https://www.sec.gov/files/opa/data/market-structure/metrics-individual-security/individual_security_2017_q3.zip",temp, mode="wb")
trying URL 'https://www.sec.gov/files/opa/data/market-structure/metrics-individual-security/individual_security_2017_q3.zip'
Content type 'application/octet-stream' length 15255963 bytes (14.5 MB)
downloaded 14.5 MB
unzip(temp, "q3_2017_all.csv")
q3_2017 <- read.csv("q3_2017_all.csv", sep=",", skipNul=T, header=T)
temp <- tempfile()
download.file("https://www.sec.gov/files/opa/data/market-structure/metrics-individual-security/individual_security_2017_q4.zip",temp, mode="wb")
trying URL 'https://www.sec.gov/files/opa/data/market-structure/metrics-individual-security/individual_security_2017_q4.zip'
Content type 'application/octet-stream' length 15448988 bytes (14.7 MB)
downloaded 14.7 MB
unzip(temp, "q4_2017_all.csv")
q4_2017 <- read.csv("q4_2017_all.csv", sep=",", skipNul=T, header=T)
temp <- tempfile()
download.file("https://www.sec.gov/files/opa/data/market-structure/metrics-individual-security/individual_security_2018_q1.zip",temp, mode="wb")
trying URL 'https://www.sec.gov/files/opa/data/market-structure/metrics-individual-security/individual_security_2018_q1.zip'
Content type 'application/octet-stream' length 15320197 bytes (14.6 MB)
downloaded 14.6 MB
unzip(temp, "q1_2018_all.csv")
q1_2018 <- read.csv("q1_2018_all.csv", sep=",", skipNul=T, header=T)
temp <- tempfile()
download.file("https://www.sec.gov/files/opa/data/market-structure/metrics-individual-security/individual_security_2018_q2.zip",temp, mode="wb")
trying URL 'https://www.sec.gov/files/opa/data/market-structure/metrics-individual-security/individual_security_2018_q2.zip'
Content type 'application/octet-stream' length 16136876 bytes (15.4 MB)
downloaded 15.4 MB
unzip(temp, "q2_2018_all.csv")
q2_2018 <- read.csv("q2_2018_all.csv", sep=",", skipNul=T, header=T)
temp <- tempfile()
download.file("https://www.sec.gov/files/opa/data/market-structure/metrics-individual-security/individual_security_2018_q3.zip",temp, mode="wb")
trying URL 'https://www.sec.gov/files/opa/data/market-structure/metrics-individual-security/individual_security_2018_q3.zip'
Content type 'application/octet-stream' length 16309710 bytes (15.6 MB)
downloaded 15.6 MB
unzip(temp, "q3_2018_all.csv")
q3_2018 <- read.csv("q3_2018_all.csv", sep=",", skipNul=T, header=T)
temp <- tempfile()
download.file("https://www.sec.gov/files/opa/data/market-structure/metrics-individual-security/individual_security_2018_q4.zip",temp, mode="wb")
trying URL 'https://www.sec.gov/files/opa/data/market-structure/metrics-individual-security/individual_security_2018_q4.zip'
Content type 'application/octet-stream' length 16832740 bytes (16.1 MB)
downloaded 16.1 MB
unzip(temp, "q4_2018_all.csv")
q4_2018 <- read.csv("q4_2018_all.csv", sep=",", skipNul=T, header=T)
temp <- tempfile()
download.file("https://www.sec.gov/files/opa/data/market-structure/metrics-individual-security/individual_security_2019_q1.zip",temp, mode="wb")
trying URL 'https://www.sec.gov/files/opa/data/market-structure/metrics-individual-security/individual_security_2019_q1.zip'
Content type 'application/octet-stream' length 16220248 bytes (15.5 MB)
downloaded 15.5 MB
unzip(temp, "q1_2019_all.csv")
q1_2019 <- read.csv("q1_2019_all.csv", sep=",", skipNul=T, header=T)
temp <- tempfile()
download.file("https://www.sec.gov/files/opa/data/market-structure/metrics-individual-security/individual_security_2019_q2.zip",temp, mode="wb")
trying URL 'https://www.sec.gov/files/opa/data/market-structure/metrics-individual-security/individual_security_2019_q2.zip'
Content type 'application/octet-stream' length 16783561 bytes (16.0 MB)
downloaded 16.0 MB
unzip(temp, "q2_2019_all.csv")
q2_2019 <- read.csv("q2_2019_all.csv", sep=",", skipNul=T, header=T)
Build the Plots
Given the complex matrix of data (100 stocks and 100 ETFs for 12 variables across 5 statistics plotted on a time series), I elected to create a separate plot for each of the 12 variables, plotting the 5 summary statistics on a time series. The plots are shown below
Stock LitVol(000s) Plot
stock_LitVol <- SEC_comb_STOCK %>%
filter(Ticker %in% stock_list_sample) %>%
select(Date, LitVol..000.) %>%
group_by(Date) %>%
na.omit() %>%
summarise_each(funs(mean = mean, p25 = quantile(.,.25), p50 = quantile(.,.5), p75 = quantile(.,.75), sd = sd)) %>%
gather(type, value, -Date)
stock_LitVol %>%
ggplot(aes(x = Date, y = value, color = type)) +
geom_line()

Stock OrderVol(000s) Plot
stock_OrderVol <- SEC_comb_STOCK %>%
filter(Ticker %in% stock_list_sample) %>%
select(Date, OrderVol..000.) %>%
group_by(Date) %>%
na.omit() %>%
summarise_each(funs(mean = mean, p25 = quantile(.,.25), p50 = quantile(.,.5), p75 = quantile(.,.75), sd = sd)) %>%
gather(type, value, -Date)
stock_OrderVol %>%
ggplot(aes(x = Date, y = value, color = type)) +
geom_line()

Stock Hidden Plot
stock_Hidden <- SEC_comb_STOCK %>%
filter(Ticker %in% stock_list_sample) %>%
select(Date, Hidden) %>%
group_by(Date) %>%
na.omit() %>%
summarise_each(funs(mean = mean, p25 = quantile(.,.25), p50 = quantile(.,.5), p75 = quantile(.,.75), sd = sd)) %>%
gather(type, value, -Date)
stock_Hidden %>%
ggplot(aes(x = Date, y = value, color = type)) +
geom_line()

Stock TradesForHidden Plot
stock_TradesForHidden <- SEC_comb_STOCK %>%
filter(Ticker %in% stock_list_sample) %>%
select(Date, TradesForHidden) %>%
group_by(Date) %>%
na.omit() %>%
summarise_each(funs(mean = mean, p25 = quantile(.,.25), p50 = quantile(.,.5), p75 = quantile(.,.75), sd = sd)) %>%
gather(type, value, -Date)
stock_TradesForHidden %>%
ggplot(aes(x = Date, y = value, color = type)) +
geom_line()

Stock HiddenVol (000s) Plot
stock_HiddenVol <- SEC_comb_STOCK %>%
filter(Ticker %in% stock_list_sample) %>%
select(Date, HiddenVol..000.) %>%
group_by(Date) %>%
na.omit() %>%
summarise_each(funs(mean = mean, p25 = quantile(.,.25), p50 = quantile(.,.5), p75 = quantile(.,.75), sd = sd)) %>%
gather(type, value, -Date)
stock_HiddenVol %>%
ggplot(aes(x = Date, y = value, color = type)) +
geom_line()

Stock TradeVolForHidden (000s) Plot
stock_TradeVolForHidden <- SEC_comb_STOCK %>%
filter(Ticker %in% stock_list_sample) %>%
select(Date, TradeVolForHidden..000.) %>%
group_by(Date) %>%
na.omit() %>%
summarise_each(funs(mean = mean, p25 = quantile(.,.25), p50 = quantile(.,.5), p75 = quantile(.,.75), sd = sd)) %>%
gather(type, value, -Date)
stock_TradeVolForHidden %>%
ggplot(aes(x = Date, y = value, color = type)) +
geom_line()

Stock Cancels Plot
stock_Cancels <- SEC_comb_STOCK %>%
filter(Ticker %in% stock_list_sample) %>%
select(Date, Cancels) %>%
group_by(Date) %>%
na.omit() %>%
summarise_each(funs(mean = mean, p25 = quantile(.,.25), p50 = quantile(.,.5), p75 = quantile(.,.75), sd = sd)) %>%
gather(type, value, -Date)
stock_Cancels %>%
ggplot(aes(x = Date, y = value, color = type)) +
geom_line()

Stock LitTrades Plot
stock_LitTrades <- SEC_comb_STOCK %>%
filter(Ticker %in% stock_list_sample) %>%
select(Date, LitTrades) %>%
group_by(Date) %>%
na.omit() %>%
summarise_each(funs(mean = mean, p25 = quantile(.,.25), p50 = quantile(.,.5), p75 = quantile(.,.75), sd = sd)) %>%
gather(type, value, -Date)
stock_LitTrades %>%
ggplot(aes(x = Date, y = value, color = type)) +
geom_line()

Stock OddLots Plot
stock_OddLots <- SEC_comb_STOCK %>%
filter(Ticker %in% stock_list_sample) %>%
select(Date, OddLots) %>%
group_by(Date) %>%
na.omit() %>%
summarise_each(funs(mean = mean, p25 = quantile(.,.25), p50 = quantile(.,.5), p75 = quantile(.,.75), sd = sd)) %>%
gather(type, value, -Date)
stock_OddLots %>%
ggplot(aes(x = Date, y = value, color = type)) +
geom_line()

Stock TradesForOddLots Plot
stock_TradeForOddLots <- SEC_comb_STOCK %>%
filter(Ticker %in% stock_list_sample) %>%
select(Date, TradesForOddLots) %>%
group_by(Date) %>%
na.omit() %>%
summarise_each(funs(mean = mean, p25 = quantile(.,.25), p50 = quantile(.,.5), p75 = quantile(.,.75), sd = sd)) %>%
gather(type, value, -Date)
stock_TradeForOddLots %>%
ggplot(aes(x = Date, y = value, color = type)) +
geom_line()

Stock OddLotVol (000s) Plot
stock_OddLotVol <- SEC_comb_STOCK %>%
filter(Ticker %in% stock_list_sample) %>%
select(Date, OddLotVol..000.) %>%
group_by(Date) %>%
na.omit() %>%
summarise_each(funs(mean = mean, p25 = quantile(.,.25), p50 = quantile(.,.5), p75 = quantile(.,.75), sd = sd)) %>%
gather(type, value, -Date)
stock_OddLotVol %>%
ggplot(aes(x = Date, y = value, color = type)) +
geom_line()

Stock TradeVolForOddLots (000s) Plot
stock_TradeVoldForOddLots <- SEC_comb_STOCK %>%
filter(Ticker %in% stock_list_sample) %>%
select(Date, TradeVolForOddLots..000.) %>%
group_by(Date) %>%
na.omit() %>%
summarise_each(funs(mean = mean, p25 = quantile(.,.25), p50 = quantile(.,.5), p75 = quantile(.,.75), sd = sd)) %>%
gather(type, value, -Date)
stock_TradeVoldForOddLots %>%
ggplot(aes(x = Date, y = value, color = type)) +
geom_line()

ETF LitVol(000s) Plot
ETF_LitVol <- SEC_comb_ETF %>%
filter(Ticker %in% ETF_list_sample) %>%
select(Date, LitVol..000.) %>%
group_by(Date) %>%
na.omit() %>%
summarise_each(funs(mean = mean, p25 = quantile(.,.25), p50 = quantile(.,.5), p75 = quantile(.,.75), sd = sd)) %>%
gather(type, value, -Date)
ETF_LitVol %>%
ggplot(aes(x = Date, y = value, color = type)) +
geom_line()

ETF OrderVol(000s) Plot
ETF_OrderVol <- SEC_comb_ETF %>%
filter(Ticker %in% ETF_list_sample) %>%
select(Date, OrderVol..000.) %>%
group_by(Date) %>%
na.omit() %>%
summarise_each(funs(mean = mean, p25 = quantile(.,.25), p50 = quantile(.,.5), p75 = quantile(.,.75), sd = sd)) %>%
gather(type, value, -Date)
ETF_OrderVol %>%
ggplot(aes(x = Date, y = value, color = type)) +
geom_line()

ETF Hidden Plot
ETF_Hidden <- SEC_comb_ETF %>%
filter(Ticker %in% ETF_list_sample) %>%
select(Date, Hidden) %>%
group_by(Date) %>%
na.omit() %>%
summarise_each(funs(mean = mean, p25 = quantile(.,.25), p50 = quantile(.,.5), p75 = quantile(.,.75), sd = sd)) %>%
gather(type, value, -Date)
ETF_Hidden %>%
ggplot(aes(x = Date, y = value, color = type)) +
geom_line()

ETF TradesForHidden Plot
ETF_TradesForHidden <- SEC_comb_ETF %>%
filter(Ticker %in% ETF_list_sample) %>%
select(Date, TradesForHidden) %>%
group_by(Date) %>%
na.omit() %>%
summarise_each(funs(mean = mean, p25 = quantile(.,.25), p50 = quantile(.,.5), p75 = quantile(.,.75), sd = sd)) %>%
gather(type, value, -Date)
ETF_TradesForHidden %>%
ggplot(aes(x = Date, y = value, color = type)) +
geom_line()

ETF HiddenVol (000s) Plot
ETF_HiddenVol <- SEC_comb_ETF %>%
filter(Ticker %in% ETF_list_sample) %>%
select(Date, HiddenVol..000.) %>%
group_by(Date) %>%
na.omit() %>%
summarise_each(funs(mean = mean, p25 = quantile(.,.25), p50 = quantile(.,.5), p75 = quantile(.,.75), sd = sd)) %>%
gather(type, value, -Date)
ETF_HiddenVol %>%
ggplot(aes(x = Date, y = value, color = type)) +
geom_line()

ETF TradeVolForHidden (000s) Plot
ETF_TradeVolForHidden <- SEC_comb_ETF %>%
filter(Ticker %in% ETF_list_sample) %>%
select(Date, TradeVolForHidden..000.) %>%
group_by(Date) %>%
na.omit() %>%
summarise_each(funs(mean = mean, p25 = quantile(.,.25), p50 = quantile(.,.5), p75 = quantile(.,.75), sd = sd)) %>%
gather(type, value, -Date)
ETF_TradeVolForHidden %>%
ggplot(aes(x = Date, y = value, color = type)) +
geom_line()

ETF Cancels Plot
ETF_Cancels <- SEC_comb_ETF %>%
filter(Ticker %in% ETF_list_sample) %>%
select(Date, Cancels) %>%
group_by(Date) %>%
na.omit() %>%
summarise_each(funs(mean = mean, p25 = quantile(.,.25), p50 = quantile(.,.5), p75 = quantile(.,.75), sd = sd)) %>%
gather(type, value, -Date)
ETF_Cancels %>%
ggplot(aes(x = Date, y = value, color = type)) +
geom_line()

ETF LitTrades Plot
ETF_LitTrades <- SEC_comb_ETF %>%
filter(Ticker %in% ETF_list_sample) %>%
select(Date, LitTrades) %>%
group_by(Date) %>%
na.omit() %>%
summarise_each(funs(mean = mean, p25 = quantile(.,.25), p50 = quantile(.,.5), p75 = quantile(.,.75), sd = sd)) %>%
gather(type, value, -Date)
ETF_LitTrades %>%
ggplot(aes(x = Date, y = value, color = type)) +
geom_line()

ETF OddLots Plot
ETF_OddLots <- SEC_comb_ETF %>%
filter(Ticker %in% ETF_list_sample) %>%
select(Date, OddLots) %>%
group_by(Date) %>%
na.omit() %>%
summarise_each(funs(mean = mean, p25 = quantile(.,.25), p50 = quantile(.,.5), p75 = quantile(.,.75), sd = sd)) %>%
gather(type, value, -Date)
ETF_OddLots %>%
ggplot(aes(x = Date, y = value, color = type)) +
geom_line()

ETF TradesForOddLots Plot
ETF_TradeForOddLots <- SEC_comb_ETF %>%
filter(Ticker %in% ETF_list_sample) %>%
select(Date, TradesForOddLots) %>%
group_by(Date) %>%
na.omit() %>%
summarise_each(funs(mean = mean, p25 = quantile(.,.25), p50 = quantile(.,.5), p75 = quantile(.,.75), sd = sd)) %>%
gather(type, value, -Date)
ETF_TradeForOddLots %>%
ggplot(aes(x = Date, y = value, color = type)) +
geom_line()

ETF OddLotVol (000s) Plot
ETF_OddLotVol <- SEC_comb_ETF %>%
filter(Ticker %in% ETF_list_sample) %>%
select(Date, OddLotVol..000.) %>%
group_by(Date) %>%
na.omit() %>%
summarise_each(funs(mean = mean, p25 = quantile(.,.25), p50 = quantile(.,.5), p75 = quantile(.,.75), sd = sd)) %>%
gather(type, value, -Date)
ETF_OddLotVol %>%
ggplot(aes(x = Date, y = value, color = type)) +
geom_line()

ETF TradeVolForOddLots (000s) Plot
ETF_TradeVoldForOddLots <- SEC_comb_ETF %>%
filter(Ticker %in% ETF_list_sample) %>%
select(Date, TradeVolForOddLots..000.) %>%
group_by(Date) %>%
na.omit() %>%
summarise_each(funs(mean = mean, p25 = quantile(.,.25), p50 = quantile(.,.5), p75 = quantile(.,.75), sd = sd)) %>%
gather(type, value, -Date)
ETF_TradeVoldForOddLots %>%
ggplot(aes(x = Date, y = value, color = type)) +
geom_line()

The individual plots for ETFs and Stocks in Q1 2012 are in line with the overall SEC combined file summary statistics. Standard deviations are much higher than the mean across the board, and ETF order volumes are also much higher than stocks. Also, interestingly, many of the Stock graphs show a significant spike in standard deviation at different periods throughout the time series, suggesting that there are one off events that dramatically influences trading activity over the period. The OddLotVol and OddLot graphs for both stocks and ETFs also tell an interesting story, which is that over time, the trade batches got smaller relative to the overall amount of trades. The random sampling we performed shows that we have selected a pool of stocks and ETFs with a broad level of trading activity, and perhaps if we converted the stocks into bins with trading thresholds, we would be able to perform more informative analysis on stocks and ETFs that fall within a certain range of trading volume.
---
output:
  html_notebook: default
  html_document:
    df_print: paged
  pdf_document: default
  word_document: default
---
---
title: "Vall-Llobera Assignment 1"
output:
  html_notebook: default
  pdf_document: default
  ---

```{r setup, include=FALSE}
knitr::opts_chunk$set(echo = TRUE)
```

Please note that when trying to Knit a report to PDF I encountered an error due to memory constraints. This file contains the full code, while the PDF Report contains only the plots and summary statistics for illustrative purposes.

## Load Commands

I loaded each file individually into R from the web to create a combined file of SEC data.

```{r}
temp <- tempfile()
download.file("https://www.sec.gov/files/opa/data/market-structure/metrics-individual-security/individual_security_2012_q10.zip",temp, mode="wb")
unzip(temp, "q1_2012_all.csv")
q1_2012 <- read.csv("q1_2012_all.csv", sep=",", skipNul=T, header=T)

temp <- tempfile()
download.file("https://www.sec.gov/files/opa/data/market-structure/metrics-individual-security/individual_security_2012_q2.zip",temp, mode="wb")
unzip(temp, "q2_2012_all.csv")
q2_2012 <- read.csv("q2_2012_all.csv", sep=",", skipNul=T, header=T)

temp <- tempfile()
download.file("https://www.sec.gov/files/opa/data/market-structure/metrics-individual-security/individual_security_2012_q3.zip",temp, mode="wb")
unzip(temp, "q3_2012_all.csv")
q3_2012 <- read.csv("q3_2012_all.csv", sep=",", skipNul=T, header=T)

temp <- tempfile()
download.file("https://www.sec.gov/files/opa/data/market-structure/metrics-individual-security/individual_security_2012_q4.zip",temp, mode="wb")
unzip(temp, "q4_2012_all.csv")
q4_2012 <- read.csv("q4_2012_all.csv", sep=",", skipNul=T, header=T)

temp <- tempfile()
download.file("https://www.sec.gov/files/opa/data/market-structure/metrics-individual-security/individual_security_2013_q1.zip",temp, mode="wb")
unzip(temp, "q1_2013_all.csv")
q1_2013 <- read.csv("q1_2013_all.csv", sep=",", skipNul=T, header=T)

temp <- tempfile()
download.file("https://www.sec.gov/files/opa/data/market-structure/metrics-individual-security/individual_security_2013_q2.zip",temp, mode="wb")
unzip(temp, "q2_2013_all.csv")
q2_2013 <- read.csv("q2_2013_all.csv", sep=",", skipNul=T, header=T)

temp <- tempfile()
download.file("https://www.sec.gov/files/opa/data/market-structure/metrics-individual-security/individual_security_2013_q3.zip",temp, mode="wb")
unzip(temp, "q3_2013_all.csv")
q3_2013 <- read.csv("q3_2013_all.csv", sep=",", skipNul=T, header=T)

temp <- tempfile()
download.file("https://www.sec.gov/files/opa/data/market-structure/metrics-individual-security/individual_security_2013_q4.zip",temp, mode="wb")
unzip(temp, "q4_2013_all.csv")
q4_2013 <- read.csv("q4_2013_all.csv", sep=",", skipNul=T, header=T)

temp <- tempfile()
download.file("https://www.sec.gov/files/opa/data/market-structure/metrics-individual-security/individual_security_2014_q1.zip",temp, mode="wb")
unzip(temp, "q1_2014_all.csv")
q1_2014 <- read.csv("q1_2014_all.csv", sep=",", skipNul=T, header=T)

setwd("~/Georgia Tech/Managing FIs/Assign1")
q2_2014 <- read.csv("~/Georgia Tech/Managing FIs/Assign1/q2_2014_all.csv")

temp <- tempfile()
download.file("https://www.sec.gov/files/opa/data/market-structure/metrics-individual-security/individual_security_2014_q3.zip",temp, mode="wb")
unzip(temp, "q3_2014_all.csv")
q3_2014 <- read.csv("q3_2014_all.csv", sep=",", skipNul=T, header=T)

temp <- tempfile()
download.file("https://www.sec.gov/files/opa/data/market-structure/metrics-individual-security/individual_security_2014_q4.zip",temp, mode="wb")
unzip(temp, "q4_2014_all.csv")
q4_2014 <- read.csv("q4_2014_all.csv", sep=",", skipNul=T, header=T)

temp <- tempfile()
download.file("https://www.sec.gov/files/opa/data/market-structure/metrics-individual-security/individual_security_2015_q1.zip",temp, mode="wb")
unzip(temp, "q1_2015_all.csv")
q1_2015 <- read.csv("q1_2015_all.csv", sep=",", skipNul=T, header=T)

temp <- tempfile()
download.file("https://www.sec.gov/files/opa/data/market-structure/metrics-individual-security/individual_security_2015_q2.zip",temp, mode="wb")
unzip(temp, "q2_2015_all.csv")
q2_2015 <- read.csv("q2_2015_all.csv", sep=",", skipNul=T, header=T)

temp <- tempfile()
download.file("https://www.sec.gov/files/opa/data/market-structure/metrics-individual-security/individual_security_2015_q3.zip",temp, mode="wb")
unzip(temp, "q3_2015_all.csv")
q3_2015 <- read.csv("q3_2015_all.csv", sep=",", skipNul=T, header=T)

temp <- tempfile()
download.file("https://www.sec.gov/files/opa/data/market-structure/metrics-individual-security/individual_security_2015_q4.zip",temp, mode="wb")
unzip(temp, "q4_2015_all.csv")
q4_2015 <- read.csv("q4_2015_all.csv", sep=",", skipNul=T, header=T)

temp <- tempfile()
download.file("https://www.sec.gov/files/opa/data/market-structure/metrics-individual-security/individual_security_2016_q1.zip",temp, mode="wb")
unzip(temp, "q1_2016_all.csv")
q1_2016 <- read.csv("q1_2016_all.csv", sep=",", skipNul=T, header=T)

temp <- tempfile()
download.file("https://www.sec.gov/files/opa/data/market-structure/metrics-individual-security/individual_security_2016_q2.zip",temp, mode="wb")
unzip(temp, "q2_2016_all.csv")
q2_2016 <- read.csv("q2_2016_all.csv", sep=",", skipNul=T, header=T)

temp <- tempfile()
download.file("https://www.sec.gov/files/opa/data/market-structure/metrics-individual-security/individual_security_2016_q3.zip",temp, mode="wb")
unzip(temp, "q3_2016_all.csv")
q3_2016 <- read.csv("q3_2016_all.csv", sep=",", skipNul=T, header=T)

temp <- tempfile()
download.file("https://www.sec.gov/files/opa/data/market-structure/metrics-individual-security/individual_security_2016_q4.zip",temp, mode="wb")
unzip(temp, "q4_2016_all.csv")
q4_2016 <- read.csv("q4_2016_all.csv", sep=",", skipNul=T, header=T)

temp <- tempfile()
download.file("https://www.sec.gov/files/opa/data/market-structure/metrics-individual-security/individual_security_2017_q1.zip",temp, mode="wb")
unzip(temp, "q1_2017_all.csv")
q1_2017 <- read.csv("q1_2017_all.csv", sep=",", skipNul=T, header=T)

temp <- tempfile()
download.file("https://www.sec.gov/files/opa/data/market-structure/metrics-individual-security/individual_security_2017_q2.zip",temp, mode="wb")
unzip(temp, "q2_2017_all.csv")
q2_2017 <- read.csv("q2_2017_all.csv", sep=",", skipNul=T, header=T)

temp <- tempfile()
download.file("https://www.sec.gov/files/opa/data/market-structure/metrics-individual-security/individual_security_2017_q3.zip",temp, mode="wb")
unzip(temp, "q3_2017_all.csv")
q3_2017 <- read.csv("q3_2017_all.csv", sep=",", skipNul=T, header=T)

temp <- tempfile()
download.file("https://www.sec.gov/files/opa/data/market-structure/metrics-individual-security/individual_security_2017_q4.zip",temp, mode="wb")
unzip(temp, "q4_2017_all.csv")
q4_2017 <- read.csv("q4_2017_all.csv", sep=",", skipNul=T, header=T)

temp <- tempfile()
download.file("https://www.sec.gov/files/opa/data/market-structure/metrics-individual-security/individual_security_2018_q1.zip",temp, mode="wb")
unzip(temp, "q1_2018_all.csv")
q1_2018 <- read.csv("q1_2018_all.csv", sep=",", skipNul=T, header=T)

temp <- tempfile()
download.file("https://www.sec.gov/files/opa/data/market-structure/metrics-individual-security/individual_security_2018_q2.zip",temp, mode="wb")
unzip(temp, "q2_2018_all.csv")
q2_2018 <- read.csv("q2_2018_all.csv", sep=",", skipNul=T, header=T)

temp <- tempfile()
download.file("https://www.sec.gov/files/opa/data/market-structure/metrics-individual-security/individual_security_2018_q3.zip",temp, mode="wb")
unzip(temp, "q3_2018_all.csv")
q3_2018 <- read.csv("q3_2018_all.csv", sep=",", skipNul=T, header=T)

temp <- tempfile()
download.file("https://www.sec.gov/files/opa/data/market-structure/metrics-individual-security/individual_security_2018_q4.zip",temp, mode="wb")
unzip(temp, "q4_2018_all.csv")
q4_2018 <- read.csv("q4_2018_all.csv", sep=",", skipNul=T, header=T)

temp <- tempfile()
download.file("https://www.sec.gov/files/opa/data/market-structure/metrics-individual-security/individual_security_2019_q1.zip",temp, mode="wb")
unzip(temp, "q1_2019_all.csv")
q1_2019 <- read.csv("q1_2019_all.csv", sep=",", skipNul=T, header=T)

temp <- tempfile()
download.file("https://www.sec.gov/files/opa/data/market-structure/metrics-individual-security/individual_security_2019_q2.zip",temp, mode="wb")
unzip(temp, "q2_2019_all.csv")
q2_2019 <- read.csv("q2_2019_all.csv", sep=",", skipNul=T, header=T)
```

## Load Packages

I loaded the dplyr, tidyr, and ggplot2 packages to execute my R commands

```{r}
library(dplyr)
library(tidyr)
library(ggplot2)
```

## Combine files

I used "rbind" to combine each file into a single dataframe. Reading this dataframe was not possible when knitting report due to memory constraints; I attempted both rbind using commas for each data frame, separate rbinds with pipes (shown below), and bind.rows, but each proved to require too much memory for the knit pdf function to handle.

```{r}
SEC_comb <- 
  q1_2012 %>%
    rbind(q2_2012) %>%
    rbind(q3_2012) %>%
    rbind(q4_2012) %>%
    rbind(q1_2013) %>%
    rbind(q2_2013) %>%
    rbind(q3_2013) %>%
    rbind(q4_2013) %>%
    rbind(q1_2014) %>%
    rbind(q2_2014) %>%
    rbind(q3_2014) %>%
    rbind(q4_2014) %>%
    rbind(q1_2015) %>%
    rbind(q2_2015) %>%
    rbind(q3_2015) %>%
    rbind(q4_2015) %>%
    rbind(q1_2016) %>%
    rbind(q2_2016) %>%
    rbind(q3_2016) %>%
    rbind(q4_2016) %>%
    rbind(q1_2017) %>%
    rbind(q2_2017) %>%
    rbind(q3_2017) %>%
    rbind(q4_2017) %>%
    rbind(q1_2018) %>%
    rbind(q2_2018) %>%
    rbind(q3_2018) %>%
    rbind(q4_2018) %>%
    rbind(q1_2019) %>%
    rbind(q2_2019)
SEC_comb <- transform(SEC_comb, Date = as.Date(as.character(Date), "%Y%m%d"))
```

## Summary Statistics for SEC data

To run summary statistics on the combined dataset, I created a new data frame that grouped the columns by ETF and STOCK and ran summary statistics for each of the variables requested.

While the SEC MIDAS website states that there are variables for Trades and TradeVol, these don’t actually appear to exist within the CSV files on the site.

When reviewing the summary statistics we can see that for both ETF’s and Stocks across all metrics, the standard deviations are far greater than the means of each variable, showing that there is significant variability amongst both individual Stocks and ETFs. Additionally, the medians are quite low compared to the averages of many of the variables, indicating that some stocks/ETFs with a lot more trading activity drive the numbers up.

ETFs appear to have a much higher order volume relative to Stocks, with an average order volume of of 122K for ETFs and only 16K for Stocks.


```{r}
options(scipen = 999)
options(digits=3)
Sum_stats <- SEC_comb %>%
  select(Security, LitVol..000.,  OrderVol..000., Hidden, TradesForHidden, 
         HiddenVol..000., TradeVolForHidden..000., Cancels, LitTrades, OddLots, 
         TradesForOddLots, OddLotVol..000., TradeVolForOddLots..000.) %>%
  group_by(Security) %>%
  na.omit() %>%
  summarise_each(funs(mean = mean, p25 = quantile(.,.25), p50 = median, p75 = quantile(.,.75), sd = sd))%>%
  gather(type, value, -Security)
Sum_stats
```

## Select a sample from Q1_2012

In order to select a random sample of 100 Stocks and ETFs, I first split the Q1_2012 into two data frames by applying a filter; one for STOCKs and one for ETFs. I then pulled 100 random stocks and ETFs from each file, and set them as an object to be queried each time I create a data frame to be plotted.

```{r}
q1_2012 <- transform(q1_2012, Date = as.Date(as.character(Date), "%Y%m%d"))
q1_2012_STOCK <- filter(q1_2012,Security == "Stock")
q1_2012_ETF <- filter(q1_2012,Security == "ETF")
stock_list <- unique(q1_2012_STOCK$Ticker)
stock_list_sample <- sample(stock_list, 100)
ETF_list <- unique(q1_2012_ETF$Ticker)
ETF_list_sample <- sample(ETF_list,100)
stock_list_sample
ETF_list_sample
SEC_comb_STOCK <- filter(SEC_comb,Security == "Stock")
SEC_comb_ETF <- filter(SEC_comb,Security == "ETF")
```

## Build the Plots

Given the complex matrix of data (100 stocks and 100 ETFs for 12 variables across 5 statistics plotted on a time series), I elected to create a separate plot for each of the 12 variables, plotting the 5 summary statistics on a time series. The plots are shown below

### Stock LitVol(000s) Plot

```{r}
stock_LitVol <- SEC_comb_STOCK %>%
  filter(Ticker %in% stock_list_sample) %>%
  select(Date, LitVol..000.) %>%
  group_by(Date) %>%
  na.omit() %>%
  summarise_each(funs(mean = mean, p25 = quantile(.,.25), p50 = quantile(.,.5), p75 = quantile(.,.75), sd = sd)) %>%
  gather(type, value, -Date)
stock_LitVol %>%
  ggplot(aes(x = Date, y = value, color = type)) +
  geom_line()
```

### Stock OrderVol(000s) Plot

```{r}
stock_OrderVol <- SEC_comb_STOCK %>%
  filter(Ticker %in% stock_list_sample) %>%
  select(Date, OrderVol..000.) %>%
  group_by(Date) %>%
  na.omit() %>%
  summarise_each(funs(mean = mean, p25 = quantile(.,.25), p50 = quantile(.,.5), p75 = quantile(.,.75), sd = sd)) %>%
  gather(type, value, -Date)
stock_OrderVol %>%
  ggplot(aes(x = Date, y = value, color = type)) +
  geom_line()
```

### Stock Hidden Plot

```{r}
stock_Hidden <- SEC_comb_STOCK %>%
  filter(Ticker %in% stock_list_sample) %>%
  select(Date, Hidden) %>%
  group_by(Date) %>%
  na.omit() %>%
  summarise_each(funs(mean = mean, p25 = quantile(.,.25), p50 = quantile(.,.5), p75 = quantile(.,.75), sd = sd)) %>%
  gather(type, value, -Date)
stock_Hidden %>%
  ggplot(aes(x = Date, y = value, color = type)) +
  geom_line()
```

### Stock TradesForHidden Plot

```{r}
stock_TradesForHidden <- SEC_comb_STOCK %>%
  filter(Ticker %in% stock_list_sample) %>%
  select(Date, TradesForHidden) %>%
  group_by(Date) %>%
  na.omit() %>%
  summarise_each(funs(mean = mean, p25 = quantile(.,.25), p50 = quantile(.,.5), p75 = quantile(.,.75), sd = sd)) %>%
  gather(type, value, -Date)
stock_TradesForHidden %>%
  ggplot(aes(x = Date, y = value, color = type)) +
  geom_line()
```

### Stock HiddenVol (000s) Plot

```{r}
stock_HiddenVol <- SEC_comb_STOCK %>%
  filter(Ticker %in% stock_list_sample) %>%
  select(Date, HiddenVol..000.) %>%
  group_by(Date) %>%
  na.omit() %>%
  summarise_each(funs(mean = mean, p25 = quantile(.,.25), p50 = quantile(.,.5), p75 = quantile(.,.75), sd = sd)) %>%
  gather(type, value, -Date)
stock_HiddenVol %>%
  ggplot(aes(x = Date, y = value, color = type)) +
  geom_line()
```

### Stock TradeVolForHidden (000s) Plot

```{r}
stock_TradeVolForHidden <- SEC_comb_STOCK %>%
  filter(Ticker %in% stock_list_sample) %>%
  select(Date, TradeVolForHidden..000.) %>%
  group_by(Date) %>%
  na.omit() %>%
  summarise_each(funs(mean = mean, p25 = quantile(.,.25), p50 = quantile(.,.5), p75 = quantile(.,.75), sd = sd)) %>%
  gather(type, value, -Date)
stock_TradeVolForHidden %>%
  ggplot(aes(x = Date, y = value, color = type)) +
  geom_line()
```

### Stock Cancels Plot

```{r}
stock_Cancels <- SEC_comb_STOCK %>%
  filter(Ticker %in% stock_list_sample) %>%
  select(Date, Cancels) %>%
  group_by(Date) %>%
  na.omit() %>%
  summarise_each(funs(mean = mean, p25 = quantile(.,.25), p50 = quantile(.,.5), p75 = quantile(.,.75), sd = sd)) %>%
  gather(type, value, -Date)
stock_Cancels %>%
  ggplot(aes(x = Date, y = value, color = type)) +
  geom_line()
```

### Stock LitTrades Plot

```{r}
stock_LitTrades <- SEC_comb_STOCK %>%
  filter(Ticker %in% stock_list_sample) %>%
  select(Date, LitTrades) %>%
  group_by(Date) %>%
  na.omit() %>%
  summarise_each(funs(mean = mean, p25 = quantile(.,.25), p50 = quantile(.,.5), p75 = quantile(.,.75), sd = sd)) %>%
  gather(type, value, -Date)
stock_LitTrades %>%
  ggplot(aes(x = Date, y = value, color = type)) +
  geom_line()
```

### Stock OddLots Plot

```{r}
stock_OddLots <- SEC_comb_STOCK %>%
  filter(Ticker %in% stock_list_sample) %>%
  select(Date, OddLots) %>%
  group_by(Date) %>%
  na.omit() %>%
  summarise_each(funs(mean = mean, p25 = quantile(.,.25), p50 = quantile(.,.5), p75 = quantile(.,.75), sd = sd)) %>%
  gather(type, value, -Date)
stock_OddLots %>%
  ggplot(aes(x = Date, y = value, color = type)) +
  geom_line()
```

### Stock TradesForOddLots Plot

```{r}
stock_TradeForOddLots <- SEC_comb_STOCK %>%
  filter(Ticker %in% stock_list_sample) %>%
  select(Date, TradesForOddLots) %>%
  group_by(Date) %>%
  na.omit() %>%
  summarise_each(funs(mean = mean, p25 = quantile(.,.25), p50 = quantile(.,.5), p75 = quantile(.,.75), sd = sd)) %>%
  gather(type, value, -Date)
stock_TradeForOddLots %>%
  ggplot(aes(x = Date, y = value, color = type)) +
  geom_line()
```

### Stock OddLotVol (000s) Plot

```{r}
stock_OddLotVol <- SEC_comb_STOCK %>%
  filter(Ticker %in% stock_list_sample) %>%
  select(Date, OddLotVol..000.) %>%
  group_by(Date) %>%
  na.omit() %>%
  summarise_each(funs(mean = mean, p25 = quantile(.,.25), p50 = quantile(.,.5), p75 = quantile(.,.75), sd = sd)) %>%
  gather(type, value, -Date)
stock_OddLotVol %>%
  ggplot(aes(x = Date, y = value, color = type)) +
  geom_line()
```

### Stock TradeVolForOddLots (000s) Plot

```{r}
stock_TradeVoldForOddLots <- SEC_comb_STOCK %>%
  filter(Ticker %in% stock_list_sample) %>%
  select(Date, TradeVolForOddLots..000.) %>%
  group_by(Date) %>%
  na.omit() %>%
  summarise_each(funs(mean = mean, p25 = quantile(.,.25), p50 = quantile(.,.5), p75 = quantile(.,.75), sd = sd)) %>%
  gather(type, value, -Date)
stock_TradeVoldForOddLots %>%
  ggplot(aes(x = Date, y = value, color = type)) +
  geom_line()
```

### ETF LitVol(000s) Plot

```{r}
ETF_LitVol <- SEC_comb_ETF %>%
  filter(Ticker %in% ETF_list_sample) %>%
  select(Date, LitVol..000.) %>%
  group_by(Date) %>%
  na.omit() %>%
  summarise_each(funs(mean = mean, p25 = quantile(.,.25), p50 = quantile(.,.5), p75 = quantile(.,.75), sd = sd)) %>%
  gather(type, value, -Date)
ETF_LitVol %>%
  ggplot(aes(x = Date, y = value, color = type)) +
  geom_line()
```

### ETF OrderVol(000s) Plot

```{r}
ETF_OrderVol <- SEC_comb_ETF %>%
  filter(Ticker %in% ETF_list_sample) %>%
  select(Date, OrderVol..000.) %>%
  group_by(Date) %>%
  na.omit() %>%
  summarise_each(funs(mean = mean, p25 = quantile(.,.25), p50 = quantile(.,.5), p75 = quantile(.,.75), sd = sd)) %>%
  gather(type, value, -Date)
ETF_OrderVol %>%
  ggplot(aes(x = Date, y = value, color = type)) +
  geom_line()
```

### ETF Hidden Plot

```{r}
ETF_Hidden <- SEC_comb_ETF %>%
  filter(Ticker %in% ETF_list_sample) %>%
  select(Date, Hidden) %>%
  group_by(Date) %>%
  na.omit() %>%
  summarise_each(funs(mean = mean, p25 = quantile(.,.25), p50 = quantile(.,.5), p75 = quantile(.,.75), sd = sd)) %>%
  gather(type, value, -Date)
ETF_Hidden %>%
  ggplot(aes(x = Date, y = value, color = type)) +
  geom_line()
```

### ETF TradesForHidden Plot

```{r}
ETF_TradesForHidden <- SEC_comb_ETF %>%
  filter(Ticker %in% ETF_list_sample) %>%
  select(Date, TradesForHidden) %>%
  group_by(Date) %>%
  na.omit() %>%
  summarise_each(funs(mean = mean, p25 = quantile(.,.25), p50 = quantile(.,.5), p75 = quantile(.,.75), sd = sd)) %>%
  gather(type, value, -Date)
ETF_TradesForHidden %>%
  ggplot(aes(x = Date, y = value, color = type)) +
  geom_line()
```

### ETF HiddenVol (000s) Plot

```{r}
ETF_HiddenVol <- SEC_comb_ETF %>%
  filter(Ticker %in% ETF_list_sample) %>%
  select(Date, HiddenVol..000.) %>%
  group_by(Date) %>%
  na.omit() %>%
  summarise_each(funs(mean = mean, p25 = quantile(.,.25), p50 = quantile(.,.5), p75 = quantile(.,.75), sd = sd)) %>%
  gather(type, value, -Date)
ETF_HiddenVol %>%
  ggplot(aes(x = Date, y = value, color = type)) +
  geom_line()
```

### ETF TradeVolForHidden (000s) Plot

```{r}
ETF_TradeVolForHidden <- SEC_comb_ETF %>%
  filter(Ticker %in% ETF_list_sample) %>%
  select(Date, TradeVolForHidden..000.) %>%
  group_by(Date) %>%
  na.omit() %>%
  summarise_each(funs(mean = mean, p25 = quantile(.,.25), p50 = quantile(.,.5), p75 = quantile(.,.75), sd = sd)) %>%
  gather(type, value, -Date)
ETF_TradeVolForHidden %>%
  ggplot(aes(x = Date, y = value, color = type)) +
  geom_line()
```

### ETF Cancels Plot

```{r}
ETF_Cancels <- SEC_comb_ETF %>%
  filter(Ticker %in% ETF_list_sample) %>%
  select(Date, Cancels) %>%
  group_by(Date) %>%
  na.omit() %>%
  summarise_each(funs(mean = mean, p25 = quantile(.,.25), p50 = quantile(.,.5), p75 = quantile(.,.75), sd = sd)) %>%
  gather(type, value, -Date)
ETF_Cancels %>%
  ggplot(aes(x = Date, y = value, color = type)) +
  geom_line()
```

### ETF LitTrades Plot

```{r}
ETF_LitTrades <- SEC_comb_ETF %>%
  filter(Ticker %in% ETF_list_sample) %>%
  select(Date, LitTrades) %>%
  group_by(Date) %>%
  na.omit() %>%
  summarise_each(funs(mean = mean, p25 = quantile(.,.25), p50 = quantile(.,.5), p75 = quantile(.,.75), sd = sd)) %>%
  gather(type, value, -Date)
ETF_LitTrades %>%
  ggplot(aes(x = Date, y = value, color = type)) +
  geom_line()
```

### ETF OddLots Plot

```{r}
ETF_OddLots <- SEC_comb_ETF %>%
  filter(Ticker %in% ETF_list_sample) %>%
  select(Date, OddLots) %>%
  group_by(Date) %>%
  na.omit() %>%
  summarise_each(funs(mean = mean, p25 = quantile(.,.25), p50 = quantile(.,.5), p75 = quantile(.,.75), sd = sd)) %>%
  gather(type, value, -Date)
ETF_OddLots %>%
  ggplot(aes(x = Date, y = value, color = type)) +
  geom_line()
```

### ETF TradesForOddLots Plot

```{r}
ETF_TradeForOddLots <- SEC_comb_ETF %>%
  filter(Ticker %in% ETF_list_sample) %>%
  select(Date, TradesForOddLots) %>%
  group_by(Date) %>%
  na.omit() %>%
  summarise_each(funs(mean = mean, p25 = quantile(.,.25), p50 = quantile(.,.5), p75 = quantile(.,.75), sd = sd)) %>%
  gather(type, value, -Date)
ETF_TradeForOddLots %>%
  ggplot(aes(x = Date, y = value, color = type)) +
  geom_line()
```

### ETF OddLotVol (000s) Plot

```{r}
ETF_OddLotVol <- SEC_comb_ETF %>%
  filter(Ticker %in% ETF_list_sample) %>%
  select(Date, OddLotVol..000.) %>%
  group_by(Date) %>%
  na.omit() %>%
  summarise_each(funs(mean = mean, p25 = quantile(.,.25), p50 = quantile(.,.5), p75 = quantile(.,.75), sd = sd)) %>%
  gather(type, value, -Date)
ETF_OddLotVol %>%
  ggplot(aes(x = Date, y = value, color = type)) +
  geom_line()
```

### ETF TradeVolForOddLots (000s) Plot

```{r}
ETF_TradeVoldForOddLots <- SEC_comb_ETF %>%
  filter(Ticker %in% ETF_list_sample) %>%
  select(Date, TradeVolForOddLots..000.) %>%
  group_by(Date) %>%
  na.omit() %>%
  summarise_each(funs(mean = mean, p25 = quantile(.,.25), p50 = quantile(.,.5), p75 = quantile(.,.75), sd = sd)) %>%
  gather(type, value, -Date)
ETF_TradeVoldForOddLots %>%
  ggplot(aes(x = Date, y = value, color = type)) +
  geom_line()
```


The individual plots for ETFs and Stocks in Q1 2012 are in line with the overall SEC combined file summary statistics.  Standard deviations are much higher than the mean across the board, and ETF order volumes are also much higher than stocks. Also, interestingly, many of the Stock graphs show a significant spike in standard deviation at different periods throughout the time series, suggesting that there are one off events that dramatically influences trading activity over the period. The OddLotVol and OddLot graphs for both stocks and ETFs also tell an interesting story, which is that over time, the trade batches got smaller relative to the overall amount of trades. The random sampling we performed shows that we have selected a pool of stocks and ETFs with a broad level of trading activity, and perhaps if we converted the stocks into bins with trading thresholds, we would be able to perform more informative analysis on stocks and ETFs that fall within a certain range of trading volume.