Here is the data step. Get all firm-years from Compustat from 1978 on. I eliminate firms with market capitalization less than $200 million. I form quintiles for the chosen measure based on all firms on Compustat in the given fiscal year. However, when returning data for plotting, only firms with complete data for ten years are included.
make_plot <- function (filter, calc, label) {
library(RPostgreSQL)
drv <- dbDriver("PostgreSQL")
pg <- dbConnect(drv, dbname="crsp")
plot.data <- dbGetQuery(pg, paste("
SET work_mem='10GB';
WITH comp_stage_1 AS (
SELECT *, prcc_f*csho AS mktcap, coalesce(che,0) + coalesce(ivao,0) AS fa,
coalesce(dlc,0) + coalesce(dltt,0) + coalesce(pstk,0) -
coalesce(tstkp,0) + coalesce(dvpa,0) AS fo
FROM comp.funda
WHERE indfmt='INDL' AND datafmt='STD' AND popsrc='D' AND consol='C'
AND fyear>=1978 AND fic='USA'),
comp_stage_2 AS (
SELECT *, fo-fa AS nfo, fo-fa+ceq+mib AS noa
FROM comp_stage_1),
comp AS (
SELECT gvkey, fyear, mktcap,
CASE WHEN ", filter, " THEN", calc, "ELSE NULL END AS measure
FROM comp_stage_2
WINDOW w AS (PARTITION BY gvkey ORDER BY fyear)) ,
quintiles AS (
SELECT *, ntile(5) over (PARTITION BY fyear ORDER BY measure) as quintile
FROM comp
WHERE mktcap > 200 AND measure IS NOT NULL),
raw_data AS (SELECT a.gvkey, a.fyear, a.quintile, b.fyear-a.fyear AS year, b.measure
FROM quintiles AS a
INNER JOIN quintiles AS b
USING (gvkey)
WHERE b.fyear BETWEEN a.fyear AND a.fyear + 9),
full_series AS (
SELECT DISTINCT gvkey, fyear, count(year) AS num_years
FROM raw_data
GROUP BY gvkey, fyear
HAVING count(year)=10)
SELECT year, quintile, median(measure) AS measure
FROM raw_data
INNER JOIN full_series
USING (gvkey, fyear)
GROUP BY year, quintile"))
dbDisconnect(pg)
plot.data <- within(plot.data, {
quintile <- as.factor(quintile)
year <- as.factor(year)
})
percent_formatter <- function(x) {
return(paste(round(x,2)*100, "%"))
}
library(ggplot2)
p <- ggplot(plot.data, aes(x=year, y=measure, colour=quintile, group=quintile)) + geom_line() +
scale_colour_discrete(labels=paste(c("Bottom", "Fourth", "Third", "Second", "Top"), "Fifth")) +
xlab("Year") + ylab(label) +
labs(colour=paste(label,"in Year 0")) +
scale_y_continuous(labels=percent_formatter)
return(p)
}
p <- make_plot(filter = "lag(SALE) OVER w > 0", calc = "sale/lag(sale) OVER w - 1",
label = "Sales growth")
p
p <- make_plot(filter = "lag(ceq) OVER w > 0", calc = "ni/lag(ceq) OVER w",
label = "Return on equity")
p
p <- make_plot(filter = "sale > 0 AND ibadj >0", calc = "oiadp*(1-txt/ibadj)/sale",
label = "Operating profit/sales")
p
p <- make_plot(filter = "lag(noa) OVER w > 0", calc = "sale/lag(noa) OVER w",
label = "Sales/Net operating assets")
p