Andrew Heiss — Apr 30, 2014, 9:29 AM
library(ggplot2)
suppressPackageStartupMessages(library(stargazer))
#---------------------
# Generate fake data
#---------------------
# Data-generating function
generate.data <- function(year, slope, start.year, start.value=0) {
x <- year - start.year
piracy <- (x * slope) + start.value
return(piracy)
}
# Generate data using coefficients from paper
pre.trend <- generate.data(1997:2002, -2.032, 1997)
post.trend <- generate.data(2003:2007, -0.733, 2002, rev(pre.trend)[1])
# Create sample data
example.data <- data.frame(year=1997:2007,
status=factor(c(rep("Pre", 6), rep("Post", 5))),
piracy=c(pre.trend, post.trend))
example.data
year status piracy
1 1997 Pre 0.000
2 1998 Pre -2.032
3 1999 Pre -4.064
4 2000 Pre -6.096
5 2001 Pre -8.128
6 2002 Pre -10.160
7 2003 Post -10.893
8 2004 Post -11.626
9 2005 Post -12.359
10 2006 Post -13.092
11 2007 Post -13.825
#---------
# Models
#---------
# Simple model showing overall trend
model.basic <- lm(piracy ~ year, data=example.data)
# Add status as a dummy; shows just the change in intercept
model.dummy <- lm(piracy ~ year + status, data=example.data)
# Add an interaction term to allow for different slopes and intercepts
model.interact <- lm(piracy ~ year + status + status*year, data=example.data)
# Alternatively, run two subsetted models
model.pre <- lm(piracy ~ year, data=example.data, subset=(status=="Pre"))
model.post <- lm(piracy ~ year, data=example.data, subset=(status=="Post"))
# Model output
col.labels <- c("Basic", "Dummy", "Interaction", "Pre-only", "Post-only")
var.labels <- c("Year", "Pre", "Year * Pre", "Constant")
stargazer(model.basic, model.dummy, model.interact, model.post, model.pre,
type="text", column.labels=col.labels, covariate.labels=var.labels, omit.stat="f",
dep.var.labels.include=FALSE, dep.var.caption="",
notes="For model 3, get the post trend by doing (Year * Pre) - Year")
==============================================================================================
Basic Dummy Interaction Pre-only Post-only
(1) (2) (3) (4) (5)
----------------------------------------------------------------------------------------------
Year -1.382*** -1.560*** -0.733*** -0.733*** -2.032***
(0.110) (0.221) (0.000) (0.000) (0.000)
Pre -1.299 2,601.000***
(1.403) (0.000)
Year * Pre -1.299***
(0.000)
Constant 2,759.000*** 3,115.000*** 1,457.000*** 1,457.000*** 4,058.000***
(219.400) (443.000) (0.000) (0.000) (0.000)
----------------------------------------------------------------------------------------------
Observations 11 11 11 5 6
R2 0.946 0.952 1.000 1.000 1.000
Adjusted R2 0.941 0.940 1.000 1.000 1.000
Residual Std. Error 1.149 (df = 9) 1.159 (df = 8) 0.000 (df = 7) 0.000 (df = 3) 0.000 (df = 4)
==============================================================================================
Note: *p<0.1; **p<0.05; ***p<0.01
For model 3, get the post trend by doing (Year * Pre) - Year
#--------
# Plots
#--------
# Single model
ggplot(example.data, aes(x=year, y=piracy)) +
stat_smooth(method="lm", se=FALSE) + geom_point() +
geom_vline(xintercept=2002, colour="darkred") + theme_bw()
# Just a change in intercept
predicted.data <- data.frame(year=1997:2007, status=c(rep("Pre", 11), rep("Post", 11)))
predicted.data$piracy.dummy <- predict(model.dummy, predicted.data)
ggplot() + geom_point(data=example.data, aes(x=year, y=piracy)) +
geom_line(data=predicted.data, aes(x=year, y=piracy.dummy, colour=status)) +
geom_vline(xintercept=2002, colour="darkred") + theme_bw()
# Change in slope and intercept
predicted.data$piracy.interact <- predict(model.interact, predicted.data)
ggplot() + geom_point(data=example.data, aes(x=year, y=piracy)) +
geom_line(data=predicted.data, aes(x=year, y=piracy.interact, colour=status)) +
geom_vline(xintercept=2002, colour="darkred") + theme_bw()
# Subsetted models
ggplot(example.data, aes(x=year, y=piracy)) +
geom_point() + stat_smooth(aes(group=status), method="lm") +
geom_vline(xintercept=2002, colour="darkred") + theme_bw()