Simulating Random Walk with Drift

Collins Hackman

2022-03-22

Why non-stationarity is a problem for OLS?

I seek to answer this question via simulation

BIASED COEFFICIENTS

The OLS estimate of the AR coefficients is biased in small samples

set.seed(1234)

T      = 25              # length of the time series
B      = 1000            # number of simulation
mu     = 0.1             # value of the drift term
sigma  = 1               # standard deviation of the error
beta25 = numeric(B)      # object to store the DF test stat

for (b in 1:B)
{
  Y        <- as.xts(arima.sim(n=T, list(order=c(0,1,0)), mean=mu, sd=sigma))
  fit      <- dyn$lm(Y ~ lag(Y,1))               
  beta25[b] <- summary(fit)$coef[2,1]                  
}

g<-ggplot() + geom_histogram(aes(beta25),color="grey35", fill="powderblue", bins=40) +
  theme_bw() + labs(x=NULL,y=NULL) +
  geom_vline(xintercept = 1, color="grey30", size=1.2, linetype="dashed")
ggplotly(g)

T-STATISTIC

ANALYSIS: the \(t\) statistic is not normally distributed (even in large samples)

T     = 500         # length of the time series
tstat = numeric(T)
for (b in 1:B)
{
  Y        <- as.xts(arima.sim(n=T, list(order=c(0,1,0)), mean=mu, sd=sigma))   
  fit      <- dyn$lm(Y ~ lag(Y,1))                                   
  fit.tab  <- summary(fit)$coefficients
  tstat[b] <- (fit.tab[2,1]-1)/fit.tab[2,2]
}

g1<-ggplot() + geom_histogram(aes(x=tstat, y=..density..),color="grey35", fill="green", binwidth=0.25) +
  xlim(-6,6) + theme_bw() + labs(x="t-statistic",y=NULL) +
  geom_vline(xintercept = 0, color="cadetblue", size=1.2, linetype="dashed") +
  stat_function(fun = dnorm, color="dodgerblue4", args= list(mean = 0, sd = 1))
ggplotly(g1)

Ols Regression on two non-stationary variables

ANSWER: the regression of a non-stationary variables on a non-stationary variable leads to spurious results of dependence between the two series

T     = 500     
B     = 1000         
mu    = 0.1         
sigma = 1           
tstat  = numeric(B) 
R2     = numeric(B)
for (b in 1:B)
{
  Y       <- as.xts(arima.sim(n=T, list(order=c(0,1,0)), mean=mu, sd=sigma))   # simulates the Y series
  X       <- as.xts(arima.sim(n=T, list(order=c(0,1,0)), mean=mu, sd=sigma))   # simulates the X series
  
  fit      <- lm(Y ~ X)               # OLS estimation of DF regression
  fit.tab  <- summary(fit)$coefficients
  tstat[b] <- fit.tab[2,3]  # stores the results
  R2[b]    <- summary(fit)$r.square
}

p1 <- ggplot() + geom_histogram(aes(tstat),color="grey35", fill="lemonchiffon2", binwidth=5) +
  theme_bw() + labs(x="t-statistic",y=NULL) +
  geom_vline(xintercept = 0, color="cadetblue", size=1.2, linetype="dashed")
p2 <- ggplot() + geom_histogram(aes(R2),color="grey35", fill="peachpuff2", binwidth=0.05) +
  xlim(c(-0.2, 1.2)) + theme_bw() + labs(x="R squared",y=NULL)
p1<-ggplotly(p1)
p2<-ggplotly(p2)
subplot(p1, p2, nrows=2)

Is the distribution of the Dickey-Fuller statistic normal?

ANSWER: Looking at the plot, the distribution of the DF statistic is not normal

T     = 100          # length of the time series
B     = 1000         # number of simulation
mu    = 0.1          # value of the drift term
sigma = 1            # standard deviation of the error
DF = numeric(B) 

for (b in 1:B)
{
  Y     <- as.xts(arima.sim(n=T, list(order=c(0,1,0)), mean=mu, sd=sigma))
  fit   <- dyn$lm(diff(Y) ~ lag(Y,1))               
  DF[b] <-  summary(fit)$coef[2,3]                   
}

# plotting
ggplot(data=data.frame(DF), aes(x=DF)) + 
  geom_histogram(aes(y=..density..), bins=50, fill="grey90", color="steelblue1") + 
  stat_function(fun = dt, colour = "dodgerblue4", args = list(df = (T-1))) + 
  theme_classic()  + xlim(-6,6)