# NEW QUESTION

# original data
x <- arima.sim(model = list(ar = 0.6), n = 100, mean = 10, sd = 5)
y <- -0.8 * lag(x, 10) + rnorm(100)

# x and y are negatively correlated
plot.ts(cbind(x, y))

plot of chunk unnamed-chunk-1

plot.ts(cbind(x, -y))

plot of chunk unnamed-chunk-1

ccf(x, y)

plot of chunk unnamed-chunk-1


# add noise (simulate the outlier between batch)
x <- c(x[1:40], rnorm(20, -100), x[41:80], rnorm(20, -100), x[81:100])
y <- c(y[1:40], rnorm(20, -100), y[41:80], rnorm(20, -100), y[81:100])

# within the regular production time x and y are still negatively
# correlated, but irrelevant data will significantly affect the
# correlation
plot.ts(cbind(x, y))

plot of chunk unnamed-chunk-1

ccf(x, y)

plot of chunk unnamed-chunk-1


# now there are great amount of such noise in our data, so the ccf
# estimation is actually for 'whether different variables start at the
# same time? what's the lag in starting time?' rather than 'what's the lag
# when x's change will result in y's?'.