\[y^{(t + 1)} = A y^{(t)}\]
\[Y_v = F_Y A_v \implies \begin{bmatrix} y^{(1)} \\ y^{(2)} \\ \vdots \\ y^{(t)} \end{bmatrix} = \begin{bmatrix} f(y^{(0)} ) \\ f(y^{(1)} ) \\ \vdots \\ f(y^{(t-1)} ) \end{bmatrix}A_v\]
\[Y_v \in \mathbb{R}^{(t-1)n \times 1}\]
\[F_Y \in \mathbb{R}^{(t-1)n \times n^2}\]
\[A_v \in \mathbb{R}^{n^2 \times 1}\]
Least Squares Solution:
\[A_v = (F_Y^T F_Y)^{-1}F_Y^T Y_v\]
Options:
Rho1
unif(-1,1)
seq(1/N, 1, 1/N) zero
rho2
standard normal unif(-1,1) norm(0.1, 1) seq(1/N, 1, 1/N) zero
YInitiial standard normal zero
### Data Generating Process ###
dgp <- function(N, periods_type, s_epsilon, s_alpha, s_beta, p_num, rho_1, rho_2, y_initial, seed){
x = seed
p_num_value <- if(p_num == "low"){
N * .2
} else if(p_num == "high"){
N * .8
}
periods <- if(periods_type == "squared"){
N^2 + N + 1
} else if(periods_type == "plus2"){
N + 2
}
set.seed(x)
rho_1_values <- if(grepl("unif", rho_1)){
range <- str_extract_all(pattern = "-?\\.?[0-9]", rho_1)
runif(N, as.numeric(range[[1]][1]), as.numeric(range[[1]][2]))
} else if(rho_1 == "seq(1/N, 1, 1/N)"){
seq(1/N, 1, 1/N)
} else if (is.numeric(rho_1)){
rep(rho_1, N)
}
set.seed(x + 7)
rho_2_values <- if(rho_2 == "standard normal"){
rnorm(N, 0, 1)
} else if(rho_2 == "unif(-1, 1)"){
runif(N, -1, 1)
} else if(rho_2 == "norm(0.1, 1)"){
rnorm(N, 0.1, 1)
} else if(rho_2 == "seq(1/N, 1, 1/N)"){
seq(1/N, 1, 1/N)
} else if(rho_2 == "zero"){
rep(0, N)
}
set.seed(2*x + 7)
y_initial_values <- if(y_initial == "standard normal"){
rnorm(N, 0, 1)
} else if(y_initial == "zero"){
rep(0, N)
}
p = p_num_value/N
set.seed(3*x + 7)
alpha = rnorm(N, mean = 0, sd = s_alpha)
set.seed(4*x + 7)
beta = rnorm(N, mean = 0, sd = s_beta)
set.seed(5*x + 7)
epsilon = rnorm(N, mean = 0, sd = s_epsilon)
set.seed(6*x + 7)
G <- erdos.renyi.game(N, p, type=c("gnp"), directed = FALSE, loops = F) %>%
as_adjacency_matrix(sparse = F)
Y <- matrix(nrow = N, ncol = periods + 1)
colnames(Y) <- 0:periods
Y[, "0"] <- y_initial_values
for(t1 in 1:periods){
t = t1 - 1
y_t <- Y[, paste(t)]
y_t1 <- alpha + beta + rho_1_values * y_t + (rho_2_values * G %*% y_t )+ epsilon
Y[, paste(t1)] <- y_t1
}
comparison_matrix = (diag(N) + G)
return(list(Y = Y, N = N, periods = periods, periods_type = periods_type, rankY = rankMatrix(Y)[1],
s_epsilon = s_epsilon,
s_alpha = s_alpha,
s_beta = s_beta,
p_num = p_num,
rho_1 = rho_1,
rho_2 = rho_2,
y_initial = y_initial, seed = seed,
comparison_matrix = comparison_matrix))
}
### Least Squares ###
least_squares <- function(output){
Y<- output$Y
N <- output$N
periods <- output$periods
F_Y <- c()
Y_v <- c()
for(i in 1:periods){
t <- Y[, paste(i - 1)]
f_t <- matrix(NA, ncol = N*N, nrow = N)
for(j in 1:N){
f_t[j, ]<- c(rep(rep(0, N), j-1), t(t), rep(rep(0, N), N-j))
}
F_Y <- rbind(F_Y, f_t)
Y_v <- rbind(Y_v, t(t(Y[, paste(i)])))
}
A = try(solve(t(F_Y) %*% F_Y) %*% t(F_Y) %*% Y_v, silent = T)
error <- ifelse(sum(ifelse(class(A) == "try-error", 1, 0)) >0, "error", "no error")
error_message <- ifelse(error == "error", paste(attributes(A)$condition), "")
diagnostics <- rbind(N= output$N, periods = output$periods_type, rankY = output$rankY, s_alpha = output$s_alpha,s_epsilon = output$s_epsilon, s_beta = output$s_beta, p_num = output$p_num, rho_1 = output$rho_1, rho_2 = output$rho_2, y_initial = output$y_initial, error = error, error_message = error_message, seed = output$seed)
return(list(A = A,
diagnostics = diagnostics,
comparison_matrix = output$comparison_matrix))
}
Note: we removed $_1 = $ standard normal and normal(0.1, 1) because they contain zero and were causing errors.
N = c(5, 10, 15)
period_type = c("squared", "plus2")
p_num = c("low", "high")
s_epsilon = c(0.05, 0.5)
s_alpha = c(0.05, 0.5)
s_beta = 0
rho_1 = c("seq(1/N, 1, 1/N)", "unif(-1, 1)", "unif(0.9, 1)")
rho_2 = "zero"
y_initial = c("standard normal", "zero")
tests <- expand.grid(N = N,
periods_type = period_type,
p_num = p_num,
s_epsilon = s_epsilon,
s_alpha = s_alpha,
s_beta = s_beta,
rho_1=rho_1,
rho_2 = rho_2,
y_initial = y_initial, stringsAsFactors = F)
final <- data.frame()
for(i in 1:nrow(tests)){
output <- dgp(N = tests[i, "N"],
periods_type = tests[i, "periods_type"],
p_num = tests[i, "p_num"],
s_epsilon = tests[i, "s_epsilon"],
s_alpha = tests[i, "s_alpha"],
s_beta = tests[i, "s_beta"],
rho_1 = tests[i, "rho_1"],
rho_2 = tests[i, "rho_2"],
y_initial = tests[i, "y_initial"],
seed = 1
)
ls_out <- least_squares(output)
final <- rbind(final, t(ls_out$diagnostics))
}
options(DT.options = list(pageLength = 100))
final %>%
datatable() %>%
formatStyle(
'error',
target = "row",
backgroundColor = styleEqual(c("error"), c('yellow'))
)
N = c(10, 15)
period_type = c("squared", "plus2")
p_num = c("low", "high")
s_epsilon = c(0.05, 0.5)
s_alpha = c(0.05, 0.5)
s_beta = 0
rho_1 = c("seq(1/N, 1, 1/N)", "unif(-1, 1)")
rho_2 = "zero"
y_initial = c("standard normal", "zero")
tests <- expand.grid(N = N,
periods_type = period_type,
p_num = p_num,
s_epsilon = s_epsilon,
s_alpha = s_alpha,
s_beta = s_beta,
rho_1=rho_1,
rho_2 = rho_2,
y_initial = y_initial, stringsAsFactors = F)
final_seed_test <- data.frame()
for(i in 1:nrow(tests)){
for(j in 1:10){
output <- dgp(N = tests[i, "N"],
periods_type = tests[i, "periods_type"],
p_num = tests[i, "p_num"],
s_epsilon = tests[i, "s_epsilon"],
s_alpha = tests[i, "s_alpha"],
s_beta = tests[i, "s_beta"],
rho_1 = tests[i, "rho_1"],
rho_2 = tests[i, "rho_2"],
y_initial = tests[i, "y_initial"],
seed = j)
ls_out <- least_squares(output)
final_seed_test <- rbind(final_seed_test, t(ls_out$diagnostics))
}
}
error_rate <- final_seed_test %>%
group_by(N, periods, s_alpha, s_epsilon, s_beta, p_num, rho_1, rho_2, y_initial, error) %>%
count(name = "error_count") %>%
ungroup() %>%
complete(N, periods, s_alpha, s_epsilon, s_beta, p_num, rho_1, rho_2, y_initial, error, fill = list(error_count = 0)) %>%
filter(error == "error") %>%
mutate(error_rate = error_count/10) %>%
select(!error & !error_count) %>%
rename(least_square = error_rate)
# g of theta
g_theta <- function(matrix, theta){
ifelse(abs(matrix) > theta, 1, 0)
}
# difference
disagreements <- function(original,estimate, N){
return(sum(abs(original - estimate) == 1)/N^2)
}
### Ridge ###
ridge <- function(output, lambda, theta){
Y<- output$Y
N <- output$N
periods <- output$periods
F_Y <- c()
Y_v <- c()
for(i in 1:periods){
t <- Y[, paste(i - 1)]
f_t <- matrix(NA, ncol = N*N, nrow = N)
for(j in 1:N){
f_t[j, ]<- c(rep(rep(0, N), j-1), t(t), rep(rep(0, N), N-j))
}
F_Y <- rbind(F_Y, f_t)
Y_v <- rbind(Y_v, t(t(Y[, paste(i)])))
}
A = try(solve(t(F_Y) %*% F_Y + lambda * diag(N^2)) %*% t(F_Y) %*% Y_v, silent = T)
A <- t(matrix(A, N, N))
error <- ifelse(sum(ifelse(class(A) == "try-error", 1, 0)) >0, "error", "no error")
error_message <- ifelse(error == "error", paste(attributes(A)$condition), "")
dis <- disagreements(output$comparison_matrix, g_theta(A, theta = theta), output$N)
diagnostics <- rbind(N= output$N, periods = output$periods_type, rankY = output$rankY,
s_alpha = output$s_alpha,s_epsilon = output$s_epsilon,
s_beta = output$s_beta, p_num = output$p_num, rho_1 = output$rho_1,
rho_2 = output$rho_2, y_initial = output$y_initial, error = error,
error_message = error_message, seed = output$seed, lambda = lambda, theta = theta,
disagreements = dis)
return(list(A = A, diagnostics = diagnostics, comparison_matrix = output$comparison_matrix))
}
### Data Generating Process ###
dgp2 <- function(N, periods_type, s_epsilon, s_alpha, s_beta, p_num, rho_1, y_initial, seed){
x = seed
p_num_value <- if(p_num == "low"){
N * .2
} else if(p_num == "high"){
N * .8
}
p = p_num_value/N
set.seed(6*x + 7)
G <- erdos.renyi.game(N, p, type=c("gnp"), directed = FALSE, loops = F) %>%
as_adjacency_matrix(sparse = F)
d_max = max(rowSums(G))
periods <- if(periods_type == "squared"){
N^2 + N + 1
} else if(periods_type == "plus2"){
N + 2
}
# pick rho_2 < (1 - rho_1)/d_max
rho_2 = (1-rho_1)/d_max - 0.01
set.seed(2*x + 7)
y_initial_values <- if(y_initial == "standard normal"){
rnorm(N, 0, 1)
} else if(y_initial == "zero"){
rep(0, N)
}
set.seed(3*x + 7)
alpha = rnorm(N, mean = 0, sd = s_alpha)
set.seed(4*x + 7)
beta = rnorm(N, mean = 0, sd = s_beta)
set.seed(5*x + 7)
epsilon = rnorm(N, mean = 0, sd = s_epsilon)
Y <- matrix(nrow = N, ncol = periods + 1)
colnames(Y) <- 0:periods
Y[, "0"] <- y_initial_values
for(t1 in 1:periods){
t = t1 - 1
y_t <- Y[, paste(t)]
y_t1 <- alpha + beta + rho_1 * y_t + (rho_2 * G %*% y_t )+ epsilon
Y[, paste(t1)] <- y_t1
}
comparison_matrix = (diag(N) + G)
return(list(Y = Y, N = N, periods = periods, periods_type = periods_type, rankY = rankMatrix(Y)[1],
s_epsilon = s_epsilon,
s_alpha = s_alpha,
s_beta = s_beta,
p_num = p_num,
rho_1 = rho_1,
rho_2 = rho_2,
y_initial = y_initial, seed = seed,
comparison_matrix = comparison_matrix))
}
N = 15
periods_type =c("squared", "plus2")
s_alpha = c(0, 0.2)
s_beta = c(0, 0.2)
s_epsilon = c(0, 0.2)
rho_1 = c(0.2, 0.6)
y_initial = c("standard normal", "zero")
p_num = c("low", "high")
theta = c(0.01, 0.0001)
lambda = c(0.01, 0.0001)
tests <- expand.grid(N = N,
periods_type = periods_type,
s_alpha = s_alpha,
s_beta = s_beta,
s_epsilon = s_epsilon,
rho_1 = rho_1,
y_initial = y_initial,
p_num = p_num,theta= theta,
lambda = lambda,
theta= theta,
stringsAsFactors = F)
final <- data.frame()
for(i in 1:nrow(tests)){
output <- dgp2(N = tests[i, "N"],
periods_type = tests[i, "periods_type"],
p_num = tests[i, "p_num"],
s_epsilon = tests[i, "s_epsilon"],
s_alpha = tests[i, "s_alpha"],
s_beta = tests[i, "s_beta"],
rho_1 = tests[i, "rho_1"],
y_initial = tests[i, "y_initial"],
seed = 1
)
out <- ridge(output = output, lambda = tests[i, "lambda"], theta = tests[i, "theta"])
final <- rbind(final, t(out$diagnostics))
}
options(DT.options = list(pageLength = 100))
final %>%
datatable()