Least Squares

\[y^{(t + 1)} = A y^{(t)}\]

\[Y_v = F_Y A_v \implies \begin{bmatrix} y^{(1)} \\ y^{(2)} \\ \vdots \\ y^{(t)} \end{bmatrix} = \begin{bmatrix} f(y^{(0)} ) \\ f(y^{(1)} ) \\ \vdots \\ f(y^{(t-1)} ) \end{bmatrix}A_v\]

\[Y_v \in \mathbb{R}^{(t-1)n \times 1}\]

\[F_Y \in \mathbb{R}^{(t-1)n \times n^2}\]

\[A_v \in \mathbb{R}^{n^2 \times 1}\]

Least Squares Solution:

\[A_v = (F_Y^T F_Y)^{-1}F_Y^T Y_v\]

Options:

Rho1

unif(-1,1)

seq(1/N, 1, 1/N) zero

rho2

standard normal unif(-1,1) norm(0.1, 1) seq(1/N, 1, 1/N) zero

YInitiial standard normal zero

### Data Generating Process ###
dgp <- function(N, periods_type, s_epsilon, s_alpha, s_beta, p_num, rho_1, rho_2, y_initial, seed){
    x = seed
  p_num_value <- if(p_num == "low"){
    N * .2
  } else if(p_num == "high"){
    N * .8
  }
  
  
  periods <- if(periods_type == "squared"){
    N^2 + N + 1
  } else if(periods_type == "plus2"){
    N + 2
  }
  
  
  set.seed(x)
  rho_1_values <- if(grepl("unif", rho_1)){
    range <- str_extract_all(pattern = "-?\\.?[0-9]", rho_1)
    runif(N, as.numeric(range[[1]][1]), as.numeric(range[[1]][2]))
  } else if(rho_1  == "seq(1/N, 1, 1/N)"){
    seq(1/N, 1, 1/N)
  } else if (is.numeric(rho_1)){
    rep(rho_1, N)
  } 
  
  
  set.seed(x + 7)
  rho_2_values <- if(rho_2 == "standard normal"){
    rnorm(N, 0, 1)
  } else if(rho_2 == "unif(-1, 1)"){
    runif(N, -1, 1)
  } else if(rho_2 == "norm(0.1, 1)"){
    rnorm(N, 0.1, 1)
  } else if(rho_2  == "seq(1/N, 1, 1/N)"){
    seq(1/N, 1, 1/N)
  } else if(rho_2 == "zero"){
    rep(0, N)
  }
  
  set.seed(2*x + 7)
  y_initial_values <- if(y_initial == "standard normal"){
    rnorm(N, 0, 1)
  } else if(y_initial == "zero"){
    rep(0, N)
  }
  
  p = p_num_value/N
  
  set.seed(3*x + 7)
  alpha = rnorm(N, mean = 0, sd = s_alpha)
  
  set.seed(4*x + 7)
  beta = rnorm(N, mean = 0, sd = s_beta)
  
  set.seed(5*x + 7)
  epsilon = rnorm(N, mean = 0, sd = s_epsilon)
  
  set.seed(6*x + 7)
  G <- erdos.renyi.game(N, p, type=c("gnp"), directed = FALSE, loops = F) %>%
      as_adjacency_matrix(sparse = F)

  Y <- matrix(nrow = N, ncol = periods + 1)
  colnames(Y) <- 0:periods

  Y[, "0"] <- y_initial_values

  for(t1 in 1:periods){
    
    t = t1 - 1
    
    y_t <- Y[, paste(t)]
    
    y_t1 <- alpha + beta + rho_1_values * y_t + (rho_2_values * G %*% y_t )+ epsilon
    
    Y[, paste(t1)] <- y_t1
    
  }

  comparison_matrix = (diag(N) + G)

  return(list(Y = Y, N = N, periods = periods, periods_type = periods_type, rankY = rankMatrix(Y)[1],
              s_epsilon = s_epsilon, 
              s_alpha = s_alpha,
              s_beta = s_beta, 
              p_num = p_num, 
              rho_1 = rho_1, 
              rho_2 = rho_2, 
              y_initial = y_initial, seed = seed, 
              comparison_matrix = comparison_matrix))

}

### Least Squares ###
least_squares <- function(output){
  
  Y<- output$Y
  
  
  N <- output$N
  periods <- output$periods
  
  F_Y <- c()
  Y_v <- c()
  
  for(i in 1:periods){
    t <- Y[, paste(i - 1)]
    
    f_t <- matrix(NA, ncol = N*N, nrow = N)
    
    for(j in 1:N){
      f_t[j, ]<- c(rep(rep(0, N), j-1), t(t), rep(rep(0, N), N-j))
    }
    
    F_Y <- rbind(F_Y, f_t)
  
    Y_v <- rbind(Y_v, t(t(Y[, paste(i)])))
  
  }
  
  
  A = try(solve(t(F_Y) %*% F_Y) %*% t(F_Y) %*% Y_v, silent = T)
  
  error <- ifelse(sum(ifelse(class(A) == "try-error", 1, 0)) >0, "error", "no error")
  error_message <- ifelse(error  == "error", paste(attributes(A)$condition), "")
  
  diagnostics <- rbind(N= output$N, periods = output$periods_type, rankY = output$rankY, s_alpha = output$s_alpha,s_epsilon =  output$s_epsilon, s_beta = output$s_beta, p_num = output$p_num, rho_1 = output$rho_1, rho_2 = output$rho_2, y_initial = output$y_initial, error = error, error_message = error_message, seed = output$seed)
  
  
  
  return(list(A = A, 
              diagnostics = diagnostics, 
              comparison_matrix = output$comparison_matrix))
  
}

Note: we removed $_1 = $ standard normal and normal(0.1, 1) because they contain zero and were causing errors.

N = c(5, 10, 15)
period_type = c("squared", "plus2")
p_num = c("low", "high")
s_epsilon = c(0.05,  0.5)
s_alpha =  c(0.05,  0.5)
s_beta = 0

rho_1 = c("seq(1/N, 1, 1/N)", "unif(-1, 1)", "unif(0.9, 1)")

rho_2 = "zero"

y_initial = c("standard normal", "zero")

tests <- expand.grid(N = N, 
                     periods_type = period_type,
            p_num = p_num, 
            s_epsilon = s_epsilon, 
            s_alpha = s_alpha,
            s_beta = s_beta, 
            rho_1=rho_1, 
            rho_2 = rho_2, 
            y_initial = y_initial, stringsAsFactors = F)


final <- data.frame()
for(i in 1:nrow(tests)){
  
  output <- dgp(N = tests[i, "N"], 
          periods_type = tests[i, "periods_type"], 
          p_num = tests[i, "p_num"], 
          s_epsilon = tests[i, "s_epsilon"],  
          s_alpha =  tests[i, "s_alpha"], 
          s_beta = tests[i, "s_beta"], 
          rho_1 = tests[i, "rho_1"],  
          rho_2 = tests[i, "rho_2"], 
          y_initial = tests[i, "y_initial"], 
          seed = 1
  )
  
  ls_out <- least_squares(output)

  final <- rbind(final, t(ls_out$diagnostics))
  
  
}

options(DT.options = list(pageLength = 100))
final %>% 
  datatable() %>%
  formatStyle(
    'error',
    target = "row",
    backgroundColor = styleEqual(c("error"), c('yellow'))
  )

Observations/Questions

If I change the seed, do the results change? Like we saw in VAR?

N = c(10, 15)
period_type = c("squared", "plus2")
p_num = c("low", "high")
s_epsilon = c(0.05,  0.5)
s_alpha =  c(0.05,  0.5)
s_beta = 0

rho_1 = c("seq(1/N, 1, 1/N)", "unif(-1, 1)")

rho_2 = "zero"

y_initial = c("standard normal", "zero")

tests <- expand.grid(N = N, 
                     periods_type = period_type,
            p_num = p_num, 
            s_epsilon = s_epsilon, 
            s_alpha = s_alpha,
            s_beta = s_beta, 
            rho_1=rho_1, 
            rho_2 = rho_2, 
            y_initial = y_initial, stringsAsFactors = F)


final_seed_test <- data.frame()

for(i in 1:nrow(tests)){
  for(j in 1:10){  
    output <- dgp(N = tests[i, "N"], 
            periods_type = tests[i, "periods_type"], 
            p_num = tests[i, "p_num"], 
            s_epsilon = tests[i, "s_epsilon"],  
            s_alpha =  tests[i, "s_alpha"], 
            s_beta = tests[i, "s_beta"], 
            rho_1 = tests[i, "rho_1"],  
            rho_2 = tests[i, "rho_2"], 
            y_initial = tests[i, "y_initial"], 
            seed = j)
    
    ls_out <- least_squares(output)
  
    final_seed_test <- rbind(final_seed_test, t(ls_out$diagnostics))
    
  }
}

error_rate <- final_seed_test %>%
  group_by(N, periods, s_alpha, s_epsilon, s_beta, p_num, rho_1, rho_2, y_initial, error) %>%
  count(name = "error_count") %>%
  ungroup() %>%
  complete(N, periods, s_alpha, s_epsilon, s_beta, p_num, rho_1, rho_2, y_initial, error, fill = list(error_count = 0)) %>%
  filter(error == "error") %>%
  mutate(error_rate  = error_count/10) %>%
  select(!error & !error_count) %>%
  rename(least_square = error_rate)

# g of theta  
g_theta <- function(matrix, theta){
  ifelse(abs(matrix) > theta, 1, 0)
}

# difference
disagreements <- function(original,estimate, N){
  return(sum(abs(original - estimate) == 1)/N^2)
}

Ridge

### Ridge ###
ridge <- function(output, lambda, theta){
  
  Y<- output$Y
  
  
  N <- output$N
  periods <- output$periods
  
  F_Y <- c()
  Y_v <- c()
  
  for(i in 1:periods){
    t <- Y[, paste(i - 1)]
    
    f_t <- matrix(NA, ncol = N*N, nrow = N)
    
    for(j in 1:N){
      f_t[j, ]<- c(rep(rep(0, N), j-1), t(t), rep(rep(0, N), N-j))
    }
    
    F_Y <- rbind(F_Y, f_t)
  
    Y_v <- rbind(Y_v, t(t(Y[, paste(i)])))
  
  }
  
  
  A = try(solve(t(F_Y) %*% F_Y + lambda * diag(N^2)) %*% t(F_Y) %*% Y_v, silent = T)
  A <- t(matrix(A, N, N))
  
  error <- ifelse(sum(ifelse(class(A) == "try-error", 1, 0)) >0, "error", "no error")
  error_message <- ifelse(error  == "error", paste(attributes(A)$condition), "")

  dis <- disagreements(output$comparison_matrix, g_theta(A, theta = theta), output$N)

  diagnostics <- rbind(N= output$N, periods = output$periods_type, rankY = output$rankY, 
                       s_alpha = output$s_alpha,s_epsilon =  output$s_epsilon, 
                       s_beta = output$s_beta, p_num = output$p_num, rho_1 = output$rho_1, 
                       rho_2 = output$rho_2, y_initial = output$y_initial, error = error, 
                       error_message = error_message, seed = output$seed, lambda = lambda, theta = theta,
                       disagreements = dis)

  return(list(A = A, diagnostics = diagnostics, comparison_matrix = output$comparison_matrix))
  
}

### Data Generating Process ###
dgp2 <- function(N, periods_type, s_epsilon, s_alpha, s_beta, p_num, rho_1, y_initial, seed){
    x = seed
  p_num_value <- if(p_num == "low"){
    N * .2
  } else if(p_num == "high"){
    N * .8
  }
  
  p = p_num_value/N
  set.seed(6*x + 7)
  G <- erdos.renyi.game(N, p, type=c("gnp"), directed = FALSE, loops = F) %>%
      as_adjacency_matrix(sparse = F)
  
  d_max = max(rowSums(G))
  
  periods <- if(periods_type == "squared"){
    N^2 + N + 1
  } else if(periods_type == "plus2"){
    N + 2
  }

  # pick rho_2 < (1 - rho_1)/d_max
  rho_2 = (1-rho_1)/d_max - 0.01
  
  set.seed(2*x + 7)
  y_initial_values <- if(y_initial == "standard normal"){
    rnorm(N, 0, 1)
  } else if(y_initial == "zero"){
    rep(0, N)
  }
  
  set.seed(3*x + 7)
  alpha = rnorm(N, mean = 0, sd = s_alpha)
  
  set.seed(4*x + 7)
  beta = rnorm(N, mean = 0, sd = s_beta)
  
  set.seed(5*x + 7)
  epsilon = rnorm(N, mean = 0, sd = s_epsilon)
  


  Y <- matrix(nrow = N, ncol = periods + 1)
  colnames(Y) <- 0:periods

  Y[, "0"] <- y_initial_values

  for(t1 in 1:periods){
    
    t = t1 - 1
    
    y_t <- Y[, paste(t)]
    
    y_t1 <- alpha + beta + rho_1 * y_t + (rho_2 * G %*% y_t )+ epsilon
    
    Y[, paste(t1)] <- y_t1
    
  }

  comparison_matrix = (diag(N) + G)

  return(list(Y = Y, N = N, periods = periods, periods_type = periods_type, rankY = rankMatrix(Y)[1],
              s_epsilon = s_epsilon, 
              s_alpha = s_alpha,
              s_beta = s_beta, 
              p_num = p_num, 
              rho_1 = rho_1, 
              rho_2 = rho_2, 
              y_initial = y_initial, seed = seed, 
              comparison_matrix = comparison_matrix))

}

N = 15
periods_type =c("squared", "plus2")
s_alpha = c(0, 0.2)
s_beta = c(0, 0.2)
s_epsilon = c(0, 0.2)
rho_1 = c(0.2, 0.6)
y_initial = c("standard normal", "zero")
p_num = c("low", "high")
theta = c(0.01,  0.0001)
lambda = c(0.01,  0.0001)


tests <- expand.grid(N = N, 
                     periods_type = periods_type, 
                     s_alpha = s_alpha, 
                     s_beta = s_beta,
                     s_epsilon = s_epsilon, 
                     rho_1 = rho_1,
                     y_initial = y_initial, 
                     p_num = p_num,theta= theta,
                     lambda = lambda,
                     theta= theta,
            stringsAsFactors = F)

final <- data.frame()
for(i in 1:nrow(tests)){
  
  output <- dgp2(N = tests[i, "N"], 
          periods_type = tests[i, "periods_type"], 
          p_num = tests[i, "p_num"], 
          s_epsilon = tests[i, "s_epsilon"],  
          s_alpha =  tests[i, "s_alpha"], 
          s_beta = tests[i, "s_beta"], 
          rho_1 = tests[i, "rho_1"],  
          y_initial = tests[i, "y_initial"], 
          seed = 1
  )
  
  out <- ridge(output = output, lambda = tests[i, "lambda"], theta = tests[i, "theta"])

  final <- rbind(final, t(out$diagnostics))
  
  
}

options(DT.options = list(pageLength = 100))
final %>% 
  datatable()

Least Squares

Sarah McDonald

8/10/2022

Observations/Questions

Ridge