fitTPS <- function(x, y, k = 100, lsp = c(-5, 5)) {
  # Helper functions
  eta <- function(r) ifelse(r > 0, r^2 * log(r), 0)
  dist_euclidean <- function(a, b) sqrt(sum((a - b)^2))
  
  # Choose k points x*
  n <- nrow(x)
  if (n <= k) {
    x_star <- x
  } else {
    set.seed(123)  # For reproducibility
    x_star <- x[sample(1:n, k), ]
  }
  
  # Create E and E*
  E <- outer(1:n, 1:k, Vectorize(function(i, j) eta(dist_euclidean(x[i, ], x_star[j, ]))))
  E_star <- outer(1:k, 1:k, Vectorize(function(i, j) eta(dist_euclidean(x_star[i, ], x_star[j, ]))))
  
  # Create T and T*
  T <- cbind(1, x)
  T_star <- cbind(1, x_star)
  
  # QR decomposition to find Z
  qr_T_star <- qr(T_star)
  Q <- qr.Q(qr_T_star, complete = TRUE)
  Z <- Q[, -(1:3)]
  
  # Reparameterize
  X <- cbind(E %*% Z, T)
  S <- rbind(cbind(t(Z) %*% E_star %*% Z, matrix(0, ncol = 3, nrow = k - 3)), 
             matrix(0, nrow = 3, ncol = k))
  
  # Grid search for lambda using GCV
  lambda_vals <- exp(seq(lsp[1], lsp[2], length.out = 100))
  gcv_scores <- numeric(100)
  edf_vals <- numeric(100)
  best_gcv <- Inf
  best_lambda <- lambda_vals[1]
  best_beta <- NULL
  
  for (i in 1:100) {
    lambda <- lambda_vals[i]
    XtX_lambdaS <- t(X) %*% X + lambda * S
    beta_hat <- solve(XtX_lambdaS, t(X) %*% y)
    mu_hat <- X %*% beta_hat
    EDF <- sum(1 / (1 + lambda * diag(solve(XtX_lambdaS))))
    GCV <- sum((y - mu_hat)^2) / (n - EDF)^2
    gcv_scores[i] <- GCV
    edf_vals[i] <- EDF
    
    if (GCV < best_gcv) {
      best_gcv <- GCV
      best_lambda <- lambda
      best_beta <- beta_hat
    }
  }
  
  mu_hat <- X %*% best_beta
  
  return(structure(list(beta = best_beta, mu = mu_hat, medf = edf_vals[which.min(gcv_scores)], 
                        lambda = lambda_vals, gcv = gcv_scores, edf = edf_vals, x_star = x_star, Z = Z),
                   class = "tps"))
}

plot.tps <- function(tps_obj, ...) {
  # Helper functions
  eta <- function(r) ifelse(r > 0, r^2 * log(r), 0)
  dist_euclidean <- function(a, b) sqrt(sum((a - b)^2))
  
  # Extract values from the tps object
  x_star <- tps_obj$x_star
  beta <- tps_obj$beta
  Z <- tps_obj$Z
  
  # Create a grid of values for prediction
  x1 <- seq(0, 1, length.out = 50)
  x2 <- seq(0, 1, length.out = 50)
  grid <- expand.grid(x1 = x1, x2 = x2)
  E_grid <- outer(1:nrow(grid), 1:nrow(x_star), Vectorize(function(i, j) eta(dist_euclidean(as.numeric(grid[i, ]), as.numeric(x_star[j, ])))))
  X_grid <- cbind(E_grid %*% Z, cbind(1, grid))
  
  # Ensure X_grid and beta are numeric and check dimensions
  X_grid <- as.matrix(X_grid)
  beta <- as.numeric(beta)
  
  print(dim(X_grid))
  print(length(beta))
  
  # Predict values on the grid
  mu_grid <- X_grid %*% beta
  
  # Reshape predictions to a matrix for plotting
  mu_matrix <- matrix(mu_grid, nrow = 50, ncol = 50)
  
  # Plotting
  contour(x1, x2, mu_matrix, ...)
  persp(x1, x2, mu_matrix, theta = 30, phi = 30, expand = 0.5, col = "lightblue", ...)
}

# Example usage with generated data
ff <- function(x) exp(-(x[,1]-.3)^2/.2^2-(x[,2] - .3)^2/.3^2)*.5 + exp(-(x[,1]-.7)^2/.25^2 - (x[,2] - .8 )^2/.3^2)

n <- 500
x <- matrix(runif(n*2), n, 2)
y <- ff(x) + rnorm(n)*.1

# Fit the model
tps_model <- fitTPS(x, y)

# Plot the results
plot.tps(tps_model)
## [1] 2500  100
## [1] 100