## Load required package
if (!require("glmnet")) install.packages("glmnet", dependencies=TRUE)
## Loading required package: glmnet
## Loading required package: Matrix
## Loaded glmnet 4.1-8
library(glmnet)

set.seed(42)  # Set seed for reproducibility

##############################################################
##
## Example 1: Sparse model with 15 true predictors out of 5000
##
##############################################################

n <- 1000  # Number of observations
p <- 5000  # Number of predictors included in model
real_p <- 15  # Number of true predictors

## Generate the data
x <- matrix(rnorm(n * p), nrow = n, ncol = p)
y <- apply(x[, 1:real_p], 1, sum) + rnorm(n)

## Split data into training and testing datasets
train_rows <- sample(1:n, 0.66 * n)
x_train <- x[train_rows, ]
x_test <- x[-train_rows, ]

y_train <- y[train_rows]
y_test <- y[-train_rows]

## Function to calculate mean squared error
calculate_mse <- function(y_true, y_pred) {
  mean((y_true - y_pred)^2)
}

## Ridge Regression (alpha = 0)
alpha0_fit <- cv.glmnet(x_train, y_train, type.measure = "mse", alpha = 0, family = "gaussian")
alpha0_pred <- predict(alpha0_fit, s = alpha0_fit$lambda.1se, newx = x_test)
alpha0_mse <- calculate_mse(y_test, alpha0_pred)

## Lasso Regression (alpha = 1)
alpha1_fit <- cv.glmnet(x_train, y_train, type.measure = "mse", alpha = 1, family = "gaussian")
alpha1_pred <- predict(alpha1_fit, s = alpha1_fit$lambda.1se, newx = x_test)
alpha1_mse <- calculate_mse(y_test, alpha1_pred)

## Elastic Net (alpha = 0.5)
alpha0_5_fit <- cv.glmnet(x_train, y_train, type.measure = "mse", alpha = 0.5, family = "gaussian")
alpha0_5_pred <- predict(alpha0_5_fit, s = alpha0_5_fit$lambda.1se, newx = x_test)
alpha0_5_mse <- calculate_mse(y_test, alpha0_5_pred)

## Cross-validation over different alpha values
list_of_fits <- list()
for (i in 0:10) {
  fit_name <- paste0("alpha", i / 10)
  list_of_fits[[fit_name]] <- cv.glmnet(x_train, y_train, type.measure = "mse", alpha = i / 10, family = "gaussian")
}

results <- data.frame()
for (i in 0:10) {
  fit_name <- paste0("alpha", i / 10)
  predicted <- predict(list_of_fits[[fit_name]], s = list_of_fits[[fit_name]]$lambda.1se, newx = x_test)
  mse <- calculate_mse(y_test, predicted)
  temp <- data.frame(alpha = i / 10, mse = mse, fit_name = fit_name)
  results <- rbind(results, temp)
}

## View the results
print(results)
##    alpha       mse fit_name
## 1    0.0 14.918840   alpha0
## 2    0.1  2.256924 alpha0.1
## 3    0.2  1.472927 alpha0.2
## 4    0.3  1.362394 alpha0.3
## 5    0.4  1.259794 alpha0.4
## 6    0.5  1.252103 alpha0.5
## 7    0.6  1.253330 alpha0.6
## 8    0.7  1.212927 alpha0.7
## 9    0.8  1.184028 alpha0.8
## 10   0.9  1.182919 alpha0.9
## 11   1.0  1.184701   alpha1
##############################################################
##
## Example 2: Dense model with 1500 true predictors out of 5000
##
##############################################################

set.seed(42)  # Set seed for reproducibility

n <- 1000    # Number of observations
p <- 5000    # Number of predictors included in model
real_p <- 1500  # Number of true predictors

## Generate the data
x <- matrix(rnorm(n * p), nrow = n, ncol = p)
y <- apply(x[, 1:real_p], 1, sum) + rnorm(n)

## Split data into train (2/3) and test (1/3) sets
train_rows <- sample(1:n, 0.66 * n)
x_train <- x[train_rows, ]
x_test <- x[-train_rows, ]

y_train <- y[train_rows]
y_test <- y[-train_rows]

list_of_fits <- list()
for (i in 0:10) {
  fit_name <- paste0("alpha", i / 10)
  list_of_fits[[fit_name]] <- cv.glmnet(x_train, y_train, type.measure = "mse", alpha = i / 10, family = "gaussian")
}

results <- data.frame()
for (i in 0:10) {
  fit_name <- paste0("alpha", i / 10)
  predicted <- predict(list_of_fits[[fit_name]], s = list_of_fits[[fit_name]]$lambda.1se, newx = x_test)
  mse <- calculate_mse(y_test, predicted)
  temp <- data.frame(alpha = i / 10, mse = mse, fit_name = fit_name)
  results <- rbind(results, temp)
}

## View the results
print(results)
##    alpha      mse fit_name
## 1    0.0 1400.375   alpha0
## 2    0.1 1545.035 alpha0.1
## 3    0.2 1545.035 alpha0.2
## 4    0.3 1545.035 alpha0.3
## 5    0.4 1545.035 alpha0.4
## 6    0.5 1545.035 alpha0.5
## 7    0.6 1545.035 alpha0.6
## 8    0.7 1545.035 alpha0.7
## 9    0.8 1545.035 alpha0.8
## 10   0.9 1545.035 alpha0.9
## 11   1.0 1545.035   alpha1