#Task1

rm(list=ls())
data1=read.csv("C:/Users/Sabuj Ganguly/OneDrive/Documents/PhD 1st sem/STAT Programing class/lm_example1.csv")
data2=read.csv("C:/Users/Sabuj Ganguly/OneDrive/Documents/PhD 1st sem/STAT Programing class/lm_example2.csv")
data3=read.csv("C:/Users/Sabuj Ganguly/OneDrive/Documents/PhD 1st sem/STAT Programing class/lm_example3.csv")
View(data1)
View(data2)
View(data3)
unit_scale <- function(x) {
  if (is.vector(x)) x <- matrix(x, ncol = 1)
  for (i in 1:ncol(x)) {
    x[, i] <- (x[, i] - min(x[, i])) / (max(x[, i]) - min(x[, i]))
  }
  return(x)
}
process_data <- function(data, name) {
  x_data <- data[,-which(names(data) == "y")] #I'm deleting y column as I do not
  #want to scale y
  
  for(col in 1:ncol(x_data)) {
    x_data[,col] <- unit_scale(x_data[,col])
  }
  
  
scaled_data <- data.frame(x_data, y = data$y)
n <- nrow(scaled_data)
n_boot <- 1000
  
  coef_names <- c("Intercept", names(x_data))
  #creating a matrix to store the results
  results <- matrix(NA, nrow = n_boot, ncol = length(coef_names))
  colnames(results) <- coef_names
  
for(i in 1:n_boot) {
    sample_rows <- sample(1:n, n, replace = TRUE)  #Bootstrapping
    sample_data <- scaled_data[sample_rows, ]
    
    model <- lm(y ~ ., data = sample_data) #Fitting model
    results[i, ] <- coef(model)
  }
  write.csv(results, paste0("bootstrap_", name, ".csv"), row.names = FALSE)
   #saving the results in a csv file
  return(results) #returning the values of results so that I can see the model coefficients
  #of the bootstrap samples
}
#process_data(data1, "data1") #I'm removing the output for this as it would be a very large data output
#process_data(data2, "data2") 
#process_data(data3, "data3")
results1 <- process_data(data1, "data1")
results2 <- process_data(data2, "data2") 
results3 <- process_data(data3, "data3")
head(results1)
##       Intercept         x1        x2       x3
## [1,] -0.1518341 -0.2613551 0.2845733 1.279309
## [2,] -0.1111140 -0.2678848 0.2130035 1.272306
## [3,] -0.1027857 -0.4460696 0.2477413 1.392513
## [4,] -0.1275855 -0.2889927 0.2419014 1.273180
## [5,] -0.1344774 -0.3347219 0.2669011 1.319900
## [6,] -0.1136196 -0.3334840 0.2249288 1.324006
head(results2)
##      Intercept       x1         x2       x3       x4
## [1,] -24.55088 54.46570 -15.766010 14.33286 47.45065
## [2,] -26.09128 29.46266  11.963964 11.69960 47.85632
## [3,] -25.48847 31.48928   7.148308 16.17414 44.79162
## [4,] -26.44543 29.43311  12.221071 12.72128 49.05907
## [5,] -27.58458 44.23484  -1.111510 11.82387 49.86749
## [6,] -23.90668 54.62487 -16.097602 14.26440 47.44882
head(results3)
##      Intercept         x1       x2            x3       x4        x5       x6
## [1,]  24.35593 -1.0436070 2.251052   0.002431174 1.614545 -21.50006 9.566731
## [2,]  25.86440 -4.1221243 9.320095  -3.231039862 4.084476 -20.81332 3.719141
## [3,]  26.20357 -1.8333625 8.375090  -3.110156144 2.324356 -24.76121 5.639179
## [4,]  22.45192 -3.0561174 2.075482  -1.948429773 3.147404 -10.46804 4.831821
## [5,]  23.49085  0.5042596 5.349578 -13.755938699 6.265376 -13.12240 3.863157
## [6,]  28.07829 -2.0007382 4.063906  -9.964111960 2.676478 -13.46914 0.581377