#Task1
rm(list=ls())
data1=read.csv("C:/Users/Sabuj Ganguly/OneDrive/Documents/PhD 1st sem/STAT Programing class/lm_example1.csv")
data2=read.csv("C:/Users/Sabuj Ganguly/OneDrive/Documents/PhD 1st sem/STAT Programing class/lm_example2.csv")
data3=read.csv("C:/Users/Sabuj Ganguly/OneDrive/Documents/PhD 1st sem/STAT Programing class/lm_example3.csv")
View(data1)
View(data2)
View(data3)
unit_scale <- function(x) {
if (is.vector(x)) x <- matrix(x, ncol = 1)
for (i in 1:ncol(x)) {
x[, i] <- (x[, i] - min(x[, i])) / (max(x[, i]) - min(x[, i]))
}
return(x)
}
process_data <- function(data, name) {
x_data <- data[,-which(names(data) == "y")] #I'm deleting y column as I do not
#want to scale y
for(col in 1:ncol(x_data)) {
x_data[,col] <- unit_scale(x_data[,col])
}
scaled_data <- data.frame(x_data, y = data$y)
n <- nrow(scaled_data)
n_boot <- 1000
coef_names <- c("Intercept", names(x_data))
#creating a matrix to store the results
results <- matrix(NA, nrow = n_boot, ncol = length(coef_names))
colnames(results) <- coef_names
for(i in 1:n_boot) {
sample_rows <- sample(1:n, n, replace = TRUE) #Bootstrapping
sample_data <- scaled_data[sample_rows, ]
model <- lm(y ~ ., data = sample_data) #Fitting model
results[i, ] <- coef(model)
}
write.csv(results, paste0("bootstrap_", name, ".csv"), row.names = FALSE)
#saving the results in a csv file
return(results) #returning the values of results so that I can see the model coefficients
#of the bootstrap samples
}
#process_data(data1, "data1") #I'm removing the output for this as it would be a very large data output
#process_data(data2, "data2")
#process_data(data3, "data3")
results1 <- process_data(data1, "data1")
results2 <- process_data(data2, "data2")
results3 <- process_data(data3, "data3")
head(results1)
## Intercept x1 x2 x3
## [1,] -0.1518341 -0.2613551 0.2845733 1.279309
## [2,] -0.1111140 -0.2678848 0.2130035 1.272306
## [3,] -0.1027857 -0.4460696 0.2477413 1.392513
## [4,] -0.1275855 -0.2889927 0.2419014 1.273180
## [5,] -0.1344774 -0.3347219 0.2669011 1.319900
## [6,] -0.1136196 -0.3334840 0.2249288 1.324006
head(results2)
## Intercept x1 x2 x3 x4
## [1,] -24.55088 54.46570 -15.766010 14.33286 47.45065
## [2,] -26.09128 29.46266 11.963964 11.69960 47.85632
## [3,] -25.48847 31.48928 7.148308 16.17414 44.79162
## [4,] -26.44543 29.43311 12.221071 12.72128 49.05907
## [5,] -27.58458 44.23484 -1.111510 11.82387 49.86749
## [6,] -23.90668 54.62487 -16.097602 14.26440 47.44882
head(results3)
## Intercept x1 x2 x3 x4 x5 x6
## [1,] 24.35593 -1.0436070 2.251052 0.002431174 1.614545 -21.50006 9.566731
## [2,] 25.86440 -4.1221243 9.320095 -3.231039862 4.084476 -20.81332 3.719141
## [3,] 26.20357 -1.8333625 8.375090 -3.110156144 2.324356 -24.76121 5.639179
## [4,] 22.45192 -3.0561174 2.075482 -1.948429773 3.147404 -10.46804 4.831821
## [5,] 23.49085 0.5042596 5.349578 -13.755938699 6.265376 -13.12240 3.863157
## [6,] 28.07829 -2.0007382 4.063906 -9.964111960 2.676478 -13.46914 0.581377