options(repos = c(CRAN = “https://cloud.r-project.org”))
required_packages <- c(“caret”, “dplyr”, “ggplot2”, “corrplot”, “randomForest”) for (pkg in required_packages) { if (!requireNamespace(pkg, quietly = TRUE)) install.packages(pkg) }
library(dplyr) library(ggplot2) library(corrplot) library(caret) library(randomForest)
data <- read.csv(“C:/Users/hasif/Downloads/rgroupprojectmypart/diabetesprojectdata.csv”)
data\(BMI <- as.numeric(data\)BMI) data <- na.omit(data)
dataReg <- data %>% select(-Outcome)
set.seed(123) trainIndex <- createDataPartition(dataReg$BMI, p = 0.8, list = FALSE) trainData <- dataReg[trainIndex, ] testData <- dataReg[-trainIndex, ]
lm_model <- train(BMI ~ ., data = trainData, method = “lm”) cat(“Linear Regression Summary:”) print(summary(lm_model$finalModel))
lm_pred <- predict(lm_model, newdata = testData)
rf_model <- train(BMI ~ ., data = trainData, method = “rf”, tuneGrid = expand.grid(.mtry = seq(2, ncol(trainData) - 1, by = 1)), trControl = trainControl(method = “cv”, number = 5)) cat(“Forest Model Parameters:”) print(rf_model$bestTune)
rf_pred <- predict(rf_model, newdata = testData)
xgb_model <- train(BMI ~ ., data = trainData, method = “xgbLinear”, tuneGrid = expand.grid(.nrounds = seq(50, 200, by = 50), .lambda = c(0, 0.1, 1), .alpha = c(0, 0.1, 1), .eta = c(0.01, 0.1, 0.3)), trControl = trainControl(method = “cv”, number = 5)) cat(“Model Best Parameters:”) print(xgb_model$bestTune)
xgb_pred <- predict(xgb_model, newdata = testData)