library(caret) library(randomForest) library(ggplot2) library(dplyr)
train_url <- “https://d396qusza40orc.cloudfront.net/predmachlearn/pml-training.csv” test_url <- “https://d396qusza40orc.cloudfront.net/predmachlearn/pml-testing.csv”
train_data <- read.csv(train_url, na.strings = c(“NA”, "“,”#DIV/0!“)) test_data <- read.csv(test_url, na.strings = c(”NA“,”“,”#DIV/0!"))
str(train_data) summary(train_data)
train_data <- train_data[, colSums(is.na(train_data)) == 0]
train_data <- train_data %>% select(-c(1:7))
train_data\(classe <- as.factor(train_data\)classe)
set.seed(123) trainIndex <- createDataPartition(train_data$classe, p = 0.8, list = FALSE) train_set <- train_data[trainIndex, ] valid_set <- train_data[-trainIndex, ]
set.seed(123) rf_model <- randomForest(classe ~ ., data = train_set, ntree = 100)
pred_valid <- predict(rf_model, valid_set) conf_matrix <- confusionMatrix(pred_valid, valid_set$classe) print(conf_matrix)
test_data <- test_data[, colnames(test_data) %in% colnames(train_set)] test_predictions <- predict(rf_model, test_data) print(test_predictions)
cat(“Random Forest performed well on the validation set.”) cat(“The model predicts the exercise performance with high accuracy.”) cat(“Predictions were made on 20 test cases.”)