{r setup, include=FALSE} knitr::opts_chunk$set(echo = TRUE)
Using data from accelerometers, we predict the manner in which individuals perform weightlifting exercises. The target variable is classe, which indicates the correctness of the exercise execution.
{r} library(caret) library(randomForest) library(ggplot2) library(dplyr)
{r} train_url <- “https://d396qusza40orc.cloudfront.net/predmachlearn/pml-training.csv” test_url <- “https://d396qusza40orc.cloudfront.net/predmachlearn/pml-testing.csv”
train_data <- read.csv(train_url, na.strings = c(“NA”, "“,”#DIV/0!“)) test_data <- read.csv(test_url, na.strings = c(”NA“,”“,”#DIV/0!"))
str(train_data) summary(train_data)
{r} # Remove columns with many missing values train_data <- train_data[, colSums(is.na(train_data)) == 0]
train_data <- train_data %>% select(-c(1:7))
train_data\(classe <- as.factor(train_data\)classe)
{r} set.seed(123) trainIndex <- createDataPartition(train_data$classe, p = 0.8, list = FALSE) train_set <- train_data[trainIndex, ] valid_set <- train_data[-trainIndex, ]
{r} set.seed(123) rf_model <- randomForest(classe ~ ., data = train_set, ntree = 100)
{r} pred_valid <- predict(rf_model, valid_set) conf_matrix <- confusionMatrix(pred_valid, valid_set$classe) conf_matrix
{r} test_data <- test_data[, colnames(test_data) %in% colnames(train_set)] test_predictions <- predict(rf_model, test_data) test_predictions