Install library
library(caret)
## Loading required package: ggplot2
## Loading required package: lattice
library(randomForest)
## randomForest 4.6-14
## Type rfNews() to see new features/changes/bug fixes.
##
## Attaching package: 'randomForest'
## The following object is masked from 'package:ggplot2':
##
## margin
library(rpart)
library(e1071)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following object is masked from 'package:randomForest':
##
## combine
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
Download dan load dataset
trainUrl <- "https://d396qusza40orc.cloudfront.net/predmachlearn/pml-training.csv"
testUrl <- "https://d396qusza40orc.cloudfront.net/predmachlearn/pml-testing.csv"
training <- read.csv(url(trainUrl), na.strings = c("NA", "#DIV/0!", ""))
testing <- read.csv(url(testUrl), na.strings = c("NA", "#DIV/0!", ""))
pra proses data
training <- training[, colSums(is.na(training)) == 0]
testing <- testing[, colSums(is.na(testing)) == 0]
training <- training[, -c(1:7)]
testing <- testing[, -c(1:7)]
testing <- testing[, names(testing) %in% names(training)[-ncol(training)]]
Split data train dan test
set.seed(123)
inTrain <- createDataPartition(training$classe, p = 0.7, list = FALSE)
trainingSet <- training[inTrain, ]
validationSet <- training[-inTrain, ]
trainingSet$classe <- as.factor(trainingSet$classe)
validationSet$classe <- factor(validationSet$classe, levels = levels(trainingSet$classe))
Latih model random forest
set.seed(12345)
model_rf <- randomForest(classe ~ ., data = trainingSet)
Prediksi data validasi
pred_rf <- predict(model_rf, newdata = validationSet)
confusionMatrix(pred_rf, validationSet$classe)
## Confusion Matrix and Statistics
##
## Reference
## Prediction A B C D E
## A 1674 3 0 0 0
## B 0 1131 3 0 0
## C 0 5 1023 8 4
## D 0 0 0 956 4
## E 0 0 0 0 1074
##
## Overall Statistics
##
## Accuracy : 0.9954
## 95% CI : (0.9933, 0.997)
## No Information Rate : 0.2845
## P-Value [Acc > NIR] : < 2.2e-16
##
## Kappa : 0.9942
##
## Mcnemar's Test P-Value : NA
##
## Statistics by Class:
##
## Class: A Class: B Class: C Class: D Class: E
## Sensitivity 1.0000 0.9930 0.9971 0.9917 0.9926
## Specificity 0.9993 0.9994 0.9965 0.9992 1.0000
## Pos Pred Value 0.9982 0.9974 0.9837 0.9958 1.0000
## Neg Pred Value 1.0000 0.9983 0.9994 0.9984 0.9983
## Prevalence 0.2845 0.1935 0.1743 0.1638 0.1839
## Detection Rate 0.2845 0.1922 0.1738 0.1624 0.1825
## Detection Prevalence 0.2850 0.1927 0.1767 0.1631 0.1825
## Balanced Accuracy 0.9996 0.9962 0.9968 0.9954 0.9963
Prediksi 20 data test
testing <- testing[, names(trainingSet)[-ncol(trainingSet)]]
final_predictions <- predict(model_rf, newdata = testing)
print(final_predictions)
## 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20
## B A B A A E D B A A B C B A E E A B B B
## Levels: A B C D E