1. Memuat Data
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(ggplot2)
library(caret)
## Loading required package: lattice
library(randomForest)
## randomForest 4.6-14
## Type rfNews() to see new features/changes/bug fixes.
##
## Attaching package: 'randomForest'
## The following object is masked from 'package:ggplot2':
##
## margin
## The following object is masked from 'package:dplyr':
##
## combine
# Mengunduh dataset
train_url <- "https://d396qusza40orc.cloudfront.net/predmachlearn/pml-training.csv"
test_url <- "https://d396qusza40orc.cloudfront.net/predmachlearn/pml-testing.csv"
training <- read.csv(url(train_url), na.strings = c("NA", "", "#DIV/0!"))
testing <- read.csv(url(test_url), na.strings = c("NA", "", "#DIV/0!"))
2. Membersihkan Data
# Menghapus kolom yang sebagian besar NA
training <- training[, colSums(is.na(training)) == 0]
testing <- testing[, colSums(is.na(testing)) == 0]
# Menghapus kolom ID yang tidak diperlukan
training <- training[,-c(1:7)]
testing <- testing[,-c(1:7)]
# Pastikan variabel target `classe` adalah faktor
training$classe <- as.factor(training$classe)
3. Eksplorasi Data
summary(training)
## roll_belt pitch_belt yaw_belt total_accel_belt
## Min. :-28.90 Min. :-55.8000 Min. :-180.00 Min. : 0.00
## 1st Qu.: 1.10 1st Qu.: 1.7600 1st Qu.: -88.30 1st Qu.: 3.00
## Median :113.00 Median : 5.2800 Median : -13.00 Median :17.00
## Mean : 64.41 Mean : 0.3053 Mean : -11.21 Mean :11.31
## 3rd Qu.:123.00 3rd Qu.: 14.9000 3rd Qu.: 12.90 3rd Qu.:18.00
## Max. :162.00 Max. : 60.3000 Max. : 179.00 Max. :29.00
## gyros_belt_x gyros_belt_y gyros_belt_z accel_belt_x
## Min. :-1.040000 Min. :-0.64000 Min. :-1.4600 Min. :-120.000
## 1st Qu.:-0.030000 1st Qu.: 0.00000 1st Qu.:-0.2000 1st Qu.: -21.000
## Median : 0.030000 Median : 0.02000 Median :-0.1000 Median : -15.000
## Mean :-0.005592 Mean : 0.03959 Mean :-0.1305 Mean : -5.595
## 3rd Qu.: 0.110000 3rd Qu.: 0.11000 3rd Qu.:-0.0200 3rd Qu.: -5.000
## Max. : 2.220000 Max. : 0.64000 Max. : 1.6200 Max. : 85.000
## accel_belt_y accel_belt_z magnet_belt_x magnet_belt_y
## Min. :-69.00 Min. :-275.00 Min. :-52.0 Min. :354.0
## 1st Qu.: 3.00 1st Qu.:-162.00 1st Qu.: 9.0 1st Qu.:581.0
## Median : 35.00 Median :-152.00 Median : 35.0 Median :601.0
## Mean : 30.15 Mean : -72.59 Mean : 55.6 Mean :593.7
## 3rd Qu.: 61.00 3rd Qu.: 27.00 3rd Qu.: 59.0 3rd Qu.:610.0
## Max. :164.00 Max. : 105.00 Max. :485.0 Max. :673.0
## magnet_belt_z roll_arm pitch_arm yaw_arm
## Min. :-623.0 Min. :-180.00 Min. :-88.800 Min. :-180.0000
## 1st Qu.:-375.0 1st Qu.: -31.77 1st Qu.:-25.900 1st Qu.: -43.1000
## Median :-320.0 Median : 0.00 Median : 0.000 Median : 0.0000
## Mean :-345.5 Mean : 17.83 Mean : -4.612 Mean : -0.6188
## 3rd Qu.:-306.0 3rd Qu.: 77.30 3rd Qu.: 11.200 3rd Qu.: 45.8750
## Max. : 293.0 Max. : 180.00 Max. : 88.500 Max. : 180.0000
## total_accel_arm gyros_arm_x gyros_arm_y gyros_arm_z
## Min. : 1.00 Min. :-6.37000 Min. :-3.4400 Min. :-2.3300
## 1st Qu.:17.00 1st Qu.:-1.33000 1st Qu.:-0.8000 1st Qu.:-0.0700
## Median :27.00 Median : 0.08000 Median :-0.2400 Median : 0.2300
## Mean :25.51 Mean : 0.04277 Mean :-0.2571 Mean : 0.2695
## 3rd Qu.:33.00 3rd Qu.: 1.57000 3rd Qu.: 0.1400 3rd Qu.: 0.7200
## Max. :66.00 Max. : 4.87000 Max. : 2.8400 Max. : 3.0200
## accel_arm_x accel_arm_y accel_arm_z magnet_arm_x
## Min. :-404.00 Min. :-318.0 Min. :-636.00 Min. :-584.0
## 1st Qu.:-242.00 1st Qu.: -54.0 1st Qu.:-143.00 1st Qu.:-300.0
## Median : -44.00 Median : 14.0 Median : -47.00 Median : 289.0
## Mean : -60.24 Mean : 32.6 Mean : -71.25 Mean : 191.7
## 3rd Qu.: 84.00 3rd Qu.: 139.0 3rd Qu.: 23.00 3rd Qu.: 637.0
## Max. : 437.00 Max. : 308.0 Max. : 292.00 Max. : 782.0
## magnet_arm_y magnet_arm_z roll_dumbbell pitch_dumbbell
## Min. :-392.0 Min. :-597.0 Min. :-153.71 Min. :-149.59
## 1st Qu.: -9.0 1st Qu.: 131.2 1st Qu.: -18.49 1st Qu.: -40.89
## Median : 202.0 Median : 444.0 Median : 48.17 Median : -20.96
## Mean : 156.6 Mean : 306.5 Mean : 23.84 Mean : -10.78
## 3rd Qu.: 323.0 3rd Qu.: 545.0 3rd Qu.: 67.61 3rd Qu.: 17.50
## Max. : 583.0 Max. : 694.0 Max. : 153.55 Max. : 149.40
## yaw_dumbbell total_accel_dumbbell gyros_dumbbell_x gyros_dumbbell_y
## Min. :-150.871 Min. : 0.00 Min. :-204.0000 Min. :-2.10000
## 1st Qu.: -77.644 1st Qu.: 4.00 1st Qu.: -0.0300 1st Qu.:-0.14000
## Median : -3.324 Median :10.00 Median : 0.1300 Median : 0.03000
## Mean : 1.674 Mean :13.72 Mean : 0.1611 Mean : 0.04606
## 3rd Qu.: 79.643 3rd Qu.:19.00 3rd Qu.: 0.3500 3rd Qu.: 0.21000
## Max. : 154.952 Max. :58.00 Max. : 2.2200 Max. :52.00000
## gyros_dumbbell_z accel_dumbbell_x accel_dumbbell_y accel_dumbbell_z
## Min. : -2.380 Min. :-419.00 Min. :-189.00 Min. :-334.00
## 1st Qu.: -0.310 1st Qu.: -50.00 1st Qu.: -8.00 1st Qu.:-142.00
## Median : -0.130 Median : -8.00 Median : 41.50 Median : -1.00
## Mean : -0.129 Mean : -28.62 Mean : 52.63 Mean : -38.32
## 3rd Qu.: 0.030 3rd Qu.: 11.00 3rd Qu.: 111.00 3rd Qu.: 38.00
## Max. :317.000 Max. : 235.00 Max. : 315.00 Max. : 318.00
## magnet_dumbbell_x magnet_dumbbell_y magnet_dumbbell_z roll_forearm
## Min. :-643.0 Min. :-3600 Min. :-262.00 Min. :-180.0000
## 1st Qu.:-535.0 1st Qu.: 231 1st Qu.: -45.00 1st Qu.: -0.7375
## Median :-479.0 Median : 311 Median : 13.00 Median : 21.7000
## Mean :-328.5 Mean : 221 Mean : 46.05 Mean : 33.8265
## 3rd Qu.:-304.0 3rd Qu.: 390 3rd Qu.: 95.00 3rd Qu.: 140.0000
## Max. : 592.0 Max. : 633 Max. : 452.00 Max. : 180.0000
## pitch_forearm yaw_forearm total_accel_forearm gyros_forearm_x
## Min. :-72.50 Min. :-180.00 Min. : 0.00 Min. :-22.000
## 1st Qu.: 0.00 1st Qu.: -68.60 1st Qu.: 29.00 1st Qu.: -0.220
## Median : 9.24 Median : 0.00 Median : 36.00 Median : 0.050
## Mean : 10.71 Mean : 19.21 Mean : 34.72 Mean : 0.158
## 3rd Qu.: 28.40 3rd Qu.: 110.00 3rd Qu.: 41.00 3rd Qu.: 0.560
## Max. : 89.80 Max. : 180.00 Max. :108.00 Max. : 3.970
## gyros_forearm_y gyros_forearm_z accel_forearm_x accel_forearm_y
## Min. : -7.02000 Min. : -8.0900 Min. :-498.00 Min. :-632.0
## 1st Qu.: -1.46000 1st Qu.: -0.1800 1st Qu.:-178.00 1st Qu.: 57.0
## Median : 0.03000 Median : 0.0800 Median : -57.00 Median : 201.0
## Mean : 0.07517 Mean : 0.1512 Mean : -61.65 Mean : 163.7
## 3rd Qu.: 1.62000 3rd Qu.: 0.4900 3rd Qu.: 76.00 3rd Qu.: 312.0
## Max. :311.00000 Max. :231.0000 Max. : 477.00 Max. : 923.0
## accel_forearm_z magnet_forearm_x magnet_forearm_y magnet_forearm_z classe
## Min. :-446.00 Min. :-1280.0 Min. :-896.0 Min. :-973.0 A:5580
## 1st Qu.:-182.00 1st Qu.: -616.0 1st Qu.: 2.0 1st Qu.: 191.0 B:3797
## Median : -39.00 Median : -378.0 Median : 591.0 Median : 511.0 C:3422
## Mean : -55.29 Mean : -312.6 Mean : 380.1 Mean : 393.6 D:3216
## 3rd Qu.: 26.00 3rd Qu.: -73.0 3rd Qu.: 737.0 3rd Qu.: 653.0 E:3607
## Max. : 291.00 Max. : 672.0 Max. :1480.0 Max. :1090.0
ggplot(training, aes(x = classe)) + geom_bar(fill = "steelblue") +
ggtitle("Distribusi Kelas pada Data Latihan")

4. Membagi Data Latihan dan Validasi
set.seed(123)
inTrain <- createDataPartition(training$classe, p = 0.7, list = FALSE)
trainSet <- training[inTrain, ]
validSet <- training[-inTrain, ]
5. Melatih Model dengan Random Forest
set.seed(123)
model_rf <- randomForest(classe ~ ., data = trainSet, importance = TRUE, ntree = 100)
6. Evaluasi Model
predictions <- predict(model_rf, validSet)
conf_matrix <- confusionMatrix(predictions, validSet$classe)
conf_matrix
## Confusion Matrix and Statistics
##
## Reference
## Prediction A B C D E
## A 1674 5 0 0 0
## B 0 1130 2 0 0
## C 0 4 1024 8 4
## D 0 0 0 956 4
## E 0 0 0 0 1074
##
## Overall Statistics
##
## Accuracy : 0.9954
## 95% CI : (0.9933, 0.997)
## No Information Rate : 0.2845
## P-Value [Acc > NIR] : < 2.2e-16
##
## Kappa : 0.9942
##
## Mcnemar's Test P-Value : NA
##
## Statistics by Class:
##
## Class: A Class: B Class: C Class: D Class: E
## Sensitivity 1.0000 0.9921 0.9981 0.9917 0.9926
## Specificity 0.9988 0.9996 0.9967 0.9992 1.0000
## Pos Pred Value 0.9970 0.9982 0.9846 0.9958 1.0000
## Neg Pred Value 1.0000 0.9981 0.9996 0.9984 0.9983
## Prevalence 0.2845 0.1935 0.1743 0.1638 0.1839
## Detection Rate 0.2845 0.1920 0.1740 0.1624 0.1825
## Detection Prevalence 0.2853 0.1924 0.1767 0.1631 0.1825
## Balanced Accuracy 0.9994 0.9958 0.9974 0.9954 0.9963
7. Prediksi pada Data Uji
predictions_test <- predict(model_rf, testing)
predictions_test
## 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20
## B A B A A E D B A A B C B A E E A B B B
## Levels: A B C D E