suppressWarnings(suppressMessages(library(caret)))
suppressWarnings(suppressMessages(library(randomForest)))
suppressWarnings(suppressMessages(library(e1071)))
library(caret)
library(randomForest)
library(e1071)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following object is masked from 'package:randomForest':
##
## combine
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(readr)
library(doParallel)
## Warning: package 'doParallel' was built under R version 4.0.5
## Loading required package: foreach
## Warning: package 'foreach' was built under R version 4.0.5
## Loading required package: iterators
## Warning: package 'iterators' was built under R version 4.0.5
## Loading required package: parallel
set.seed(1603)
training.df <- read_csv('C:/Users/asus/Documents/pml-training.csv')
## Warning: Missing column names filled in: 'X1' [1]
##
## -- Column specification --------------------------------------------------------
## cols(
## .default = col_double(),
## user_name = col_character(),
## cvtd_timestamp = col_character(),
## new_window = col_character(),
## kurtosis_roll_belt = col_character(),
## kurtosis_picth_belt = col_character(),
## kurtosis_yaw_belt = col_character(),
## skewness_roll_belt = col_character(),
## skewness_roll_belt.1 = col_character(),
## skewness_yaw_belt = col_character(),
## max_yaw_belt = col_character(),
## min_yaw_belt = col_character(),
## amplitude_yaw_belt = col_character(),
## kurtosis_picth_arm = col_character(),
## kurtosis_yaw_arm = col_character(),
## skewness_pitch_arm = col_character(),
## skewness_yaw_arm = col_character(),
## kurtosis_yaw_dumbbell = col_character(),
## skewness_yaw_dumbbell = col_character(),
## kurtosis_roll_forearm = col_character(),
## kurtosis_picth_forearm = col_character()
## # ... with 8 more columns
## )
## i Use `spec()` for the full column specifications.
## Warning: 182 parsing failures.
## row col expected actual file
## 2231 kurtosis_roll_arm a double #DIV/0! 'C:/Users/asus/Documents/pml-training.csv'
## 2231 skewness_roll_arm a double #DIV/0! 'C:/Users/asus/Documents/pml-training.csv'
## 2255 kurtosis_roll_arm a double #DIV/0! 'C:/Users/asus/Documents/pml-training.csv'
## 2255 skewness_roll_arm a double #DIV/0! 'C:/Users/asus/Documents/pml-training.csv'
## 2282 kurtosis_roll_arm a double #DIV/0! 'C:/Users/asus/Documents/pml-training.csv'
## .... ................. ........ ....... ..........................................
## See problems(...) for more details.
training.df <-training.df[,colSums(is.na(training.df)) == 0]
testing.df <- read_csv('C:/Users/asus/Documents/pml-testing.csv')
## Warning: Missing column names filled in: 'X1' [1]
##
## -- Column specification --------------------------------------------------------
## cols(
## .default = col_logical(),
## X1 = col_double(),
## user_name = col_character(),
## raw_timestamp_part_1 = col_double(),
## raw_timestamp_part_2 = col_double(),
## cvtd_timestamp = col_character(),
## new_window = col_character(),
## num_window = col_double(),
## roll_belt = col_double(),
## pitch_belt = col_double(),
## yaw_belt = col_double(),
## total_accel_belt = col_double(),
## gyros_belt_x = col_double(),
## gyros_belt_y = col_double(),
## gyros_belt_z = col_double(),
## accel_belt_x = col_double(),
## accel_belt_y = col_double(),
## accel_belt_z = col_double(),
## magnet_belt_x = col_double(),
## magnet_belt_y = col_double(),
## magnet_belt_z = col_double()
## # ... with 40 more columns
## )
## i Use `spec()` for the full column specifications.
testing.df <-testing.df[,colSums(is.na(testing.df)) == 0]
Training.df <-training.df[,-c(1:7)]
Testing.df <-testing.df[,-c(1:7)]
Training.nzv<-nzv(Training.df[,-ncol(Training.df)],saveMetrics=TRUE)
newTR <- createDataPartition(Training.df$classe, p = 0.6, list = FALSE)
newTRAIN <- Training.df[newTR,]
newTEST <- Training.df[-newTR,]
ncores <- makeCluster(detectCores() - 1)
registerDoParallel(cores=ncores)
getDoParWorkers()
## [1] 3
myModel <- train(classe ~ ., data = newTRAIN
, method = "rf"
, metric = "Accuracy"
, preProcess=c("center", "scale")
, trControl=trainControl(method = "cv"
, number = 4
, p= 0.60
, allowParallel = TRUE ))
print(myModel, digits=4)
## Random Forest
##
## 11776 samples
## 52 predictor
## 5 classes: 'A', 'B', 'C', 'D', 'E'
##
## Pre-processing: centered (52), scaled (52)
## Resampling: Cross-Validated (4 fold)
## Summary of sample sizes: 8833, 8831, 8830, 8834
## Resampling results across tuning parameters:
##
## mtry Accuracy Kappa
## 2 0.9855 0.9816
## 27 0.9862 0.9825
## 52 0.9797 0.9743
##
## Accuracy was used to select the optimal model using the largest value.
## The final value used for the model was mtry = 27.
predTest <- predict(myModel, newdata=newTEST)
newTEST$classe=factor(newTEST$classe)
confusionMatrix(predTest, newTEST$classe)
## Confusion Matrix and Statistics
##
## Reference
## Prediction A B C D E
## A 2229 9 0 0 0
## B 3 1505 11 1 0
## C 0 2 1351 11 4
## D 0 1 6 1274 2
## E 0 1 0 0 1436
##
## Overall Statistics
##
## Accuracy : 0.9935
## 95% CI : (0.9915, 0.9952)
## No Information Rate : 0.2845
## P-Value [Acc > NIR] : < 2.2e-16
##
## Kappa : 0.9918
##
## Mcnemar's Test P-Value : NA
##
## Statistics by Class:
##
## Class: A Class: B Class: C Class: D Class: E
## Sensitivity 0.9987 0.9914 0.9876 0.9907 0.9958
## Specificity 0.9984 0.9976 0.9974 0.9986 0.9998
## Pos Pred Value 0.9960 0.9901 0.9876 0.9930 0.9993
## Neg Pred Value 0.9995 0.9979 0.9974 0.9982 0.9991
## Prevalence 0.2845 0.1935 0.1744 0.1639 0.1838
## Detection Rate 0.2841 0.1918 0.1722 0.1624 0.1830
## Detection Prevalence 0.2852 0.1937 0.1744 0.1635 0.1832
## Balanced Accuracy 0.9985 0.9945 0.9925 0.9946 0.9978
print(predict(myModel, newdata=testing.df))
## [1] B A B A A E D B A A B C B A E E A B B B
## Levels: A B C D E