projest_coursera

suppressWarnings(suppressMessages(library(caret)))
suppressWarnings(suppressMessages(library(randomForest)))
suppressWarnings(suppressMessages(library(e1071)))
library(caret)
library(randomForest)
library(e1071)
library(dplyr)

## 
## Attaching package: 'dplyr'

## The following object is masked from 'package:randomForest':
## 
##     combine

## The following objects are masked from 'package:stats':
## 
##     filter, lag

## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

library(readr)
library(doParallel)

## Warning: package 'doParallel' was built under R version 4.0.5

## Loading required package: foreach

## Warning: package 'foreach' was built under R version 4.0.5

## Loading required package: iterators

## Warning: package 'iterators' was built under R version 4.0.5

## Loading required package: parallel

set.seed(1603)
training.df   <- read_csv('C:/Users/asus/Documents/pml-training.csv')

## Warning: Missing column names filled in: 'X1' [1]

## 
## -- Column specification --------------------------------------------------------
## cols(
##   .default = col_double(),
##   user_name = col_character(),
##   cvtd_timestamp = col_character(),
##   new_window = col_character(),
##   kurtosis_roll_belt = col_character(),
##   kurtosis_picth_belt = col_character(),
##   kurtosis_yaw_belt = col_character(),
##   skewness_roll_belt = col_character(),
##   skewness_roll_belt.1 = col_character(),
##   skewness_yaw_belt = col_character(),
##   max_yaw_belt = col_character(),
##   min_yaw_belt = col_character(),
##   amplitude_yaw_belt = col_character(),
##   kurtosis_picth_arm = col_character(),
##   kurtosis_yaw_arm = col_character(),
##   skewness_pitch_arm = col_character(),
##   skewness_yaw_arm = col_character(),
##   kurtosis_yaw_dumbbell = col_character(),
##   skewness_yaw_dumbbell = col_character(),
##   kurtosis_roll_forearm = col_character(),
##   kurtosis_picth_forearm = col_character()
##   # ... with 8 more columns
## )
## i Use `spec()` for the full column specifications.

## Warning: 182 parsing failures.
##  row               col expected  actual                                       file
## 2231 kurtosis_roll_arm a double #DIV/0! 'C:/Users/asus/Documents/pml-training.csv'
## 2231 skewness_roll_arm a double #DIV/0! 'C:/Users/asus/Documents/pml-training.csv'
## 2255 kurtosis_roll_arm a double #DIV/0! 'C:/Users/asus/Documents/pml-training.csv'
## 2255 skewness_roll_arm a double #DIV/0! 'C:/Users/asus/Documents/pml-training.csv'
## 2282 kurtosis_roll_arm a double #DIV/0! 'C:/Users/asus/Documents/pml-training.csv'
## .... ................. ........ ....... ..........................................
## See problems(...) for more details.

training.df     <-training.df[,colSums(is.na(training.df)) == 0]

testing.df       <- read_csv('C:/Users/asus/Documents/pml-testing.csv')

## Warning: Missing column names filled in: 'X1' [1]

## 
## -- Column specification --------------------------------------------------------
## cols(
##   .default = col_logical(),
##   X1 = col_double(),
##   user_name = col_character(),
##   raw_timestamp_part_1 = col_double(),
##   raw_timestamp_part_2 = col_double(),
##   cvtd_timestamp = col_character(),
##   new_window = col_character(),
##   num_window = col_double(),
##   roll_belt = col_double(),
##   pitch_belt = col_double(),
##   yaw_belt = col_double(),
##   total_accel_belt = col_double(),
##   gyros_belt_x = col_double(),
##   gyros_belt_y = col_double(),
##   gyros_belt_z = col_double(),
##   accel_belt_x = col_double(),
##   accel_belt_y = col_double(),
##   accel_belt_z = col_double(),
##   magnet_belt_x = col_double(),
##   magnet_belt_y = col_double(),
##   magnet_belt_z = col_double()
##   # ... with 40 more columns
## )
## i Use `spec()` for the full column specifications.

testing.df     <-testing.df[,colSums(is.na(testing.df)) == 0]



Training.df   <-training.df[,-c(1:7)]
Testing.df <-testing.df[,-c(1:7)]


Training.nzv<-nzv(Training.df[,-ncol(Training.df)],saveMetrics=TRUE)



newTR     <- createDataPartition(Training.df$classe, p = 0.6, list = FALSE)
newTRAIN  <- Training.df[newTR,]
newTEST      <- Training.df[-newTR,]




ncores <- makeCluster(detectCores() - 1)
registerDoParallel(cores=ncores)
getDoParWorkers()

## [1] 3

myModel <- train(classe ~ ., data = newTRAIN
                   , method = "rf"
                   , metric = "Accuracy"  
                   , preProcess=c("center", "scale") 
                   , trControl=trainControl(method = "cv"
                                            , number = 4 
                                            , p= 0.60
                                            , allowParallel = TRUE  ))
                  

print(myModel, digits=4)

## Random Forest 
## 
## 11776 samples
##    52 predictor
##     5 classes: 'A', 'B', 'C', 'D', 'E' 
## 
## Pre-processing: centered (52), scaled (52) 
## Resampling: Cross-Validated (4 fold) 
## Summary of sample sizes: 8833, 8831, 8830, 8834 
## Resampling results across tuning parameters:
## 
##   mtry  Accuracy  Kappa 
##    2    0.9855    0.9816
##   27    0.9862    0.9825
##   52    0.9797    0.9743
## 
## Accuracy was used to select the optimal model using the largest value.
## The final value used for the model was mtry = 27.

predTest <- predict(myModel, newdata=newTEST)

newTEST$classe=factor(newTEST$classe)
confusionMatrix(predTest, newTEST$classe)

## Confusion Matrix and Statistics
## 
##           Reference
## Prediction    A    B    C    D    E
##          A 2229    9    0    0    0
##          B    3 1505   11    1    0
##          C    0    2 1351   11    4
##          D    0    1    6 1274    2
##          E    0    1    0    0 1436
## 
## Overall Statistics
##                                           
##                Accuracy : 0.9935          
##                  95% CI : (0.9915, 0.9952)
##     No Information Rate : 0.2845          
##     P-Value [Acc > NIR] : < 2.2e-16       
##                                           
##                   Kappa : 0.9918          
##                                           
##  Mcnemar's Test P-Value : NA              
## 
## Statistics by Class:
## 
##                      Class: A Class: B Class: C Class: D Class: E
## Sensitivity            0.9987   0.9914   0.9876   0.9907   0.9958
## Specificity            0.9984   0.9976   0.9974   0.9986   0.9998
## Pos Pred Value         0.9960   0.9901   0.9876   0.9930   0.9993
## Neg Pred Value         0.9995   0.9979   0.9974   0.9982   0.9991
## Prevalence             0.2845   0.1935   0.1744   0.1639   0.1838
## Detection Rate         0.2841   0.1918   0.1722   0.1624   0.1830
## Detection Prevalence   0.2852   0.1937   0.1744   0.1635   0.1832
## Balanced Accuracy      0.9985   0.9945   0.9925   0.9946   0.9978

print(predict(myModel, newdata=testing.df))

##  [1] B A B A A E D B A A B C B A E E A B B B
## Levels: A B C D E