#locate downloaded dataset
trainUrl <- "https://d396qusza40orc.cloudfront.net/predmachlearn/pml-training.csv"
testUrl <- "https://d396qusza40orc.cloudfront.net/predmachlearn/pml-testing.csv"
## Data processing
## create a partition using caret with the training dataset on 70,30 ratio
# inTrain <- createDataPartition(training$classe, p=0.7, list=FALSE)
# TrainSet <- training[inTrain, ]
# TestSet <- training[-inTrain, ]
# dim(TrainSet)
dim(TestSet)
## Error in eval(expr, envir, enclos): object 'TestSet' not found
dim(TestSet)
## Error in eval(expr, envir, enclos): object 'TestSet' not found
# remove variables with Nearly Zero Variance
n0var <- nearZeroVar(TrainSet)
## Error in nearZeroVar(TrainSet): could not find function "nearZeroVar"
TrainSet <- TrainSet[, -n0var]
## Error in eval(expr, envir, enclos): object 'TrainSet' not found
TestSet <- TestSet[, -n0var]
## Error in eval(expr, envir, enclos): object 'TestSet' not found
dim(TrainSet)
## Error in eval(expr, envir, enclos): object 'TrainSet' not found
# remove variables that are mostly NA
AllNA <- sapply(TrainSet, function(x) mean(is.na(x))) > 0.95
## Error in lapply(X = X, FUN = FUN, ...): object 'TrainSet' not found
TrainSet <- TrainSet[, AllNA==FALSE]
## Error in eval(expr, envir, enclos): object 'TrainSet' not found
TestSet <- TestSet[, AllNA==FALSE]
## Error in eval(expr, envir, enclos): object 'TestSet' not found
dim(TrainSet)
## Error in eval(expr, envir, enclos): object 'TrainSet' not found
dim(TrainSet)
## Error in eval(expr, envir, enclos): object 'TrainSet' not found
# remove identification only variables (columns 1 to 5)
TrainSet <- TrainSet[, -(1:5)]
## Error in eval(expr, envir, enclos): object 'TrainSet' not found
TestSet <- TestSet[, -(1:5)]
## Error in eval(expr, envir, enclos): object 'TestSet' not found
dim(TrainSet)
## Error in eval(expr, envir, enclos): object 'TrainSet' not found
#check correlation among variables
M <- cor(TrainSet[, -48])
## Error in is.data.frame(x): object 'TrainSet' not found
corrplot(M, method="circle")
## Error in corrplot(M, method = "circle"): could not find function "corrplot"
#1.random forest method
#model fit
set.seed(3408)
controlRF <- trainControl(method="cv", number=3, verboseIter=FALSE)
## Error in trainControl(method = "cv", number = 3, verboseIter = FALSE): could not find function "trainControl"
modFitRandForest <- train(classe ~ ., data=TrainSet, method="rf",
trControl=controlRF)
## Error in train(classe ~ ., data = TrainSet, method = "rf", trControl = controlRF): could not find function "train"
modFitRandForest$finalModel
## Error in eval(expr, envir, enclos): object 'modFitRandForest' not found
# prediction on Test dataset
predictRandForest <- predict(modFitRandForest, newdata=TestSet)
## Error in predict(modFitRandForest, newdata = TestSet): object 'modFitRandForest' not found
confMatRandForest <- confusionMatrix(predictRandForest, TestSet$classe)
## Error in confusionMatrix(predictRandForest, TestSet$classe): could not find function "confusionMatrix"
confMatRandForest
## Error in eval(expr, envir, enclos): object 'confMatRandForest' not found
# plot matrix results
plot(confMatRandForest$table, col = confMatRandForest$byClass,
main = paste("Random Forest - Accuracy =",
round(confMatRandForest$overall['Accuracy'], 4)))
## Error in plot(confMatRandForest$table, col = confMatRandForest$byClass, : object 'confMatRandForest' not found
#2.Decision tree
# model fit
set.seed(3408)
modFitDecTree <- rpart(classe ~ ., data=TrainSet, method="class")
## Error in rpart(classe ~ ., data = TrainSet, method = "class"): could not find function "rpart"
fancyRpartPlot(modFitDecTree)
## Error in fancyRpartPlot(modFitDecTree): could not find function "fancyRpartPlot"
# prediction on Test dataset
predictDecTree <- predict(modFitDecTree, newdata=TestSet, type="class")
## Error in predict(modFitDecTree, newdata = TestSet, type = "class"): object 'modFitDecTree' not found
confMatDecTree <- confusionMatrix(predictDecTree, TestSet$classe)
## Error in confusionMatrix(predictDecTree, TestSet$classe): could not find function "confusionMatrix"
confMatDecTree
## Error in eval(expr, envir, enclos): object 'confMatDecTree' not found
#plot matrix results
plot(confMatDecTree$table, col = confMatDecTree$byClass,
main = paste("Decision Tree - Accuracy =",
round(confMatDecTree$overall['Accuracy'], 4)))
## Error in plot(confMatDecTree$table, col = confMatDecTree$byClass, main = paste("Decision Tree - Accuracy =", : object 'confMatDecTree' not found
#3. Applying the selected Model to the Test Data (Random Forest is selected because of higher accuracy)
predictTEST <- predict(modFitRandForest, newdata=testing)
## Error in predict(modFitRandForest, newdata = testing): object 'modFitRandForest' not found
predictTEST
## Error in eval(expr, envir, enclos): object 'predictTEST' not found