library("ISLR")
library("caret")
## Loading required package: lattice
## Loading required package: ggplot2
library("randomForest")
## randomForest 4.6-10
## Type rfNews() to see new features/changes/bug fixes.
allData <- read.csv("https://d396qusza40orc.cloudfront.net/predmachlearn/pml-training.csv")
train_idx <- createDataPartition(allData$classe, p = 70/100, list = FALSE)
trainData <-allData[train_idx,]
testData <-allData[-train_idx,]
nsv <- nearZeroVar(trainData[,-160])
trainData2 <- trainData[,-nsv]
mycols <- c('roll_belt','pitch_belt','yaw_belt','total_accel_belt','gyros_belt_x','gyros_belt_y','gyros_belt_z','accel_belt_x','accel_belt_y','accel_belt_z','magnet_belt_x','magnet_belt_y','magnet_belt_z','roll_arm','pitch_arm','yaw_arm','total_accel_arm','gyros_arm_x','gyros_arm_y','gyros_arm_z','accel_arm_x','accel_arm_y','accel_arm_z','magnet_arm_x','magnet_arm_y','magnet_arm_z','roll_dumbbell','pitch_dumbbell','yaw_dumbbell','total_accel_dumbbell','gyros_dumbbell_x','gyros_dumbbell_y','gyros_dumbbell_z','accel_dumbbell_x','accel_dumbbell_y','accel_dumbbell_z','magnet_dumbbell_x','magnet_dumbbell_y','magnet_dumbbell_z','roll_forearm','pitch_forearm','yaw_forearm','total_accel_forearm','gyros_forearm_x','gyros_forearm_y','gyros_forearm_z','accel_forearm_x','accel_forearm_y','accel_forearm_z','magnet_forearm_x','magnet_forearm_y','magnet_forearm_z','classe')
trainData3 <- trainData2[,mycols]
trainData3$classe <- factor(trainData3$classe)
set.seed(33833)
modelFit <-randomForest(classe ~., data = trainData3, importance = TRUE)
print(modelFit)
##
## Call:
## randomForest(formula = classe ~ ., data = trainData3, importance = TRUE)
## Type of random forest: classification
## Number of trees: 500
## No. of variables tried at each split: 7
##
## OOB estimate of error rate: 0.51%
## Confusion matrix:
## A B C D E class.error
## A 3903 2 1 0 0 0.0007680492
## B 11 2642 5 0 0 0.0060195636
## C 0 10 2383 3 0 0.0054257095
## D 1 0 29 2220 2 0.0142095915
## E 0 0 0 6 2519 0.0023762376
# optional: save the model so we don't have to keep building it
saveRDS(modelFit, "modelFit.rds")
# A rds file is now in your working directory. To load it in a next session :
#myModelFit <- readRDS("modelFit.rds")
predictions <-predict(modelFit, testData)
myPredictions <- testData$classe == predictions
accuracy <-length(myPredictions[myPredictions==TRUE])/ length(myPredictions)
accuracy
## [1] 0.9949023
testDataPML <- read.csv("https://d396qusza40orc.cloudfront.net/predmachlearn/pml-testing.csv")
predictionsPML <-predict(modelFit, testDataPML)
answers <- predict(modelFit, testDataPML)
pml_write_files = function(x){
n = length(x)
for(i in 1:n){
filename = paste0("problem_id_",i,".txt")
write.table(x[i],file=filename,quote=FALSE,row.names=FALSE,col.names=FALSE)
}
}
# set wd
setwd("/tmp/ml")
pml_write_files(answers)