#
# set work directory
root<-"D:/OneDrive/Coursera/Data_Science/Practical_machine_learning"
setwd(root)
#
# load package
library(caret); library(randomForest)
## Loading required package: lattice
## Loading required package: ggplot2
## randomForest 4.6-12
## Type rfNews() to see new features/changes/bug fixes.
##
## Attaching package: 'randomForest'
## The following object is masked from 'package:ggplot2':
##
## margin
#
# load data from url
# url<-"https://d396qusza40orc.cloudfront.net/predmachlearn/pml-training.csv"
# download.file(url,destfile="./pml-training.csv")
# url<-"https://d396qusza40orc.cloudfront.net/predmachlearn/pml-testing.csv"
# download.file(url,destfile="./pml-testing.csv")
#
# load data from .csv
training<-read.csv(file="pml-training.csv")
testing<-read.csv(file="pml-testing.csv")
#
# Cleaning
print(colnames(training))
## [1] "X" "user_name"
## [3] "raw_timestamp_part_1" "raw_timestamp_part_2"
## [5] "cvtd_timestamp" "new_window"
## [7] "num_window" "roll_belt"
## [9] "pitch_belt" "yaw_belt"
## [11] "total_accel_belt" "kurtosis_roll_belt"
## [13] "kurtosis_picth_belt" "kurtosis_yaw_belt"
## [15] "skewness_roll_belt" "skewness_roll_belt.1"
## [17] "skewness_yaw_belt" "max_roll_belt"
## [19] "max_picth_belt" "max_yaw_belt"
## [21] "min_roll_belt" "min_pitch_belt"
## [23] "min_yaw_belt" "amplitude_roll_belt"
## [25] "amplitude_pitch_belt" "amplitude_yaw_belt"
## [27] "var_total_accel_belt" "avg_roll_belt"
## [29] "stddev_roll_belt" "var_roll_belt"
## [31] "avg_pitch_belt" "stddev_pitch_belt"
## [33] "var_pitch_belt" "avg_yaw_belt"
## [35] "stddev_yaw_belt" "var_yaw_belt"
## [37] "gyros_belt_x" "gyros_belt_y"
## [39] "gyros_belt_z" "accel_belt_x"
## [41] "accel_belt_y" "accel_belt_z"
## [43] "magnet_belt_x" "magnet_belt_y"
## [45] "magnet_belt_z" "roll_arm"
## [47] "pitch_arm" "yaw_arm"
## [49] "total_accel_arm" "var_accel_arm"
## [51] "avg_roll_arm" "stddev_roll_arm"
## [53] "var_roll_arm" "avg_pitch_arm"
## [55] "stddev_pitch_arm" "var_pitch_arm"
## [57] "avg_yaw_arm" "stddev_yaw_arm"
## [59] "var_yaw_arm" "gyros_arm_x"
## [61] "gyros_arm_y" "gyros_arm_z"
## [63] "accel_arm_x" "accel_arm_y"
## [65] "accel_arm_z" "magnet_arm_x"
## [67] "magnet_arm_y" "magnet_arm_z"
## [69] "kurtosis_roll_arm" "kurtosis_picth_arm"
## [71] "kurtosis_yaw_arm" "skewness_roll_arm"
## [73] "skewness_pitch_arm" "skewness_yaw_arm"
## [75] "max_roll_arm" "max_picth_arm"
## [77] "max_yaw_arm" "min_roll_arm"
## [79] "min_pitch_arm" "min_yaw_arm"
## [81] "amplitude_roll_arm" "amplitude_pitch_arm"
## [83] "amplitude_yaw_arm" "roll_dumbbell"
## [85] "pitch_dumbbell" "yaw_dumbbell"
## [87] "kurtosis_roll_dumbbell" "kurtosis_picth_dumbbell"
## [89] "kurtosis_yaw_dumbbell" "skewness_roll_dumbbell"
## [91] "skewness_pitch_dumbbell" "skewness_yaw_dumbbell"
## [93] "max_roll_dumbbell" "max_picth_dumbbell"
## [95] "max_yaw_dumbbell" "min_roll_dumbbell"
## [97] "min_pitch_dumbbell" "min_yaw_dumbbell"
## [99] "amplitude_roll_dumbbell" "amplitude_pitch_dumbbell"
## [101] "amplitude_yaw_dumbbell" "total_accel_dumbbell"
## [103] "var_accel_dumbbell" "avg_roll_dumbbell"
## [105] "stddev_roll_dumbbell" "var_roll_dumbbell"
## [107] "avg_pitch_dumbbell" "stddev_pitch_dumbbell"
## [109] "var_pitch_dumbbell" "avg_yaw_dumbbell"
## [111] "stddev_yaw_dumbbell" "var_yaw_dumbbell"
## [113] "gyros_dumbbell_x" "gyros_dumbbell_y"
## [115] "gyros_dumbbell_z" "accel_dumbbell_x"
## [117] "accel_dumbbell_y" "accel_dumbbell_z"
## [119] "magnet_dumbbell_x" "magnet_dumbbell_y"
## [121] "magnet_dumbbell_z" "roll_forearm"
## [123] "pitch_forearm" "yaw_forearm"
## [125] "kurtosis_roll_forearm" "kurtosis_picth_forearm"
## [127] "kurtosis_yaw_forearm" "skewness_roll_forearm"
## [129] "skewness_pitch_forearm" "skewness_yaw_forearm"
## [131] "max_roll_forearm" "max_picth_forearm"
## [133] "max_yaw_forearm" "min_roll_forearm"
## [135] "min_pitch_forearm" "min_yaw_forearm"
## [137] "amplitude_roll_forearm" "amplitude_pitch_forearm"
## [139] "amplitude_yaw_forearm" "total_accel_forearm"
## [141] "var_accel_forearm" "avg_roll_forearm"
## [143] "stddev_roll_forearm" "var_roll_forearm"
## [145] "avg_pitch_forearm" "stddev_pitch_forearm"
## [147] "var_pitch_forearm" "avg_yaw_forearm"
## [149] "stddev_yaw_forearm" "var_yaw_forearm"
## [151] "gyros_forearm_x" "gyros_forearm_y"
## [153] "gyros_forearm_z" "accel_forearm_x"
## [155] "accel_forearm_y" "accel_forearm_z"
## [157] "magnet_forearm_x" "magnet_forearm_y"
## [159] "magnet_forearm_z" "classe"
# delete 1,...,7,12,...,36,50,...,59,69,...,83,87,...,101,103,...,112,125,...,139,141,...,150
# because NA, DIV/0, etc.
training<-training[,-c(1:7,12:36,50:59,69:83,87:101,103:112,125:139,141:150)]
#
# Sliting, training->trainSet(70%) + validSet(30%)
set.seed(1234)
inTrain<-createDataPartition(training$classe,p=0.1,list = FALSE)
trainSet<-training[inTrain,]
validSet<-training[-inTrain,]
#
# random forest
cc<-trainControl(method="cv", number=5)
fit<-train(classe~., data=trainSet, method="rf", trControl = cc)
#
# cross validation
valid_predict<-predict(fit, validSet)
confMat<-confusionMatrix(validSet$classe, valid_predict)
print(confMat$overall)
## Accuracy Kappa AccuracyLower AccuracyUpper AccuracyNull
## 9.449541e-01 9.303033e-01 9.414879e-01 9.482729e-01 2.935214e-01
## AccuracyPValue McnemarPValue
## 0.000000e+00 2.851080e-51
#
# predict testing
result<-predict(fit, testing)
print(result)
## [1] B A B A A E D D A A B C B A E E A B A B
## Levels: A B C D E