Data Analysis Project 2

Step 5 Trees

George Fisher george@georgefisher.com

Observations


build and prune a tree on the training dataset

library(tree)
# drop the subject variable, turn activity into a factor
train = train[-c(ncol(train) - 1)]  # drop subject
train$activity = factor(train$activity)
# give the dataset new names
new.names = paste("v", 1:(length(train) - 1), sep = "")
names(train) = c(new.names, "activity")

full.tree = tree(activity ~ ., data = train)

pruned.tree = prune.tree(full.tree, best = 6)
plot(pruned.tree)
text(pruned.tree)
mtext("Pruned Tree from Training Data, best=6")

plot of chunk s5.training.tree


table(train$activity, predict(pruned.tree, type = "class"))
##           
##            laying sitting standing walk walkdown walkup
##   laying      221       0        0    0        0      0
##   sitting       0     159       39    0        0      0
##   standing      0       0      227    0        0      0
##   walk          0       0        0  256        7      3
##   walkdown      0       0        0    7      183      3
##   walkup        0       0        0    8       28    174

predict the result on the test dataset

# drop the subject variable, turn activity into a factor
test = test[-c(ncol(test) - 1)]  # drop subject
test$activity = factor(test$activity)
# give the dataset new names
new.names = paste("v", 1:(length(test) - 1), sep = "")
names(test) = c(new.names, "activity")

(test.table = table(test$activity, predict(pruned.tree, type = "class", newdata = test)))
##           
##            laying sitting standing walk walkdown walkup
##   laying      293       0        0    0        0      0
##   sitting       0     176       88    0        0      0
##   standing      0      31      252    0        0      0
##   walk          0       0        0  222        7      0
##   walkdown      0       0        0    9      191      0
##   walkup        0       0        0   95       71     50
percent.accuracy = 0
for (i in 1:ncol(test.table)) {
    percent.accuracy = percent.accuracy + (max(test.table[, i])/sum(test.table[, 
        i])) * 100
    cat(paste(colnames(test.table)[i], paste(round((max(test.table[, i])/sum(test.table[, 
        i]) * 100), digits = 2), "%", sep = "")), "\n")

}
## laying 100% 
## sitting 85.02% 
## standing 74.12% 
## walk 68.1% 
## walkdown 71% 
## walkup 100%
cat("average", paste(round(percent.accuracy/6, digits = 2), "%", sep = ""))
## average 83.04%