load("samsungData.rda")
str(samsungData[c(1:20, length(samsungData) - 1, length(samsungData))])
## 'data.frame': 7352 obs. of 22 variables:
## $ tBodyAcc-mean()-X : num 0.289 0.278 0.28 0.279 0.277 ...
## $ tBodyAcc-mean()-Y : num -0.0203 -0.0164 -0.0195 -0.0262 -0.0166 ...
## $ tBodyAcc-mean()-Z : num -0.133 -0.124 -0.113 -0.123 -0.115 ...
## $ tBodyAcc-std()-X : num -0.995 -0.998 -0.995 -0.996 -0.998 ...
## $ tBodyAcc-std()-Y : num -0.983 -0.975 -0.967 -0.983 -0.981 ...
## $ tBodyAcc-std()-Z : num -0.914 -0.96 -0.979 -0.991 -0.99 ...
## $ tBodyAcc-mad()-X : num -0.995 -0.999 -0.997 -0.997 -0.998 ...
## $ tBodyAcc-mad()-Y : num -0.983 -0.975 -0.964 -0.983 -0.98 ...
## $ tBodyAcc-mad()-Z : num -0.924 -0.958 -0.977 -0.989 -0.99 ...
## $ tBodyAcc-max()-X : num -0.935 -0.943 -0.939 -0.939 -0.942 ...
## $ tBodyAcc-max()-Y : num -0.567 -0.558 -0.558 -0.576 -0.569 ...
## $ tBodyAcc-max()-Z : num -0.744 -0.818 -0.818 -0.83 -0.825 ...
## $ tBodyAcc-min()-X : num 0.853 0.849 0.844 0.844 0.849 ...
## $ tBodyAcc-min()-Y : num 0.686 0.686 0.682 0.682 0.683 ...
## $ tBodyAcc-min()-Z : num 0.814 0.823 0.839 0.838 0.838 ...
## $ tBodyAcc-sma() : num -0.966 -0.982 -0.983 -0.986 -0.993 ...
## $ tBodyAcc-energy()-X: num -1 -1 -1 -1 -1 ...
## $ tBodyAcc-energy()-Y: num -1 -1 -1 -1 -1 ...
## $ tBodyAcc-energy()-Z: num -0.995 -0.998 -0.999 -1 -1 ...
## $ tBodyAcc-iqr()-X : num -0.994 -0.999 -0.997 -0.997 -0.998 ...
## $ subject : int 1 1 1 1 1 1 1 1 1 1 ...
## $ activity : chr "standing" "standing" "standing" "standing" ...
# what activites? distributed how?
table(samsungData$activity)
##
## laying sitting standing walk walkdown walkup
## 1407 1286 1374 1226 986 1073
# how many subjects?
length(unique(samsungData$subject))
## [1] 21
# identified how?
unique(samsungData$subject)
## [1] 1 3 5 6 7 8 11 14 15 16 17 19 21 22 23 25 26 27 28 29 30
# no incomplete cases
complete_cases = samsungData[complete.cases(samsungData), ]
dim(samsungData) == dim(complete_cases)
## [1] TRUE TRUE
Your task is to build a function that predicts what activity a subject is performing based on the quantitative measurements from the Samsung phone.
For this analysis your training set must include the data from subjects 1, 3, 5, and 6. But you may use more subjects data to train if you wish.
Your test set is the data from subjects 27, 28, 29, and 30, but you may use more data to test. Be careful that your training/test sets do not overlap.
train = subset(samsungData, samsungData$subject == 1 | samsungData$subject ==
3 | samsungData$subject == 5 | samsungData$subject == 6)
table(train$subject)
##
## 1 3 5 6
## 347 341 302 325
dim(train)
## [1] 1315 563
numericActivity.train <- as.numeric(as.factor(train$activity))
table(train$activity)
##
## laying sitting standing walk walkdown walkup
## 221 198 227 266 193 210
test = subset(samsungData, samsungData$subject == 27 | samsungData$subject ==
28 | samsungData$subject == 29 | samsungData$subject == 30)
table(test$subject)
##
## 27 28 29 30
## 376 382 344 383
dim(test)
## [1] 1485 563
numericActivity.test <- as.numeric(as.factor(test$activity))
table(test$activity)
##
## laying sitting standing walk walkdown walkup
## 293 264 283 229 200 216
save(train, test, numericActivity.train, numericActivity.test, file = "train_and_test.Rda")
print(str(.Platform))
## List of 8
## $ OS.type : chr "windows"
## $ file.sep : chr "/"
## $ dynlib.ext: chr ".dll"
## $ GUI : chr "RTerm"
## $ endian : chr "little"
## $ pkgType : chr "win.binary"
## $ path.sep : chr ";"
## $ r_arch : chr "x64"
## NULL
print(version)
## _
## platform x86_64-w64-mingw32
## arch x86_64
## os mingw32
## system x86_64, mingw32
## status
## major 3
## minor 0.2
## year 2013
## month 09
## day 25
## svn rev 63987
## language R
## version.string R version 3.0.2 (2013-09-25)
## nickname Frisbee Sailing
print(sessionInfo(), locale = FALSE)
## R version 3.0.2 (2013-09-25)
## Platform: x86_64-w64-mingw32/x64 (64-bit)
##
## attached base packages:
## [1] stats graphics grDevices utils datasets methods base
##
## other attached packages:
## [1] knitr_1.5
##
## loaded via a namespace (and not attached):
## [1] evaluate_0.5.1 formatR_0.10 stringr_0.6.2 tools_3.0.2
print(Sys.time())
## [1] "2013-11-27 18:14:12 EST"