Created on 23 June 2013
Revised on Sun Jun 23 14:50:29 2013
library(RCurl) ## complicated, download data from https
## Loading required package: bitops
library(XML)
fileUrl <- "https://dl.dropboxusercontent.com/u/8272421/samsungData.csv"
myCsv <- getURL(fileUrl, ssl.verifypeer = FALSE)
temporaryFile <- tempfile()
con <- file(temporaryFile, open = "w")
cat(myCsv, file = con)
close(con)
samsungData <- read.csv(temporaryFile)
table(samsungData$activity)
##
## laying sitting standing walk walkdown walkup
## 1407 1286 1374 1226 986 1073
par(mfrow = c(1, 2)) ## Plotting average acceleration for first subject
numericActivity <- as.numeric(as.factor(samsungData$activity))[samsungData$subject ==
1]
plot(samsungData[samsungData$subject == 1, 1], pch = 19, col = numericActivity,
ylab = names(samsungData)[1])
plot(samsungData[samsungData$subject == 1, 2], pch = 19, col = numericActivity,
ylab = names(samsungData)[2])
legend(150, -0.1, legend = unique(samsungData$activity), col = unique(numericActivity),
pch = 19)
par(mfrow = c(1, 1))
source("https://dl.dropboxusercontent.com/u/8272421/myplclust.R")
## Warning: 不支持这种URL方案
## Error: 无法打开链结
distanceMatrix <- dist(samsungData[samsungData$subject == 1, 1:3])
hclustering <- hclust(distanceMatrix) ## Clustering based just on average acceleration
myplclust(hclustering, lab.col = numericActivity)
## Error: 没有"myplclust"这个函数
par(mfrow = c(1, 2)) ## Plotting max acceleration for the first subject
plot(samsungData[samsungData$subject == 1, 10], pch = 19, col = numericActivity,
ylab = names(samsungData)[10])
plot(samsungData[samsungData$subject == 1, 11], pch = 19, col = numericActivity,
ylab = names(samsungData)[11])
par(mfrow = c(1, 1))
source("https://dl.dropboxusercontent.com/u/8272421/myplclust.R")
## Warning: 不支持这种URL方案
## Error: 无法打开链结
distanceMatrix <- dist(samsungData[samsungData$subject == 1, 10:12])
hclustering <- hclust(distanceMatrix) ## Clustering based on maximum acceleration
myplclust(hclustering, lab.col = numericActivity)
## Error: 没有"myplclust"这个函数
svd1 = svd(scale(samsungData[samsungData$subject == 1, -c(562, 563, 564)]))
par(mfrow = c(1, 2)) ## Singular value decomposition
plot(svd1$u[, 1], col = numericActivity, pch = 19)
plot(svd1$u[, 2], col = numericActivity, pch = 19)
par(mfrow = c(1, 1))
plot(svd1$v[, 2], pch = 19) ## Find maximum contributor
maxContrib <- which.max(svd1$v[, 2])
distanceMatrix <- dist(samsungData[samsungData$subject == 1, c(10:12, maxContrib)])
hclustering <- hclust(distanceMatrix) ## New clustering with maximum contributer
myplclust(hclustering, lab.col = numericActivity)
## Error: 没有"myplclust"这个函数
names(samsungData)[maxContrib] ## New clustering with maximum contributer
## [1] "tBodyGyroMag.arCoeff..2"
kClust <- kmeans(samsungData[samsungData$subject == 1, -c(562, 563, 564)], centers = 6) ## K-means clustering (nstart=1, first try)
table(kClust$cluster, samsungData$activity[samsungData$subject == 1])
##
## laying sitting standing walk walkdown walkup
## 1 12 22 26 0 0 0
## 2 0 0 0 6 25 26
## 3 10 24 27 0 0 0
## 4 0 0 0 5 24 27
## 5 11 1 0 43 0 0
## 6 17 0 0 41 0 0
kClust <- kmeans(samsungData[samsungData$subject == 1, -c(562, 563, 564)], centers = 6,
nstart = 1) ## K-means clustering (nstart=1, second try)
table(kClust$cluster, samsungData$activity[samsungData$subject == 1])
##
## laying sitting standing walk walkdown walkup
## 1 17 0 0 41 0 0
## 2 0 0 0 6 25 26
## 3 0 0 0 5 24 27
## 4 11 1 0 43 0 0
## 5 12 22 26 0 0 0
## 6 10 24 27 0 0 0
kClust <- kmeans(samsungData[samsungData$subject == 1, -c(562, 563, 564)], centers = 6,
nstart = 100) ## K-means clustering (nstart=100, first try)
table(kClust$cluster, samsungData$activity[samsungData$subject == 1])
##
## laying sitting standing walk walkdown walkup
## 1 11 1 0 43 0 0
## 2 10 24 27 0 0 0
## 3 12 22 26 0 0 0
## 4 0 0 0 6 25 26
## 5 17 0 0 41 0 0
## 6 0 0 0 5 24 27
kClust <- kmeans(samsungData[samsungData$subject == 1, -c(562, 563, 564)], centers = 6,
nstart = 100) ## K-means clustering (nstart=100, second try)
table(kClust$cluster, samsungData$activity[samsungData$subject == 1])
##
## laying sitting standing walk walkdown walkup
## 1 17 0 0 41 0 0
## 2 11 1 0 43 0 0
## 3 0 0 0 6 25 26
## 4 10 24 27 0 0 0
## 5 0 0 0 5 24 27
## 6 12 22 26 0 0 0
plot(kClust$center[1, 1:10], pch = 19, ylab = "Cluster Center", xlab = "") ## Cluster 1 Variable Centers (Laying)
plot(kClust$center[6, 1:10], pch = 19, ylab = "Cluster Center", xlab = "") ## Cluster 2 Variable Centers (Walking)