With machine learning we can learn how to use smartphone accelerometer to predict the physical activities of the individuals carrying the phones. The data used in this post comes from the Smartphone-Based Recognition of Human Activities.
library(keras) # Neural Networks
library(tidyverse) # Data cleaning / Visualization
## -- Attaching packages ----------------------------------------------------------- tidyverse 1.2.1 --
## v ggplot2 3.2.1 v purrr 0.3.2
## v tibble 2.1.3 v dplyr 0.8.3
## v tidyr 1.0.0 v stringr 1.4.0
## v readr 1.3.1 v forcats 0.4.0
## -- Conflicts -------------------------------------------------------------- tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
library(knitr) # Table printing
library(rmarkdown) # Misc. output utilities
library(ggridges) # Visualization
##
## Attaching package: 'ggridges'
## The following object is masked from 'package:ggplot2':
##
## scale_discrete_manual
activityLabels <- read.table("C:/Users/Test/Documents/renni/renni_cv/myproject_renniekaputri/2machinelearning/1/HAPT Data Set/activity_labels.txt",
col.names = c("number", "label"))
activityLabels %>% kable(align = c("c", "l"))
| number | label |
|---|---|
| 1 | WALKING |
| 2 | WALKING_UPSTAIRS |
| 3 | WALKING_DOWNSTAIRS |
| 4 | SITTING |
| 5 | STANDING |
| 6 | LAYING |
| 7 | STAND_TO_SIT |
| 8 | SIT_TO_STAND |
| 9 | SIT_TO_LIE |
| 10 | LIE_TO_SIT |
| 11 | STAND_TO_LIE |
| 12 | LIE_TO_STAND |
labels <- read.table(
"C:/Users/Test/Documents/renni/renni_cv/myproject_renniekaputri/2machinelearning/1/HAPT Data Set/RawData/labels.txt",
col.names = c("experiment", "userId", "activity", "startPos", "endPos")
)
labels %>%
head(50) %>%
paged_table()
dataFiles <- list.files("C:/Users/Test/Documents/renni/renni_cv/myproject_renniekaputri/2machinelearning/1/HAPT Data Set/RawData")
dataFiles %>% head()
## [1] "acc_exp01_user01.txt" "acc_exp02_user01.txt" "acc_exp03_user02.txt"
## [4] "acc_exp04_user02.txt" "acc_exp05_user03.txt" "acc_exp06_user03.txt"
fileInfo <- tibble(
filePath = dataFiles
) %>%
filter(filePath != "labels.txt") %>%
separate(filePath, sep = '_',
into = c("type", "experiment", "userId"),
remove = FALSE) %>%
mutate(
experiment = str_remove(experiment, "exp"),
userId = str_remove_all(userId, "user|\\.txt")
) %>%
spread(type, filePath)
fileInfo %>% head() %>% kable()
| experiment | userId | acc | gyro |
|---|---|---|---|
| 01 | 01 | acc_exp01_user01.txt | gyro_exp01_user01.txt |
| 02 | 01 | acc_exp02_user01.txt | gyro_exp02_user01.txt |
| 03 | 02 | acc_exp03_user02.txt | gyro_exp03_user02.txt |
| 04 | 02 | acc_exp04_user02.txt | gyro_exp04_user02.txt |
| 05 | 03 | acc_exp05_user03.txt | gyro_exp05_user03.txt |
| 06 | 03 | acc_exp06_user03.txt | gyro_exp06_user03.txt |
# Read contents of single file to a dataframe with accelerometer and gyro data.
readInData <- function(experiment, userId){
genFilePath = function(type) {
paste0("C:/Users/Test/Documents/renni/renni_cv/myproject_renniekaputri/2machinelearning/1/HAPT Data Set/RawData/", type, "_exp",experiment, "_user", userId, ".txt")
}
bind_cols(
read.table(genFilePath("acc"), col.names = c("a_x", "a_y", "a_z")),
read.table(genFilePath("gyro"), col.names = c("g_x", "g_y", "g_z"))
)
}
# Function to read a given file and get the observations contained along
# with their classes.
loadFileData <- function(curExperiment, curUserId) {
# load sensor data from file into dataframe
allData <- readInData(curExperiment, curUserId)
extractObservation <- function(startPos, endPos){
allData[startPos:endPos,]
}
# get observation locations in this file from labels dataframe
dataLabels <- labels %>%
filter(userId == as.integer(curUserId),
experiment == as.integer(curExperiment))
# extract observations as dataframes and save as a column in dataframe.
dataLabels %>%
mutate(
data = map2(startPos, endPos, extractObservation)
) %>%
select(-startPos, -endPos)
}
# scan through all experiment and userId combos and gather data into a dataframe.
allObservations <- map2_df(fileInfo$experiment, fileInfo$userId, loadFileData) %>%
right_join(activityLabels, by = c("activity" = "number")) %>%
rename(activityName = label)
# cache work.
write_rds(allObservations, "allObservations.rds")
allObservations %>% dim()
## [1] 1214 5
allObservations %>%
mutate(recording_length = map_int(data,nrow)) %>%
ggplot(aes(x = recording_length, y = activityName)) +
geom_density_ridges(alpha = 0.8)
## Picking joint bandwidth of 32.5
desiredActivities <- c(
"STAND_TO_SIT", "SIT_TO_STAND", "SIT_TO_LIE",
"LIE_TO_SIT", "STAND_TO_LIE", "LIE_TO_STAND"
)
filteredObservations <- allObservations %>%
filter(activityName %in% desiredActivities) %>%
mutate(observationId = 1:n())
filteredObservations %>% paged_table()
# get all users
userIds <- allObservations$userId %>% unique()
# randomly choose 24 (80% of 30 individuals) for training
set.seed(42) # seed for reproducibility
trainIds <- sample(userIds, size = 24)
# set the rest of the users to the testing set
testIds <- setdiff(userIds,trainIds)
# filter data.
trainData <- filteredObservations %>%
filter(userId %in% trainIds)
testData <- filteredObservations %>%
filter(userId %in% testIds)
unpackedObs <- 1:nrow(trainData) %>%
map_df(function(rowNum){
dataRow <- trainData[rowNum, ]
dataRow$data[[1]] %>%
mutate(
activityName = dataRow$activityName,
observationId = dataRow$observationId,
time = 1:n() )
}) %>%
gather(reading, value, -time, -activityName, -observationId) %>%
separate(reading, into = c("type", "direction"), sep = "_") %>%
mutate(type = ifelse(type == "a", "acceleration", "gyro"))
unpackedObs %>%
ggplot(aes(x = time, y = value, color = direction)) +
geom_line(alpha = 0.2) +
geom_smooth(se = FALSE, alpha = 0.7, size = 0.5) +
facet_grid(type ~ activityName, scales = "free_y") +
theme_minimal() +
theme( axis.text.x = element_blank() )
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
Note that the echo = FALSE parameter was added to the code chunk to prevent printing of the R code that generated the plot.