Machine Learning

Activity Detection with Smartphone Data using Machine Learning

With machine learning we can learn how to use smartphone accelerometer to predict the physical activities of the individuals carrying the phones. The data used in this post comes from the Smartphone-Based Recognition of Human Activities.

library(keras)     # Neural Networks
library(tidyverse) # Data cleaning / Visualization

## -- Attaching packages ----------------------------------------------------------- tidyverse 1.2.1 --

## v ggplot2 3.2.1     v purrr   0.3.2
## v tibble  2.1.3     v dplyr   0.8.3
## v tidyr   1.0.0     v stringr 1.4.0
## v readr   1.3.1     v forcats 0.4.0

## -- Conflicts -------------------------------------------------------------- tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()

library(knitr)     # Table printing
library(rmarkdown) # Misc. output utilities 
library(ggridges)  # Visualization

## 
## Attaching package: 'ggridges'

## The following object is masked from 'package:ggplot2':
## 
##     scale_discrete_manual

activityLabels <- read.table("C:/Users/Test/Documents/renni/renni_cv/myproject_renniekaputri/2machinelearning/1/HAPT Data Set/activity_labels.txt", 
                             col.names = c("number", "label")) 

activityLabels %>% kable(align = c("c", "l"))

number	label
1	WALKING
2	WALKING_UPSTAIRS
3	WALKING_DOWNSTAIRS
4	SITTING
5	STANDING
6	LAYING
7	STAND_TO_SIT
8	SIT_TO_STAND
9	SIT_TO_LIE
10	LIE_TO_SIT
11	STAND_TO_LIE
12	LIE_TO_STAND

labels <- read.table(
  "C:/Users/Test/Documents/renni/renni_cv/myproject_renniekaputri/2machinelearning/1/HAPT Data Set/RawData/labels.txt",
  col.names = c("experiment", "userId", "activity", "startPos", "endPos")
)

labels %>% 
  head(50) %>% 
  paged_table()

dataFiles <- list.files("C:/Users/Test/Documents/renni/renni_cv/myproject_renniekaputri/2machinelearning/1/HAPT Data Set/RawData")
dataFiles %>% head()

## [1] "acc_exp01_user01.txt" "acc_exp02_user01.txt" "acc_exp03_user02.txt"
## [4] "acc_exp04_user02.txt" "acc_exp05_user03.txt" "acc_exp06_user03.txt"

fileInfo <- tibble(
  filePath = dataFiles
) %>%
  filter(filePath != "labels.txt") %>% 
  separate(filePath, sep = '_', 
           into = c("type", "experiment", "userId"), 
           remove = FALSE) %>% 
  mutate(
    experiment = str_remove(experiment, "exp"),
    userId = str_remove_all(userId, "user|\\.txt")
  ) %>% 
  spread(type, filePath)

fileInfo %>% head() %>% kable()

experiment	userId	acc	gyro
01	01	acc_exp01_user01.txt	gyro_exp01_user01.txt
02	01	acc_exp02_user01.txt	gyro_exp02_user01.txt
03	02	acc_exp03_user02.txt	gyro_exp03_user02.txt
04	02	acc_exp04_user02.txt	gyro_exp04_user02.txt
05	03	acc_exp05_user03.txt	gyro_exp05_user03.txt
06	03	acc_exp06_user03.txt	gyro_exp06_user03.txt

# Read contents of single file to a dataframe with accelerometer and gyro data.
readInData <- function(experiment, userId){
  genFilePath = function(type) {
    paste0("C:/Users/Test/Documents/renni/renni_cv/myproject_renniekaputri/2machinelearning/1/HAPT Data Set/RawData/", type, "_exp",experiment, "_user", userId, ".txt")
  }  
  
  bind_cols(
    read.table(genFilePath("acc"), col.names = c("a_x", "a_y", "a_z")),
    read.table(genFilePath("gyro"), col.names = c("g_x", "g_y", "g_z"))
  )
}

# Function to read a given file and get the observations contained along
# with their classes.

loadFileData <- function(curExperiment, curUserId) {
  
  # load sensor data from file into dataframe
  allData <- readInData(curExperiment, curUserId)
  
  extractObservation <- function(startPos, endPos){
    allData[startPos:endPos,]
  }
  
  # get observation locations in this file from labels dataframe
  dataLabels <- labels %>% 
    filter(userId == as.integer(curUserId), 
           experiment == as.integer(curExperiment))
  
  
  # extract observations as dataframes and save as a column in dataframe.
  dataLabels %>% 
    mutate(
      data = map2(startPos, endPos, extractObservation)
    ) %>% 
    select(-startPos, -endPos)
}

# scan through all experiment and userId combos and gather data into a dataframe. 
allObservations <- map2_df(fileInfo$experiment, fileInfo$userId, loadFileData) %>% 
  right_join(activityLabels, by = c("activity" = "number")) %>% 
  rename(activityName = label)

# cache work. 
write_rds(allObservations, "allObservations.rds")
allObservations %>% dim()

## [1] 1214    5

allObservations %>% 
  mutate(recording_length = map_int(data,nrow)) %>% 
  ggplot(aes(x = recording_length, y = activityName)) +
  geom_density_ridges(alpha = 0.8)

## Picking joint bandwidth of 32.5

desiredActivities <- c(
  "STAND_TO_SIT", "SIT_TO_STAND", "SIT_TO_LIE", 
  "LIE_TO_SIT", "STAND_TO_LIE", "LIE_TO_STAND"  
)

filteredObservations <- allObservations %>% 
  filter(activityName %in% desiredActivities) %>% 
  mutate(observationId = 1:n())

filteredObservations %>% paged_table()

# get all users
userIds <- allObservations$userId %>% unique()

# randomly choose 24 (80% of 30 individuals) for training
set.seed(42) # seed for reproducibility
trainIds <- sample(userIds, size = 24)

# set the rest of the users to the testing set
testIds <- setdiff(userIds,trainIds)

# filter data. 
trainData <- filteredObservations %>% 
  filter(userId %in% trainIds)

testData <- filteredObservations %>% 
  filter(userId %in% testIds)






unpackedObs <- 1:nrow(trainData) %>% 
  map_df(function(rowNum){
    dataRow <- trainData[rowNum, ]
    dataRow$data[[1]] %>% 
      mutate(
        activityName = dataRow$activityName, 
        observationId = dataRow$observationId,
        time = 1:n() )
  }) %>% 
  gather(reading, value, -time, -activityName, -observationId) %>% 
  separate(reading, into = c("type", "direction"), sep = "_") %>% 
  mutate(type = ifelse(type == "a", "acceleration", "gyro"))

  unpackedObs %>% 
  ggplot(aes(x = time, y = value, color = direction)) +
  geom_line(alpha = 0.2) +
  geom_smooth(se = FALSE, alpha = 0.7, size = 0.5) +
  facet_grid(type ~ activityName, scales = "free_y") +
  theme_minimal() +
  theme( axis.text.x = element_blank() )

## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'

Note that the echo = FALSE parameter was added to the code chunk to prevent printing of the R code that generated the plot.

Machine Learning

Renni Ekaputri

November 19, 2019

Activity Detection with Smartphone Data using Machine Learning