This analysis explores smartphone sensor data for developing a health monitoring system that can: - Track daily physical activity levels - Identify sedentary behavior patterns - Monitor exercise intensity - Detect potential fall risks
Our findings will support the development of a mobile health application for elderly care and general wellness monitoring.
# Define data paths
data_path <- "UCI HAR Dataset/"
# Import feature names and activity labels
features <- read.table(paste0(data_path, "features.txt"),
col.names = c("index", "feature"))
activity_labels <- read.table(paste0(data_path, "activity_labels.txt"),
col.names = c("activity_id", "activity"))
# Import training data
X_train <- read.table(paste0(data_path, "train/X_train.txt"))
y_train <- read.table(paste0(data_path, "train/y_train.txt"),
col.names = "activity")
subject_train <- read.table(paste0(data_path, "train/subject_train.txt"),
col.names = "subject")
# Import test data
X_test <- read.table(paste0(data_path, "test/X_test.txt"))
y_test <- read.table(paste0(data_path, "test/y_test.txt"),
col.names = "activity")
subject_test <- read.table(paste0(data_path, "test/subject_test.txt"),
col.names = "subject")
# Assign column names to sensor data
colnames(X_train) <- features$feature
colnames(X_test) <- features$featureThe dataset contains smartphone sensor measurements from 30 volunteers performing six different activities. Key components include:
# Combine training and test sets
full_data <- bind_rows(
# Training data
bind_cols(
subject_train,
y_train,
X_train
) %>% mutate(data_type = "train"),
# Test data
bind_cols(
subject_test,
y_test,
X_test
) %>% mutate(data_type = "test")
)
# Add activity labels
full_data <- full_data %>%
left_join(activity_labels, by = c("activity" = "activity_id"))
# Display basic information
cat("Dataset dimensions:", dim(full_data), "\n")## Dataset dimensions: 10299 565
## Number of subjects: 30
## Number of activities: 6
cat("Number of features:", ncol(full_data) - 4, "\n") # Excluding subject, activity, label, and data_type## Number of features: 561
# Select relevant features for health monitoring
selected_features <- full_data %>%
select(
subject, activity, activity.y,
contains("mean()"),
contains("std()"),
-contains("meanFreq()"),
data_type
)
# Rename columns for clarity
names(selected_features) <- names(selected_features) %>%
gsub("\\(\\)", "", .) %>%
gsub("-", "_", .)
# Check for missing values
missing_values <- colSums(is.na(selected_features))
cat("Total missing values:", sum(missing_values), "\n")## Total missing values: 0
# Check data distribution across activities and subjects
activity_distribution <- selected_features %>%
count(activity.y) %>%
mutate(percentage = n/sum(n) * 100)
# Visualize activity distribution
ggplot(activity_distribution,
aes(x = reorder(activity.y, -percentage), y = percentage)) +
geom_bar(stat = "identity", fill = "steelblue") +
theme_minimal() +
labs(title = "Distribution of Activities in Dataset",
x = "Activity",
y = "Percentage of Records") +
theme(axis.text.x = element_text(angle = 45, hjust = 1))# Calculate mean acceleration for each activity
activity_intensity <- selected_features %>%
group_by(activity.y) %>%
summarise(
total_acc_mean = mean(tBodyAcc_mean_X^2 +
tBodyAcc_mean_Y^2 +
tBodyAcc_mean_Z^2),
.groups = "drop"
) %>%
mutate(
intensity_level = case_when(
total_acc_mean >= quantile(total_acc_mean, 0.66) ~ "High",
total_acc_mean >= quantile(total_acc_mean, 0.33) ~ "Medium",
TRUE ~ "Low"
)
)
# Visualize activity intensity
ggplot(activity_intensity,
aes(x = reorder(activity.y, -total_acc_mean),
y = total_acc_mean,
fill = intensity_level)) +
geom_bar(stat = "identity") +
scale_fill_manual(values = c("High" = "#ff7f7f",
"Medium" = "#7fbf7f",
"Low" = "#7f7fff")) +
theme_minimal() +
labs(title = "Activity Intensity Based on Total Acceleration",
x = "Activity",
y = "Total Acceleration (magnitude)",
fill = "Intensity Level") +
theme(axis.text.x = element_text(angle = 45, hjust = 1))# Analyze movement patterns using acceleration components
movement_patterns <- selected_features %>%
group_by(activity.y) %>%
summarise(
vertical_acc = mean(tBodyAcc_mean_Y),
horizontal_acc = mean(tBodyAcc_mean_X),
.groups = "drop"
)
# Create scatter plot
ggplot(movement_patterns,
aes(x = horizontal_acc,
y = vertical_acc,
color = activity.y)) +
geom_point(size = 4) +
geom_text(aes(label = activity.y),
vjust = -1,
size = 3) +
theme_minimal() +
labs(title = "Movement Patterns by Activity",
x = "Horizontal Acceleration",
y = "Vertical Acceleration",
color = "Activity")# Prepare data for statistical testing
static_activities <- selected_features %>%
filter(activity.y %in% c("SITTING", "STANDING", "LAYING"))
dynamic_activities <- selected_features %>%
filter(activity.y %in% c("WALKING", "WALKING_UPSTAIRS", "WALKING_DOWNSTAIRS"))
# Perform t-test to compare static vs dynamic activities
t_test_result <- t.test(
static_activities$tBodyAcc_mean_X,
dynamic_activities$tBodyAcc_mean_X
)
# Print results
cat("T-test comparing static vs dynamic activities:\n")## T-test comparing static vs dynamic activities:
##
## Welch Two Sample t-test
##
## data: static_activities$tBodyAcc_mean_X and dynamic_activities$tBodyAcc_mean_X
## t = -1.2124, df = 9194.6, p-value = 0.2254
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -0.004311329 0.001016210
## sample estimates:
## mean of x mean of y
## 0.2735999 0.2752474
# Calculate descriptive statistics for each activity
intensity_stats <- selected_features %>%
group_by(activity.y) %>%
summarise(
mean_acceleration = mean(sqrt(tBodyAcc_mean_X^2 +
tBodyAcc_mean_Y^2 +
tBodyAcc_mean_Z^2)),
sd_acceleration = sd(sqrt(tBodyAcc_mean_X^2 +
tBodyAcc_mean_Y^2 +
tBodyAcc_mean_Z^2)),
max_acceleration = max(sqrt(tBodyAcc_mean_X^2 +
tBodyAcc_mean_Y^2 +
tBodyAcc_mean_Z^2)),
.groups = "drop"
)
# Display results
knitr::kable(intensity_stats,
caption = "Activity Intensity Metrics",
digits = 3)| activity.y | mean_acceleration | sd_acceleration | max_acceleration |
|---|---|---|---|
| LAYING | 0.309 | 0.087 | 1.364 |
| SITTING | 0.299 | 0.032 | 0.734 |
| STANDING | 0.302 | 0.022 | 0.665 |
| WALKING | 0.300 | 0.048 | 0.445 |
| WALKING_DOWNSTAIRS | 0.314 | 0.085 | 0.621 |
| WALKING_UPSTAIRS | 0.300 | 0.066 | 0.504 |
Based on our analysis, we can classify activities into three categories: