Loading and preprocessing the data
setwd("/Users/adrianromano/Downloads")
activity <- read.csv("activity.csv")
str(activity)
## 'data.frame': 17568 obs. of 3 variables:
## $ steps : int NA NA NA NA NA NA NA NA NA NA ...
## $ date : Factor w/ 61 levels "2012-10-01","2012-10-02",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ interval: int 0 5 10 15 20 25 30 35 40 45 ...
activity$date <- as.Date(activity$date)
What is mean total number of steps taken per day?
- Calculate the total number of steps taken per day
library(plyr)
totalSteps <- aggregate(activity$steps ~ activity$date, FUN = sum)
colnames(totalSteps) <- c("Date", "Steps")
head(totalSteps)
## Date Steps
## 1 2012-10-02 126
## 2 2012-10-03 11352
## 3 2012-10-04 12116
## 4 2012-10-05 13294
## 5 2012-10-06 15420
## 6 2012-10-07 11015
- Histogram of the total number of steps taken per day
hist(totalSteps$Steps, main = "Total Number of Steps", xlab = "Number of Steps", col = "red")

- Calculate the mean and median of the total number of steps taken per day
meanSteps <- mean(totalSteps$Steps, na.rm = TRUE)
meanSteps
## [1] 10766.19
medianSteps <- median(totalSteps$Steps, na.rm = TRUE)
medianSteps
## [1] 10765
- mean: 10766.19
- median: 10765
What is the average daily activity pattern?
- Create a time series plot of the 5-minute interval and the average number of steps taken averaged across all days
averageSteps <- aggregate(activity$steps ~ activity$interval, FUN = mean, na.rm = TRUE)
colnames(averageSteps) <- c("Interval", "Steps")
head(averageSteps)
## Interval Steps
## 1 0 1.7169811
## 2 5 0.3396226
## 3 10 0.1320755
## 4 15 0.1509434
## 5 20 0.0754717
## 6 25 2.0943396
plot(averageSteps$Steps ~ averageSteps$Interval,
type = "l",
col = "blue",
main = "Average Number of Steps per Interval",
xlab = "Interval",
ylab = "Average Number of Steps")

- Look at the maximum number of steps for the average 5-minute interval across all days
maxSteps <- averageSteps[which.max(averageSteps$Steps),]
maxSteps
## Interval Steps
## 104 835 206.1698
- Interval 835 has the maximum number of steps of 206.1698 for the average 5-minute interval across all days
Imputing missing values
- Look at the number of missing values in the data
sum(is.na(activity))
## [1] 2304
- Total number of missing values is 2304
- Replace NA Values with the mean average steps per interval
meanPerInterval <- mean(averageSteps$Steps, na.rm = TRUE)
activity1 <- activity
activity1$steps[is.na(activity1$steps)] <- meanPerInterval
- Strategy: The missing values are replaced with the mean average steps per interval
- Histogram of the total number of steps taken each day after missing values were imputed
totalSteps1 <- aggregate(activity1$steps ~ activity1$date, FUN = sum)
colnames(totalSteps1) <- c("Date", "Steps")
hist(totalSteps$Steps, main = "Total Number of Steps", xlab = "Number of Steps", col = "green")

- Calculate the mean and median of the total number of steps taken each day after missing values were imputed
meanSteps1 <- mean(totalSteps1$Steps)
meanSteps1
## [1] 10766.19
medianSteps1 <- median(totalSteps1$Steps)
medianSteps1
## [1] 10766.19
- mean: 10766.19
- median: 10766.19
- Replacing the missing values did not have any change in the mean value and a slight increase in median value
Are there differences in activity patterns between weekdays and weekends?
- Create a new factor variable cating whether a given date is a weekday or weekend day
activity1$date <- as.Date(activity1$date)
activity1$dayCategory <- ifelse(weekdays(activity1$date) == "Saturday" | weekdays(activity1$date) == "Sunday", "Weekend", "Weekday")
averageDayCategory <- aggregate(activity1$steps ~ activity1$dayCategory + activity1$interval, FUN = mean)
colnames(averageDayCategory) <- c("DayType", "Interval", "Steps")
head(averageDayCategory)
## DayType Interval Steps
## 1 Weekday 0 7.006569
## 2 Weekend 0 4.672825
## 3 Weekday 5 5.384347
## 4 Weekend 5 4.672825
## 5 Weekday 10 5.139902
## 6 Weekend 10 4.672825
- Make a panel plot containing a time series plot of the 5-minute interval and the average number of steps taken, averaged across all weekday days or weekend days
library(ggplot2)
p <- ggplot(averageDayCategory, aes(Interval, Steps, color = DayType))
p + geom_line() + labs(x = "Intervals", y = "Average Number of Steps", title = "Activity Patterns") + facet_wrap(~DayType,ncol=1,nrow=2)
