Loading and preprocessing the data
# Load data
if (!file.exists("activity.csv") )
{
dlurl <- 'http://d396qusza40orc.cloudfront.net/repdata%2Fdata%2Factivity.zip'
download.file(dlurl,destfile='repdata%2Fdata%2Factivity.zip',mode='wb')
unzip('repdata%2Fdata%2Factivity.zip')
}
# Read data
data <- read.csv("activity.csv")
What is mean total number of steps taken per day?
steps_by_day <- aggregate(steps ~ date, data, sum)
hist(steps_by_day$steps, main = paste("Total Steps Each Day"), col="green",xlab="Number of Steps")

rmean <- mean(steps_by_day$steps)
rmean
## [1] 10766.19
rmedian <- median(steps_by_day$steps)
rmedian
## [1] 10765
What is the average daily activity pattern?
steps_by_interval <- aggregate(steps ~ interval, data, mean)
plot(steps_by_interval$interval,steps_by_interval$steps, type="l", xlab="Interval", ylab="Number of Steps",main="Average Number of Steps per Day by Interval")

max_interval <- steps_by_interval[which.max(steps_by_interval$steps),1]
max_interval
## [1] 835
Imputing missing values
# 1.Calculate and report the total number of missing values in the dataset.
NATotal <- sum(!complete.cases(data))
NATotal
## [1] 2304
# 2.Using Mean for the day compute missing values.
StepsAverage <- aggregate(steps ~ interval, data = data, FUN = mean)
fillNA <- numeric()
for (i in 1:nrow(data)) {
obs <- data[i, ]
if (is.na(obs$steps)) {
steps <- subset(StepsAverage, interval == obs$interval)$steps
} else {
steps <- obs$steps
}
fillNA <- c(fillNA, steps)
}
# 3. Create a new dataset including the imputed missing values.
new_activity <- data
new_activity$steps <- fillNA
# 4. Make a histogram of the total number of steps taken each day and Calculate and report the mean and median total number of steps taken per day.
StepsTotalUnion <- aggregate(steps ~ date, data = new_activity, sum, na.rm = TRUE)
hist(StepsTotalUnion$steps, main = paste("Total Steps Each Day"), col="blue", xlab="Number of Steps")
#Create Histogram to show difference.
hist(steps_by_day$steps, main = paste("Total Steps Each Day"), col="green", xlab="Number of Steps", add=T)
legend("topright", c("Imputed", "Non-imputed"), col=c("blue", "green"), lwd=10)

# Calculate Mean
rmeantotal <- mean(StepsTotalUnion$steps)
rmeantotal
## [1] 10766.19
# Calculate Median.
rmediantotal <- median(StepsTotalUnion$steps)
rmediantotal
## [1] 10766.19
# Do these values differ from the estimates from the first part of the assignment?
rmediandiff <- rmediantotal - rmedian
rmediandiff
## [1] 1.188679
rmeandiff <- rmeantotal - rmean
rmeandiff
## [1] 0
Are there differences in activity patterns between weekdays and weekends?
weekdays <- c("Monday", "Tuesday", "Wednesday", "Thursday",
"Friday")
new_activity$dow = as.factor(ifelse(is.element(weekdays(as.Date(new_activity$date)),weekdays), "Weekday", "Weekend"))
StepsTotalUnion <- aggregate(steps ~ interval + dow, new_activity, mean)
library(lattice)
xyplot(StepsTotalUnion$steps ~ StepsTotalUnion$interval|StepsTotalUnion$dow, main="Average Steps per Day by Interval",xlab="Interval", ylab="Steps",layout=c(1,2), type="l")
