This assignment makes use of data from a personal activity monitoring device. This device collects data at 5 minute intervals through out the day. The data consists of two months of data from an anonymous individual collected during the months of October and November, 2012 and include the number of steps taken in 5 minute intervals each day.
setwd("/Users/junwen/Documents/Coursera/Data Sciences/Reproducible Research")
df <- read.csv("activity.csv")
summary(df)
## steps date interval
## Min. : 0.00 2012-10-01: 288 Min. : 0.0
## 1st Qu.: 0.00 2012-10-02: 288 1st Qu.: 588.8
## Median : 0.00 2012-10-03: 288 Median :1177.5
## Mean : 37.38 2012-10-04: 288 Mean :1177.5
## 3rd Qu.: 12.00 2012-10-05: 288 3rd Qu.:1766.2
## Max. :806.00 2012-10-06: 288 Max. :2355.0
## NA's :2304 (Other) :15840
df.total_step <- aggregate(steps~date, df, sum, na.rm = TRUE)
hist(df.total_step$steps,
main="Histogram for Total Number of Steps",
xlab="total number of steps taken per day")
Calculate and report the mean and median of the total number of steps taken per day:
mean(df.total_step$steps)
## [1] 10766.19
median(df.total_step$steps)
## [1] 10765
df.average_step <- aggregate(steps~interval, df, mean, na.rm = TRUE)
plot(steps~interval, data = df.average_step, type = "l")
df.average_step[which.max(df.average_step$steps), ]$interval
## [1] 835
sum(is.na(df$steps))
## [1] 2304
df.filled <- df
filtered <- is.na(df$steps)
by_interval <- tapply(df$steps, df$interval, median, na.rm=TRUE)
df.filled$steps[filtered] <- by_interval[as.character(df$interval[filtered])]
total_step <- aggregate(steps~date, df.filled, sum, na.rm = TRUE)
hist(total_step$steps,
main="Histogram for Total Number of Steps after Imputing",
xlab="total number of steps taken per day")
mean(total_step$steps)
## [1] 9503.869
median(total_step$steps)
## [1] 10395
df$date <- as.Date(strptime(df$date, format="%Y-%m-%d"))
df$datetype <- sapply(df$date, function(x) {
if (weekdays(x) == "Saturday" | weekdays(x) =="Sunday")
{y <- "Weekend"} else
{y <- "Weekday"}
y
})
head(df)
## steps date interval datetype
## 1 NA 2012-10-01 0 Weekday
## 2 NA 2012-10-01 5 Weekday
## 3 NA 2012-10-01 10 Weekday
## 4 NA 2012-10-01 15 Weekday
## 5 NA 2012-10-01 20 Weekday
## 6 NA 2012-10-01 25 Weekday
library(ggplot2)
by_date <- aggregate(steps~interval + datetype, df, mean, na.rm = TRUE)
plot<- ggplot(by_date, aes(x = interval , y = steps, color = datetype)) +
geom_line() +
labs(title = "Average Daily Steps by Datetype", x = "Interval", y = "Average number of Steps") +
facet_wrap(~datetype, ncol = 1, nrow=2)
print(plot)