This is a report summarizing 2 months of data from a personal activity monitoring device. The variables in the original dataset ‘activity’ were steps, date, and 5 minute interval. This report will summarize:
activity <- read.csv("activity.csv")
total <- aggregate (activity$steps, by=list(activity$date), FUN=sum, na.rm=TRUE)
names(total) <- c("date", "steps")
hist(total$steps, breaks = 10, col="grey", xlab = "steps per day", main="Histogram of Steps per day")
mean(total$steps)
## [1] 9354.23
median(total$steps)
## [1] 10395
avgsteps <- aggregate(activity$steps, by=list(activity$interval), FUN=mean, na.rm=TRUE)
names(avgsteps) <- c("interval", "steps")
plot(avgsteps$interval, avgsteps$steps, type = "l", xlab="5 minute interval",
ylab="average steps")
title("Average Steps by 5 minute interval")
avgsteps[which.max(avgsteps$steps),]
## interval steps
## 104 835 206.1698
sum(is.na(activity$steps))
## [1] 2304
#replace NA's with mean steps per interval
steps.na <- function(step, minute) {
impute <- NA
if (!is.na(step))
impute <- c(step)
else impute <- avgsteps[avgsteps$interval == minute, "steps"]
return(impute)
}
activity.imp <- activity
activity.imp$newsteps <- mapply(steps.na, activity.imp$steps, activity.imp$interval)
total.imp <- aggregate(activity.imp$steps, by=list(activity.imp$date), FUN=sum, na.rm=TRUE)
names(total.imp) <- c("date", "steps")
hist(total.imp$steps, breaks = 10, col="grey", xlab = "steps per day", main="Histogram of Steps per day - with imputed data")
mean(total.imp$steps)
## [1] 9354.23
median(total.imp$steps)
## [1] 10395
activity.imp$mdy <- mdy(activity.imp$date)
activity.imp$weekday <- as.factor(weekdays(activity.imp$mdy))
weekend <- c("Saturday", "Sunday")
activity.imp$wkdf <- factor(activity.imp$weekday %in% weekend, levels=c("TRUE", "FALSE"),
labels=c("weekend","weekday"))
act_wkend <- subset(activity.imp, wkdf == "weekend")
act_wkd <- subset(activity.imp, wkdf == "weekday")
par(mfrow=c(2,1))
par(mar=c(2,2,2,2))
#weekday
avgsteps_wd <- aggregate(act_wkd$steps, by=list(act_wkd$interval), FUN=mean, na.rm=TRUE)
names(avgsteps_wd) <- c("interval", "steps")
plot(avgsteps_wd$interval, avgsteps_wd$steps, type = "l", xlab="5 minute interval",
ylab="average steps")
title("Average Steps by 5 minute interval - Weekday")
#weekend
avgsteps_we <- aggregate(act_wkend$steps, by=list(act_wkend$interval), FUN=mean, na.rm=TRUE)
names(avgsteps_we) <- c("interval", "steps")
plot(avgsteps_we$interval, avgsteps_we$steps, type = "l", xlab="5 minute interval",
ylab="average steps")
title("Average Steps by 5 minute interval - Weekend")