About the data:

The data from a personal activity monitoring device. This device collects data at 5 minute intervals through out the day. The data consists of two months of data from an anonymous individual collected during the months of October and November, 2012 and include the number of steps taken in 5 minute intervals each day.

load the data

fileUrl<-"https://d396qusza40orc.cloudfront.net/repdata%2Fdata%2Factivity.zip"
download.file(fileUrl,destfile="./data1/Dataset.zip")
unzip(zipfile="./data1/Dataset.zip",exdir="./data1")
path_rf <- file.path("./data1" , "data1.csv")
files<-list.files(path_rf, recursive=TRUE)
act<-read.csv("./data1/activity.csv")

Histogram of the total number of steps taken each day

stepperday<-tapply(act$steps,act$date,sum,na.rm=T)
hist(stepperday, xlab = "number of steps",
     main = "the total number of steps taken each day")

Mean and median number of steps taken each day

summary(stepperday)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##       0    6778   10395    9354   12811   21194

Time series plot of the average number of steps taken

mean_interval <- tapply(act$steps, act$interval, mean, na.rm = T)

plot(mean_interval, type = "l", main = "time series plot", xlab = "the 5-minute interval", ylab = "the average number of steps")

The 5-minute interval that, on average, contains the maximum number of steps

which.max(mean_interval)
## 835 
## 104

Code to describe and show a strategy for imputing missing data

stepsNA <- sum(is.na(act$steps))
dateNA <- sum(is.na(act$date))
intervalNA <- sum(is.na(act$interval))

Histogram of the total number of steps taken each day after missing values are imputed

numMissingValues <- length(which(is.na(act$steps)))
numMissingValues
## [1] 2304

set a function to impute the NA

impute <- function(x, x.impute){ifelse(is.na(x),x.impute,x)}
act2 <- act
act2$steps <- impute(act$steps, mean(act$steps))

Make a histogram of the total number of steps taken each day

act2step <- tapply(act2$steps, act2$date, sum,na.rm=TRUE)
hist(act2step, xlab='Total steps per day (Imputed)',bins=50)
## Warning in plot.window(xlim, ylim, "", ...): "bins"不是图形参数
## Warning in title(main = main, sub = sub, xlab = xlab, ylab = ylab, ...):
## "bins"不是图形参数
## Warning in axis(1, ...): "bins"不是图形参数
## Warning in axis(2, ...): "bins"不是图形参数

Panel plot comparing the average number of steps taken per 5-minute interval across weekdays and weekends

make a new vector to describe weekday and weekend

act2$dateType <-  ifelse(as.POSIXlt(act2$date)$wday %in% c(0,6), 'weekend', 'weekday')

make the plot

library(ggplot2)
avgAct2 <- aggregate(steps ~ interval + dateType, data=act2, mean)
ggplot(avgAct2, aes(interval, steps)) + 
  geom_line() + 
  facet_grid(dateType ~ .) +
  xlab("5-minute interval") + 
  ylab("avarage number of steps")