library(ggplot2)
## Warning: package 'ggplot2' was built under R version 3.5.1
library(scales)
library(Hmisc)
## Warning: package 'Hmisc' was built under R version 3.5.1
## Loading required package: lattice
## Loading required package: survival
## Loading required package: Formula
##
## Attaching package: 'Hmisc'
## The following objects are masked from 'package:base':
##
## format.pval, units
if(!file.exists('activity.csv')){
unzip('activity.zip')
}
activity <- read.csv('activity.csv')
TotalStepsPerDay<- tapply(activity$steps,activity$date, sum, na.rm=T)
qplot(TotalStepsPerDay,xlab='Total steps per day', ylab='Frequency', binwidth=500,col="red")
3. the mean and median of the steps for each day
MeanStepsPerDay= mean(TotalStepsPerDay)
MedianStepsPerDay= median(TotalStepsPerDay)
AverageStepsPerDay<- aggregate(x=list(steps=activity$steps),by=list(interval=activity$interval),
FUN=mean, na.rm=TRUE)
ggplot(data=AverageStepsPerDay, aes(x=interval, y=steps)) +
geom_line() +
xlab("5-minute interval") +
ylab("average number of steps taken")
3. the maximum number of steps by the 5-minute interval
MaxNumberOfSteps <- which.max(AverageStepsPerDay$steps)
timeOfMaxSteps <- gsub("([0-9]{1,2})([0-9]{2})", "\\1:\\2", AverageStepsPerDay[MaxNumberOfSteps,'interval'])
NumberOfMissingValues<- length(which(is.na(activity$steps)))
activityDataWithoutNA <- activity
activityDataWithoutNA$steps <- impute(activity$steps, fun=mean)
TotalStepsPerDayWithoutNA <- tapply(activityDataWithoutNA$steps, activityDataWithoutNA$date, sum)
qplot(TotalStepsPerDayWithoutNA, xlab='Total steps per day (without NA)', ylab='Frequency', binwidth=500,col="red")
#### the mean and median of the steps for each day
stepsByDayMeanWithoutNA<- mean(TotalStepsPerDayWithoutNA)
stepsByDayMedianWithoutNA <- median(TotalStepsPerDayWithoutNA)
activityDataWithoutNA$DayType <- ifelse(as.POSIXlt(activityDataWithoutNA$date)$wday %in% c(0,6), 'weekend', 'weekday')
AverageStepsPerDayWithoutNA <- aggregate(steps ~ interval + DayType, data=activityDataWithoutNA, mean)
ggplot(AverageStepsPerDayWithoutNA, aes(interval, steps)) +
geom_line() +
facet_grid(DayType ~ .) +
xlab("5-minute interval") +
ylab("avarage number of steps")