This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.
When you click the Knit button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this:
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 3.2.3
library(dplyr)
## Warning: package 'dplyr' was built under R version 3.2.3
##
## Attaching package: 'dplyr'
##
## The following objects are masked from 'package:stats':
##
## filter, lag
##
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(Hmisc)
## Warning: package 'Hmisc' was built under R version 3.2.3
## Loading required package: grid
## Loading required package: lattice
## Loading required package: survival
## Loading required package: Formula
## Warning: package 'Formula' was built under R version 3.2.3
##
## Attaching package: 'Hmisc'
##
## The following objects are masked from 'package:dplyr':
##
## combine, src, summarize
##
## The following objects are masked from 'package:base':
##
## format.pval, round.POSIXt, trunc.POSIXt, units
activity<-read.csv("activity.csv")
head(activity)
## steps date interval
## 1 NA 2012-10-01 0
## 2 NA 2012-10-01 5
## 3 NA 2012-10-01 10
## 4 NA 2012-10-01 15
## 5 NA 2012-10-01 20
## 6 NA 2012-10-01 25
ggplot(stepsByDay, aes(x=steps))+geom_histogram(color='red',fill='green')+
scale_x_continuous("Total steps per day",limit=c(0,max(stepsByDay$steps)))
## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.
mean(stepsByDay$steps,na.rm = TRUE)
## [1] 9354.23
median(stepsByDay$steps,na.rm = TRUE)
## [1] 10395
average_step_interval <- aggregate(activity$steps,by=list(activity$interval),FUN=mean,na.rm=T)
names(average_step_interval)<-c("interval",'meanSteps')
ggplot(average_step_interval,aes(x=interval,y=meanSteps))+geom_line(color='red',fill='blue')+
xlab("5 min Interval")+ylab("Mean Steps/Inteval")
which.max(average_step_interval$meanSteps)
## [1] 104
sum(is.na(activity$steps))
## [1] 2304
activityDataImputed <- activity
activityDataImputed$steps <- impute(activity$steps, fun=mean)
stepsByDay<-aggregate(activityDataImputed$steps, by=list(activityDataImputed$date),FUN=sum,na.rm=T)
names(stepsByDay)<-c("date",'steps')
ggplot(stepsByDay, aes(x=steps))+geom_histogram(color='red',fill='blue')+
scale_x_continuous("Total steps per day",limit=c(0,max(stepsByDay$steps)))
## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.
###…Calculate and report the mean and median total number of steps taken per day.
mean(activityDataImputed$steps)
## [1] 37.3826
median(activityDataImputed$steps)
## [1] 0
activityDataImputed$dateType <- ifelse(as.POSIXlt(activityDataImputed$date)$wday %in% c(0,6),'weekend','weekday' )
averagedActivityDataImputed <- aggregate(steps ~ interval + dateType, data=activityDataImputed, mean)
ggplot(averagedActivityDataImputed,aes(interval,steps))+geom_line(aes(color=dateType))+facet_grid(dateType ~.)