getwd()
## [1] "C:/Users/nirma/Documents/GitHub/RepData_PeerAssessment1"
setwd("C:/Users/nirma/Documents/GitHub/RepData_PeerAssessment1")
require(lubridate)
## Loading required package: lubridate
##
## Attaching package: 'lubridate'
## The following objects are masked from 'package:base':
##
## date, intersect, setdiff, union
require(dplyr)
## Loading required package: dplyr
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
require(ggplot2)
## Loading required package: ggplot2
library(scales)
require(ggthemes)
## Loading required package: ggthemes
require(RColorBrewer)
## Loading required package: RColorBrewer
mydata<-read.csv('activity.csv')
Getting to Know Your Data:
head(mydata)
## steps date interval
## 1 NA 2012-10-01 0
## 2 NA 2012-10-01 5
## 3 NA 2012-10-01 10
## 4 NA 2012-10-01 15
## 5 NA 2012-10-01 20
## 6 NA 2012-10-01 25
summary(mydata)
## steps date interval
## Min. : 0.00 Length:17568 Min. : 0.0
## 1st Qu.: 0.00 Class :character 1st Qu.: 588.8
## Median : 0.00 Mode :character Median :1177.5
## Mean : 37.38 Mean :1177.5
## 3rd Qu.: 12.00 3rd Qu.:1766.2
## Max. :806.00 Max. :2355.0
## NA's :2304
class(mydata)
## [1] "data.frame"
names(mydata)
## [1] "steps" "date" "interval"
mydata$date<-as.Date(mydata$date)
dailysteps<-aggregate(steps~date,mydata,FUN=sum, na.rm=TRUE)
par(bg='brown')
hist(dailysteps$steps,
main="Histogram of Total Steps Per Day",
xlab="Total Steps Per Day",
ylab="Frequency of the Day",
border="blue",
col="orange",
las=1)
Mean:
dailymean<-mean(dailysteps$steps)
dailymean
## [1] 10766.19
Median:
dailymedian<-median(dailysteps$steps)
dailymedian
## [1] 10765
names(mydata)
## [1] "steps" "date" "interval"
intervalsteps<-aggregate(steps~interval,data=mydata,mean,na.rm=TRUE)
par(bg='grey')
plot(steps~interval,data=intervalsteps, type='l',
main="Average Interval Frequency",
xlab="Average Steps",
ylab="Interval Frequency",
cex.lab=1,
col="blue")
maxstepsinterval<-intervalsteps[which.max(intervalsteps$steps),]$interval
maxstepsinterval
## [1] 835
totalmissing<-sum(is.na(mydata$steps))
totalmissing
## [1] 2304
NAcolumn<-ifelse(is.na(mydata$steps),round(intervalsteps$steps[match
(mydata$interval,intervalsteps$interval)],0),mydata$steps)
imputeddata<-data.frame(steps=NAcolumn,interval=mydata$interval,date=mydata$date)
head(imputeddata,n=10)
## steps interval date
## 1 2 0 2012-10-01
## 2 0 5 2012-10-01
## 3 0 10 2012-10-01
## 4 0 15 2012-10-01
## 5 0 20 2012-10-01
## 6 2 25 2012-10-01
## 7 1 30 2012-10-01
## 8 1 35 2012-10-01
## 9 0 40 2012-10-01
## 10 1 45 2012-10-01
dailyimputed<-aggregate(imputeddata$steps,list(imputeddata$date),FUN=sum)
colnames(dailyimputed)<-c("Date","Steps")
hgram<-ggplot(dailyimputed,aes(Steps))
hgram+geom_histogram(boundary=0,binwidth = 2500,col="black",fill="orange")+
ggtitle("Histogram of Steps Per Day")+xlab("Steps")+ylab("Frequency")+
theme(plot.title=element_text(face="bold",size=12))+scale_x_continuous(breaks=
seq(0,25000,2500))+scale_y_continuous(breaks = seq(0,26,2))
mean(dailyimputed$Steps)
## [1] 10765.64
median(dailyimputed$Steps)
## [1] 10762
imputeddata$exactdate<-as.Date(imputeddata$date,format="%Y-%m-%d")
imputeddata$weekday<-weekdays(imputeddata$exactdate)
imputeddata$daytype<-ifelse(imputeddata$weekday=='Saturday'|
imputeddata$weekday=='Sunday','weekend','weekday')
head(imputeddata, n=5)
## steps interval date exactdate weekday daytype
## 1 2 0 2012-10-01 2012-10-01 Monday weekday
## 2 0 5 2012-10-01 2012-10-01 Monday weekday
## 3 0 10 2012-10-01 2012-10-01 Monday weekday
## 4 0 15 2012-10-01 2012-10-01 Monday weekday
## 5 0 20 2012-10-01 2012-10-01 Monday weekday
stepsPerTime<-aggregate(steps~interval+daytype,data=imputeddata,FUN=mean,
na.action = na.omit)
stepsPerTime$time<-intervalsteps$interval/100
Lplot<-ggplot(stepsPerTime,aes(time,steps))
Lplot+geom_line(col="red")+ggtitle("Average Steps Per Time Interval:Weekdays vs. Weekends")+xlab("Time")+ylab("Steps")+theme(plot.title=element_text(
face="bold",size=12))+facet_grid(daytype~.)