Checking Directory and Setting Desired Directory

getwd()
## [1] "C:/Users/nirma/Documents/GitHub/RepData_PeerAssessment1"
setwd("C:/Users/nirma/Documents/GitHub/RepData_PeerAssessment1")

Invoking Required Libraries and Resources

require(lubridate)
## Loading required package: lubridate
## 
## Attaching package: 'lubridate'
## The following objects are masked from 'package:base':
## 
##     date, intersect, setdiff, union
require(dplyr)
## Loading required package: dplyr
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
require(ggplot2)
## Loading required package: ggplot2
library(scales)
require(ggthemes)
## Loading required package: ggthemes
require(RColorBrewer)
## Loading required package: RColorBrewer

Reading the data file. Data have been already downloaded,unzipped and saved in the local directory

mydata<-read.csv('activity.csv')

Getting to Know Your Data:

head(mydata)
##   steps       date interval
## 1    NA 2012-10-01        0
## 2    NA 2012-10-01        5
## 3    NA 2012-10-01       10
## 4    NA 2012-10-01       15
## 5    NA 2012-10-01       20
## 6    NA 2012-10-01       25
summary(mydata)
##      steps            date              interval     
##  Min.   :  0.00   Length:17568       Min.   :   0.0  
##  1st Qu.:  0.00   Class :character   1st Qu.: 588.8  
##  Median :  0.00   Mode  :character   Median :1177.5  
##  Mean   : 37.38                      Mean   :1177.5  
##  3rd Qu.: 12.00                      3rd Qu.:1766.2  
##  Max.   :806.00                      Max.   :2355.0  
##  NA's   :2304
class(mydata)
## [1] "data.frame"
names(mydata)
## [1] "steps"    "date"     "interval"

Setting date

mydata$date<-as.Date(mydata$date)

1. Calculating total steps taken per day and making histogram

dailysteps<-aggregate(steps~date,mydata,FUN=sum, na.rm=TRUE)
par(bg='brown')
hist(dailysteps$steps,
     main="Histogram of Total Steps Per Day",
     xlab="Total Steps Per Day",
     ylab="Frequency of the Day",
     border="blue",
     col="orange",
     las=1)

2.Calculating and reporting Mean and the Median of total steps per day

Mean:

dailymean<-mean(dailysteps$steps)
dailymean
## [1] 10766.19

Median:

dailymedian<-median(dailysteps$steps)
dailymedian
## [1] 10765

3. What is the average daily activity pattern?

  1. making a time series plot (i.e., type=“1”)of the 5-minute interval (x-axis)and the average number of steps taken, averaged across all days (y-axis): i. quickly checking dataset
names(mydata)
## [1] "steps"    "date"     "interval"
  1. Calculating Interval Steps
intervalsteps<-aggregate(steps~interval,data=mydata,mean,na.rm=TRUE)
  1. plotting the interval steps
par(bg='grey')
plot(steps~interval,data=intervalsteps, type='l',
     main="Average Interval Frequency",
     xlab="Average Steps",
     ylab="Interval Frequency",
          cex.lab=1,
          col="blue")

  1. Which 5-minute interval, on average across all the days in the dataset contains the maximum number of steps
maxstepsinterval<-intervalsteps[which.max(intervalsteps$steps),]$interval
maxstepsinterval
## [1] 835

4. Dealing with the Missing Values

  1. Calculate and report the total missing values in the dataset
totalmissing<-sum(is.na(mydata$steps))
totalmissing
## [1] 2304
  1. Devise a strategy for filling in all missing values in the data set using mean
i. Imputing NAs
NAcolumn<-ifelse(is.na(mydata$steps),round(intervalsteps$steps[match
                  (mydata$interval,intervalsteps$interval)],0),mydata$steps)
ii. New dataset ‘imputeddata’
imputeddata<-data.frame(steps=NAcolumn,interval=mydata$interval,date=mydata$date)
iii. Quick checking
head(imputeddata,n=10)
##    steps interval       date
## 1      2        0 2012-10-01
## 2      0        5 2012-10-01
## 3      0       10 2012-10-01
## 4      0       15 2012-10-01
## 5      0       20 2012-10-01
## 6      2       25 2012-10-01
## 7      1       30 2012-10-01
## 8      1       35 2012-10-01
## 9      0       40 2012-10-01
## 10     1       45 2012-10-01
  1. Making a histogram of the total steps per day and calculating and reporting the mean and median
dailyimputed<-aggregate(imputeddata$steps,list(imputeddata$date),FUN=sum)
colnames(dailyimputed)<-c("Date","Steps")
Plotting a histogram
hgram<-ggplot(dailyimputed,aes(Steps))
hgram+geom_histogram(boundary=0,binwidth = 2500,col="black",fill="orange")+
  ggtitle("Histogram of Steps Per Day")+xlab("Steps")+ylab("Frequency")+
  theme(plot.title=element_text(face="bold",size=12))+scale_x_continuous(breaks=
                  seq(0,25000,2500))+scale_y_continuous(breaks = seq(0,26,2))

Calculate and report mean and median total number of steps per day. Do these values differ from the estimates fromt he first part of the assignment? What is the impact of imputing missing data on the estimates of the total daily number of steps
i. Daily average steps after imputation
mean(dailyimputed$Steps)
## [1] 10765.64
ii. Daily median steps after imputation
median(dailyimputed$Steps)
## [1] 10762

4. Are there differnces in activity patterns between weekdays and weekends?

i. Creating a new factor variable in dataset with two levels=“weekday” and “weekend”
imputeddata$exactdate<-as.Date(imputeddata$date,format="%Y-%m-%d")
ii. New variable with weekday
imputeddata$weekday<-weekdays(imputeddata$exactdate)
iii. Introducing weekend
imputeddata$daytype<-ifelse(imputeddata$weekday=='Saturday'|
                            imputeddata$weekday=='Sunday','weekend','weekday')
iv. checking if it works
head(imputeddata, n=5)
##   steps interval       date  exactdate weekday daytype
## 1     2        0 2012-10-01 2012-10-01  Monday weekday
## 2     0        5 2012-10-01 2012-10-01  Monday weekday
## 3     0       10 2012-10-01 2012-10-01  Monday weekday
## 4     0       15 2012-10-01 2012-10-01  Monday weekday
## 5     0       20 2012-10-01 2012-10-01  Monday weekday

5. Creating two time series plot of the 5-minute interval (x) and the average number of steps taken averaged across weekday days or weekend days (y)

stepsPerTime<-aggregate(steps~interval+daytype,data=imputeddata,FUN=mean,
                        na.action = na.omit)
stepsPerTime$time<-intervalsteps$interval/100
i. Drawing Line Plots
Lplot<-ggplot(stepsPerTime,aes(time,steps))
Lplot+geom_line(col="red")+ggtitle("Average Steps Per Time Interval:Weekdays vs. Weekends")+xlab("Time")+ylab("Steps")+theme(plot.title=element_text(
    face="bold",size=12))+facet_grid(daytype~.)

Thanks Everyone!!