Loading and Preprocessing the data:

Unzip and load the data into data frame activity

unzip(zipfile="c:/Users/Takshak/Desktop/R working/course4/week2/activity.zip",exdir="c:/Users/Takshak/Desktop/R working/course4/week2")
activity<-read.csv("c:/Users/Takshak/Desktop/R working/course4/week2/activity.csv",header=TRUE,sep=",")

What is mean total number of steps taken per day?

For this we need to calculate- * Total number of steps per day. * Make a histogram of the total number of steps per day. * The mean of the total number of steps taken per day. * Median of the total number of steps taken per day.

stepsaday <- aggregate(steps ~ date, activity, sum)
hist(stepsaday$steps, main ="Total Steps Each Day", col="gray", xlab="Number of Steps")

mean(stepsaday$steps)
## [1] 10766.19
median(stepsaday$steps)
## [1] 10765

What is the average daily activity pattern?

FOr this we need to know- * Average steps per interval for all days. * Plot the Average Number Steps per Day by Interval * Interval,contains the maximum number of steps

stepsainterval <- aggregate(steps~interval,activity,mean)
plot(stepsainterval$interval,stepsainterval$steps,type="l",col="dark red",xlab="INTERVAL",ylab="Ave. Number of Steps",main="Average Number of Steps per Interval")

stepsainterval[which.max(stepsainterval$steps),1]
## [1] 835

Imputing missing values

*Calculate the total number of missing values in the dataset (i.e. the total number of rows with NAs)

sum(!complete.cases(activity))
## [1] 2304

*Devise a strategy for filling in all of the missing values in the dataset

new_data <- transform(activity, steps = ifelse(is.na(activity$steps), stepsainterval$steps[match(activity$interval, stepsainterval$interval)],activity$steps))
new_data[as.character(new_data$date) == "2012-10-01", 1] <- 0
steps_by_day_i <- aggregate(steps ~ date, new_data, sum)

*Make a histogram of the total number of steps per day

hist(steps_by_day_i$steps, main = paste("Total Steps Each Day"), col="brown", xlab="Number of Steps")

* Histogram to show difference

hist(steps_by_day_i$steps, main = paste("Total Steps Each Day"), col="brown", xlab="Number of Steps")

hist(stepsaday$steps, main ="Total Steps Each Day", col="gray", xlab="Number of Steps", add=T)
legend("topright", c("Imputed", "Non-imputed"), col=c("brown", "gray"), lwd=5)

*Mean and Median of non imputed data:

mean1<-mean(stepsaday$steps)
mean1
## [1] 10766.19
median1<-median(stepsaday$steps)
median1
## [1] 10765

*Mean & Median of imputed data

mean2<-mean(steps_by_day_i$steps)
mean2
## [1] 10589.69
median2<-median(steps_by_day_i$steps)
median2
## [1] 10766.19

*Total difference between imputed and non-imputed data.

mean_diff <- mean2-mean1
mean_diff
## [1] -176.4949
median_diff<-median2-median1
median_diff
## [1] 1.188679

*Calculate total difference

total_diff <- sum(steps_by_day_i$steps) - sum(stepsaday$steps)
total_diff
## [1] 75363.32

Are there differences in activity patterns between weekdays and weekends?

weekdays <- c("Monday", "Tuesday", "Wednesday", "Thursday","Friday")
new_data$dow = as.factor(ifelse(is.element(weekdays(as.Date(new_data$date)),weekdays), "Weekday", "Weekend"))
stepsinterval1 <- aggregate(steps ~ interval + dow,new_data, mean)
library(lattice)
xyplot(stepsinterval1$steps ~ stepsinterval1$interval|stepsinterval1$dow, main="AVERAGE STEPS PER DAY BY INTERVAL",xlab="INTERVAL", ylab="STEPS",layout=c(1,2), type="l")