library(lattice)
library(ggplot2)
getwd()
## [1] "/Users/janakiramsundaraneedi/Activity"
setwd("/Users/janakiramsundaraneedi/Downloads")
data <- read.csv("activity.csv")
dates <- strptime(data$date, "%Y-%m-%d")
data$date <- dates
dates_unique <- unique(dates)
unique_intervals <- unique(data$interval)
Steps_perday <- tapply(data$steps, dates$yday, FUN=sum, na.rm=TRUE)
plot(dates_unique, Steps_perday, main="Histogram of steps taken each day", xlab="Date (October to November 2012)", ylab="Frequency", type="l", lwd=2, col="black")+abline(h = mean(Steps_perday), col = "blue", lwd = 3)+
abline(h = median(Steps_perday), col = "green", lwd = 3)
## numeric(0)
which.max(data$steps)
## [1] 16492
table(is.na(data))
##
## FALSE TRUE
## 50400 2304
na_value <- which(is.na(data))
NA_value <- which(is.na(data))
Values_Impu<-Steps_perday[as.character(data[NA_value,3])]
names(Values_Impu) <- NA_value
for (i in NA_value) {
data$steps[i] = Values_Impu[as.character(i)]
}
sum(is.na(data))
## [1] 2248
wday_wend <- function(date) {
day <- weekdays(date)
if (day %in% c("Monday", "Tuesday", "Wednesday", "Thursday", "Friday"))
return("weekday")
else if (day %in% c("Saturday", "Sunday"))
return("weekend")
else
stop("invalid date")
}
data$date <- as.Date(data$date)
data$day <- sapply(data$date, FUN=wday_wend)
averages <- aggregate(steps ~ interval + day, data=data, mean)
ggplot(averages, aes(interval, steps)) + geom_line() + facet_grid(day ~ .) +
xlab("5-minute interval") + ylab("Number of steps")