if (!file.exists("../ProjectData/activity.csv")) {
download.file(url = "https://d396qusza40orc.cloudfront.net/repdata%2Fdata%2Factivity.zip",
destfile = "../activity.zip", method = "auto")
unzip("../activity.zip", exdir = "../ProjectData")
}
amd <- read.csv("../ProjectData/activity.csv", header = T, sep = ",")
str(amd)
## 'data.frame': 17568 obs. of 3 variables:
## $ steps : int NA NA NA NA NA NA NA NA NA NA ...
## $ date : Factor w/ 61 levels "2012-10-01","2012-10-02",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ interval: int 0 5 10 15 20 25 30 35 40 45 ...
amd$date <- as.Date(amd$date)
str(amd)
## 'data.frame': 17568 obs. of 3 variables:
## $ steps : int NA NA NA NA NA NA NA NA NA NA ...
## $ date : Date, format: "2012-10-01" "2012-10-01" ...
## $ interval: int 0 5 10 15 20 25 30 35 40 45 ...
amdsteps <- tapply(amd$steps, amd$date, sum)
library(reshape2)
amdmelt <- melt(amdsteps)
names(amdmelt) <- c("Date", "SumofSteps")
head(amdmelt)
## Date SumofSteps
## 1 2012-10-01 NA
## 2 2012-10-02 126
## 3 2012-10-03 11352
## 4 2012-10-04 12116
## 5 2012-10-05 13294
## 6 2012-10-06 15420
hist(amdmelt$SumofSteps, main = "Histogram of Total Number of Steps per Day",
xlab = "Total Number of Steps per Day", ylab = "Frequency", col = "blue",
breaks = 30)
mean(amdmelt$SumofSteps, na.rm = T)
## [1] 10766
median(amdmelt$SumofSteps, na.rm = T)
## [1] 10765
amdavg <- tapply(amd$steps, amd$interval, mean, na.rm = T)
amdmelt_avg <- melt(amdavg)
names(amdmelt_avg) <- c("interval", "avg")
nrow(amdmelt_avg)
## [1] 288
plot(avg ~ interval, data = amdmelt_avg, type = "l", main = "Average daily activity pattern")
amdmelt_avg[amdmelt_avg$avg == max(amdmelt_avg$avg), ]
## interval avg
## 104 835 206.2
colSums(is.na(amd))
## steps date interval
## 2304 0 0
mean(amd$steps, na.rm = T)
## [1] 37.38
amdimpute <- amd
amdimpute$steps[is.na(amdimpute$steps)] <- mean(amdimpute$steps, na.rm = T)
colSums(is.na(amdimpute))
## steps date interval
## 0 0 0
amdimputesteps <- tapply(amdimpute$steps, amdimpute$date, sum)
library(reshape2)
amdmeltimpute <- melt(amdimputesteps)
names(amdmeltimpute) <- c("Date", "SumofSteps")
head(amdmeltimpute)
## Date SumofSteps
## 1 2012-10-01 10766
## 2 2012-10-02 126
## 3 2012-10-03 11352
## 4 2012-10-04 12116
## 5 2012-10-05 13294
## 6 2012-10-06 15420
hist(amdmeltimpute$SumofSteps, main = "Histogram of Total Number of Steps per Day on Impute Value",
xlab = "Total Number of Steps per Day", ylab = "Frequency", col = "blue",
breaks = 30)
mean(amdmeltimpute$SumofSteps, na.rm = T)
## [1] 10766
median(amdmeltimpute$SumofSteps, na.rm = T)
## [1] 10766
amdimpute$weekdays <- weekdays(amdimpute$date)
amdimpute$weeks[(amdimpute$weekdays == "Saturday" | amdimpute$weekdays == "Sunday")] <- "weekend"
amdimpute$weeks[!(amdimpute$weekdays == "Saturday" | amdimpute$weekdays == "Sunday")] <- "weekdays"
library(plyr)
week_comp <- ddply(amdimpute, c("interval", "weeks"), function(x) apply(x[1],
2, mean))
head(week_comp)
## interval weeks steps
## 1 0 weekdays 7.007
## 2 0 weekend 4.673
## 3 5 weekdays 5.384
## 4 5 weekend 4.673
## 5 10 weekdays 5.140
## 6 10 weekend 4.673
library(lattice)
xyplot(steps ~ interval | weeks, data = week_comp, type = "l", xlab = "Interval",
ylab = "Number of steps", layout = c(1, 2))