The code assumes the zip file is already in the working directory
file <-unzip("activity.zip")
data0 <- read.csv(file)
summary(data0)
## steps date interval
## Min. : 0.00 Length:17568 Min. : 0.0
## 1st Qu.: 0.00 Class :character 1st Qu.: 588.8
## Median : 0.00 Mode :character Median :1177.5
## Mean : 37.38 Mean :1177.5
## 3rd Qu.: 12.00 3rd Qu.:1766.2
## Max. :806.00 Max. :2355.0
## NA's :2304
Table of the sum of steps per day
total_steps_day1 <- aggregate(steps~date, data = data0, FUN = sum, na.rm=TRUE)
head(total_steps_day1)
## date steps
## 1 2012-10-02 126
## 2 2012-10-03 11352
## 3 2012-10-04 12116
## 4 2012-10-05 13294
## 5 2012-10-06 15420
## 6 2012-10-07 11015
Plot of the sum of steps per day
b <- total_steps_day1$steps
hist(b, labels = paste0(round(hist(b,plot =FALSE)$counts/length(b)*100,1),"%"),
col = "yellow", xlab = "Steps per day", ylim = c(0,30),
main = "Total Number of Steps Per Day")
Mean and Median of the sum of steps per day
mean(total_steps_day1$steps)
## [1] 10766.19
median(total_steps_day1$steps)
## [1] 10765
Table of average of steps by interval
avg_steps_interval <- aggregate(steps~interval, data0, FUN = mean, na.rm=TRUE)
head(avg_steps_interval)
## interval steps
## 1 0 1.7169811
## 2 5 0.3396226
## 3 10 0.1320755
## 4 15 0.1509434
## 5 20 0.0754717
## 6 25 2.0943396
Time series plot of the average of steps by interval
plot(avg_steps_interval$steps~avg_steps_interval$interval, type="l",
xlab="Intervals", ylab = "Average number of steps",
main="Time series plot of the average of steps by interval", col="red")
Maximum average number of steps by 5-minute interval
avg_steps_interval$interval[which.max(avg_steps_interval$steps)]
## [1] 835
Sum of NAs
sum(is.na(data0))
## [1] 2304
Replace Nas with average total steps to get a new dataset
data0$steps[is.na(data0$steps)==T]<-mean(data0$steps, na.rm = TRUE)
Rename the new dataset with replaced NAs
data1 <-data0
head(data1)
## steps date interval
## 1 37.3826 2012-10-01 0
## 2 37.3826 2012-10-01 5
## 3 37.3826 2012-10-01 10
## 4 37.3826 2012-10-01 15
## 5 37.3826 2012-10-01 20
## 6 37.3826 2012-10-01 25
Table showing the sum of total steps by date of the new dataset
total_steps_day2 <-aggregate(steps~date, data1, FUN = sum)
head(total_steps_day2)
## date steps
## 1 2012-10-01 10766.19
## 2 2012-10-02 126.00
## 3 2012-10-03 11352.00
## 4 2012-10-04 12116.00
## 5 2012-10-05 13294.00
## 6 2012-10-06 15420.00
Histogram showing the sum of total steps by date of the new dataset
hist(total_steps_day2$steps, labels = TRUE, xlab = "Steps per day",
main = "Total Number of Steps Per Day of The New Dataset", col="green")
Mean and media of the total steps by date of the new dataset
mean(total_steps_day2$steps)
## [1] 10766.19
median(total_steps_day2$steps)
## [1] 10766.19
Create a function to group days into weekday or weekend
wkday_fun <- function(z) {
wd <- weekdays(as.Date(z, '%Y-%m-%d'))
if (!(wd == 'Saturday' || wd == 'Sunday')) {
x <- 'Weekday'
}
else {
x <- 'Weekend'
}
x
}
Factoring Weekday verses Weekend
data1$weekday <- weekdays(as.Date(data1$date))
data1$week_type <- as.factor(sapply(data1$date, wkday_fun))
levels(data1$week_type)
## [1] "Weekday" "Weekend"
head(data1)
## steps date interval weekday week_type
## 1 37.3826 2012-10-01 0 Monday Weekday
## 2 37.3826 2012-10-01 5 Monday Weekday
## 3 37.3826 2012-10-01 10 Monday Weekday
## 4 37.3826 2012-10-01 15 Monday Weekday
## 5 37.3826 2012-10-01 20 Monday Weekday
## 6 37.3826 2012-10-01 25 Monday Weekday
Average of total steps by interval for weekday and weekend
average_by_weektype <- aggregate(steps~interval+week_type, data1, FUN=mean)
head(average_by_weektype)
## interval week_type steps
## 1 0 Weekday 7.006569
## 2 5 Weekday 5.384347
## 3 10 Weekday 5.139902
## 4 15 Weekday 5.162124
## 5 20 Weekday 5.073235
## 6 25 Weekday 6.295458
load lattice library
library(lattice)
y <- average_by_weektype$steps
x <- average_by_weektype$interval
f <- average_by_weektype$week_type
Plot with lattice, the Average of total steps by interval for weekday/weekend
xyplot(y~x|f, layout=c(1,2), type="l", xlab = "5-minutes Interval",
ylab = "Number of Steps")