suppressMessages(library(dplyr))
suppressMessages(library(lubridate))
stringsAsFactors=FALSE
activity <- read.table(unz("repdata_data_activity.zip", "activity.csv"),
header = TRUE, sep = ",")
stepsPerDay <- activity %>% select(date, steps) %>% group_by(date) %>%
summarize(mean(steps, na.rm = TRUE))
names(stepsPerDay) <- c("date", "sum")
hist(stepsPerDay$sum,
freq = TRUE,
col = 'red',
xlab = 'Total Number of Steps per day',
breaks = 10,
main = 'Number of Steps per Day')
dev.copy(png, file = 'figures/plot1.png', height=480, width=480)
png 3
dev.off()
png 2
stepsPerDayMean <- mean(stepsPerDay$sum, na.rm = TRUE)
stepsPerDayMedian <- median(stepsPerDay$sum, na.rm = TRUE)
| Variable | mean | median |
|---|---|---|
| Steps Per Day | 37.3826 | 37.3785 |
intervalPerDayMean <- activity %>% group_by(interval) %>%
summarize(mean(steps, na.rm = TRUE))
names(intervalPerDayMean) <- c("interval", "MeanSteps")
intervalLevels <- levels(factor(activity$interval))
par(mar = c(4.5,4.5,1,1))
plot(intervalPerDayMean$MeanSteps ~ intervalLevels, type = "l",
ylab = "number of steps", xlab = "interval levels", col = "red")
dev.copy(png, file = 'figures/plot2.png', height=480, width=960)
png 3
dev.off()
png 2
max5MinMeanValue <- max(intervalPerDayMean$MeanSteps)
max5MinMeanRow <- which(intervalPerDayMean$MeanSteps == max5MinMeanValue)
max5MinMeanInterval <- intervalPerDayMean[row(intervalPerDayMean) ==
max5MinMeanRow,][1,1]
The (835) 5-minute interval contains 206 maximum number of steps in average across all days:
iSteps <- which(is.na(activity[,1]) == TRUE)
jInterval <- activity[iSteps,3]
activity[iSteps, 1] <- intervalPerDayMean[,2]
Total number of missing values rows in the dataset is: 2304
missingvals = c(NA)
meanVector <- activity %>% group_by(interval) %>% summarize(avg_steps =
mean(steps, na.rm = TRUE, na.inf = na.omit))
newActivity <- tbl_df(activity)
newActivity <- cbind(newActivity, meanVector)
newActivityVector <- data.frame(newActivity$avg_steps)
newActivityVector <- cbind(newActivityVector, newActivity$date)
newActivityVector <- cbind(newActivityVector, newActivity$interval)
names(newActivityVector) <- c("steps", "date", "interval")
hist(newActivityVector$steps,
freq = TRUE,
col = 'red',
xlab = 'Total Number of Steps per day',
breaks = 10,
main = 'Number of Steps per Day')
dev.copy(png, file = 'figures/plot3.png', height=480, width=480)
png 3
dev.off()
png 2
meanf <- newActivityVector %>% group_by(date) %>%
summarize(avg_steps = mean(steps))
medianf <- newActivityVector %>% group_by(date) %>% summarize(med_steps = median(steps))
| Variable | mean | median |
|---|---|---|
| Steps Per Day | 37.3826 | 34.1132 |
Yes. It depends how reliable may a technique for filling be in regards to the NA data. Although, in this case mean values are matched, median value is biased to the left.
weekdays1 <- c('Mon', 'Tue', 'Wed', 'Thu', 'Fri')
activity <- activity %>% mutate(daytype =
factor((weekdays(as.POSIXlt(activity$date),
abbr = TRUE) %in%
weekdays1)+1L, levels = 1:2,
labels = c("weekend", "weekday")))
par(mfrow = c(2, 1))
par(mar = c(2.5,2.5,1,1))
intervalLevels <- levels(factor(activity$daytype, activity$interval))
## Warning in `levels<-`(`*tmp*`, value = if (nl == nL) as.character(labels)
## else paste0(labels, : duplicated levels in factors are deprecated
plot(activity$steps ~ intervalLevels, type = "l",
ylab = "number of steps", xlab = "interval levels", col = "red")
dev.copy(png, file = 'figures/plot4.png', height=480, width=960)
png 3
dev.off()
png 2