Loading and preprocessing the data

activity <- read.csv("repdata_data_activity/activity.csv")
activity$date <-  as.Date(activity$date)

Histogram of the total number of steps taken each day

stepsByDate <- aggregate(activity$steps, by = list(activity$date), FUN=sum, na.rm = TRUE)
colnames(stepsByDate)<- c("date", "steps")
hist(stepsByDate$steps, breaks = 30)

Mean and Median

mSteps <- mean(stepsByDate$steps)
mdSteps <- median(stepsByDate$steps)
print(c("mean = ", mSteps , "median = ", mdSteps))
## [1] "mean = "          "9354.22950819672" "median = "        "10395"

Average daily activity pattern

intervalSteps <- aggregate(activity$steps, by = list(activity$interval), FUN=mean, na.rm = TRUE)
colnames(intervalSteps)<- c("Interval", "Mean")
plot(intervalSteps$Mean ~ intervalSteps$Interval,
     type="l", xlab = "Interval", ylab = "Mean",
     main = "Mean of steps by Interval")

Imputing missing values

library(Hmisc)
## Warning: package 'Hmisc' was built under R version 4.1.2
## Loading required package: lattice
## Loading required package: survival
## Loading required package: Formula
## Loading required package: ggplot2
## Warning: package 'ggplot2' was built under R version 4.1.2
## 
## Attaching package: 'Hmisc'
## The following objects are masked from 'package:base':
## 
##     format.pval, units
isna <- is.na(activity$steps)
print(c( "Total NAs = ", table(isna)))
##                         FALSE           TRUE 
## "Total NAs = "        "15264"         "2304"
cleanData <- activity
cleanData$steps <- impute(activity$steps, fun=mean)

New histogram after missing values imputed

stepsByDate2 <- aggregate(cleanData$steps, by = list(cleanData$date), FUN=sum, na.rm = TRUE)
colnames(stepsByDate2)<- c("date", "steps")
hist(stepsByDate2$steps, breaks = 30)

New Mean and Median

mSteps2 <- mean(stepsByDate2$steps)
mdSteps2 <- median(stepsByDate2$steps)
print(c("mean = ", mSteps2 , "median = ", mdSteps2))
## [1] "mean = "          "10766.1886792453" "median = "        "10766.1886792453"

Weekdays vs Weekends

library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:Hmisc':
## 
##     src, summarize
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
cleanData2 <- mutate(cleanData, weekday = weekdays(cleanData$date))
cleanData2$dayType <- ifelse(cleanData2$weekday =="Saturday" | cleanData2$weekday =="Sunday", "Weekend", "Weekday" )
cleanData2$dayType <- as.factor(cleanData2$dayType)

Weekdays vs Weekends plot

library(ggplot2)
cleanData3 <- aggregate(cleanData2$steps , by= list(cleanData2$dayType, cleanData2$interval),  FUN=mean)
colnames(cleanData3) <- c("dayType", "interval", "steps")
ggplot(cleanData3, aes(x=interval, y=steps, color=dayType))+ 
  geom_line()+
  facet_grid(dayType ~.)+ 
  xlab("Intervals") + ylab("Mean")+
  ggtitle("Average Steps per Interva, by Day Typel")