Calling ggplot2 function
library(ggplot2)
Loading and preprocessing the data
activity_data <- read.csv("./activity.csv", na.strings = "NA")
Change class of dates in activity data from character to Date
activity_data$date <- as.Date(activity_data$date, format = "%Y-%m-%d")
original_data <- activity_data
What is mean total number of steps taken per day?
Group the steps' data by date using sum
group_data <- aggregate(x = activity_data$steps,
by = list(activity_data$date),
FUN = sum, na.rm = TRUE)
Plotting a histogram of Total Number of Steps taken each Day
with(group_data,
hist(x = x,
breaks = 10,
xlim = c(0,25000),
main = "Histogram of Total Number of Steps taken each Day",
xlab = "Total Number of Steps taken per Day"))

What is the average daily activity pattern?
Group the steps' data by interval using mean
group_interval <- aggregate(x = activity_data$steps,
by = list(activity_data$interval),
FUN = mean, na.rm = TRUE)
Plotting the time series graph for Average Daily Activity Pattern
with(group_interval,
plot(x = Group.1,
y = x, type = "l", xlim = c(0,2500),
main = "Average Daily Activity Pattern",
ylab = "Average Number of Steps Taken",
xlab = "5 Min Intervals"))

Which interval has maximum number of steps?
group_interval[which.max(group_interval$x),]$Group.1
## [1] 835
Imputing missing values
Number of rows where no. of steps' data is missing
nrow(activity_data[is.na(activity_data$steps),])
## [1] 2304
Looping through na_data for assigning values - I am here using the mean for that respective interval
na_data <- activity_data[is.na(activity_data$steps),]
for(i in 1:2304) {
na_data$steps[i] <- group_interval[group_interval$Group.1 == na_data[i,]$interval,]$x
}
Replacing data with means of same interval group
activity_data$steps[is.na(activity_data$steps)] <- na_data$steps
Making new group, for steps' new data, by date using sum
new_group <- aggregate(x = activity_data$steps,
by = list(activity_data$date),
FUN = sum)
Plotting a histogram of Total Number of Steps taken each Day using Filled Missing Data
with(new_group,
hist(x = x,
breaks = 10,
xlim = c(0,25000),
main = "Histogram of Total Number of Steps taken each Day",
xlab = "Total Number of Steps taken per Day"))

Comparing with older data
oldgroup_data <- aggregate(x = original_data$steps,
by = list(original_data$date),
FUN = sum, na.rm = TRUE)
Mean <- mean(oldgroup_data$x)
Mean
## [1] 9354.23
Median <- median(oldgroup_data$x)
Median
## [1] 10395
Are there differences in activity patterns between weekdays and weekends?
Adding a variable of day using weekdays function
original_data$day <- weekdays(original_data$date)
Changing day variable values to two factor weekday or weekend
for (i in 1:17568) {
if (original_data$day[i] != "Saturday") {
original_data$day[i] <- "Weekday"
} else if (original_data$day[i] != "Sunday") {original_data$day[i] <- "Weekend"}
else {original_data$day[i] <- "Weekend"}
}
Coercing day data into factors
original_data$day <- as.factor(original_data$day)
Grouping by day and interval and taking mean
weekday_group <- aggregate(x = original_data$steps,
by = list(original_data$day, original_data$interval),
FUN = mean,
na.rm = TRUE)
Plotting a panel plot containing a time series plot of the 5-minute interval (x-axis) and the average number of steps taken, averaged across all weekday days or weekend days (y-axis)
g <- ggplot(data = weekday_group, aes(x = Group.2, y = x))
g + geom_line() +
facet_grid(Group.1~.) +
labs(title = "Activity Patterns between Weekdays & Weekends") +
xlab("5 Min Intervals") +
ylab("Average Number of Steps Taken") +
coord_cartesian(xlim = c(0,2500))

Thank You