#Loading Libraries

library(lubridate)
## 
## Attaching package: 'lubridate'
## The following objects are masked from 'package:base':
## 
##     date, intersect, setdiff, union
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(tibble)
library(lattice)

#loading the dataset

activity <- read.csv("activity.csv")
str(activity)
## 'data.frame':    17568 obs. of  3 variables:
##  $ steps   : int  NA NA NA NA NA NA NA NA NA NA ...
##  $ date    : chr  "2012-10-01" "2012-10-01" "2012-10-01" "2012-10-01" ...
##  $ interval: int  0 5 10 15 20 25 30 35 40 45 ...

What is mean total number of steps taken per day? 1. Calculate the total number of steps taken per day

Steps_total_per_day <- aggregate(steps~date, activity, sum)
head(Steps_total_per_day)
##         date steps
## 1 2012-10-02   126
## 2 2012-10-03 11352
## 3 2012-10-04 12116
## 4 2012-10-05 13294
## 5 2012-10-06 15420
## 6 2012-10-07 11015
mean_per_day <- aggregate(steps~date, activity, mean)
head(mean_per_day, 10)
##          date    steps
## 1  2012-10-02  0.43750
## 2  2012-10-03 39.41667
## 3  2012-10-04 42.06944
## 4  2012-10-05 46.15972
## 5  2012-10-06 53.54167
## 6  2012-10-07 38.24653
## 7  2012-10-09 44.48264
## 8  2012-10-10 34.37500
## 9  2012-10-11 35.77778
## 10 2012-10-12 60.35417
  1. Make a histogram of the total number of steps taken each day
hist(Steps_total_per_day$steps,
     col = "orange",
     main = "Total Number of Steps Taken Each Day", 
     xlab =  "Steps",
     ylab = "Number of Days")  

  1. Calculate and report the mean and median of the total number of steps taken per day

3a. mean calculation

mean_of_steps <- mean(Steps_total_per_day$steps)
print(mean_of_steps)
## [1] 10766.19

3b. median calculation

median_steps <- median(Steps_total_per_day$steps)
print(median_steps)
## [1] 10765

What is the average daily activity pattern?

Make a time series plot (type = “l”) of the 5-minute interval (x-axis) and the average number of steps taken, averaged across all days (y-axis) 1.1 The average number of steps taken & Time series plot:

average_steps <- aggregate(steps~interval, activity, mean)
with(average_steps,
     plot(interval,
          steps,
          type = "l",
          col= "red",
          main = "Time Series Plot"))

  1. Which 5-minute interval, on average across all the days in the dataset, contains the maximum number of steps?
interval_max <- average_steps[which.max(average_steps$steps),1]
print(interval_max)
## [1] 835

Inputing missing values Calculate and report the total number of missing values in the dataset (i.e. the total number of rows with NAs)

missing_values <- sum(is.na(activity$steps))
print(missing_values)
## [1] 2304

Answer 1: The total number of rows with NAs is 2304)

Replacing the missing values with mean for that 5-minute interval 2.1 Averaging: the mean number of steps per Interval

mean_values <- mean(average_steps$steps)
print(mean_values)
## [1] 37.3826

Create a new dataset that is equal to the original dataset but with the missing data filled in. 3.1 Create a new dataset

new_df <- activity

3.2 Fill the new_df with mean values (37.3826)

new_df[is.na(new_df)] <- mean_values
head(new_df)
##     steps       date interval
## 1 37.3826 2012-10-01        0
## 2 37.3826 2012-10-01        5
## 3 37.3826 2012-10-01       10
## 4 37.3826 2012-10-01       15
## 5 37.3826 2012-10-01       20
## 6 37.3826 2012-10-01       25

Make a histogram of the total number of steps taken each day and Calculate and report the mean and median total number of steps taken per day.

totalstepsnew_df <- aggregate(steps ~ date, new_df, sum)
hist(totalstepsnew_df$steps,
     main = "Total Steps Taken Each day -  Replacing NA",
     ylab = "Number of Days",
     xlab = "Steps",
     col = "gray")

4.1 Calculate mean

mean_steps_new_df <- mean(totalstepsnew_df$steps)
head(mean_steps_new_df)
## [1] 10766.19

4.2 Calulate median

median_steps_new_df <- median(totalstepsnew_df$steps)
print(median_steps_new_df)
## [1] 10766.19

Do these values differ from # the estimates from the first part of the assignment? There is insignificant difference. When I was replacing the NA values I got an increase about number of days but insignificant difference between previous values of mean and median

What is the impact of inputing missing data on the estimates of the total daily number of steps? The number of days increased.

Are there differences in activity patterns between weekdays and weekends? For this part the weekdays() function may be of some help here. Use the dataset with the filled-in missing values for this part.

Create a new factor variable in the dataset with two levels “weekday” and “weekend” indicating whether a given date is a weekday or weekend day.

new_df$date <- as.Date(new_df$date)

df <- new_df %>%
    mutate(dayofweek = ifelse(weekdays(new_df$date) == "Saturday" | weekdays(new_df$date) == "Sunday","weekend","weekday"))
  1. Make a panel plot containing a time series plot (i.e.type=“l”) of the 5-minute interval (x-axis) and the average number of steps taken, averaged across all weekday days or weekend days (y-axis)
df2<-df %>%
    group_by(dayofweek, interval) %>%
    summarize(sumsteps=sum(steps))
## `summarise()` has grouped output by 'dayofweek'. You can override using the
## `.groups` argument.

display of the data table after summarization

head(df2)
## # A tibble: 6 × 3
## # Groups:   dayofweek [1]
##   dayofweek interval sumsteps
##   <chr>        <int>    <dbl>
## 1 weekday          0     315.
## 2 weekday          5     242.
## 3 weekday         10     231.
## 4 weekday         15     232.
## 5 weekday         20     228.
## 6 weekday         25     283.
with(df2,
     xyplot(sumsteps ~ interval | dayofweek, 
            type = "l",      
            main = "Total Number of Steps within Intervals by dayofweek",
            xlab = "Daily Intervals",
            ylab = "Average Number of Steps"))