```
# Get data from working directory
library(readr)
data <- read_csv("activity.zip", col_types = cols(date = col_datetime(format = "%Y-%m-%d")))
# Load libraries
library(tidyverse)
library(lubridate)
```

Since data is stored in YYYY - MM - DD format, but time of day in a 5 min interval where 0 means midnight and for example 815 mean 8:15am i’m going to reformat it to proper time data.

```
data$date <- ymd(data$date)
# data$interval2 <- sprintf("%04d", data$interval)
# data$time<- format(strptime(data$interval2, format="%H%M"), format = "%H:%M")
# data$daytime <- paste(data$date, data$time, sep= " ")
# data$daytime <- as.POSIXct(data$daytime, format="%Y-%m-%d %H:%M")
```

The following histogram depicts the total number of steps each day:

```
hist_data<- data %>%
group_by(date) %>%
summarise(daily_steps = sum(steps, na.rm=TRUE))
ggplot(hist_data, aes(x=daily_steps)) +
geom_histogram(bins = 20) +
labs(title="Histogram of the total number of steps taken each day",
x = "Daily steps", y = "Number of obs")
```

On average people where doing M = 9354.23 steps daily. Middle value of daily steps was Me = 1.039510^{4}

```
# Mean daily steps:
round(mean(hist_data$daily_steps, na.rm = TRUE), 2)
```

`## [1] 9354.23`

```
# Median daily steps:
round(median(hist_data$daily_steps, na.rm = TRUE), 0)
```

`## [1] 10395`

The following line graph depicts average steps of each 5min interval during all days of measurement:

```
ts_data <- data %>%
group_by(interval) %>%
summarise(steps = mean(steps, na.rm=TRUE))
ggplot(ts_data, aes(x = interval, y= steps)) + geom_line() +
labs(title="Average number of steps in 5min intrevals during day",
x = "Time in 5min intervals", y= "Number of steps")
```

```
data %>%
group_by(interval) %>%
summarise(steps = mean(steps, na.rm=TRUE)) %>%
arrange(desc(steps)) %>%
head(1)
```

```
## # A tibble: 1 x 2
## interval steps
## <int> <dbl>
## 1 835 206.1698
```

On average 8:35 a.m. contais the maximum number of steps

There is a total of 2304 NA values in the `steps`

column

```
data %>%
filter(is.na(steps)) %>%
count()
```

```
## # A tibble: 1 x 1
## n
## <int>
## 1 2304
```

In order to remove missing cases (NA values) I computed a 5min interval average across all days and replaced all NAs with it:

```
data %>% group_by(interval) %>%
mutate(mean_steps = mean(steps, na.rm = TRUE)) %>%
ungroup() %>%
select(steps, date, interval, mean_steps)
```

```
## # A tibble: 17,568 x 4
## steps date interval mean_steps
## <int> <date> <int> <dbl>
## 1 NA 2012-10-01 0 1.7169811
## 2 NA 2012-10-01 5 0.3396226
## 3 NA 2012-10-01 10 0.1320755
## 4 NA 2012-10-01 15 0.1509434
## 5 NA 2012-10-01 20 0.0754717
## 6 NA 2012-10-01 25 2.0943396
## 7 NA 2012-10-01 30 0.5283019
## 8 NA 2012-10-01 35 0.8679245
## 9 NA 2012-10-01 40 0.0000000
## 10 NA 2012-10-01 45 1.4716981
## # ... with 17,558 more rows
```

For future analysis I’ll keep a separate dataset where missing values have been replaced with computed averages

```
dataNoNA <- data %>% group_by(interval) %>%
mutate(steps = ifelse(is.na(steps), mean(steps, na.rm = TRUE), steps))
```

Above is a histogram of the total number of steps taken each day.

```
hist_data2<- dataNoNA %>%
group_by(date) %>%
summarise(daily_steps = sum(steps, na.rm=TRUE))
ggplot(hist_data2, aes(x=daily_steps)) +
geom_histogram(bins = 20) +
labs(title="Histogram of the total number of steps taken each day",
x = "Daily steps", y = "Number of obs")
```

```
# Mean daily steps with NA's replaced:
M<- round(mean(hist_data2$daily_steps, na.rm = TRUE), 2)
M
```

`## [1] 10766.19`

```
# Median daily steps with NA's replaced:
Me<- round(median(hist_data2$daily_steps, na.rm = TRUE), 0)
Me
```

`## [1] 10766`

When NA values are replaced with 5min interval means for a given missing cell, the average of steps during a day is M = 1.07661910^{4}, and Median is Me = 1.076610^{4}

```
dataNoNA <- dataNoNA %>%
mutate(weekday = if_else(weekdays(date) %in% c("sobota", "niedziela"),
"weekend", "weekday"))
```

Following is a panel plot containing a time series plot of the 5-minute interval (x-axis) and the average number of steps taken, averaged across all weekday days or weekend days (y-axis).

```
dataNoNA %>% group_by(weekday, interval) %>%
summarise(steps = sum(steps)) %>%
ggplot(aes(x = interval, y= steps)) +
geom_line() +
labs(title="Average number of steps in 5min intrevals during day",
x = "Time in 5min intervals", y= "Number of steps") +
facet_grid(.~weekday)
```