R Markdown

This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.

When you click the Knit button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this:

setwd("C:/Users/hp/Downloads/repdata_data_activity")


activity <- read.csv("activity.csv")

# View the first few rows of the dataset
head(activity)
##   steps       date interval
## 1    NA 2012-10-01        0
## 2    NA 2012-10-01        5
## 3    NA 2012-10-01       10
## 4    NA 2012-10-01       15
## 5    NA 2012-10-01       20
## 6    NA 2012-10-01       25

What is the mean total number of steps taken per day?

# Calculate the total number of steps per day
activity_total_steps <- with(activity, aggregate(steps, by = list(date), FUN = sum, na.rm = TRUE))
names(activity_total_steps) <- c("date", "steps")

# Plot the histogram
hist(activity_total_steps$steps, main = "Total number of steps taken per day", 
     xlab = "Total steps taken per day", col = "darkblue", ylim = c(0, 20), breaks = seq(0, 25000, by = 2500))

# Calculate and display the mean and median
mean_steps <- mean(activity_total_steps$steps, na.rm = TRUE)
median_steps <- median(activity_total_steps$steps, na.rm = TRUE)

mean_steps
## [1] 9354.23
median_steps
## [1] 10395

What is the average daily activity pattern?

# Calculate the average daily activity pattern
average_daily_activity <- aggregate(activity$steps, by = list(activity$interval), FUN = mean, na.rm = TRUE)
names(average_daily_activity) <- c("interval", "mean")

# Plot the time series
plot(average_daily_activity$interval, average_daily_activity$mean, type = "l", col = "darkblue", lwd = 2, 
     xlab = "Interval", ylab = "Average number of steps", main = "Average number of steps per interval")

# Identify the interval with the maximum average steps
average_daily_activity[which.max(average_daily_activity$mean), ]
##     interval     mean
## 104      835 206.1698

Imputing Missing Values

# Count the number of missing values
sum(is.na(activity$steps))
## [1] 2304
# Impute missing values
imputed_steps <- average_daily_activity$mean[match(activity$interval, average_daily_activity$interval)]
activity_imputed <- transform(activity, steps = ifelse(is.na(activity$steps), yes = imputed_steps, no = activity$steps))

# Plot the histogram for imputed data
total_steps_imputed <- aggregate(steps ~ date, activity_imputed, sum)
names(total_steps_imputed) <- c("date", "daily_steps")

hist(total_steps_imputed$daily_steps, col = "darkblue", xlab = "Total steps per day", 
     main = "Total number of steps taken each day (Imputed)", ylim = c(0, 30), breaks = seq(0, 25000, by = 2500))

Are there differences in activity patterns between weekdays and weekends?

# Load ggplot2 for plotting
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 4.3.3
# Add `datetype` column for weekday/weekend classification
activity_imputed$date <- as.Date(activity_imputed$date, format = "%Y-%m-%d")
activity_imputed$datetype <- sapply(activity_imputed$date, function(x) {
  if (weekdays(x) %in% c("Saturday", "Sunday")) {
    "Weekend"
  } else {
    "Weekday"
  }
})

# Aggregate data by interval and datetype
activity_by_date <- aggregate(steps ~ interval + datetype, data = activity_imputed, FUN = mean)

# Plot the data
ggplot(activity_by_date, aes(x = interval, y = steps, color = datetype)) +
  geom_line() +
  labs(title = "Average Daily Steps by Type of Day", x = "Interval", y = "Average Number of Steps") +
  facet_wrap(~datetype, ncol = 1, nrow = 2) +
  theme_minimal()