This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.
When you click the Knit button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this:
setwd("C:/Users/hp/Downloads/repdata_data_activity")
activity <- read.csv("activity.csv")
# View the first few rows of the dataset
head(activity)
## steps date interval
## 1 NA 2012-10-01 0
## 2 NA 2012-10-01 5
## 3 NA 2012-10-01 10
## 4 NA 2012-10-01 15
## 5 NA 2012-10-01 20
## 6 NA 2012-10-01 25
What is the mean total number of steps taken per day?
# Calculate the total number of steps per day
activity_total_steps <- with(activity, aggregate(steps, by = list(date), FUN = sum, na.rm = TRUE))
names(activity_total_steps) <- c("date", "steps")
# Plot the histogram
hist(activity_total_steps$steps, main = "Total number of steps taken per day",
xlab = "Total steps taken per day", col = "darkblue", ylim = c(0, 20), breaks = seq(0, 25000, by = 2500))
# Calculate and display the mean and median
mean_steps <- mean(activity_total_steps$steps, na.rm = TRUE)
median_steps <- median(activity_total_steps$steps, na.rm = TRUE)
mean_steps
## [1] 9354.23
median_steps
## [1] 10395
What is the average daily activity pattern?
# Calculate the average daily activity pattern
average_daily_activity <- aggregate(activity$steps, by = list(activity$interval), FUN = mean, na.rm = TRUE)
names(average_daily_activity) <- c("interval", "mean")
# Plot the time series
plot(average_daily_activity$interval, average_daily_activity$mean, type = "l", col = "darkblue", lwd = 2,
xlab = "Interval", ylab = "Average number of steps", main = "Average number of steps per interval")
# Identify the interval with the maximum average steps
average_daily_activity[which.max(average_daily_activity$mean), ]
## interval mean
## 104 835 206.1698
Imputing Missing Values
# Count the number of missing values
sum(is.na(activity$steps))
## [1] 2304
# Impute missing values
imputed_steps <- average_daily_activity$mean[match(activity$interval, average_daily_activity$interval)]
activity_imputed <- transform(activity, steps = ifelse(is.na(activity$steps), yes = imputed_steps, no = activity$steps))
# Plot the histogram for imputed data
total_steps_imputed <- aggregate(steps ~ date, activity_imputed, sum)
names(total_steps_imputed) <- c("date", "daily_steps")
hist(total_steps_imputed$daily_steps, col = "darkblue", xlab = "Total steps per day",
main = "Total number of steps taken each day (Imputed)", ylim = c(0, 30), breaks = seq(0, 25000, by = 2500))
Are there differences in activity patterns between weekdays and
weekends?
# Load ggplot2 for plotting
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 4.3.3
# Add `datetype` column for weekday/weekend classification
activity_imputed$date <- as.Date(activity_imputed$date, format = "%Y-%m-%d")
activity_imputed$datetype <- sapply(activity_imputed$date, function(x) {
if (weekdays(x) %in% c("Saturday", "Sunday")) {
"Weekend"
} else {
"Weekday"
}
})
# Aggregate data by interval and datetype
activity_by_date <- aggregate(steps ~ interval + datetype, data = activity_imputed, FUN = mean)
# Plot the data
ggplot(activity_by_date, aes(x = interval, y = steps, color = datetype)) +
geom_line() +
labs(title = "Average Daily Steps by Type of Day", x = "Interval", y = "Average Number of Steps") +
facet_wrap(~datetype, ncol = 1, nrow = 2) +
theme_minimal()