1.0 INTRODUCTION
1.1 LOADING THE REQUIRED LIBRARIES
1.2 LOADING THE DATASETS
1.3 DATASETS BASIC STATISTICS
2.0 DATA WRANGLING
2.1 CLEANING THE SLEEP TABLE
2.2 CLEANING THE DAILY TABLE
3.0 ANALYSIS
3.1 DAILY STATISTICS OF USERS
3.2 SLEEP STATISTICS OF USERS
3.3 USERS SLEEP PATTERN
3.4 USERS ACTIVE AND SEDENTARY MINUTES
3.5 USERS STEPS
3.6 USERS DAILY STATISTICS TRENDS
3.7 USERS STATISTICS ON DIFFERENT DAYS
3.8 USERS METRICS ON WEEKENDS AND WEEK DAYS
3.9 USERS PERFORMANCE
4.0 CONCLUSION
Bellalbeat is a high-tech company that manufactures smart health products.They manufacture smart devices that monitor users daily activities, steps, calories and sleep .The data used in this analysis are from FitBit users personal fitness tracker available on Kaggle. This study will conduct an analysis on how consumers uses its smart devices, in order to gain valuable insights that will inform business decisions and marketing strategies.
library(tidyverse)
library(lubridate )
library(ggplot2)
library(DT)
library(prettydoc)
library(kableExtra)
daily <- read_csv("dailyActivity_merged.csv")
sleep <- read_csv("sleepDay_merged.csv")
str(daily)
## spec_tbl_df [940 × 15] (S3: spec_tbl_df/tbl_df/tbl/data.frame)
## $ Id : num [1:940] 1.5e+09 1.5e+09 1.5e+09 1.5e+09 1.5e+09 ...
## $ ActivityDate : chr [1:940] "4/12/2016" "4/13/2016" "4/14/2016" "4/15/2016" ...
## $ TotalSteps : num [1:940] 13162 10735 10460 9762 12669 ...
## $ TotalDistance : num [1:940] 8.5 6.97 6.74 6.28 8.16 ...
## $ TrackerDistance : num [1:940] 8.5 6.97 6.74 6.28 8.16 ...
## $ LoggedActivitiesDistance: num [1:940] 0 0 0 0 0 0 0 0 0 0 ...
## $ VeryActiveDistance : num [1:940] 1.88 1.57 2.44 2.14 2.71 ...
## $ ModeratelyActiveDistance: num [1:940] 0.55 0.69 0.4 1.26 0.41 ...
## $ LightActiveDistance : num [1:940] 6.06 4.71 3.91 2.83 5.04 ...
## $ SedentaryActiveDistance : num [1:940] 0 0 0 0 0 0 0 0 0 0 ...
## $ VeryActiveMinutes : num [1:940] 25 21 30 29 36 38 42 50 28 19 ...
## $ FairlyActiveMinutes : num [1:940] 13 19 11 34 10 20 16 31 12 8 ...
## $ LightlyActiveMinutes : num [1:940] 328 217 181 209 221 164 233 264 205 211 ...
## $ SedentaryMinutes : num [1:940] 728 776 1218 726 773 ...
## $ Calories : num [1:940] 1985 1797 1776 1745 1863 ...
## - attr(*, "spec")=
## .. cols(
## .. Id = col_double(),
## .. ActivityDate = col_character(),
## .. TotalSteps = col_double(),
## .. TotalDistance = col_double(),
## .. TrackerDistance = col_double(),
## .. LoggedActivitiesDistance = col_double(),
## .. VeryActiveDistance = col_double(),
## .. ModeratelyActiveDistance = col_double(),
## .. LightActiveDistance = col_double(),
## .. SedentaryActiveDistance = col_double(),
## .. VeryActiveMinutes = col_double(),
## .. FairlyActiveMinutes = col_double(),
## .. LightlyActiveMinutes = col_double(),
## .. SedentaryMinutes = col_double(),
## .. Calories = col_double()
## .. )
## - attr(*, "problems")=<externalptr>
colnames(daily)
## [1] "Id" "ActivityDate"
## [3] "TotalSteps" "TotalDistance"
## [5] "TrackerDistance" "LoggedActivitiesDistance"
## [7] "VeryActiveDistance" "ModeratelyActiveDistance"
## [9] "LightActiveDistance" "SedentaryActiveDistance"
## [11] "VeryActiveMinutes" "FairlyActiveMinutes"
## [13] "LightlyActiveMinutes" "SedentaryMinutes"
## [15] "Calories"
dim(daily)
## [1] 940 15
str(sleep)
## spec_tbl_df [413 × 5] (S3: spec_tbl_df/tbl_df/tbl/data.frame)
## $ Id : num [1:413] 1.5e+09 1.5e+09 1.5e+09 1.5e+09 1.5e+09 ...
## $ SleepDay : chr [1:413] "4/12/2016 12:00:00 AM" "4/13/2016 12:00:00 AM" "4/15/2016 12:00:00 AM" "4/16/2016 12:00:00 AM" ...
## $ TotalSleepRecords : num [1:413] 1 2 1 2 1 1 1 1 1 1 ...
## $ TotalMinutesAsleep: num [1:413] 327 384 412 340 700 304 360 325 361 430 ...
## $ TotalTimeInBed : num [1:413] 346 407 442 367 712 320 377 364 384 449 ...
## - attr(*, "spec")=
## .. cols(
## .. Id = col_double(),
## .. SleepDay = col_character(),
## .. TotalSleepRecords = col_double(),
## .. TotalMinutesAsleep = col_double(),
## .. TotalTimeInBed = col_double()
## .. )
## - attr(*, "problems")=<externalptr>
colnames(sleep)
## [1] "Id" "SleepDay" "TotalSleepRecords"
## [4] "TotalMinutesAsleep" "TotalTimeInBed"
dim(sleep)
## [1] 413 5
sleep <- sleep %>%
mutate(Date = as.Date(SleepDay , "%m/%d/%y") )%>%
select(-SleepDay)
*The SleepDay column is converted to Date datatype and renamed.
daily <- daily %>%
mutate(Date = as.Date(ActivityDate , "%m/%d/%y"))%>%
select(- ActivityDate)
*The ActivityDate column is converted to Date datatype and renamed.
Id_stats_daily <- daily%>%
group_by(Id) %>%
summarize(steps = mean(TotalSteps), calorie = mean(Calories), sedentary = mean(SedentaryMinutes), active = mean(VeryActiveMinutes))
stats_daily <- daily %>%
summarize( steps = mean(TotalSteps), calorie = mean(Calories),
sedentary = mean(SedentaryMinutes), active = mean(VeryActiveMinutes))
datatable(Id_stats_daily, filter = "bottom")
kable(stats_daily)
| steps | calorie | sedentary | active |
|---|---|---|---|
| 7637.911 | 2303.61 | 991.2106 | 21.16489 |
We get the average of users daily statistics and the average for all users. 33 Users data are available
Id_sleep_stats <- sleep %>%
group_by(Id) %>%
summarize(sleep = mean(TotalMinutesAsleep), time_in_bed = mean(TotalTimeInBed))
sleep_stats <- sleep %>%
summarize(sleep = mean(TotalMinutesAsleep), time_in_bed = mean(TotalTimeInBed))
datatable(Id_sleep_stats, filter = "bottom")
kable(sleep_stats, caption = "Sleep Statistics (Averages)")
| sleep | time_in_bed |
|---|---|
| 419.4673 | 458.6392 |
We get the average of users sleep statistics and the average for all users. 24 Users data are available.
Analysis on whether users are getting enough sleep, using an average of 7 hours or 420 minutes benchmark for a good night sleep.
less_sleep <- Id_sleep_stats %>%
filter( sleep < 420)
less_time_in_bed <- less_sleep %>%
filter(time_in_bed < 420)
sleep_categorized <- Id_sleep_stats %>%
mutate( sleep_categorize = ifelse(sleep > 420, "Enough", "Not_enough"))
sleep_pattern <- sleep_categorized %>%
group_by(sleep_categorize) %>%
summarise( count = n())
ggplot(data = sleep_pattern, aes(x = sleep_categorize, y = count))+
geom_bar( stat = "identity",fill = "orange", color = "black") +
ggtitle("Users sleep pattern")
Analysis on time spent in bed by users
ggplot(data = sleep_categorized, aes(x = sleep, y = time_in_bed, color = sleep_categorize ))+
geom_point( ) +
ggtitle("Users sleep and time in bed pattern")
*increased time in bed leads to enough sleep
Analysis on users active and sedentary minutes. Users with more sedentary minutes than the average.
more_sedentary <- Id_stats_daily %>%
filter( sedentary > active)
sedentary_categorized <- Id_stats_daily %>%
mutate( sedentary_categorize = ifelse(sedentary > 991, "More", "Less"))
sedentary_pattern <- sedentary_categorized %>%
group_by(sedentary_categorize) %>%
summarise( count = n())
ggplot(data = sedentary_pattern, aes(x = sedentary_categorize, y = count)) +
geom_bar( stat = "identity",fill = "orange", color = "black")+
ggtitle("Users sedentary pattern")
*More Users are having higher sedentary minutes than the average
Analysis to comparing the users steps and get details for users with more sedentary minutes than the average
steps_categorized <- Id_stats_daily %>%
mutate( steps_categorize = ifelse(steps > 7637, "Above", "Less"))
steps_pattern <- steps_categorized %>%
group_by(steps_categorize) %>%
summarise( count = n())
ggplot(data = steps_pattern, aes(x = steps_categorize, y = count)) +
geom_bar( stat = "identity",fill = "orange", color = "black") +
ggtitle("Users steps pattern")
*18 (54%) users have less steps than the average
Analysis on the trends for different users metrics: calories, steps, sedentary minutes and active minutes
# Trend
Id_stats_date <- daily%>%
group_by(Date) %>%
summarize(steps = mean(TotalSteps), calorie = mean(Calories),
sedentary = mean(SedentaryMinutes), active = mean(VeryActiveMinutes))
ggplot(data = Id_stats_date,aes(x = Date, y = calorie)) +
geom_point() +
ggtitle("Calorie Trend")
ggplot(data = Id_stats_date,aes(x = Date, y = steps)) +
geom_point() +
ggtitle("Steps Trend")
ggplot(data = Id_stats_date,aes(x = Date, y = sedentary)) +
geom_point() +
ggtitle("Sedentary Minutes Trend")
ggplot(data = Id_stats_date,aes(x = Date, y = active)) +
geom_point() +
ggtitle("Active Minutes Trend")
Users sleep and time in bed trend
Id_sleep_date <- sleep%>%
group_by(Date) %>%
summarize(sleep = mean(TotalMinutesAsleep), time_in_bed = mean(TotalTimeInBed))
ggplot(data = Id_sleep_date,aes(x = Date, y = sleep)) +
geom_point() +
ggtitle("sleep Trend")
ggplot(data = Id_sleep_date,aes(x = Date, y = time_in_bed)) +
geom_point() +
ggtitle("Time In Bed Trend")
Analysis on users daily metris on weekdays . We categorize date into week days and analyze users steps, calories, sedentary minutes and active minutes on different days
weekday_daily <- daily %>%
mutate( weekday = wday(Date, label = TRUE)) %>%
group_by(weekday) %>%
summarize(steps = mean(TotalSteps), calorie = mean(Calories),
sedentary = mean(SedentaryMinutes), active = mean(VeryActiveMinutes),)
ggplot(data = weekday_daily,aes(x = weekday, y = steps)) +
geom_bar( stat = "identity",fill = "orange", color = "black")+
ggtitle("Steps On Different Days")
ggplot(data = weekday_daily,aes(x = weekday, y = calorie)) +
geom_bar( stat = "identity",fill = "orange", color = "black")+
ggtitle("Calorie On Different Days")
ggplot(data = weekday_daily,aes(x = weekday, y = sedentary)) +
geom_bar( stat = "identity",fill = "orange", color = "black")+
ggtitle("Sedentary Minutes On Different Days")
ggplot(data = weekday_daily,aes(x = weekday, y = active)) +
geom_bar( stat = "identity",fill = "orange", color = "black")+
ggtitle("Active Minutes On Different Days")
# more active on weekends
weekday_sleep <- sleep %>%
mutate( weekday = wday(Date, label = TRUE)) %>%
group_by(weekday) %>%
summarize(sleep = mean(TotalMinutesAsleep), time_in_bed = mean(TotalTimeInBed))
*Users are more active on weekends than on week days
sleep minutes and time in bed on different days.
weekday_sleep <- sleep %>%
mutate( weekday = wday(Date, label = TRUE)) %>%
group_by(weekday) %>%
summarize(sleep = mean(TotalMinutesAsleep), time_in_bed = mean(TotalTimeInBed))
ggplot(data = weekday_sleep,aes(x = weekday, y = sleep)) +
geom_bar( stat = "identity",fill = "orange", color = "black")+
ggtitle("Sleep Minutes On Different Days")
# more sleep on weekends
ggplot(data = weekday_sleep,aes(x = weekday, y = time_in_bed)) +
geom_bar( stat = "identity",fill = "orange", color = "black")+
ggtitle("Time In Bed Minutes On Different Days")
Analysis on users steps, calories, sedentary minutes and active minutes metrics on weekends and week days.
week_daily <- daily %>%
mutate( day = wday(Date)) %>%
mutate( day_categorize = ifelse(day == 6| day == 7, "Weekday", "Weekend")) %>%
group_by(day_categorize) %>%
summarize(steps = mean(TotalSteps), calorie = mean(Calories),
sedentary = mean(SedentaryMinutes), active = mean(VeryActiveMinutes),)
ggplot(data = week_daily,aes(x = day_categorize, y = steps)) +
geom_bar( stat = "identity",fill = "orange", color = "black")+
ggtitle("Steps On Weekdays And Weekends")
ggplot(data = week_daily,aes(x = day_categorize, y = calorie)) +
geom_bar( stat = "identity",fill = "orange", color = "black")+
ggtitle("Calories On Weekdays And Weekends")
ggplot(data = week_daily,aes(x = day_categorize, y = sedentary))+
geom_bar( stat = "identity",fill = "orange", color = "black")+
ggtitle("Sedentary Sinutes On Weekdays And Weekends")
ggplot(data = week_daily,aes(x = day_categorize, y = active)) +
geom_bar( stat = "identity",fill = "orange", color = "black")+
ggtitle("Active Minutes On Weekdays And Weekends")
We will categorize users sleep on weekends and week days and time in bed minutes on weekdays and weekend
week_sleep <- sleep %>%
mutate( day = wday(Date)) %>%
mutate( day_categorize = ifelse(day == 6| day == 7, "Weekday", "Weekend")) %>%
group_by(day_categorize) %>%
summarize(sleep = mean(TotalMinutesAsleep), time_in_bed = mean(TotalTimeInBed))
ggplot(data = week_sleep,aes(x = day_categorize, y = sleep)) +
geom_bar( stat = "identity",fill = "orange", color = "black")+
ggtitle("Sleep Minutes On Weekdays And Weekends")
ggplot(data = week_sleep,aes(x = day_categorize, y = time_in_bed)) +
geom_bar( stat = "identity",fill = "orange", color = "black")+
ggtitle("Time In Bed Minutes On Weekdays And Weekends")
*Users spend more time in bed and sleep more on weekdays than on weekends.
We categorizing users into top, average and worse performers using the different metrics averages. * for active minutes 21 is the average metrice score, * for sedentary minutes 991 is the average metrice score, * for steps 7637 is the average metrice score,
top_performers <- Id_stats_daily %>%
filter( active > 21 &sedentary < 991 & steps > 7637)
worse_performers <- Id_stats_daily %>%
filter( active < 21 &sedentary > 991 & steps < 7637)
users_categorized <- Id_stats_daily %>%
mutate(users_categorize =
ifelse(( active > 21 &sedentary < 991 & steps > 7637), "Top_performers",
ifelse(( active < 21 &sedentary > 991 & steps < 7637), "Worse_performers","Average_performers" )))
users_categories <- users_categorized %>%
group_by(users_categorize) %>%
summarise( count = n())
ggplot(data = users_categories,aes(x = users_categorize, y = count)) +
geom_bar( stat = "identity",fill = "orange", color = "black") +
ggtitle("Users Category")