install.packages(‘readr’) install.packages(‘tidyverse’) install.packages(‘janitor’) install.packages(‘lubridate’) install.packages(‘here’) install.packages(‘skimr’) install.packages(‘ggrepel’) install.packages(‘ggpubr’)
library(readr)
library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.1 ──
## ✓ ggplot2 3.3.5 ✓ dplyr 1.0.8
## ✓ tibble 3.1.6 ✓ stringr 1.4.0
## ✓ tidyr 1.2.0 ✓ forcats 0.5.1
## ✓ purrr 0.3.4
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
library(janitor)
##
## Attaching package: 'janitor'
## The following objects are masked from 'package:stats':
##
## chisq.test, fisher.test
library(lubridate)
##
## Attaching package: 'lubridate'
## The following objects are masked from 'package:base':
##
## date, intersect, setdiff, union
library(here)
## here() starts at /Users/saurabh
library(skimr)
library(ggrepel)
library(ggpubr)
#Import the datasets
dailyActivity_merged <- read_csv("Downloads/Fitabase Data 4.12.16-5.12.16/dailyActivity_merged.csv")
## Rows: 940 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): ActivityDate
## dbl (14): Id, TotalSteps, TotalDistance, TrackerDistance, LoggedActivitiesDi...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
dailyCalories_merged <- read_csv("Downloads/Fitabase Data 4.12.16-5.12.16/dailyCalories_merged.csv")
## Rows: 940 Columns: 3
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): ActivityDay
## dbl (2): Id, Calories
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
dailyIntensities_merged <- read_csv("Downloads/Fitabase Data 4.12.16-5.12.16/dailyIntensities_merged.csv")
## Rows: 940 Columns: 10
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): ActivityDay
## dbl (9): Id, SedentaryMinutes, LightlyActiveMinutes, FairlyActiveMinutes, Ve...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
dailySteps_merged <- read_csv("Downloads/Fitabase Data 4.12.16-5.12.16/dailySteps_merged.csv")
## Rows: 940 Columns: 3
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): ActivityDay
## dbl (2): Id, StepTotal
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
heartrate_seconds_merged <- read_csv("Downloads/Fitabase Data 4.12.16-5.12.16/heartrate_seconds_merged.csv")
## Rows: 2483658 Columns: 3
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): Time
## dbl (2): Id, Value
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
hourlyCalories_merged <- read_csv("Downloads/Fitabase Data 4.12.16-5.12.16/hourlyCalories_merged.csv")
## Rows: 22099 Columns: 3
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): ActivityHour
## dbl (2): Id, Calories
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
hourlyIntensities_merged <- read_csv("Downloads/Fitabase Data 4.12.16-5.12.16/hourlyIntensities_merged.csv")
## Rows: 22099 Columns: 4
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): ActivityHour
## dbl (3): Id, TotalIntensity, AverageIntensity
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
hourlySteps_merged <- read_csv("Downloads/Fitabase Data 4.12.16-5.12.16/hourlySteps_merged.csv")
## Rows: 22099 Columns: 3
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): ActivityHour
## dbl (2): Id, StepTotal
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
minuteCaloriesNarrow_merged <- read_csv("Downloads/Fitabase Data 4.12.16-5.12.16/minuteCaloriesNarrow_merged.csv")
## Rows: 1325580 Columns: 3
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): ActivityMinute
## dbl (2): Id, Calories
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
minuteCaloriesWide_merged <- read_csv("Downloads/Fitabase Data 4.12.16-5.12.16/minuteCaloriesWide_merged.csv")
## Rows: 21645 Columns: 62
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): ActivityHour
## dbl (61): Id, Calories00, Calories01, Calories02, Calories03, Calories04, Ca...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
minuteIntensitiesNarrow_merged <- read_csv("Downloads/Fitabase Data 4.12.16-5.12.16/minuteIntensitiesNarrow_merged.csv")
## Rows: 1325580 Columns: 3
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): ActivityMinute
## dbl (2): Id, Intensity
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
minuteIntensitiesWide_merged <- read_csv("Downloads/Fitabase Data 4.12.16-5.12.16/minuteIntensitiesWide_merged.csv")
## Rows: 21645 Columns: 62
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): ActivityHour
## dbl (61): Id, Intensity00, Intensity01, Intensity02, Intensity03, Intensity0...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
minuteMETsNarrow_merged <- read_csv("Downloads/Fitabase Data 4.12.16-5.12.16/minuteMETsNarrow_merged.csv")
## Rows: 1325580 Columns: 3
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): ActivityMinute
## dbl (2): Id, METs
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
minuteSleep_merged <- read_csv("Downloads/Fitabase Data 4.12.16-5.12.16/minuteSleep_merged.csv")
## Rows: 188521 Columns: 4
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): date
## dbl (3): Id, value, logId
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
minuteStepsNarrow_merged <- read_csv("Downloads/Fitabase Data 4.12.16-5.12.16/minuteStepsNarrow_merged.csv")
## Rows: 1325580 Columns: 3
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): ActivityMinute
## dbl (2): Id, Steps
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
minuteStepsWide_merged <- read_csv("Downloads/Fitabase Data 4.12.16-5.12.16/minuteStepsWide_merged.csv")
## Rows: 21645 Columns: 62
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): ActivityHour
## dbl (61): Id, Steps00, Steps01, Steps02, Steps03, Steps04, Steps05, Steps06,...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
sleepDay_merged <- read_csv("Downloads/Fitabase Data 4.12.16-5.12.16/sleepDay_merged.csv")
## Rows: 413 Columns: 5
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): SleepDay
## dbl (4): Id, TotalSleepRecords, TotalMinutesAsleep, TotalTimeInBed
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
weightLogInfo_merged <- read_csv("Downloads/Fitabase Data 4.12.16-5.12.16/weightLogInfo_merged.csv")
## Rows: 67 Columns: 8
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): Date
## dbl (6): Id, WeightKg, WeightPounds, Fat, BMI, LogId
## lgl (1): IsManualReport
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
#Read Imported data
head(dailyActivity_merged)
## # A tibble: 6 × 15
## Id ActivityDate TotalSteps TotalDistance TrackerDistance LoggedActivitie…
## <dbl> <chr> <dbl> <dbl> <dbl> <dbl>
## 1 1.50e9 4/12/2016 13162 8.5 8.5 0
## 2 1.50e9 4/13/2016 10735 6.97 6.97 0
## 3 1.50e9 4/14/2016 10460 6.74 6.74 0
## 4 1.50e9 4/15/2016 9762 6.28 6.28 0
## 5 1.50e9 4/16/2016 12669 8.16 8.16 0
## 6 1.50e9 4/17/2016 9705 6.48 6.48 0
## # … with 9 more variables: VeryActiveDistance <dbl>,
## # ModeratelyActiveDistance <dbl>, LightActiveDistance <dbl>,
## # SedentaryActiveDistance <dbl>, VeryActiveMinutes <dbl>,
## # FairlyActiveMinutes <dbl>, LightlyActiveMinutes <dbl>,
## # SedentaryMinutes <dbl>, Calories <dbl>
head(dailyCalories_merged)
## # A tibble: 6 × 3
## Id ActivityDay Calories
## <dbl> <chr> <dbl>
## 1 1503960366 4/12/2016 1985
## 2 1503960366 4/13/2016 1797
## 3 1503960366 4/14/2016 1776
## 4 1503960366 4/15/2016 1745
## 5 1503960366 4/16/2016 1863
## 6 1503960366 4/17/2016 1728
head(dailyIntensities_merged)
## # A tibble: 6 × 10
## Id ActivityDay SedentaryMinutes LightlyActiveMinutes FairlyActiveMinu…
## <dbl> <chr> <dbl> <dbl> <dbl>
## 1 1503960366 4/12/2016 728 328 13
## 2 1503960366 4/13/2016 776 217 19
## 3 1503960366 4/14/2016 1218 181 11
## 4 1503960366 4/15/2016 726 209 34
## 5 1503960366 4/16/2016 773 221 10
## 6 1503960366 4/17/2016 539 164 20
## # … with 5 more variables: VeryActiveMinutes <dbl>,
## # SedentaryActiveDistance <dbl>, LightActiveDistance <dbl>,
## # ModeratelyActiveDistance <dbl>, VeryActiveDistance <dbl>
head(dailySteps_merged)
## # A tibble: 6 × 3
## Id ActivityDay StepTotal
## <dbl> <chr> <dbl>
## 1 1503960366 4/12/2016 13162
## 2 1503960366 4/13/2016 10735
## 3 1503960366 4/14/2016 10460
## 4 1503960366 4/15/2016 9762
## 5 1503960366 4/16/2016 12669
## 6 1503960366 4/17/2016 9705
head(heartrate_seconds_merged)
## # A tibble: 6 × 3
## Id Time Value
## <dbl> <chr> <dbl>
## 1 2022484408 4/12/2016 7:21:00 AM 97
## 2 2022484408 4/12/2016 7:21:05 AM 102
## 3 2022484408 4/12/2016 7:21:10 AM 105
## 4 2022484408 4/12/2016 7:21:20 AM 103
## 5 2022484408 4/12/2016 7:21:25 AM 101
## 6 2022484408 4/12/2016 7:22:05 AM 95
head(sleepDay_merged)
## # A tibble: 6 × 5
## Id SleepDay TotalSleepRecor… TotalMinutesAsl… TotalTimeInBed
## <dbl> <chr> <dbl> <dbl> <dbl>
## 1 1503960366 4/12/2016 12:00:0… 1 327 346
## 2 1503960366 4/13/2016 12:00:0… 2 384 407
## 3 1503960366 4/15/2016 12:00:0… 1 412 442
## 4 1503960366 4/16/2016 12:00:0… 2 340 367
## 5 1503960366 4/17/2016 12:00:0… 1 700 712
## 6 1503960366 4/19/2016 12:00:0… 1 304 320
head(weightLogInfo_merged)
## # A tibble: 6 × 8
## Id Date WeightKg WeightPounds Fat BMI IsManualReport LogId
## <dbl> <chr> <dbl> <dbl> <dbl> <dbl> <lgl> <dbl>
## 1 1503960366 5/2/2016 … 52.6 116. 22 22.6 TRUE 1.46e12
## 2 1503960366 5/3/2016 … 52.6 116. NA 22.6 TRUE 1.46e12
## 3 1927972279 4/13/2016… 134. 294. NA 47.5 FALSE 1.46e12
## 4 2873212765 4/21/2016… 56.7 125. NA 21.5 TRUE 1.46e12
## 5 2873212765 5/12/2016… 57.3 126. NA 21.7 TRUE 1.46e12
## 6 4319703577 4/17/2016… 72.4 160. 25 27.5 TRUE 1.46e12
head(hourlyIntensities_merged)
## # A tibble: 6 × 4
## Id ActivityHour TotalIntensity AverageIntensity
## <dbl> <chr> <dbl> <dbl>
## 1 1503960366 4/12/2016 12:00:00 AM 20 0.333
## 2 1503960366 4/12/2016 1:00:00 AM 8 0.133
## 3 1503960366 4/12/2016 2:00:00 AM 7 0.117
## 4 1503960366 4/12/2016 3:00:00 AM 0 0
## 5 1503960366 4/12/2016 4:00:00 AM 0 0
## 6 1503960366 4/12/2016 5:00:00 AM 0 0
Look for missing values
sum(is.na(dailyActivity_merged))
## [1] 0
sum(is.na(dailyCalories_merged))
## [1] 0
sum(is.na(dailyIntensities_merged))
## [1] 0
sum(is.na(dailySteps_merged))
## [1] 0
sum(is.na(heartrate_seconds_merged))
## [1] 0
sum(is.na(sleepDay_merged))
## [1] 0
sum(is.na(weightLogInfo_merged))
## [1] 65
sum(is.na(hourlyIntensities_merged))
## [1] 0
n_distinct(dailyActivity_merged$Id)
## [1] 33
n_distinct(dailyCalories_merged$Id)
## [1] 33
n_distinct(dailyIntensities_merged$Id)
## [1] 33
n_distinct(dailySteps_merged$Id)
## [1] 33
n_distinct(heartrate_seconds_merged$Id)
## [1] 14
n_distinct(sleepDay_merged$Id)
## [1] 24
n_distinct(weightLogInfo_merged$Id)
## [1] 8
n_distinct(hourlyIntensities_merged$Id)
## [1] 33
#Formatting Dates
dailyActivity_merged$ActivityDate = as.POSIXct(dailyActivity_merged$ActivityDate, format= "%m/%d/%y", tz=Sys.timezone())
dailyActivity_merged$date <- format(dailyActivity_merged$ActivityDate, format= "%m/%d/%y")
sleepDay_merged$SleepDay = as.POSIXct(sleepDay_merged$SleepDay, format= "%m/%d/%Y %I:%M:%S %p", tz=Sys.timezone())
sleepDay_merged$date <- format(sleepDay_merged$SleepDay, format = "%m/%d/%y")
dhourlyInt <- hourlyIntensities_merged %>%
extract(ActivityHour, c("Date" , "Hour"), "([^ ]+) (.*)")
summary(dailyActivity_merged)
## Id ActivityDate TotalSteps
## Min. :1.504e+09 Min. :2020-04-12 00:00:00 Min. : 0
## 1st Qu.:2.320e+09 1st Qu.:2020-04-19 00:00:00 1st Qu.: 3790
## Median :4.445e+09 Median :2020-04-26 00:00:00 Median : 7406
## Mean :4.855e+09 Mean :2020-04-26 06:53:37 Mean : 7638
## 3rd Qu.:6.962e+09 3rd Qu.:2020-05-04 00:00:00 3rd Qu.:10727
## Max. :8.878e+09 Max. :2020-05-12 00:00:00 Max. :36019
## TotalDistance TrackerDistance LoggedActivitiesDistance VeryActiveDistance
## Min. : 0.000 Min. : 0.000 Min. :0.0000 Min. : 0.000
## 1st Qu.: 2.620 1st Qu.: 2.620 1st Qu.:0.0000 1st Qu.: 0.000
## Median : 5.245 Median : 5.245 Median :0.0000 Median : 0.210
## Mean : 5.490 Mean : 5.475 Mean :0.1082 Mean : 1.503
## 3rd Qu.: 7.713 3rd Qu.: 7.710 3rd Qu.:0.0000 3rd Qu.: 2.053
## Max. :28.030 Max. :28.030 Max. :4.9421 Max. :21.920
## ModeratelyActiveDistance LightActiveDistance SedentaryActiveDistance
## Min. :0.0000 Min. : 0.000 Min. :0.000000
## 1st Qu.:0.0000 1st Qu.: 1.945 1st Qu.:0.000000
## Median :0.2400 Median : 3.365 Median :0.000000
## Mean :0.5675 Mean : 3.341 Mean :0.001606
## 3rd Qu.:0.8000 3rd Qu.: 4.782 3rd Qu.:0.000000
## Max. :6.4800 Max. :10.710 Max. :0.110000
## VeryActiveMinutes FairlyActiveMinutes LightlyActiveMinutes SedentaryMinutes
## Min. : 0.00 Min. : 0.00 Min. : 0.0 Min. : 0.0
## 1st Qu.: 0.00 1st Qu.: 0.00 1st Qu.:127.0 1st Qu.: 729.8
## Median : 4.00 Median : 6.00 Median :199.0 Median :1057.5
## Mean : 21.16 Mean : 13.56 Mean :192.8 Mean : 991.2
## 3rd Qu.: 32.00 3rd Qu.: 19.00 3rd Qu.:264.0 3rd Qu.:1229.5
## Max. :210.00 Max. :143.00 Max. :518.0 Max. :1440.0
## Calories date
## Min. : 0 Length:940
## 1st Qu.:1828 Class :character
## Median :2134 Mode :character
## Mean :2304
## 3rd Qu.:2793
## Max. :4900
summary(dailyCalories_merged)
## Id ActivityDay Calories
## Min. :1.504e+09 Length:940 Min. : 0
## 1st Qu.:2.320e+09 Class :character 1st Qu.:1828
## Median :4.445e+09 Mode :character Median :2134
## Mean :4.855e+09 Mean :2304
## 3rd Qu.:6.962e+09 3rd Qu.:2793
## Max. :8.878e+09 Max. :4900
summary(dailyIntensities_merged)
## Id ActivityDay SedentaryMinutes LightlyActiveMinutes
## Min. :1.504e+09 Length:940 Min. : 0.0 Min. : 0.0
## 1st Qu.:2.320e+09 Class :character 1st Qu.: 729.8 1st Qu.:127.0
## Median :4.445e+09 Mode :character Median :1057.5 Median :199.0
## Mean :4.855e+09 Mean : 991.2 Mean :192.8
## 3rd Qu.:6.962e+09 3rd Qu.:1229.5 3rd Qu.:264.0
## Max. :8.878e+09 Max. :1440.0 Max. :518.0
## FairlyActiveMinutes VeryActiveMinutes SedentaryActiveDistance
## Min. : 0.00 Min. : 0.00 Min. :0.000000
## 1st Qu.: 0.00 1st Qu.: 0.00 1st Qu.:0.000000
## Median : 6.00 Median : 4.00 Median :0.000000
## Mean : 13.56 Mean : 21.16 Mean :0.001606
## 3rd Qu.: 19.00 3rd Qu.: 32.00 3rd Qu.:0.000000
## Max. :143.00 Max. :210.00 Max. :0.110000
## LightActiveDistance ModeratelyActiveDistance VeryActiveDistance
## Min. : 0.000 Min. :0.0000 Min. : 0.000
## 1st Qu.: 1.945 1st Qu.:0.0000 1st Qu.: 0.000
## Median : 3.365 Median :0.2400 Median : 0.210
## Mean : 3.341 Mean :0.5675 Mean : 1.503
## 3rd Qu.: 4.782 3rd Qu.:0.8000 3rd Qu.: 2.053
## Max. :10.710 Max. :6.4800 Max. :21.920
summary(dailySteps_merged)
## Id ActivityDay StepTotal
## Min. :1.504e+09 Length:940 Min. : 0
## 1st Qu.:2.320e+09 Class :character 1st Qu.: 3790
## Median :4.445e+09 Mode :character Median : 7406
## Mean :4.855e+09 Mean : 7638
## 3rd Qu.:6.962e+09 3rd Qu.:10727
## Max. :8.878e+09 Max. :36019
summary(sleepDay_merged)
## Id SleepDay TotalSleepRecords
## Min. :1.504e+09 Min. :2016-04-12 00:00:00 Min. :1.000
## 1st Qu.:3.977e+09 1st Qu.:2016-04-19 00:00:00 1st Qu.:1.000
## Median :4.703e+09 Median :2016-04-27 00:00:00 Median :1.000
## Mean :5.001e+09 Mean :2016-04-26 12:40:05 Mean :1.119
## 3rd Qu.:6.962e+09 3rd Qu.:2016-05-04 00:00:00 3rd Qu.:1.000
## Max. :8.792e+09 Max. :2016-05-12 00:00:00 Max. :3.000
## TotalMinutesAsleep TotalTimeInBed date
## Min. : 58.0 Min. : 61.0 Length:413
## 1st Qu.:361.0 1st Qu.:403.0 Class :character
## Median :433.0 Median :463.0 Mode :character
## Mean :419.5 Mean :458.6
## 3rd Qu.:490.0 3rd Qu.:526.0
## Max. :796.0 Max. :961.0
summary(hourlyIntensities_merged)
## Id ActivityHour TotalIntensity AverageIntensity
## Min. :1.504e+09 Length:22099 Min. : 0.00 Min. :0.0000
## 1st Qu.:2.320e+09 Class :character 1st Qu.: 0.00 1st Qu.:0.0000
## Median :4.445e+09 Mode :character Median : 3.00 Median :0.0500
## Mean :4.848e+09 Mean : 12.04 Mean :0.2006
## 3rd Qu.:6.962e+09 3rd Qu.: 16.00 3rd Qu.:0.2667
## Max. :8.878e+09 Max. :180.00 Max. :3.0000
#Calculating usused device days * Let’s first see how many days the 33 participants did not use their devices. * To do this, I will count the days where SedentaryMinutes = 1440, since 1440 is the total number of minutes in a day. * It is highly unlikely that participants did not move at all for a whole day. It makes more sense that the fitbit was not used during those days.
deviceUnused <- dailyActivity_merged %>%
filter(SedentaryMinutes == 1440) %>%
group_by(Id) %>%
summarise(countofUnusedDays = n()) %>%
print()
## # A tibble: 17 × 2
## Id countofUnusedDays
## <dbl> <int>
## 1 1503960366 1
## 2 1844505072 9
## 3 1927972279 13
## 4 4020332650 14
## 5 4057192912 1
## 6 4319703577 1
## 7 4388161847 1
## 8 4702921684 1
## 9 5577150313 2
## 10 6117666160 5
## 11 6290855005 4
## 12 6775888955 9
## 13 7007744171 1
## 14 7086361926 1
## 15 8253242879 1
## 16 8583815059 6
## 17 8792009665 9
dailyActivity_merged %>%
group_by(ActivityDate) %>%
summarise(count = n()) %>%
print()
## # A tibble: 31 × 2
## ActivityDate count
## <dttm> <int>
## 1 2020-04-12 00:00:00 33
## 2 2020-04-13 00:00:00 33
## 3 2020-04-14 00:00:00 33
## 4 2020-04-15 00:00:00 33
## 5 2020-04-16 00:00:00 32
## 6 2020-04-17 00:00:00 32
## 7 2020-04-18 00:00:00 32
## 8 2020-04-19 00:00:00 32
## 9 2020-04-20 00:00:00 32
## 10 2020-04-21 00:00:00 32
## # … with 21 more rows
percentDaysUnused <- dailyIntensities_merged %>%
filter(SedentaryMinutes == 1440) %>%
group_by(Id) %>%
summarise(unusedDays = n(), percentUnused = (unusedDays/31)*100)
head(percentDaysUnused)
## # A tibble: 6 × 3
## Id unusedDays percentUnused
## <dbl> <int> <dbl>
## 1 1503960366 1 3.23
## 2 1844505072 9 29.0
## 3 1927972279 13 41.9
## 4 4020332650 14 45.2
## 5 4057192912 1 3.23
## 6 4319703577 1 3.23
Not lets make a pie chart to show total fitbit usage from all users, showing how many missed 1 day of use, missed up to a week of use, missed more than a week of use, and how many did not any days.
piechart <- percentDaysUnused %>%
group_by(unusedDays) %>%
summarise(totalParticipants = n())
head(piechart)
## # A tibble: 6 × 2
## unusedDays totalParticipants
## <int> <int>
## 1 1 8
## 2 2 1
## 3 4 1
## 4 5 1
## 5 6 1
## 6 9 3
slices <- c(54, 24, 12, 10)
lbls <- c("Used everyday", "Unused for 1 day", "Unused 2 to 7 days", "Unused >7 days")
pct <- round(slices/sum(slices)*100)
lbls <- paste(lbls,pct)
lbls <- paste(lbls,"%", sep = "")
pie(slices,labels = lbls,col = rainbow(length(lbls)),
main = "Percents of Fitbit Usage")
Now lets make a new table to filter out the unused days and group users by activity level and look at that with a bar chart.
participantActivity <- dailyActivity_merged %>%
filter(SedentaryMinutes !=1440) %>%
group_by(Id) %>%
summarize(total_very_active_mins = sum(VeryActiveMinutes),
total_fairly_active_mins = sum(FairlyActiveMinutes),
total_lightly_active_mins = sum(LightlyActiveMinutes),
total_sedentary_mins = sum(SedentaryMinutes),
total_mins = sum(VeryActiveMinutes,FairlyActiveMinutes,LightlyActiveMinutes,SedentaryMinutes),
percent_very_active = (total_very_active_mins/total_mins)*100,
percent_faily_active = (total_fairly_active_mins/total_mins)*100,
percent_lightly_active = (total_lightly_active_mins/total_mins)*100,
percent_sedentary_active = (total_sedentary_mins/total_mins)*100)
head(participantActivity)
## # A tibble: 6 × 10
## Id total_very_acti… total_fairly_ac… total_lightly_a… total_sedentary…
## <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 1503960366 1200 594 6818 24853
## 2 1624580081 269 180 4758 38990
## 3 1644430081 287 641 5354 34856
## 4 1844505072 4 40 3579 24445
## 5 1927972279 41 24 1196 22120
## 6 2022484408 1125 600 7981 34490
## # … with 5 more variables: total_mins <dbl>, percent_very_active <dbl>,
## # percent_faily_active <dbl>, percent_lightly_active <dbl>,
## # percent_sedentary_active <dbl>
participantActivity <- participantActivity %>%
mutate(intensity =
case_when(percent_very_active > mean(percent_very_active) ~ "Very Active",
percent_faily_active > mean(percent_faily_active) ~ "Fairly Active",
percent_lightly_active > mean(percent_lightly_active) ~ "Lightly Active",
percent_sedentary_active > mean(percent_sedentary_active) ~ "Sedentary")
)
newActivity <- participantActivity %>%
group_by(intensity) %>%
summarise(count = n())
ggplot(newActivity, aes(x = intensity, y = count, fill = intensity))+
geom_histogram(stat = "identity") +
ylab("Number of Participants") +
xlab("Intensity Type") +
labs(title = "Number of Participants by Intensity") +
theme(legend.position = "none")
## Warning: Ignoring unknown parameters: binwidth, bins, pad
newHourlyInt <- dhourlyInt %>%
group_by(Hour) %>%
drop_na() %>%
summarise(mean_total_int = mean(TotalIntensity))
head(newHourlyInt)
## # A tibble: 6 × 2
## Hour mean_total_int
## <chr> <dbl>
## 1 1:00:00 AM 1.42
## 2 1:00:00 PM 18.8
## 3 10:00:00 AM 17.6
## 4 10:00:00 PM 9.06
## 5 11:00:00 AM 16.9
## 6 11:00:00 PM 5.00
ggplot(data = newHourlyInt, aes(x= Hour, y= mean_total_int))+
geom_histogram(stat = "identity", fill = "cyan4") +
theme(axis.text.x = element_text(angle = 90)) +
labs(title = "Average Total Intensity vs. Time", y = "Average Intensity")
## Warning: Ignoring unknown parameters: binwidth, bins, pad
** Looks like people are most active between 5:00 PM & 7:00 PM._This would be a good time to send out that notification to remind and motivate the sedentary users (or all users) to be more active_.
merged_data <- merge(sleepDay_merged, dailyActivity_merged, by=c('Id')) %>%
filter(SedentaryMinutes !=1440)
ggplot(data = merged_data, aes(x=TotalMinutesAsleep, y=SedentaryMinutes)) +
geom_point(color='orange') + geom_smooth(color = 'blue')+
labs(title= "Minutes Asleep vs.Sedentary Minutes", x= "Total Minutes Asleep",
y= "Sedentary Minutes")
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
#Conclusion
The Bellabeat time device is similar to the Fitbit watches, and the apps are also comparable. The following strategies can be used to increase usage of both devices.
The average steps per day are 7638, yet CDC research shows that 8000 steps or more is associated with a lower risk for mortality. The Bellabeat app can explain this health benefit. It could also send out notifications by app and/or Time device to remind users to be more active by walking or running each day.
Since most activity happens between 5pm and 7pm, Bellabeat should consider this a good time to send out those reminders, specifically to the more sedentary users, to keep them on track.
A decrease in active minutes leads to a decrease in sleep, so Bellabeat can also explain the health benefits of getting at least 8 hours of sleep in the app, and how activity levels affect quality of sleep. If users want to improve their sleep, Bellabeat should send out notifications by app and/or Time device to remind useres to be more active walking or running each day.
Lastly, wearing the device every day is important to track each users health. Bellabeat should consider sending out notification via app to remind users to wear their device every day for getting most benefit. A good time to do this could be when the heart rate is at 0 for a specified amount of time.