This is a detailed analysis of spreadsheet data for a fitness smartwatch that tracks data including: calories, hours slept, intensity levels, and overall data activity. The original dataset can be found online on Kaggle, which is modified and cleaned to my profile at Fitbit Project Portfolio.
In this analysis, I proceed to clean the data and aggregate datetime columns from the various spreadsheets to ensure each of the csv files (dailyActivity, dailySleep, dailyCalories, dailyIntensities, dailySteps) have uniform units of per day. Additionally, I create a column that combines the activity day and Id to create a unique identifier (there are multiple observations per Id - see hourlyIntensities. After cleaning the data, I use R code to merge and join the CSV files to begin looking for trends and patterns (joining could have been done in SQL, but the number of observations is relatively sparse).
library(ggplot2)
library(ggridges)
library(tidyr)
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.5
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ lubridate 1.9.3 ✔ tibble 3.2.1
## ✔ purrr 1.0.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(skimr)
library(dplyr)
library(viridis)
## Loading required package: viridisLite
library(SmartEDA)
## Registered S3 method overwritten by 'GGally':
## method from
## +.gg ggplot2
library(dlookr)
## Registered S3 methods overwritten by 'dlookr':
## method from
## plot.transform scales
## print.transform scales
##
## Attaching package: 'dlookr'
##
## The following object is masked from 'package:tidyr':
##
## extract
##
## The following object is masked from 'package:base':
##
## transform
library(DataExplorer)
library(moments)
##
## Attaching package: 'moments'
##
## The following objects are masked from 'package:dlookr':
##
## kurtosis, skewness
library(flextable)
##
## Attaching package: 'flextable'
##
## The following object is masked from 'package:purrr':
##
## compose
library(forcats)
library(corrplot)
## corrplot 0.92 loaded
options(warn = -1)
# -- Load and Clean Data -- #
f1 <- "C:/Users/Anthony Morciglio/OneDrive/Coursera Google Data Analytics/Fitabase Data 3.12.16-4.11.16/dailyActivity_merged.csv"
f2 <- "C:/Users/Anthony Morciglio/OneDrive/Coursera Google Data Analytics/Fitabase Data 3.12.16-4.11.16/dailySleep_merged.csv"
f3 <- "C:/Users/Anthony Morciglio/OneDrive/Coursera Google Data Analytics/Fitabase Data 3.12.16-4.11.16/dailyCalories_merged.csv"
f4 <- "C:/Users/Anthony Morciglio/OneDrive/Coursera Google Data Analytics/Fitabase Data 3.12.16-4.11.16/dailyIntensities_merged.csv"
f5 <- "C:/Users/Anthony Morciglio/OneDrive/Coursera Google Data Analytics/Fitabase Data 3.12.16-4.11.16/dailySteps_merged.csv"
df_daily_act <- read_csv(f1)
## Rows: 457 Columns: 16
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): ActivityDate
## dbl (15): ID-AD, Id, TotalSteps, TotalDistance, TrackerDistance, LoggedActiv...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
df_daily_sleep <- read_csv(f2)
## Rows: 198559 Columns: 5
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): date
## dbl (4): ID-AD, Id, logId, hours
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
df_daily_cal <- read_csv(f3)
## Rows: 1021 Columns: 4
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): ID-AD, date
## dbl (2): Id, Calories
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
df_daily_int <- read_csv(f4)
## Rows: 1021 Columns: 5
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): date
## dbl (4): ID-AD, Id, TotalIntensity, AverageIntensity
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
df_daily_steps <- read_csv(f5)
## Rows: 1021 Columns: 4
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): date
## dbl (3): ID-AD, Id, StepTotal
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
# -- Combine data using join or merge functions -- #
# Remark: joining on the unique identifier, ID-AD, obtained by combining Id with date
df_tot <- df_daily_act %>%
dplyr::inner_join(df_daily_sleep, by = "ID-AD") %>% select(-Id.x, -Id.y) %>%
dplyr::inner_join(df_daily_int, by = "ID-AD") %>% select(-date.x, -date.y, -TrackerDistance)
colnames(df_tot)[16] <- "HoursSlept"
colnames(df_tot)[11] <- "ModeratelyActiveMinutes"
colnames(df_tot)[8] <- "LightlyActiveDistance"
After joining the data using dplyr::inner_join(), we change a few of the column names and observe most of the columns are numeric types. Before creating visualizations, we will generate a few preliminary reports using functions from libraries such as DataExplorer, SmartEDA, and dlookr.
It would be beneficial to create categorical variables based on the activity levels of the people using the fitbits. Since we are interested in the number of Calories burned (to market the product in the wellness and fitness industry), we can categorize activity levels hierarchically such as Sedentary, Lightly Active, Moderately Active, and Very Active using binning.
## Rows: 228
## Columns: 16
## $ ActivityDate <chr> "3/25/2016", "3/26/2016", "3/26/2016", "3/27/…
## $ TotalSteps <dbl> 11004, 17609, 17609, 12736, 12736, 13231, 120…
## $ TotalDistance <dbl> 7.11, 11.55, 11.55, 8.53, 8.53, 8.93, 7.85, 7…
## $ LoggedActivitiesDistance <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ VeryActiveDistance <dbl> 2.57, 6.92, 6.92, 4.66, 4.66, 3.19, 2.16, 2.2…
## $ ModeratelyActiveDistance <dbl> 0.46, 0.73, 0.73, 0.16, 0.16, 0.79, 1.09, 0.4…
## $ LightlyActiveDistance <dbl> 4.07, 3.91, 3.91, 3.71, 3.71, 4.95, 4.61, 5.0…
## $ SedentaryActiveDistance <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ VeryActiveMinutes <dbl> 33, 89, 89, 56, 56, 39, 28, 33, 47, 40, 15, 4…
## $ ModeratelyActiveMinutes <dbl> 12, 17, 17, 5, 5, 20, 28, 12, 21, 11, 30, 18,…
## $ LightlyActiveMinutes <dbl> 205, 274, 274, 268, 268, 224, 243, 239, 200, …
## $ SedentaryMinutes <dbl> 804, 588, 588, 605, 605, 1080, 763, 820, 866,…
## $ Calories <dbl> 1819, 2154, 2154, 1944, 1944, 1932, 1886, 188…
## $ HoursSlept <dbl> 6.42, 7.60, 0.23, 2.13, 5.48, 1.27, 6.28, 5.5…
## $ TotalIntensity <dbl> 139, 130, 130, 97, 97, 76, 56, 61, 52, 129, 9…
## $ AverageIntensity <dbl> 0.070, 0.066, 0.066, 0.049, 0.049, 0.038, 0.0…
ActivityDate | TotalSteps | TotalDistance | LoggedActivitiesDistance | VeryActiveDistance | ModeratelyActiveDistance | LightlyActiveDistance | SedentaryActiveDistance | VeryActiveMinutes | ModeratelyActiveMinutes | LightlyActiveMinutes | SedentaryMinutes | Calories | HoursSlept | TotalIntensity | AverageIntensity |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
3/25/2016 | 11,004 | 7.11 | 0.000000 | 2.57 | 0.46 | 4.07 | 0.00 | 33 | 12 | 205 | 804 | 1,819 | 6.42 | 139 | 0.070 |
3/26/2016 | 17,609 | 11.55 | 0.000000 | 6.92 | 0.73 | 3.91 | 0.00 | 89 | 17 | 274 | 588 | 2,154 | 7.60 | 130 | 0.066 |
3/26/2016 | 17,609 | 11.55 | 0.000000 | 6.92 | 0.73 | 3.91 | 0.00 | 89 | 17 | 274 | 588 | 2,154 | 0.23 | 130 | 0.066 |
3/27/2016 | 12,736 | 8.53 | 0.000000 | 4.66 | 0.16 | 3.71 | 0.00 | 56 | 5 | 268 | 605 | 1,944 | 2.13 | 97 | 0.049 |
3/27/2016 | 12,736 | 8.53 | 0.000000 | 4.66 | 0.16 | 3.71 | 0.00 | 56 | 5 | 268 | 605 | 1,944 | 5.48 | 97 | 0.049 |
3/28/2016 | 13,231 | 8.93 | 0.000000 | 3.19 | 0.79 | 4.95 | 0.00 | 39 | 20 | 224 | 1,080 | 1,932 | 1.27 | 76 | 0.038 |
3/29/2016 | 12,041 | 7.85 | 0.000000 | 2.16 | 1.09 | 4.61 | 0.00 | 28 | 28 | 243 | 763 | 1,886 | 6.28 | 56 | 0.028 |
3/31/2016 | 12,256 | 7.86 | 0.000000 | 2.29 | 0.49 | 5.04 | 0.00 | 33 | 12 | 239 | 820 | 1,889 | 5.58 | 61 | 0.031 |
4/1/2016 | 12,262 | 7.87 | 0.000000 | 3.32 | 0.83 | 3.64 | 0.00 | 47 | 21 | 200 | 866 | 1,868 | 5.08 | 52 | 0.026 |
4/2/2016 | 11,248 | 7.25 | 0.000000 | 3.00 | 0.45 | 3.74 | 0.00 | 40 | 11 | 244 | 636 | 1,843 | 8.47 | 129 | 0.065 |
4/3/2016 | 10,016 | 6.37 | 0.000000 | 0.91 | 1.28 | 4.18 | 0.00 | 15 | 30 | 314 | 655 | 1,850 | 7.08 | 96 | 0.050 |
4/4/2016 | 14,557 | 9.80 | 0.000000 | 3.39 | 0.70 | 5.69 | 0.00 | 43 | 18 | 285 | 757 | 2,030 | 5.60 | 69 | 0.036 |
4/5/2016 | 14,844 | 9.73 | 0.000000 | 2.94 | 0.76 | 6.04 | 0.00 | 36 | 18 | 341 | 736 | 2,083 | 5.13 | 46 | 0.023 |
4/9/2016 | 12,432 | 8.10 | 0.000000 | 2.59 | 0.59 | 4.92 | 0.00 | 32 | 15 | 248 | 738 | 1,883 | 6.77 | 45 | 0.025 |
4/10/2016 | 10,057 | 6.98 | 0.000000 | 4.00 | 0.49 | 2.48 | 0.00 | 44 | 13 | 168 | 737 | 1,755 | 7.95 | 120 | 0.074 |
4/11/2016 | 10,990 | 7.26 | 0.000000 | 2.04 | 0.57 | 4.65 | 0.00 | 26 | 14 | 216 | 855 | 1,811 | 5.47 | 67 | 0.045 |
4/3/2016 | 2,841 | 1.88 | 0.000000 | 0.00 | 0.00 | 1.88 | 0.00 | 0 | 0 | 136 | 1,114 | 1,636 | 3.15 | 96 | 0.050 |
4/9/2016 | 4,979 | 3.29 | 0.000000 | 0.00 | 0.00 | 3.29 | 0.00 | 0 | 0 | 184 | 620 | 1,807 | 10.58 | 45 | 0.025 |
4/2/2016 | 5,662 | 3.92 | 0.000000 | 0.00 | 0.00 | 3.92 | 0.00 | 0 | 0 | 267 | 858 | 2,783 | 5.23 | 129 | 0.065 |
4/3/2016 | 3,198 | 2.21 | 0.000000 | 0.00 | 0.00 | 2.21 | 0.00 | 0 | 0 | 146 | 1,183 | 2,449 | 1.83 | 96 | 0.050 |
4/4/2016 | 2,352 | 1.63 | 0.000000 | 0.00 | 0.00 | 1.63 | 0.00 | 0 | 0 | 128 | 829 | 2,380 | 3.13 | 69 | 0.036 |
4/5/2016 | 2,234 | 1.55 | 0.000000 | 0.00 | 0.00 | 1.55 | 0.00 | 0 | 0 | 108 | 912 | 2,344 | 6.98 | 46 | 0.023 |
4/6/2016 | 1,259 | 0.87 | 0.000000 | 0.00 | 0.00 | 0.87 | 0.00 | 0 | 0 | 73 | 938 | 2,202 | 7.13 | 122 | 0.064 |
4/9/2016 | 2,523 | 1.75 | 0.000000 | 0.00 | 0.37 | 1.38 | 0.00 | 0 | 9 | 134 | 1,250 | 2,443 | 0.77 | 45 | 0.025 |
4/10/2016 | 2,105 | 1.46 | 0.000000 | 0.00 | 0.00 | 1.46 | 0.00 | 0 | 0 | 139 | 805 | 2,442 | 5.13 | 120 | 0.074 |
4/10/2016 | 2,105 | 1.46 | 0.000000 | 0.00 | 0.00 | 1.46 | 0.00 | 0 | 0 | 139 | 805 | 2,442 | 1.03 | 120 | 0.074 |
4/11/2016 | 1,209 | 0.84 | 0.000000 | 0.00 | 0.00 | 0.84 | 0.00 | 0 | 0 | 73 | 842 | 2,255 | 2.05 | 67 | 0.045 |
4/12/2016 | 24 | 0.02 | 0.000000 | 0.00 | 0.00 | 0.02 | 0.00 | 0 | 0 | 3 | 161 | 942 | 2.13 | 25 | 0.021 |
4/1/2016 | 2,605 | 1.62 | 0.000000 | 0.00 | 0.00 | 1.62 | 0.00 | 0 | 0 | 166 | 729 | 1,407 | 2.45 | 52 | 0.026 |
4/2/2016 | 1,229 | 0.76 | 0.000000 | 0.00 | 0.00 | 0.76 | 0.00 | 0 | 0 | 69 | 795 | 1,237 | 2.55 | 129 | 0.065 |
4/3/2016 | 2,308 | 1.43 | 0.000000 | 0.00 | 0.00 | 1.43 | 0.00 | 0 | 0 | 115 | 720 | 1,330 | 2.97 | 96 | 0.050 |
4/4/2016 | 6,679 | 4.14 | 0.000000 | 0.00 | 0.00 | 4.14 | 0.00 | 0 | 0 | 263 | 644 | 1,583 | 2.00 | 69 | 0.036 |
4/5/2016 | 5,645 | 3.50 | 0.000000 | 0.00 | 0.00 | 3.50 | 0.00 | 0 | 0 | 248 | 621 | 1,538 | 2.57 | 46 | 0.023 |
4/6/2016 | 3,180 | 1.97 | 0.000000 | 0.00 | 0.00 | 1.97 | 0.00 | 0 | 0 | 185 | 738 | 1,421 | 2.17 | 122 | 0.064 |
4/7/2016 | 2,767 | 1.72 | 0.000000 | 0.00 | 0.00 | 1.72 | 0.00 | 0 | 0 | 170 | 673 | 1,392 | 2.95 | 80 | 0.042 |
4/8/2016 | 3,762 | 2.33 | 0.000000 | 0.00 | 0.00 | 2.33 | 0.00 | 0 | 0 | 173 | 719 | 1,425 | 2.92 | 45 | 0.023 |
4/9/2016 | 3,098 | 1.92 | 0.000000 | 0.00 | 0.00 | 1.92 | 0.00 | 0 | 0 | 185 | 700 | 1,413 | 2.82 | 45 | 0.025 |
4/10/2016 | 5,142 | 3.19 | 0.000000 | 0.00 | 0.00 | 3.19 | 0.00 | 0 | 0 | 230 | 654 | 1,515 | 2.72 | 120 | 0.074 |
4/11/2016 | 3,279 | 2.03 | 0.000000 | 0.00 | 0.00 | 2.03 | 0.00 | 0 | 0 | 164 | 694 | 1,405 | 3.18 | 67 | 0.045 |
3/29/2016 | 10,272 | 6.79 | 0.000000 | 0.16 | 3.12 | 3.50 | 0.00 | 2 | 58 | 208 | 700 | 2,041 | 1.82 | 56 | 0.028 |
3/30/2016 | 10,533 | 7.10 | 0.000000 | 1.77 | 2.06 | 3.27 | 0.00 | 21 | 35 | 255 | 615 | 2,187 | 1.57 | 99 | 0.050 |
3/31/2016 | 6,760 | 4.47 | 0.000000 | 0.00 | 0.00 | 4.47 | 0.00 | 0 | 0 | 250 | 613 | 1,929 | 2.28 | 61 | 0.031 |
4/2/2016 | 15,459 | 10.22 | 0.000000 | 3.59 | 0.81 | 5.82 | 0.00 | 51 | 16 | 327 | 583 | 2,438 | 7.63 | 129 | 0.065 |
4/2/2016 | 15,459 | 10.22 | 0.000000 | 3.59 | 0.81 | 5.82 | 0.00 | 51 | 16 | 327 | 583 | 2,438 | 0.05 | 129 | 0.065 |
4/3/2016 | 7,485 | 4.95 | 0.000000 | 0.00 | 0.00 | 4.95 | 0.00 | 0 | 0 | 324 | 491 | 2,035 | 2.50 | 96 | 0.050 |
4/4/2016 | 10,254 | 6.80 | 0.000000 | 1.42 | 1.23 | 4.16 | 0.00 | 21 | 25 | 231 | 638 | 2,099 | 2.08 | 69 | 0.036 |
4/5/2016 | 10,114 | 6.82 | 0.000000 | 1.64 | 0.48 | 4.68 | 0.00 | 18 | 9 | 269 | 696 | 2,096 | 0.62 | 46 | 0.023 |
4/7/2016 | 10,320 | 6.85 | 0.000000 | 0.68 | 1.23 | 4.94 | 0.00 | 15 | 32 | 315 | 1,002 | 2,338 | 1.25 | 80 | 0.042 |
4/9/2016 | 16,081 | 10.63 | 0.000000 | 1.25 | 1.82 | 7.56 | 0.00 | 16 | 32 | 401 | 970 | 2,488 | 0.33 | 45 | 0.025 |
4/10/2016 | 10,078 | 6.83 | 0.000000 | 1.02 | 0.12 | 5.69 | 0.00 | 12 | 3 | 303 | 463 | 2,164 | 1.93 | 120 | 0.074 |
4/1/2016 | 4,499 | 3.01 | 0.000000 | 0.00 | 0.00 | 3.01 | 0.00 | 0 | 0 | 168 | 842 | 1,288 | 7.15 | 52 | 0.026 |
4/2/2016 | 7,618 | 5.10 | 0.000000 | 0.00 | 0.18 | 4.91 | 0.00 | 0 | 6 | 302 | 842 | 1,490 | 4.82 | 129 | 0.065 |
4/3/2016 | 11,508 | 7.70 | 0.000000 | 2.17 | 0.84 | 4.69 | 0.00 | 29 | 22 | 282 | 748 | 1,630 | 5.97 | 96 | 0.050 |
4/4/2016 | 11,943 | 7.99 | 0.000000 | 1.03 | 2.01 | 4.95 | 0.00 | 16 | 43 | 297 | 710 | 1,648 | 6.22 | 69 | 0.036 |
4/5/2016 | 12,303 | 8.23 | 0.000000 | 1.71 | 1.35 | 5.17 | 0.00 | 26 | 32 | 291 | 605 | 1,649 | 8.08 | 46 | 0.023 |
4/6/2016 | 15,425 | 10.32 | 0.000000 | 1.62 | 1.82 | 6.87 | 0.00 | 24 | 45 | 331 | 614 | 1,783 | 7.08 | 122 | 0.064 |
4/7/2016 | 8,422 | 5.63 | 0.000000 | 0.00 | 3.60 | 2.04 | 0.00 | 0 | 81 | 139 | 815 | 1,431 | 6.73 | 80 | 0.042 |
4/8/2016 | 10,226 | 6.84 | 0.000000 | 0.00 | 4.44 | 2.40 | 0.00 | 0 | 101 | 174 | 685 | 1,524 | 7.98 | 45 | 0.023 |
4/9/2016 | 14,583 | 9.76 | 0.000000 | 2.48 | 1.74 | 5.53 | 0.00 | 35 | 36 | 254 | 697 | 1,697 | 6.95 | 45 | 0.025 |
4/10/2016 | 3,573 | 2.39 | 0.000000 | 0.94 | 0.41 | 0.97 | 0.00 | 15 | 8 | 99 | 866 | 1,232 | 7.52 | 120 | 0.074 |
4/11/2016 | 3,108 | 2.14 | 0.000000 | 0.07 | 0.04 | 1.96 | 0.00 | 1 | 1 | 129 | 894 | 1,223 | 6.90 | 67 | 0.045 |
3/12/2016 | 5,543 | 3.97 | 0.000000 | 0.00 | 0.00 | 3.96 | 0.01 | 0 | 0 | 254 | 757 | 2,990 | 6.00 | 98 | 0.049 |
3/12/2016 | 5,543 | 3.97 | 0.000000 | 0.00 | 0.00 | 3.96 | 0.01 | 0 | 0 | 254 | 757 | 2,990 | 1.12 | 98 | 0.049 |
3/13/2016 | 3,226 | 2.31 | 0.000000 | 0.00 | 0.00 | 2.28 | 0.00 | 0 | 0 | 136 | 771 | 2,480 | 5.37 | 53 | 0.027 |
3/14/2016 | 3,023 | 2.17 | 0.000000 | 0.00 | 0.00 | 2.14 | 0.00 | 0 | 0 | 145 | 1,005 | 2,570 | 4.82 | 27 | 0.014 |
3/15/2016 | 5,906 | 4.23 | 0.000000 | 0.00 | 0.16 | 4.04 | 0.00 | 0 | 10 | 215 | 874 | 3,016 | 5.67 | 53 | 0.027 |
3/16/2016 | 12,483 | 8.99 | 0.000000 | 1.45 | 0.57 | 6.90 | 0.00 | 25 | 14 | 309 | 599 | 3,830 | 8.20 | 73 | 0.037 |
3/17/2016 | 8,940 | 6.41 | 0.000000 | 0.00 | 0.00 | 0.61 | 0.04 | 0 | 0 | 47 | 986 | 3,706 | 6.77 | 70 | 0.035 |
3/30/2016 | 5,400 | 3.87 | 0.000000 | 0.00 | 0.00 | 3.85 | 0.00 | 0 | 0 | 258 | 795 | 3,418 | 2.43 | 99 | 0.050 |
3/30/2016 | 5,400 | 3.87 | 0.000000 | 0.00 | 0.00 | 3.85 | 0.00 | 0 | 0 | 258 | 795 | 3,418 | 3.98 | 99 | 0.050 |
3/31/2016 | 7,428 | 5.33 | 0.000000 | 0.87 | 0.92 | 3.51 | 0.00 | 18 | 33 | 261 | 860 | 3,439 | 4.45 | 61 | 0.031 |
4/1/2016 | 5,351 | 3.84 | 0.000000 | 0.62 | 0.18 | 3.01 | 0.00 | 15 | 17 | 210 | 748 | 3,338 | 4.08 | 52 | 0.026 |
4/1/2016 | 5,351 | 3.84 | 0.000000 | 0.62 | 0.18 | 3.01 | 0.00 | 15 | 17 | 210 | 748 | 3,338 | 2.92 | 52 | 0.026 |
4/1/2016 | 5,351 | 3.84 | 0.000000 | 0.62 | 0.18 | 3.01 | 0.00 | 15 | 17 | 210 | 748 | 3,338 | 0.45 | 52 | 0.026 |
4/2/2016 | 4,299 | 3.10 | 0.000000 | 0.09 | 0.21 | 2.74 | 0.04 | 3 | 15 | 162 | 752 | 2,892 | 3.12 | 129 | 0.065 |
4/3/2016 | 6,107 | 4.38 | 0.000000 | 0.00 | 0.00 | 4.36 | 0.00 | 0 | 0 | 265 | 776 | 3,313 | 5.00 | 96 | 0.050 |
4/3/2016 | 6,107 | 4.38 | 0.000000 | 0.00 | 0.00 | 4.36 | 0.00 | 0 | 0 | 265 | 776 | 3,313 | 1.62 | 96 | 0.050 |
4/4/2016 | 6,429 | 4.60 | 0.000000 | 0.00 | 0.95 | 3.65 | 0.00 | 0 | 28 | 193 | 741 | 3,118 | 7.95 | 69 | 0.036 |
4/6/2016 | 7,476 | 5.36 | 0.000000 | 0.00 | 0.00 | 5.31 | 0.00 | 0 | 0 | 263 | 920 | 3,253 | 4.27 | 122 | 0.064 |
4/10/2016 | 5,129 | 3.68 | 0.000000 | 0.18 | 0.24 | 3.21 | 0.01 | 5 | 7 | 176 | 1,178 | 2,817 | 1.22 | 120 | 0.074 |
4/11/2016 | 2,993 | 2.15 | 0.000000 | 0.00 | 0.00 | 2.09 | 0.00 | 0 | 0 | 114 | 888 | 2,507 | 3.75 | 67 | 0.045 |
3/29/2016 | 5,643 | 3.79 | 0.000000 | 0.23 | 0.33 | 3.23 | 0.00 | 3 | 8 | 199 | 683 | 1,958 | 1.60 | 56 | 0.028 |
3/29/2016 | 5,643 | 3.79 | 0.000000 | 0.23 | 0.33 | 3.23 | 0.00 | 3 | 8 | 199 | 683 | 1,958 | 1.73 | 56 | 0.028 |
3/31/2016 | 8,144 | 5.46 | 0.000000 | 0.25 | 0.61 | 4.61 | 0.00 | 4 | 17 | 247 | 1,125 | 2,129 | 0.77 | 61 | 0.031 |
4/1/2016 | 9,343 | 6.27 | 0.000000 | 0.96 | 0.85 | 4.46 | 0.00 | 14 | 20 | 268 | 654 | 2,216 | 1.55 | 52 | 0.026 |
4/1/2016 | 9,343 | 6.27 | 0.000000 | 0.96 | 0.85 | 4.46 | 0.00 | 14 | 20 | 268 | 654 | 2,216 | 0.72 | 52 | 0.026 |
4/2/2016 | 8,405 | 5.66 | 0.000000 | 1.13 | 0.19 | 4.34 | 0.00 | 15 | 4 | 257 | 621 | 2,154 | 0.97 | 129 | 0.065 |
4/3/2016 | 8,223 | 5.52 | 0.000000 | 0.80 | 1.51 | 3.21 | 0.00 | 12 | 37 | 257 | 551 | 2,178 | 1.03 | 96 | 0.050 |
4/4/2016 | 10,067 | 6.76 | 0.000000 | 0.36 | 1.42 | 4.97 | 0.00 | 5 | 39 | 313 | 578 | 2,308 | 1.52 | 69 | 0.036 |
4/5/2016 | 8,359 | 5.63 | 0.000000 | 0.23 | 0.53 | 4.87 | 0.00 | 3 | 14 | 311 | 638 | 2,201 | 1.15 | 46 | 0.023 |
4/6/2016 | 10,946 | 7.35 | 0.000000 | 0.54 | 2.09 | 4.72 | 0.00 | 8 | 51 | 308 | 569 | 2,341 | 4.30 | 122 | 0.064 |
4/6/2016 | 10,946 | 7.35 | 0.000000 | 0.54 | 2.09 | 4.72 | 0.00 | 8 | 51 | 308 | 569 | 2,341 | 0.80 | 122 | 0.064 |
3/29/2016 | 2,303 | 1.55 | 0.000000 | 0.00 | 0.00 | 1.55 | 0.00 | 0 | 0 | 155 | 807 | 2,010 | 3.75 | 56 | 0.028 |
3/29/2016 | 2,303 | 1.55 | 0.000000 | 0.00 | 0.00 | 1.55 | 0.00 | 0 | 0 | 155 | 807 | 2,010 | 4.18 | 56 | 0.028 |
3/31/2016 | 4,804 | 3.22 | 0.000000 | 0.00 | 0.00 | 3.22 | 0.00 | 0 | 0 | 238 | 786 | 2,227 | 4.90 | 61 | 0.031 |
3/31/2016 | 4,804 | 3.22 | 0.000000 | 0.00 | 0.00 | 3.22 | 0.00 | 0 | 0 | 238 | 786 | 2,227 | 2.00 | 61 | 0.031 |
4/1/2016 | 3,271 | 2.19 | 0.000000 | 0.00 | 0.00 | 2.19 | 0.00 | 0 | 0 | 205 | 898 | 2,133 | 5.60 | 52 | 0.026 |
4/2/2016 | 5,406 | 3.63 | 0.000000 | 0.00 | 0.00 | 3.61 | 0.00 | 0 | 0 | 273 | 672 | 2,317 | 8.23 | 129 | 0.065 |
4/5/2016 | 4,239 | 2.84 | 0.000000 | 0.06 | 0.24 | 2.53 | 0.00 | 1 | 6 | 235 | 702 | 2,217 | 8.25 | 46 | 0.023 |
4/6/2016 | 6,911 | 4.78 | 0.000000 | 2.32 | 0.10 | 2.36 | 0.00 | 28 | 2 | 215 | 745 | 2,374 | 7.48 | 122 | 0.064 |
4/7/2016 | 6,667 | 4.61 | 0.000000 | 2.02 | 0.05 | 2.55 | 0.00 | 24 | 1 | 208 | 939 | 2,335 | 4.45 | 80 | 0.042 |
4/8/2016 | 5,543 | 3.72 | 0.000000 | 0.00 | 0.00 | 3.72 | 0.00 | 0 | 0 | 266 | 941 | 2,303 | 1.68 | 45 | 0.023 |
4/8/2016 | 5,543 | 3.72 | 0.000000 | 0.00 | 0.00 | 3.72 | 0.00 | 0 | 0 | 266 | 941 | 2,303 | 2.17 | 45 | 0.023 |
4/9/2016 | 4,195 | 2.81 | 0.000000 | 0.00 | 0.00 | 2.81 | 0.00 | 0 | 0 | 239 | 758 | 2,210 | 3.38 | 45 | 0.025 |
4/9/2016 | 4,195 | 2.81 | 0.000000 | 0.00 | 0.00 | 2.81 | 0.00 | 0 | 0 | 239 | 758 | 2,210 | 1.15 | 45 | 0.025 |
4/9/2016 | 4,195 | 2.81 | 0.000000 | 0.00 | 0.00 | 2.81 | 0.00 | 0 | 0 | 239 | 758 | 2,210 | 2.80 | 45 | 0.025 |
4/10/2016 | 6,625 | 4.45 | 0.000000 | 0.00 | 0.00 | 4.45 | 0.00 | 0 | 0 | 316 | 755 | 2,424 | 4.87 | 120 | 0.074 |
4/10/2016 | 6,625 | 4.45 | 0.000000 | 0.00 | 0.00 | 4.45 | 0.00 | 0 | 0 | 316 | 755 | 2,424 | 1.25 | 120 | 0.074 |
4/11/2016 | 5,986 | 4.13 | 0.000000 | 1.95 | 0.23 | 1.95 | 0.00 | 23 | 5 | 195 | 789 | 2,297 | 4.55 | 67 | 0.045 |
4/11/2016 | 5,986 | 4.13 | 0.000000 | 1.95 | 0.23 | 1.95 | 0.00 | 23 | 5 | 195 | 789 | 2,297 | 2.55 | 67 | 0.045 |
4/12/2016 | 278 | 0.19 | 0.000000 | 0.00 | 0.00 | 0.19 | 0.00 | 0 | 0 | 20 | 253 | 745 | 5.52 | 25 | 0.021 |
4/2/2016 | 3,183 | 2.10 | 0.000000 | 0.00 | 0.00 | 2.10 | 0.00 | 0 | 0 | 173 | 1,180 | 1,680 | 1.43 | 129 | 0.065 |
3/29/2016 | 6,506 | 5.28 | 0.000000 | 0.33 | 0.27 | 4.67 | 0.00 | 4 | 5 | 241 | 760 | 2,886 | 1.15 | 56 | 0.028 |
3/30/2016 | 7,583 | 6.15 | 0.000000 | 0.25 | 0.55 | 5.35 | 0.00 | 3 | 13 | 227 | 742 | 2,915 | 1.53 | 99 | 0.050 |
3/31/2016 | 6,963 | 5.65 | 0.000000 | 0.00 | 0.00 | 5.65 | 0.00 | 0 | 0 | 256 | 759 | 2,895 | 1.00 | 61 | 0.031 |
4/1/2016 | 7,165 | 5.81 | 0.000000 | 0.00 | 0.37 | 5.44 | 0.00 | 0 | 9 | 248 | 708 | 2,923 | 2.00 | 52 | 0.026 |
4/2/2016 | 10,328 | 8.38 | 0.000000 | 0.00 | 1.46 | 6.92 | 0.00 | 0 | 32 | 367 | 475 | 3,323 | 2.00 | 129 | 0.065 |
4/3/2016 | 12,116 | 9.83 | 0.000000 | 0.25 | 2.85 | 6.72 | 0.00 | 3 | 57 | 323 | 471 | 3,357 | 1.95 | 96 | 0.050 |
4/4/2016 | 7,810 | 6.36 | 0.000000 | 0.48 | 1.20 | 4.68 | 0.00 | 6 | 27 | 216 | 746 | 2,931 | 1.33 | 69 | 0.036 |
4/5/2016 | 6,670 | 5.41 | 0.000000 | 1.24 | 0.80 | 3.37 | 0.00 | 15 | 16 | 199 | 770 | 2,848 | 1.32 | 46 | 0.023 |
4/6/2016 | 7,605 | 6.18 | 0.000000 | 0.33 | 0.21 | 5.63 | 0.00 | 4 | 4 | 248 | 708 | 2,943 | 1.75 | 122 | 0.064 |
4/7/2016 | 6,585 | 5.34 | 0.000000 | 0.00 | 0.00 | 5.34 | 0.00 | 0 | 0 | 210 | 770 | 2,822 | 1.63 | 80 | 0.042 |
4/9/2016 | 14,002 | 11.36 | 0.000000 | 0.61 | 2.68 | 8.06 | 0.00 | 8 | 56 | 381 | 407 | 3,597 | 7.20 | 45 | 0.025 |
4/9/2016 | 14,002 | 11.36 | 0.000000 | 0.61 | 2.68 | 8.06 | 0.00 | 8 | 56 | 381 | 407 | 3,597 | 2.57 | 45 | 0.025 |
4/10/2016 | 11,135 | 9.03 | 0.000000 | 0.08 | 1.16 | 7.79 | 0.00 | 1 | 25 | 331 | 528 | 3,224 | 2.02 | 120 | 0.074 |
4/10/2016 | 11,135 | 9.03 | 0.000000 | 0.08 | 1.16 | 7.79 | 0.00 | 1 | 25 | 331 | 528 | 3,224 | 0.87 | 120 | 0.074 |
4/11/2016 | 6,499 | 5.27 | 0.000000 | 0.00 | 0.00 | 5.27 | 0.00 | 0 | 0 | 207 | 809 | 2,677 | 3.80 | 67 | 0.045 |
4/2/2016 | 2,937 | 1.92 | 0.000000 | 0.00 | 0.00 | 1.91 | 0.00 | 0 | 0 | 181 | 356 | 1,615 | 11.58 | 129 | 0.065 |
4/2/2016 | 2,937 | 1.92 | 0.000000 | 0.00 | 0.00 | 1.91 | 0.00 | 0 | 0 | 181 | 356 | 1,615 | 3.43 | 129 | 0.065 |
4/3/2016 | 1,515 | 0.99 | 0.000000 | 0.00 | 0.00 | 0.99 | 0.00 | 0 | 0 | 93 | 507 | 1,481 | 12.50 | 96 | 0.050 |
4/3/2016 | 1,515 | 0.99 | 0.000000 | 0.00 | 0.00 | 0.99 | 0.00 | 0 | 0 | 93 | 507 | 1,481 | 1.47 | 96 | 0.050 |
4/4/2016 | 8,921 | 5.88 | 0.000000 | 2.07 | 0.44 | 3.36 | 0.00 | 27 | 8 | 198 | 754 | 1,892 | 1.53 | 69 | 0.036 |
4/5/2016 | 11,306 | 7.38 | 0.000000 | 2.31 | 0.98 | 4.09 | 0.00 | 40 | 26 | 218 | 772 | 2,086 | 0.38 | 46 | 0.023 |
4/6/2016 | 12,252 | 8.00 | 0.000000 | 2.41 | 1.70 | 3.89 | 0.00 | 33 | 28 | 229 | 745 | 2,044 | 0.58 | 122 | 0.064 |
4/7/2016 | 15,444 | 10.08 | 0.000000 | 3.27 | 1.73 | 5.08 | 0.00 | 51 | 39 | 243 | 731 | 2,249 | 0.08 | 80 | 0.042 |
4/9/2016 | 4,599 | 3.00 | 0.000000 | 0.00 | 0.00 | 3.00 | 0.00 | 0 | 0 | 176 | 578 | 1,692 | 9.12 | 45 | 0.025 |
4/9/2016 | 4,599 | 3.00 | 0.000000 | 0.00 | 0.00 | 3.00 | 0.00 | 0 | 0 | 176 | 578 | 1,692 | 2.28 | 45 | 0.025 |
4/10/2016 | 5,594 | 3.65 | 0.000000 | 0.28 | 1.48 | 1.89 | 0.00 | 4 | 25 | 150 | 552 | 1,712 | 10.35 | 120 | 0.074 |
4/10/2016 | 5,594 | 3.65 | 0.000000 | 0.28 | 1.48 | 1.89 | 0.00 | 4 | 25 | 150 | 552 | 1,712 | 1.43 | 120 | 0.074 |
4/11/2016 | 11,906 | 7.77 | 0.000000 | 2.26 | 1.52 | 3.99 | 0.00 | 32 | 27 | 234 | 688 | 2,065 | 1.65 | 67 | 0.045 |
4/1/2016 | 10,461 | 7.87 | 0.000000 | 4.76 | 0.85 | 2.25 | 0.00 | 99 | 31 | 142 | 721 | 3,625 | 2.20 | 52 | 0.026 |
4/2/2016 | 14,873 | 11.11 | 0.000000 | 8.19 | 0.60 | 2.31 | 0.00 | 202 | 36 | 153 | 663 | 4,430 | 5.17 | 129 | 0.065 |
4/2/2016 | 14,873 | 11.11 | 0.000000 | 8.19 | 0.60 | 2.31 | 0.00 | 202 | 36 | 153 | 663 | 4,430 | 1.17 | 129 | 0.065 |
4/3/2016 | 9,917 | 7.41 | 0.000000 | 1.99 | 0.97 | 4.44 | 0.00 | 58 | 27 | 240 | 700 | 3,427 | 2.20 | 96 | 0.050 |
4/4/2016 | 7,401 | 5.56 | 0.000000 | 2.32 | 0.36 | 2.88 | 0.00 | 70 | 19 | 204 | 689 | 3,492 | 2.27 | 69 | 0.036 |
4/5/2016 | 8,964 | 6.70 | 0.000000 | 3.97 | 0.19 | 2.53 | 0.00 | 107 | 18 | 145 | 756 | 3,597 | 2.17 | 46 | 0.023 |
4/5/2016 | 8,964 | 6.70 | 0.000000 | 3.97 | 0.19 | 2.53 | 0.00 | 107 | 18 | 145 | 756 | 3,597 | 0.95 | 46 | 0.023 |
4/6/2016 | 11,080 | 8.30 | 0.000000 | 3.13 | 1.37 | 3.81 | 0.00 | 83 | 52 | 190 | 695 | 3,765 | 1.60 | 122 | 0.064 |
4/7/2016 | 4,499 | 3.36 | 0.000000 | 0.89 | 0.26 | 2.21 | 0.00 | 31 | 11 | 146 | 756 | 2,775 | 2.35 | 80 | 0.042 |
4/8/2016 | 4,363 | 3.26 | 0.000000 | 0.13 | 0.06 | 3.08 | 0.00 | 8 | 2 | 156 | 813 | 2,486 | 2.22 | 45 | 0.023 |
4/9/2016 | 10,494 | 7.84 | 0.000000 | 4.91 | 0.78 | 2.15 | 0.00 | 123 | 62 | 138 | 601 | 3,817 | 4.43 | 45 | 0.025 |
4/9/2016 | 10,494 | 7.84 | 0.000000 | 4.91 | 0.78 | 2.15 | 0.00 | 123 | 62 | 138 | 601 | 3,817 | 1.88 | 45 | 0.025 |
4/9/2016 | 10,494 | 7.84 | 0.000000 | 4.91 | 0.78 | 2.15 | 0.00 | 123 | 62 | 138 | 601 | 3,817 | 0.28 | 45 | 0.025 |
4/10/2016 | 9,776 | 7.38 | 0.000000 | 2.93 | 1.43 | 3.02 | 0.00 | 80 | 35 | 162 | 721 | 3,378 | 1.53 | 120 | 0.074 |
4/1/2016 | 7,444 | 5.62 | 0.000000 | 0.00 | 0.00 | 5.62 | 0.00 | 0 | 0 | 286 | 568 | 2,210 | 4.28 | 52 | 0.026 |
4/1/2016 | 7,444 | 5.62 | 0.000000 | 0.00 | 0.00 | 5.62 | 0.00 | 0 | 0 | 286 | 568 | 2,210 | 3.93 | 52 | 0.026 |
4/1/2016 | 7,444 | 5.62 | 0.000000 | 0.00 | 0.00 | 5.62 | 0.00 | 0 | 0 | 286 | 568 | 2,210 | 1.50 | 52 | 0.026 |
4/5/2016 | 9,910 | 7.48 | 0.000000 | 0.00 | 0.00 | 7.48 | 0.00 | 0 | 0 | 384 | 990 | 2,445 | 1.05 | 46 | 0.023 |
4/5/2016 | 9,910 | 7.48 | 0.000000 | 0.00 | 0.00 | 7.48 | 0.00 | 0 | 0 | 384 | 990 | 2,445 | 0.02 | 46 | 0.023 |
4/6/2016 | 12,409 | 9.37 | 0.000000 | 0.00 | 0.00 | 9.37 | 0.00 | 0 | 0 | 491 | 388 | 2,694 | 1.72 | 122 | 0.064 |
4/9/2016 | 10,789 | 8.15 | 0.000000 | 0.00 | 0.00 | 8.15 | 0.00 | 0 | 0 | 506 | 413 | 2,617 | 3.73 | 45 | 0.025 |
4/9/2016 | 10,789 | 8.15 | 0.000000 | 0.00 | 0.00 | 8.15 | 0.00 | 0 | 0 | 506 | 413 | 2,617 | 2.95 | 45 | 0.025 |
4/9/2016 | 10,789 | 8.15 | 0.000000 | 0.00 | 0.00 | 8.15 | 0.00 | 0 | 0 | 506 | 413 | 2,617 | 1.05 | 45 | 0.025 |
4/1/2016 | 7,225 | 5.18 | 0.000000 | 1.73 | 1.27 | 2.18 | 0.00 | 25 | 50 | 163 | 1,189 | 3,065 | 0.20 | 52 | 0.026 |
4/6/2016 | 11,761 | 8.43 | 0.000000 | 1.31 | 2.44 | 4.68 | 0.00 | 24 | 99 | 300 | 550 | 3,920 | 7.58 | 122 | 0.064 |
4/6/2016 | 11,761 | 8.43 | 0.000000 | 1.31 | 2.44 | 4.68 | 0.00 | 24 | 99 | 300 | 550 | 3,920 | 0.17 | 122 | 0.064 |
4/7/2016 | 13,987 | 10.03 | 0.000000 | 2.87 | 3.34 | 3.82 | 0.00 | 46 | 114 | 250 | 739 | 3,856 | 1.82 | 80 | 0.042 |
4/7/2016 | 13,987 | 10.03 | 0.000000 | 2.87 | 3.34 | 3.82 | 0.00 | 46 | 114 | 250 | 739 | 3,856 | 2.13 | 80 | 0.042 |
3/30/2016 | 15,491 | 10.24 | 0.000000 | 1.29 | 4.49 | 4.46 | 0.00 | 18 | 77 | 272 | 641 | 2,244 | 6.75 | 99 | 0.050 |
3/30/2016 | 15,491 | 10.24 | 0.000000 | 1.29 | 4.49 | 4.46 | 0.00 | 18 | 77 | 272 | 641 | 2,244 | 0.42 | 99 | 0.050 |
3/31/2016 | 14,097 | 9.32 | 0.000000 | 3.50 | 1.92 | 3.90 | 0.00 | 50 | 32 | 234 | 595 | 2,188 | 1.90 | 61 | 0.031 |
3/31/2016 | 14,097 | 9.32 | 0.000000 | 3.50 | 1.92 | 3.90 | 0.00 | 50 | 32 | 234 | 595 | 2,188 | 0.07 | 61 | 0.031 |
4/2/2016 | 12,437 | 8.30 | 0.000000 | 3.76 | 1.29 | 3.24 | 0.00 | 50 | 24 | 219 | 732 | 2,115 | 6.90 | 129 | 0.065 |
4/3/2016 | 12,307 | 8.14 | 0.000000 | 1.01 | 3.26 | 3.87 | 0.00 | 14 | 51 | 218 | 589 | 2,055 | 8.25 | 96 | 0.050 |
4/3/2016 | 12,307 | 8.14 | 0.000000 | 1.01 | 3.26 | 3.87 | 0.00 | 14 | 51 | 218 | 589 | 2,055 | 1.18 | 96 | 0.050 |
4/5/2016 | 12,010 | 7.94 | 0.000000 | 2.38 | 0.95 | 4.61 | 0.00 | 38 | 17 | 287 | 692 | 2,158 | 5.90 | 46 | 0.023 |
4/5/2016 | 12,010 | 7.94 | 0.000000 | 2.38 | 0.95 | 4.61 | 0.00 | 38 | 17 | 287 | 692 | 2,158 | 0.83 | 46 | 0.023 |
4/6/2016 | 12,480 | 8.86 | 3.972795 | 4.18 | 0.69 | 4.00 | 0.00 | 55 | 14 | 276 | 593 | 2,170 | 0.22 | 122 | 0.064 |
4/7/2016 | 13,417 | 9.65 | 1.926302 | 3.39 | 1.25 | 4.91 | 0.10 | 45 | 16 | 284 | 665 | 2,244 | 0.33 | 80 | 0.042 |
4/9/2016 | 12,495 | 8.26 | 0.000000 | 2.21 | 1.09 | 4.96 | 0.00 | 33 | 19 | 324 | 609 | 2,231 | 6.18 | 45 | 0.025 |
4/9/2016 | 12,495 | 8.26 | 0.000000 | 2.21 | 1.09 | 4.96 | 0.00 | 33 | 19 | 324 | 609 | 2,231 | 1.37 | 45 | 0.025 |
4/10/2016 | 10,148 | 6.71 | 0.000000 | 1.36 | 0.22 | 5.13 | 0.00 | 19 | 7 | 306 | 679 | 2,100 | 7.02 | 120 | 0.074 |
4/10/2016 | 10,148 | 6.71 | 0.000000 | 1.36 | 0.22 | 5.13 | 0.00 | 19 | 7 | 306 | 679 | 2,100 | 0.10 | 120 | 0.074 |
4/12/2016 | 5,893 | 3.90 | 0.000000 | 2.88 | 0.56 | 0.46 | 0.00 | 43 | 9 | 48 | 146 | 917 | 6.43 | 25 | 0.021 |
4/2/2016 | 10,976 | 7.34 | 0.000000 | 0.55 | 0.67 | 6.13 | 0.00 | 9 | 16 | 271 | 1,075 | 2,575 | 1.13 | 129 | 0.065 |
4/3/2016 | 16,806 | 11.24 | 0.000000 | 6.41 | 1.37 | 3.47 | 0.00 | 104 | 31 | 230 | 1,002 | 3,086 | 1.20 | 96 | 0.050 |
4/5/2016 | 12,084 | 9.14 | 4.836380 | 4.39 | 0.46 | 4.29 | 0.00 | 50 | 6 | 284 | 1,030 | 2,676 | 1.15 | 46 | 0.023 |
4/7/2016 | 14,100 | 10.58 | 4.875990 | 5.10 | 1.19 | 4.28 | 0.00 | 61 | 33 | 265 | 1,015 | 2,840 | 1.08 | 80 | 0.042 |
4/1/2016 | 11,463 | 7.67 | 0.000000 | 2.83 | 1.89 | 2.93 | 0.00 | 38 | 30 | 154 | 777 | 2,496 | 0.60 | 52 | 0.026 |
4/5/2016 | 7,478 | 4.99 | 0.000000 | 1.80 | 0.53 | 2.61 | 0.03 | 25 | 33 | 132 | 1,159 | 2,439 | 1.50 | 46 | 0.023 |
4/6/2016 | 7,352 | 4.73 | 0.000000 | 1.41 | 0.78 | 2.53 | 0.00 | 19 | 14 | 124 | 814 | 2,233 | 1.08 | 122 | 0.064 |
4/7/2016 | 14,604 | 10.82 | 0.000000 | 7.23 | 1.12 | 2.43 | 0.03 | 76 | 30 | 127 | 695 | 2,862 | 1.75 | 80 | 0.042 |
4/9/2016 | 7,338 | 4.67 | 0.000000 | 1.08 | 0.13 | 3.46 | 0.00 | 27 | 5 | 204 | 717 | 2,519 | 8.10 | 45 | 0.025 |
4/10/2016 | 569 | 0.35 | 0.000000 | 0.00 | 0.00 | 0.34 | 0.00 | 0 | 0 | 27 | 1,368 | 1,799 | 0.73 | 120 | 0.074 |
4/11/2016 | 6,242 | 3.92 | 0.000000 | 0.74 | 0.23 | 2.93 | 0.00 | 46 | 22 | 126 | 741 | 2,543 | 1.80 | 67 | 0.045 |
4/1/2016 | 14,179 | 11.24 | 0.000000 | 6.23 | 1.06 | 3.96 | 0.00 | 73 | 21 | 195 | 719 | 3,669 | 2.52 | 52 | 0.026 |
4/1/2016 | 14,179 | 11.24 | 0.000000 | 6.23 | 1.06 | 3.96 | 0.00 | 73 | 21 | 195 | 719 | 3,669 | 0.30 | 52 | 0.026 |
4/2/2016 | 3,358 | 2.66 | 0.000000 | 0.00 | 0.00 | 2.66 | 0.00 | 0 | 0 | 144 | 657 | 2,702 | 3.23 | 129 | 0.065 |
4/2/2016 | 3,358 | 2.66 | 0.000000 | 0.00 | 0.00 | 2.66 | 0.00 | 0 | 0 | 144 | 657 | 2,702 | 1.30 | 129 | 0.065 |
4/2/2016 | 3,358 | 2.66 | 0.000000 | 0.00 | 0.00 | 2.66 | 0.00 | 0 | 0 | 144 | 657 | 2,702 | 2.03 | 129 | 0.065 |
4/3/2016 | 9,152 | 7.26 | 0.000000 | 1.99 | 0.82 | 4.45 | 0.00 | 25 | 13 | 243 | 600 | 3,304 | 2.05 | 96 | 0.050 |
4/4/2016 | 13,935 | 11.05 | 2.092147 | 4.09 | 0.79 | 6.17 | 0.00 | 105 | 16 | 220 | 658 | 4,234 | 3.00 | 69 | 0.036 |
4/5/2016 | 12,846 | 10.19 | 2.253081 | 5.00 | 0.75 | 4.44 | 0.00 | 113 | 15 | 189 | 686 | 4,128 | 2.10 | 46 | 0.023 |
4/5/2016 | 12,846 | 10.19 | 2.253081 | 5.00 | 0.75 | 4.44 | 0.00 | 113 | 15 | 189 | 686 | 4,128 | 0.90 | 46 | 0.023 |
4/6/2016 | 9,124 | 7.24 | 2.092147 | 0.00 | 0.25 | 6.99 | 0.00 | 55 | 6 | 230 | 728 | 3,798 | 1.25 | 122 | 0.064 |
4/6/2016 | 9,124 | 7.24 | 2.092147 | 0.00 | 0.25 | 6.99 | 0.00 | 55 | 6 | 230 | 728 | 3,798 | 1.40 | 122 | 0.064 |
4/7/2016 | 9,725 | 7.71 | 2.253081 | 2.41 | 0.53 | 4.77 | 0.00 | 83 | 11 | 181 | 670 | 3,839 | 1.52 | 80 | 0.042 |
4/7/2016 | 9,725 | 7.71 | 2.253081 | 2.41 | 0.53 | 4.77 | 0.00 | 83 | 11 | 181 | 670 | 3,839 | 1.45 | 80 | 0.042 |
4/7/2016 | 9,725 | 7.71 | 2.253081 | 2.41 | 0.53 | 4.77 | 0.00 | 83 | 11 | 181 | 670 | 3,839 | 0.87 | 80 | 0.042 |
4/8/2016 | 8,350 | 6.62 | 2.092147 | 1.49 | 0.53 | 4.61 | 0.00 | 72 | 10 | 167 | 868 | 3,713 | 1.10 | 45 | 0.023 |
4/9/2016 | 2,240 | 1.78 | 0.000000 | 0.00 | 0.00 | 1.78 | 0.00 | 0 | 0 | 110 | 843 | 2,606 | 2.95 | 45 | 0.025 |
4/9/2016 | 2,240 | 1.78 | 0.000000 | 0.00 | 0.00 | 1.78 | 0.00 | 0 | 0 | 110 | 843 | 2,606 | 1.55 | 45 | 0.025 |
4/10/2016 | 2,631 | 2.09 | 0.000000 | 0.00 | 0.00 | 2.09 | 0.00 | 0 | 0 | 117 | 727 | 2,624 | 8.17 | 120 | 0.074 |
4/10/2016 | 2,631 | 2.09 | 0.000000 | 0.00 | 0.00 | 2.09 | 0.00 | 0 | 0 | 117 | 727 | 2,624 | 1.65 | 120 | 0.074 |
4/11/2016 | 8,837 | 7.01 | 2.092147 | 1.54 | 0.88 | 4.59 | 0.00 | 74 | 17 | 182 | 811 | 3,775 | 1.67 | 67 | 0.045 |
4/1/2016 | 4,592 | 2.94 | 0.000000 | 0.10 | 0.14 | 2.70 | 0.00 | 4 | 8 | 176 | 748 | 2,260 | 5.63 | 52 | 0.026 |
4/1/2016 | 4,592 | 2.94 | 0.000000 | 0.10 | 0.14 | 2.70 | 0.00 | 4 | 8 | 176 | 748 | 2,260 | 2.73 | 52 | 0.026 |
4/3/2016 | 7,238 | 4.63 | 0.000000 | 0.11 | 0.23 | 4.29 | 0.00 | 2 | 6 | 279 | 636 | 2,667 | 8.60 | 96 | 0.050 |
4/4/2016 | 3,821 | 2.45 | 0.000000 | 0.00 | 0.22 | 2.23 | 0.00 | 0 | 9 | 161 | 675 | 2,229 | 5.58 | 69 | 0.036 |
4/4/2016 | 3,821 | 2.45 | 0.000000 | 0.00 | 0.22 | 2.23 | 0.00 | 0 | 9 | 161 | 675 | 2,229 | 4.30 | 69 | 0.036 |
4/5/2016 | 2,332 | 1.49 | 0.000000 | 0.01 | 0.18 | 1.30 | 0.00 | 1 | 10 | 111 | 904 | 2,100 | 6.88 | 46 | 0.023 |
4/6/2016 | 2,121 | 1.36 | 0.000000 | 0.00 | 0.00 | 1.36 | 0.00 | 0 | 0 | 122 | 855 | 2,114 | 7.70 | 122 | 0.064 |
4/7/2016 | 1,291 | 0.83 | 0.000000 | 0.00 | 0.00 | 0.83 | 0.00 | 0 | 0 | 77 | 888 | 1,961 | 7.90 | 80 | 0.042 |
4/8/2016 | 1,467 | 0.94 | 0.000000 | 0.10 | 0.21 | 0.63 | 0.00 | 2 | 8 | 71 | 912 | 1,953 | 2.95 | 45 | 0.023 |
4/8/2016 | 1,467 | 0.94 | 0.000000 | 0.10 | 0.21 | 0.63 | 0.00 | 2 | 8 | 71 | 912 | 1,953 | 4.47 | 45 | 0.023 |
4/9/2016 | 1,022 | 0.65 | 0.000000 | 0.00 | 0.00 | 0.65 | 0.00 | 0 | 0 | 63 | 739 | 1,890 | 4.67 | 45 | 0.025 |
4/9/2016 | 1,022 | 0.65 | 0.000000 | 0.00 | 0.00 | 0.65 | 0.00 | 0 | 0 | 63 | 739 | 1,890 | 4.60 | 45 | 0.025 |
4/9/2016 | 1,022 | 0.65 | 0.000000 | 0.00 | 0.00 | 0.65 | 0.00 | 0 | 0 | 63 | 739 | 1,890 | 1.32 | 45 | 0.025 |
## ID-AD ActivityDate TotalSteps TotalDistance
## Min. :1.504e+14 Length:228 Min. : 24 Min. : 0.020
## 1st Qu.:3.977e+14 Class :character 1st Qu.: 4499 1st Qu.: 3.000
## Median :4.703e+14 Mode :character Median : 7482 Median : 5.620
## Mean :4.985e+14 Mean : 7977 Mean : 5.613
## 3rd Qu.:6.962e+14 3rd Qu.:11262 3rd Qu.: 7.888
## Max. :8.792e+14 Max. :17609 Max. :11.550
## LoggedActivitiesDistance VeryActiveDistance ModeratelyActiveDistance
## Min. :0.0000 Min. :0.000 Min. :0.0000
## 1st Qu.:0.0000 1st Qu.:0.000 1st Qu.:0.0000
## Median :0.0000 Median :0.265 Median :0.2400
## Mean :0.1638 Mean :1.275 Mean :0.6450
## 3rd Qu.:0.0000 3rd Qu.:2.180 3rd Qu.:0.9275
## Max. :4.8760 Max. :8.190 Max. :4.4900
## LightlyActiveDistance SedentaryActiveDistance VeryActiveMinutes
## Min. :0.020 Min. :0.000000 Min. : 0.00
## 1st Qu.:2.245 1st Qu.:0.000000 1st Qu.: 0.00
## Median :3.715 Median :0.000000 Median : 4.50
## Mean :3.662 Mean :0.001184 Mean : 22.73
## 3rd Qu.:4.683 3rd Qu.:0.000000 3rd Qu.: 33.00
## Max. :9.370 Max. :0.100000 Max. :202.00
## ModeratelyActiveMinutes LightlyActiveMinutes SedentaryMinutes Calories
## Min. : 0.00 Min. : 3.0 Min. : 146.0 Min. : 745
## 1st Qu.: 0.00 1st Qu.:155.8 1st Qu.: 632.2 1st Qu.:1958
## Median : 9.00 Median :217.0 Median : 727.5 Median :2260
## Mean : 16.18 Mean :216.9 Mean : 727.4 Mean :2462
## 3rd Qu.: 24.25 3rd Qu.:268.0 3rd Qu.: 804.2 3rd Qu.:2893
## Max. :114.00 Max. :506.0 Max. :1368.0 Max. :4430
## logId HoursSlept Id TotalIntensity
## Min. :1.111e+10 Min. : 0.020 Min. :1.504e+09 Min. : 25.00
## 1st Qu.:1.129e+10 1st Qu.: 1.393 1st Qu.:3.977e+09 1st Qu.: 46.00
## Median :1.132e+10 Median : 2.315 Median :4.703e+09 Median : 69.00
## Mean :1.132e+10 Mean : 3.401 Mean :4.985e+09 Mean : 78.88
## 3rd Qu.:1.135e+10 3rd Qu.: 5.265 3rd Qu.:6.962e+09 3rd Qu.:120.00
## Max. :1.137e+10 Max. :12.500 Max. :8.792e+09 Max. :139.00
## AverageIntensity
## Min. :0.01400
## 1st Qu.:0.02500
## Median :0.04200
## Mean :0.04231
## 3rd Qu.:0.06400
## Max. :0.07400
described_variables | n | na | mean | sd | se_mean | IQR | skewness | kurtosis | p00 | p01 | p05 | p10 | p20 | p25 | p30 | p40 | p50 | p60 | p70 | p75 | p80 | p90 | p95 | p99 | p100 |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
TotalIntensity | 228 | 0 | 78.88 | 31.99 | 2.12 | 74.00 | 0.36 | -1.36 | 25.00 | 25.54 | 45.00 | 45.00 | 46.00 | 46.00 | 52.00 | 61.00 | 69.00 | 80.00 | 97.00 | 120.00 | 120.00 | 124.10 | 129.00 | 129.73 | 139.00 |
AverageIntensity | 228 | 0 | 0.04 | 0.02 | 0.00 | 0.04 | 0.45 | -1.23 | 0.01 | 0.02 | 0.02 | 0.02 | 0.03 | 0.03 | 0.03 | 0.03 | 0.04 | 0.04 | 0.05 | 0.06 | 0.06 | 0.06 | 0.07 | 0.07 | 0.07 |
TotalDistance | 228 | 0 | 5.61 | 3.03 | 0.20 | 4.89 | 0.10 | -1.05 | 0.02 | 0.43 | 0.96 | 1.63 | 2.45 | 3.00 | 3.65 | 4.44 | 5.62 | 6.80 | 7.70 | 7.89 | 8.25 | 9.89 | 10.61 | 11.36 | 11.55 |
TotalSteps | 228 | 0 | 7,976.65 | 4,247.19 | 281.28 | 6,763.50 | 0.12 | -0.99 | 24.00 | 691.31 | 1,483.80 | 2,324.80 | 3,444.00 | 4,499.00 | 5,351.00 | 6,485.00 | 7,481.50 | 9,725.00 | 10,763.40 | 11,262.50 | 12,103.20 | 13,991.50 | 14,862.85 | 16,610.25 | 17,609.00 |
Calories | 228 | 0 | 2,461.91 | 745.15 | 49.35 | 934.75 | 0.54 | -0.24 | 745.00 | 1,017.87 | 1,415.80 | 1,615.00 | 1,889.40 | 1,958.00 | 2,083.30 | 2,201.80 | 2,260.00 | 2,445.00 | 2,701.20 | 2,892.75 | 3,181.60 | 3,669.00 | 3,835.85 | 4,205.38 | 4,430.00 |
HoursSlept | 228 | 0 | 3.40 | 2.63 | 0.17 | 3.87 | 0.91 | 0.05 | 0.02 | 0.07 | 0.33 | 0.79 | 1.19 | 1.39 | 1.53 | 1.95 | 2.32 | 3.13 | 4.59 | 5.27 | 5.94 | 7.49 | 8.15 | 10.52 | 12.50 |
LightlyActiveDistance | 228 | 0 | 3.66 | 1.79 | 0.12 | 2.44 | 0.46 | 0.17 | 0.02 | 0.37 | 0.83 | 1.52 | 2.09 | 2.25 | 2.53 | 3.07 | 3.72 | 4.05 | 4.58 | 4.68 | 4.95 | 5.69 | 6.99 | 8.15 | 9.37 |
LightlyActiveMinutes | 228 | 0 | 216.94 | 85.22 | 5.64 | 112.25 | 0.42 | 1.09 | 3.00 | 32.40 | 73.00 | 116.40 | 145.00 | 155.75 | 170.30 | 195.00 | 217.00 | 239.20 | 258.00 | 268.00 | 280.80 | 314.30 | 331.00 | 501.95 | 506.00 |
SedentaryMinutes | 228 | 0 | 727.36 | 173.73 | 11.51 | 172.00 | 0.31 | 1.97 | 146.00 | 280.81 | 465.80 | 552.00 | 602.60 | 632.25 | 657.00 | 691.40 | 727.50 | 748.00 | 771.90 | 804.25 | 836.80 | 938.30 | 1,024.75 | 1,187.38 | 1,368.00 |
VeryActiveDistance | 228 | 0 | 1.27 | 1.78 | 0.12 | 2.18 | 1.65 | 2.48 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.27 | 0.97 | 1.73 | 2.18 | 2.41 | 3.82 | 4.91 | 7.15 | 8.19 |
ModeratelyActiveDistance | 228 | 0 | 0.65 | 0.89 | 0.06 | 0.93 | 2.07 | 4.75 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.16 | 0.24 | 0.53 | 0.79 | 0.93 | 1.14 | 1.76 | 2.60 | 4.21 | 4.49 |
ModeratelyActiveMinutes | 228 | 0 | 16.18 | 21.74 | 1.44 | 24.25 | 2.14 | 5.40 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 5.00 | 9.00 | 15.00 | 18.90 | 24.25 | 28.00 | 40.20 | 57.65 | 100.46 | 114.00 |
VeryActiveMinutes | 228 | 0 | 22.73 | 34.23 | 2.27 | 33.00 | 2.26 | 6.55 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 4.50 | 15.20 | 26.00 | 33.00 | 43.00 | 72.30 | 95.50 | 123.00 | 202.00 |
LoggedActivitiesDistance | 228 | 0 | 0.16 | 0.69 | 0.05 | 0.00 | 4.72 | 24.12 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 2.09 | 3.51 | 4.88 |
SedentaryActiveDistance | 228 | 0 | 0.00 | 0.01 | 0.00 | 0.00 | 9.38 | 102.16 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.04 | 0.10 |
variables | min | Q1 | mean | median | Q3 | max | zero | minus | outlier |
|---|---|---|---|---|---|---|---|---|---|
TotalSteps | 24.00 | 4,499.00 | 7,976.65 | 7,481.50 | 11,262.50 | 17,609.00 | 0 | 0 | 0 |
TotalDistance | 0.02 | 3.00 | 5.61 | 5.62 | 7.89 | 11.55 | 0 | 0 | 0 |
LoggedActivitiesDistance | 0.00 | 0.00 | 0.16 | 0.00 | 0.00 | 4.88 | 214 | 0 | 14 |
VeryActiveDistance | 0.00 | 0.00 | 1.27 | 0.27 | 2.18 | 8.19 | 94 | 0 | 8 |
ModeratelyActiveDistance | 0.00 | 0.00 | 0.65 | 0.24 | 0.93 | 4.49 | 82 | 0 | 14 |
LightlyActiveDistance | 0.02 | 2.25 | 3.66 | 3.72 | 4.68 | 9.37 | 0 | 0 | 1 |
SedentaryActiveDistance | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.10 | 220 | 0 | 8 |
VeryActiveMinutes | 0.00 | 0.00 | 22.73 | 4.50 | 33.00 | 202.00 | 92 | 0 | 18 |
ModeratelyActiveMinutes | 0.00 | 0.00 | 16.18 | 9.00 | 24.25 | 114.00 | 82 | 0 | 11 |
LightlyActiveMinutes | 3.00 | 155.75 | 216.94 | 217.00 | 268.00 | 506.00 | 0 | 0 | 4 |
SedentaryMinutes | 146.00 | 632.25 | 727.36 | 727.50 | 804.25 | 1,368.00 | 0 | 0 | 16 |
Calories | 745.00 | 1,958.00 | 2,461.91 | 2,260.00 | 2,892.75 | 4,430.00 | 0 | 0 | 2 |
HoursSlept | 0.02 | 1.39 | 3.40 | 2.32 | 5.27 | 12.50 | 0 | 0 | 2 |
TotalIntensity | 25.00 | 46.00 | 78.88 | 69.00 | 120.00 | 139.00 | 0 | 0 | 0 |
AverageIntensity | 0.01 | 0.03 | 0.04 | 0.04 | 0.06 | 0.07 | 0 | 0 | 0 |
##
## Anscombe-Glynn kurtosis test
##
## data: df_tot$TotalSteps
## kurt = 2.0065, z = -6.2249, p-value = 4.82e-10
## alternative hypothesis: kurtosis is not equal to 3
##
## Anscombe-Glynn kurtosis test
##
## data: df_tot$Calories
## kurt = 2.73955, z = -0.72288, p-value = 0.4698
## alternative hypothesis: kurtosis is not equal to 3
##
## Anscombe-Glynn kurtosis test
##
## data: df_tot$VeryActiveDistance
## kurt = 5.3966, z = 4.1207, p-value = 3.777e-05
## alternative hypothesis: kurtosis is not equal to 3
##
## Anscombe-Glynn kurtosis test
##
## data: df_tot$LightlyActiveMinutes
## kurt = 4.0398, z = 2.5349, p-value = 0.01125
## alternative hypothesis: kurtosis is not equal to 3
Our reports suggest we do not have any negative data and the variables with greatest change after removing outliers include: VeryActiveMinutes, SedentaryMinutes, LoggedActiviesDistance, ModeratelyActiveDistance, and ModeratelyActiveMinutes (see: FitnessSmartEDA from SmartEDA library) with VeryActiveMinutes having the greatest change as the Kurtosis has a value of around 9.38 (using kurtosis()). Some of the data is somewhat normally distributed after making observations when testing for Kurtosis close to 3 using anscombe.test() for Calories, TotalSteps, VeryActiveDistance, and LightlyActiveDistance. Most of the data appears to be skewed right with VeryActiveMiinutes having skewness of around 2.25 (using skewness()).
We can now explore the patterns within the dataset such as how much distance should a person travel in order to burn a certain amount of calories and how is sleep related to the activity levels of a person, but first we will need to explore the heterogeneity of the distributions of the columns of data.
# -- Generate density functions
# We are interested in the properties of Calories burned.
# The distribution appears to be bimodally distributed:
ggplot(data = df_tot, aes(x = Calories)) +
geom_density(data = df_tot, aes(x = Calories)) +
labs(x = "Calories", title = "PDF: Calories", y = "Density") +
theme_linedraw(base_size = 18)
cvar <- df_tot$SedentaryMinutes
quartiles <- quantile(cvar, c(0.25, 0.5, 0.75))
df_tot$qrts <- cut(cvar, breaks = c(-Inf, quartiles, Inf), labels = c("Q1", "Q2", "Q3", "Q4"))
ggplot(data = df_tot, aes(x = df_tot$qrts, y = Calories, fill = df_tot$qrts)) +
geom_violin(data = df_tot, trim = FALSE, draw_quantiles = c(0.25, 0.5, 0.75)) +
#geom_boxplot(data = df_tot, aes(x = df_tot$qrts), width = 0.10) +
labs(x = "Sedentary Minutes", y = "Calories", title = "Box Plot: Sedentary Minutes vs Calories", fill = "Quantiles") +
scale_fill_manual(values = viridis(n = length(unique(df_tot$qrts)))) +
theme_linedraw(base_size = 18)
cvar <- df_tot$LightlyActiveMinutes
quartiles <- quantile(cvar, c(0.25, 0.5, 0.75))
df_tot$qrts <- cut(cvar, breaks = c(-Inf, quartiles, Inf), labels = c("Q1", "Q2", "Q3", "Q4"))
ggplot(data = df_tot, aes(x = df_tot$qrts, y = Calories, fill = df_tot$qrts)) +
geom_violin(data = df_tot, trim = FALSE, draw_quantiles = c(0.25, 0.5, 0.75)) +
#geom_boxplot(data = df_tot, aes(x = df_tot$qrts), width = 0.10) +
labs(x = "Light Activity Minutes", y = "Calories", title = "Box Plot: Light Activity Minutes vs Calories", fill = "Quantiles") +
scale_fill_manual(values = viridis(n = length(unique(df_tot$qrts)))) +
theme_linedraw(base_size = 18)
cvar <- df_tot$ModeratelyActiveMinutes
quartiles <- quantile(cvar, c(0.25, 0.5, 0.75))
df_tot$qrts <- cut(cvar, breaks = c(-Inf, quartiles, Inf), labels = c("Q1", "Q2", "Q3", "Q4"))
ggplot(data = df_tot, aes(x = df_tot$qrts, y = Calories, fill = df_tot$qrts)) +
geom_violin(data = df_tot, trim = FALSE, draw_quantiles = c(0.25, 0.5, 0.75)) +
#geom_boxplot(data = df_tot, aes(x = df_tot$qrts), width = 0.10) +
labs(x = "Moderate Activity Minutes", y = "Calories", title = "Box Plot: Moderate Activity Minutes vs Calories", fill = "Quantiles") +
scale_fill_manual(values = viridis(n = length(unique(df_tot$qrts)))) +
theme_linedraw(base_size = 18)
cvar <- df_tot$VeryActiveMinutes
quartiles <- quantile(cvar, c(0.25, 0.5, 0.75))
df_tot$qrts <- cut(cvar, breaks = c(-Inf, quartiles, Inf), labels = c("Q1", "Q2", "Q3", "Q4"))
ggplot(data = df_tot, aes(x = df_tot$qrts, y = Calories, fill = df_tot$qrts)) +
geom_violin(data = df_tot, trim = FALSE, draw_quantiles = c(0.25, 0.5, 0.75)) +
#geom_boxplot(data = df_tot, aes(x = df_tot$qrts), width = 0.10) +
labs(x = "Very Activity Minutes", y = "Calories", title = "Box Plot: Very Activity Minutes vs Calories", fill = "Quantiles") +
scale_fill_manual(values = viridis(n = length(unique(df_tot$qrts)))) +
theme_linedraw(base_size = 18)
# Remark: SedentaryActiveDistance does not work due to the distribution properties.
cvar <- df_tot$LightlyActiveDistance
quartiles <- quantile(cvar, c(0.25, 0.5, 0.75))
df_tot$qrts <- cut(cvar, breaks = c(-Inf, quartiles, Inf), labels = c("Q1", "Q2", "Q3", "Q4"))
ggplot(data = df_tot, aes(x = df_tot$qrts, y = Calories, fill = df_tot$qrts)) +
geom_violin(data = df_tot, trim = FALSE, draw_quantiles = c(0.25, 0.5, 0.75)) +
#geom_boxplot(data = df_tot, aes(x = df_tot$qrts), width = 0.10) +
labs(x = "Light Activity Distance", y = "Calories", title = "Box Plot: Light Activity Distance vs Calories", fill = "Quantiles") +
scale_fill_manual(values = viridis(n = length(unique(df_tot$qrts)))) +
theme_linedraw(base_size = 18)
cvar <- df_tot$ModeratelyActiveDistance
quartiles <- quantile(cvar, c(0.25, 0.5, 0.75))
df_tot$qrts <- cut(cvar, breaks = c(-Inf, quartiles, Inf), labels = c("Q1", "Q2", "Q3", "Q4"))
ggplot(data = df_tot, aes(x = df_tot$qrts, y = Calories, fill = df_tot$qrts)) +
geom_violin(data = df_tot, trim = FALSE, draw_quantiles = c(0.25, 0.5, 0.75)) +
#geom_boxplot(data = df_tot, aes(x = df_tot$qrts), width = 0.10) +
labs(x = "Moderate Activity Distance", y = "Calories", title = "Box Plot: Moderate Activity Distance vs Calories", fill = "Quantiles") +
scale_fill_manual(values = viridis(n = length(unique(df_tot$qrts)))) +
theme_linedraw(base_size = 18)
cvar <- df_tot$VeryActiveDistance
quartiles <- quantile(cvar, c(0.25, 0.5, 0.75))
df_tot$qrts <- cut(cvar, breaks = c(-Inf, quartiles, Inf), labels = c("Q1", "Q2", "Q3", "Q4"))
ggplot(data = df_tot, aes(x = df_tot$qrts, y = Calories, fill = df_tot$qrts)) +
geom_violin(data = df_tot, trim = FALSE, draw_quantiles = c(0.25, 0.5, 0.75)) +
#geom_boxplot(data = df_tot, aes(x = df_tot$qrts), width = 0.10) +
labs(x = "Very Activity Distance", y = "Calories", title = "Box Plot: Very Activity Distance vs Calories", fill = "Quantiles") +
scale_fill_manual(values = viridis(n = length(unique(df_tot$qrts)))) +
theme_linedraw(base_size = 18)
We observe the LightActivityMinutes plot to be bimodally distributed at the first and third quartile with a significant proportion of the highest forth quartile to have a skinny tail (violin plot in yellow) suggesting a uniform spread. In the FairlyActivityMinutes plot, we observe similar distributions for the third and forth quartiles and similar results are obtained in the VeryActiveMinutes plot for the first and second quartiles. This suggests the upper and lower tails of the variables are not similarly distributed and we can categorize the activity levels based on multiple columns. Similar results are obtained when substituting minutes for distance, but the qualitative shape of the distribution is less heterogeneous.
After exploring with various methods, I started by taking the
greatest of the measures of central tendency (average and median) for
each variable (VeryActiveMinutes, ModeratelyActiveMinutes, and
LightlyActiveMinutes). The Very Active population will be greater than
or equal to the max of average and median of VeryActiveMinutes,
otherwise it would fall into the Moderately Active or Lightly Active
population. To classify if it should fall into the Moderately Active
population, the population should have greater ModeratelyActiveMinutes
than the max of median and average of its respective column. A similar
approach is applied for the Lightly Active categorization. If it does
not fall into the three classifications, it is Sedentary.
A similar method could be applied when substituting Minutes for
Distance, but the results do not change too much (just a slight
difference in where each member is categorized - can be seen in the bar
chart).
# -- Group by using the binning method -- #
# In the absence of categorical data, we will create factor variables based on the numeric variables.
df_tot_user_mins <- df_tot %>%
summarise(
user_type = factor(case_when(
VeryActiveMinutes >= max(median(VeryActiveMinutes), mean(VeryActiveMinutes)) ~ "Very Active",
VeryActiveMinutes < max(median(VeryActiveMinutes), mean(VeryActiveMinutes)) &
ModeratelyActiveMinutes >= max(median(ModeratelyActiveMinutes), mean(ModeratelyActiveMinutes)) ~ "Moderately Active",
VeryActiveMinutes < max(median(VeryActiveMinutes), mean(VeryActiveMinutes)) &
ModeratelyActiveMinutes < max(median(ModeratelyActiveMinutes), mean(ModeratelyActiveMinutes)) &
LightlyActiveMinutes >= max(median(LightlyActiveMinutes), mean(LightlyActiveMinutes)) ~ "Lightly Active",
VeryActiveMinutes < max(median(VeryActiveMinutes), mean(VeryActiveMinutes)) &
ModeratelyActiveMinutes < max(median(ModeratelyActiveMinutes), mean(ModeratelyActiveMinutes)) &
LightlyActiveMinutes < max(median(LightlyActiveMinutes), mean(LightlyActiveMinutes)) ~ "Sedentary",
),
levels = c("Very Active", "Moderately Active", "Lightly Active", "Sedentary")),
Calories,
HoursSlept,
TotalDistance,
TotalIntensity
)
# Observe the aggregated data categorized by minutes
glimpse(df_tot_user_mins) %>% flextable()
## Rows: 228
## Columns: 5
## $ user_type <fct> Very Active, Very Active, Very Active, Very Active, Ver…
## $ Calories <dbl> 1819, 2154, 2154, 1944, 1944, 1932, 1886, 1889, 1868, 1…
## $ HoursSlept <dbl> 6.42, 7.60, 0.23, 2.13, 5.48, 1.27, 6.28, 5.58, 5.08, 8…
## $ TotalDistance <dbl> 7.11, 11.55, 11.55, 8.53, 8.53, 8.93, 7.85, 7.86, 7.87,…
## $ TotalIntensity <dbl> 139, 130, 130, 97, 97, 76, 56, 61, 52, 129, 96, 69, 46,…
user_type | Calories | HoursSlept | TotalDistance | TotalIntensity |
|---|---|---|---|---|
Very Active | 1,819 | 6.42 | 7.11 | 139 |
Very Active | 2,154 | 7.60 | 11.55 | 130 |
Very Active | 2,154 | 0.23 | 11.55 | 130 |
Very Active | 1,944 | 2.13 | 8.53 | 97 |
Very Active | 1,944 | 5.48 | 8.53 | 97 |
Very Active | 1,932 | 1.27 | 8.93 | 76 |
Very Active | 1,886 | 6.28 | 7.85 | 56 |
Very Active | 1,889 | 5.58 | 7.86 | 61 |
Very Active | 1,868 | 5.08 | 7.87 | 52 |
Very Active | 1,843 | 8.47 | 7.25 | 129 |
Moderately Active | 1,850 | 7.08 | 6.37 | 96 |
Very Active | 2,030 | 5.60 | 9.80 | 69 |
Very Active | 2,083 | 5.13 | 9.73 | 46 |
Very Active | 1,883 | 6.77 | 8.10 | 45 |
Very Active | 1,755 | 7.95 | 6.98 | 120 |
Very Active | 1,811 | 5.47 | 7.26 | 67 |
Sedentary | 1,636 | 3.15 | 1.88 | 96 |
Sedentary | 1,807 | 10.58 | 3.29 | 45 |
Lightly Active | 2,783 | 5.23 | 3.92 | 129 |
Sedentary | 2,449 | 1.83 | 2.21 | 96 |
Sedentary | 2,380 | 3.13 | 1.63 | 69 |
Sedentary | 2,344 | 6.98 | 1.55 | 46 |
Sedentary | 2,202 | 7.13 | 0.87 | 122 |
Sedentary | 2,443 | 0.77 | 1.75 | 45 |
Sedentary | 2,442 | 5.13 | 1.46 | 120 |
Sedentary | 2,442 | 1.03 | 1.46 | 120 |
Sedentary | 2,255 | 2.05 | 0.84 | 67 |
Sedentary | 942 | 2.13 | 0.02 | 25 |
Sedentary | 1,407 | 2.45 | 1.62 | 52 |
Sedentary | 1,237 | 2.55 | 0.76 | 129 |
Sedentary | 1,330 | 2.97 | 1.43 | 96 |
Lightly Active | 1,583 | 2.00 | 4.14 | 69 |
Lightly Active | 1,538 | 2.57 | 3.50 | 46 |
Sedentary | 1,421 | 2.17 | 1.97 | 122 |
Sedentary | 1,392 | 2.95 | 1.72 | 80 |
Sedentary | 1,425 | 2.92 | 2.33 | 45 |
Sedentary | 1,413 | 2.82 | 1.92 | 45 |
Lightly Active | 1,515 | 2.72 | 3.19 | 120 |
Sedentary | 1,405 | 3.18 | 2.03 | 67 |
Moderately Active | 2,041 | 1.82 | 6.79 | 56 |
Moderately Active | 2,187 | 1.57 | 7.10 | 99 |
Lightly Active | 1,929 | 2.28 | 4.47 | 61 |
Very Active | 2,438 | 7.63 | 10.22 | 129 |
Very Active | 2,438 | 0.05 | 10.22 | 129 |
Lightly Active | 2,035 | 2.50 | 4.95 | 96 |
Moderately Active | 2,099 | 2.08 | 6.80 | 69 |
Lightly Active | 2,096 | 0.62 | 6.82 | 46 |
Moderately Active | 2,338 | 1.25 | 6.85 | 80 |
Moderately Active | 2,488 | 0.33 | 10.63 | 45 |
Lightly Active | 2,164 | 1.93 | 6.83 | 120 |
Sedentary | 1,288 | 7.15 | 3.01 | 52 |
Lightly Active | 1,490 | 4.82 | 5.10 | 129 |
Very Active | 1,630 | 5.97 | 7.70 | 96 |
Moderately Active | 1,648 | 6.22 | 7.99 | 69 |
Very Active | 1,649 | 8.08 | 8.23 | 46 |
Very Active | 1,783 | 7.08 | 10.32 | 122 |
Moderately Active | 1,431 | 6.73 | 5.63 | 80 |
Moderately Active | 1,524 | 7.98 | 6.84 | 45 |
Very Active | 1,697 | 6.95 | 9.76 | 45 |
Sedentary | 1,232 | 7.52 | 2.39 | 120 |
Sedentary | 1,223 | 6.90 | 2.14 | 67 |
Lightly Active | 2,990 | 6.00 | 3.97 | 98 |
Lightly Active | 2,990 | 1.12 | 3.97 | 98 |
Sedentary | 2,480 | 5.37 | 2.31 | 53 |
Sedentary | 2,570 | 4.82 | 2.17 | 27 |
Sedentary | 3,016 | 5.67 | 4.23 | 53 |
Very Active | 3,830 | 8.20 | 8.99 | 73 |
Sedentary | 3,706 | 6.77 | 6.41 | 70 |
Lightly Active | 3,418 | 2.43 | 3.87 | 99 |
Lightly Active | 3,418 | 3.98 | 3.87 | 99 |
Moderately Active | 3,439 | 4.45 | 5.33 | 61 |
Moderately Active | 3,338 | 4.08 | 3.84 | 52 |
Moderately Active | 3,338 | 2.92 | 3.84 | 52 |
Moderately Active | 3,338 | 0.45 | 3.84 | 52 |
Sedentary | 2,892 | 3.12 | 3.10 | 129 |
Lightly Active | 3,313 | 5.00 | 4.38 | 96 |
Lightly Active | 3,313 | 1.62 | 4.38 | 96 |
Moderately Active | 3,118 | 7.95 | 4.60 | 69 |
Lightly Active | 3,253 | 4.27 | 5.36 | 122 |
Sedentary | 2,817 | 1.22 | 3.68 | 120 |
Sedentary | 2,507 | 3.75 | 2.15 | 67 |
Sedentary | 1,958 | 1.60 | 3.79 | 56 |
Sedentary | 1,958 | 1.73 | 3.79 | 56 |
Moderately Active | 2,129 | 0.77 | 5.46 | 61 |
Moderately Active | 2,216 | 1.55 | 6.27 | 52 |
Moderately Active | 2,216 | 0.72 | 6.27 | 52 |
Lightly Active | 2,154 | 0.97 | 5.66 | 129 |
Moderately Active | 2,178 | 1.03 | 5.52 | 96 |
Moderately Active | 2,308 | 1.52 | 6.76 | 69 |
Lightly Active | 2,201 | 1.15 | 5.63 | 46 |
Moderately Active | 2,341 | 4.30 | 7.35 | 122 |
Moderately Active | 2,341 | 0.80 | 7.35 | 122 |
Sedentary | 2,010 | 3.75 | 1.55 | 56 |
Sedentary | 2,010 | 4.18 | 1.55 | 56 |
Lightly Active | 2,227 | 4.90 | 3.22 | 61 |
Lightly Active | 2,227 | 2.00 | 3.22 | 61 |
Sedentary | 2,133 | 5.60 | 2.19 | 52 |
Lightly Active | 2,317 | 8.23 | 3.63 | 129 |
Lightly Active | 2,217 | 8.25 | 2.84 | 46 |
Very Active | 2,374 | 7.48 | 4.78 | 122 |
Very Active | 2,335 | 4.45 | 4.61 | 80 |
Lightly Active | 2,303 | 1.68 | 3.72 | 45 |
Lightly Active | 2,303 | 2.17 | 3.72 | 45 |
Lightly Active | 2,210 | 3.38 | 2.81 | 45 |
Lightly Active | 2,210 | 1.15 | 2.81 | 45 |
Lightly Active | 2,210 | 2.80 | 2.81 | 45 |
Lightly Active | 2,424 | 4.87 | 4.45 | 120 |
Lightly Active | 2,424 | 1.25 | 4.45 | 120 |
Very Active | 2,297 | 4.55 | 4.13 | 67 |
Very Active | 2,297 | 2.55 | 4.13 | 67 |
Sedentary | 745 | 5.52 | 0.19 | 25 |
Sedentary | 1,680 | 1.43 | 2.10 | 129 |
Lightly Active | 2,886 | 1.15 | 5.28 | 56 |
Lightly Active | 2,915 | 1.53 | 6.15 | 99 |
Lightly Active | 2,895 | 1.00 | 5.65 | 61 |
Lightly Active | 2,923 | 2.00 | 5.81 | 52 |
Moderately Active | 3,323 | 2.00 | 8.38 | 129 |
Moderately Active | 3,357 | 1.95 | 9.83 | 96 |
Moderately Active | 2,931 | 1.33 | 6.36 | 69 |
Sedentary | 2,848 | 1.32 | 5.41 | 46 |
Lightly Active | 2,943 | 1.75 | 6.18 | 122 |
Sedentary | 2,822 | 1.63 | 5.34 | 80 |
Moderately Active | 3,597 | 7.20 | 11.36 | 45 |
Moderately Active | 3,597 | 2.57 | 11.36 | 45 |
Moderately Active | 3,224 | 2.02 | 9.03 | 120 |
Moderately Active | 3,224 | 0.87 | 9.03 | 120 |
Sedentary | 2,677 | 3.80 | 5.27 | 67 |
Sedentary | 1,615 | 11.58 | 1.92 | 129 |
Sedentary | 1,615 | 3.43 | 1.92 | 129 |
Sedentary | 1,481 | 12.50 | 0.99 | 96 |
Sedentary | 1,481 | 1.47 | 0.99 | 96 |
Very Active | 1,892 | 1.53 | 5.88 | 69 |
Very Active | 2,086 | 0.38 | 7.38 | 46 |
Very Active | 2,044 | 0.58 | 8.00 | 122 |
Very Active | 2,249 | 0.08 | 10.08 | 80 |
Sedentary | 1,692 | 9.12 | 3.00 | 45 |
Sedentary | 1,692 | 2.28 | 3.00 | 45 |
Moderately Active | 1,712 | 10.35 | 3.65 | 120 |
Moderately Active | 1,712 | 1.43 | 3.65 | 120 |
Very Active | 2,065 | 1.65 | 7.77 | 67 |
Very Active | 3,625 | 2.20 | 7.87 | 52 |
Very Active | 4,430 | 5.17 | 11.11 | 129 |
Very Active | 4,430 | 1.17 | 11.11 | 129 |
Very Active | 3,427 | 2.20 | 7.41 | 96 |
Very Active | 3,492 | 2.27 | 5.56 | 69 |
Very Active | 3,597 | 2.17 | 6.70 | 46 |
Very Active | 3,597 | 0.95 | 6.70 | 46 |
Very Active | 3,765 | 1.60 | 8.30 | 122 |
Very Active | 2,775 | 2.35 | 3.36 | 80 |
Sedentary | 2,486 | 2.22 | 3.26 | 45 |
Very Active | 3,817 | 4.43 | 7.84 | 45 |
Very Active | 3,817 | 1.88 | 7.84 | 45 |
Very Active | 3,817 | 0.28 | 7.84 | 45 |
Very Active | 3,378 | 1.53 | 7.38 | 120 |
Lightly Active | 2,210 | 4.28 | 5.62 | 52 |
Lightly Active | 2,210 | 3.93 | 5.62 | 52 |
Lightly Active | 2,210 | 1.50 | 5.62 | 52 |
Lightly Active | 2,445 | 1.05 | 7.48 | 46 |
Lightly Active | 2,445 | 0.02 | 7.48 | 46 |
Lightly Active | 2,694 | 1.72 | 9.37 | 122 |
Lightly Active | 2,617 | 3.73 | 8.15 | 45 |
Lightly Active | 2,617 | 2.95 | 8.15 | 45 |
Lightly Active | 2,617 | 1.05 | 8.15 | 45 |
Very Active | 3,065 | 0.20 | 5.18 | 52 |
Very Active | 3,920 | 7.58 | 8.43 | 122 |
Very Active | 3,920 | 0.17 | 8.43 | 122 |
Very Active | 3,856 | 1.82 | 10.03 | 80 |
Very Active | 3,856 | 2.13 | 10.03 | 80 |
Moderately Active | 2,244 | 6.75 | 10.24 | 99 |
Moderately Active | 2,244 | 0.42 | 10.24 | 99 |
Very Active | 2,188 | 1.90 | 9.32 | 61 |
Very Active | 2,188 | 0.07 | 9.32 | 61 |
Very Active | 2,115 | 6.90 | 8.30 | 129 |
Moderately Active | 2,055 | 8.25 | 8.14 | 96 |
Moderately Active | 2,055 | 1.18 | 8.14 | 96 |
Very Active | 2,158 | 5.90 | 7.94 | 46 |
Very Active | 2,158 | 0.83 | 7.94 | 46 |
Very Active | 2,170 | 0.22 | 8.86 | 122 |
Very Active | 2,244 | 0.33 | 9.65 | 80 |
Very Active | 2,231 | 6.18 | 8.26 | 45 |
Very Active | 2,231 | 1.37 | 8.26 | 45 |
Lightly Active | 2,100 | 7.02 | 6.71 | 120 |
Lightly Active | 2,100 | 0.10 | 6.71 | 120 |
Very Active | 917 | 6.43 | 3.90 | 25 |
Lightly Active | 2,575 | 1.13 | 7.34 | 129 |
Very Active | 3,086 | 1.20 | 11.24 | 96 |
Very Active | 2,676 | 1.15 | 9.14 | 46 |
Very Active | 2,840 | 1.08 | 10.58 | 80 |
Very Active | 2,496 | 0.60 | 7.67 | 52 |
Very Active | 2,439 | 1.50 | 4.99 | 46 |
Sedentary | 2,233 | 1.08 | 4.73 | 122 |
Very Active | 2,862 | 1.75 | 10.82 | 80 |
Very Active | 2,519 | 8.10 | 4.67 | 45 |
Sedentary | 1,799 | 0.73 | 0.35 | 120 |
Very Active | 2,543 | 1.80 | 3.92 | 67 |
Very Active | 3,669 | 2.52 | 11.24 | 52 |
Very Active | 3,669 | 0.30 | 11.24 | 52 |
Sedentary | 2,702 | 3.23 | 2.66 | 129 |
Sedentary | 2,702 | 1.30 | 2.66 | 129 |
Sedentary | 2,702 | 2.03 | 2.66 | 129 |
Very Active | 3,304 | 2.05 | 7.26 | 96 |
Very Active | 4,234 | 3.00 | 11.05 | 69 |
Very Active | 4,128 | 2.10 | 10.19 | 46 |
Very Active | 4,128 | 0.90 | 10.19 | 46 |
Very Active | 3,798 | 1.25 | 7.24 | 122 |
Very Active | 3,798 | 1.40 | 7.24 | 122 |
Very Active | 3,839 | 1.52 | 7.71 | 80 |
Very Active | 3,839 | 1.45 | 7.71 | 80 |
Very Active | 3,839 | 0.87 | 7.71 | 80 |
Very Active | 3,713 | 1.10 | 6.62 | 45 |
Sedentary | 2,606 | 2.95 | 1.78 | 45 |
Sedentary | 2,606 | 1.55 | 1.78 | 45 |
Sedentary | 2,624 | 8.17 | 2.09 | 120 |
Sedentary | 2,624 | 1.65 | 2.09 | 120 |
Very Active | 3,775 | 1.67 | 7.01 | 67 |
Sedentary | 2,260 | 5.63 | 2.94 | 52 |
Sedentary | 2,260 | 2.73 | 2.94 | 52 |
Lightly Active | 2,667 | 8.60 | 4.63 | 96 |
Sedentary | 2,229 | 5.58 | 2.45 | 69 |
Sedentary | 2,229 | 4.30 | 2.45 | 69 |
Sedentary | 2,100 | 6.88 | 1.49 | 46 |
Sedentary | 2,114 | 7.70 | 1.36 | 122 |
Sedentary | 1,961 | 7.90 | 0.83 | 80 |
Sedentary | 1,953 | 2.95 | 0.94 | 45 |
Sedentary | 1,953 | 4.47 | 0.94 | 45 |
Sedentary | 1,890 | 4.67 | 0.65 | 45 |
Sedentary | 1,890 | 4.60 | 0.65 | 45 |
Sedentary | 1,890 | 1.32 | 0.65 | 45 |
summary(df_tot_user_mins)
## user_type Calories HoursSlept TotalDistance
## Very Active :80 Min. : 745 Min. : 0.020 Min. : 0.020
## Moderately Active:34 1st Qu.:1958 1st Qu.: 1.393 1st Qu.: 3.000
## Lightly Active :47 Median :2260 Median : 2.315 Median : 5.620
## Sedentary :67 Mean :2462 Mean : 3.401 Mean : 5.613
## 3rd Qu.:2893 3rd Qu.: 5.265 3rd Qu.: 7.888
## Max. :4430 Max. :12.500 Max. :11.550
## TotalIntensity
## Min. : 25.00
## 1st Qu.: 46.00
## Median : 69.00
## Mean : 78.88
## 3rd Qu.:120.00
## Max. :139.00
df_tot_user_dist <- df_tot %>%
summarise(
user_type = factor(case_when(
VeryActiveDistance >= max(median(VeryActiveDistance), mean(VeryActiveDistance)) ~ "Very Active",
VeryActiveDistance < max(median(VeryActiveDistance), mean(VeryActiveDistance)) &
ModeratelyActiveDistance >= max(median(ModeratelyActiveDistance), mean(ModeratelyActiveDistance)) ~ "Moderately Active",
VeryActiveDistance < max(median(VeryActiveDistance), mean(VeryActiveDistance)) &
ModeratelyActiveDistance < max(median(ModeratelyActiveDistance), mean(ModeratelyActiveDistance)) &
LightlyActiveDistance >= max(median(LightlyActiveDistance), mean(LightlyActiveDistance)) ~ "Lightly Active",
VeryActiveDistance < max(median(VeryActiveDistance), mean(VeryActiveDistance)) &
ModeratelyActiveDistance < max(median(ModeratelyActiveDistance), mean(ModeratelyActiveDistance)) &
LightlyActiveDistance < max(median(LightlyActiveDistance), mean(LightlyActiveDistance)) ~ "Sedentary",
),
levels = c("Very Active", "Moderately Active", "Lightly Active", "Sedentary")),
Calories,
HoursSlept,
TotalDistance,
TotalIntensity
)
# Observe the aggregated data:
glimpse(df_tot_user_dist) %>% flextable()
## Rows: 228
## Columns: 5
## $ user_type <fct> Very Active, Very Active, Very Active, Very Active, Ver…
## $ Calories <dbl> 1819, 2154, 2154, 1944, 1944, 1932, 1886, 1889, 1868, 1…
## $ HoursSlept <dbl> 6.42, 7.60, 0.23, 2.13, 5.48, 1.27, 6.28, 5.58, 5.08, 8…
## $ TotalDistance <dbl> 7.11, 11.55, 11.55, 8.53, 8.53, 8.93, 7.85, 7.86, 7.87,…
## $ TotalIntensity <dbl> 139, 130, 130, 97, 97, 76, 56, 61, 52, 129, 96, 69, 46,…
user_type | Calories | HoursSlept | TotalDistance | TotalIntensity |
|---|---|---|---|---|
Very Active | 1,819 | 6.42 | 7.11 | 139 |
Very Active | 2,154 | 7.60 | 11.55 | 130 |
Very Active | 2,154 | 0.23 | 11.55 | 130 |
Very Active | 1,944 | 2.13 | 8.53 | 97 |
Very Active | 1,944 | 5.48 | 8.53 | 97 |
Very Active | 1,932 | 1.27 | 8.93 | 76 |
Very Active | 1,886 | 6.28 | 7.85 | 56 |
Very Active | 1,889 | 5.58 | 7.86 | 61 |
Very Active | 1,868 | 5.08 | 7.87 | 52 |
Very Active | 1,843 | 8.47 | 7.25 | 129 |
Moderately Active | 1,850 | 7.08 | 6.37 | 96 |
Very Active | 2,030 | 5.60 | 9.80 | 69 |
Very Active | 2,083 | 5.13 | 9.73 | 46 |
Very Active | 1,883 | 6.77 | 8.10 | 45 |
Very Active | 1,755 | 7.95 | 6.98 | 120 |
Very Active | 1,811 | 5.47 | 7.26 | 67 |
Sedentary | 1,636 | 3.15 | 1.88 | 96 |
Sedentary | 1,807 | 10.58 | 3.29 | 45 |
Lightly Active | 2,783 | 5.23 | 3.92 | 129 |
Sedentary | 2,449 | 1.83 | 2.21 | 96 |
Sedentary | 2,380 | 3.13 | 1.63 | 69 |
Sedentary | 2,344 | 6.98 | 1.55 | 46 |
Sedentary | 2,202 | 7.13 | 0.87 | 122 |
Sedentary | 2,443 | 0.77 | 1.75 | 45 |
Sedentary | 2,442 | 5.13 | 1.46 | 120 |
Sedentary | 2,442 | 1.03 | 1.46 | 120 |
Sedentary | 2,255 | 2.05 | 0.84 | 67 |
Sedentary | 942 | 2.13 | 0.02 | 25 |
Sedentary | 1,407 | 2.45 | 1.62 | 52 |
Sedentary | 1,237 | 2.55 | 0.76 | 129 |
Sedentary | 1,330 | 2.97 | 1.43 | 96 |
Lightly Active | 1,583 | 2.00 | 4.14 | 69 |
Sedentary | 1,538 | 2.57 | 3.50 | 46 |
Sedentary | 1,421 | 2.17 | 1.97 | 122 |
Sedentary | 1,392 | 2.95 | 1.72 | 80 |
Sedentary | 1,425 | 2.92 | 2.33 | 45 |
Sedentary | 1,413 | 2.82 | 1.92 | 45 |
Sedentary | 1,515 | 2.72 | 3.19 | 120 |
Sedentary | 1,405 | 3.18 | 2.03 | 67 |
Moderately Active | 2,041 | 1.82 | 6.79 | 56 |
Very Active | 2,187 | 1.57 | 7.10 | 99 |
Lightly Active | 1,929 | 2.28 | 4.47 | 61 |
Very Active | 2,438 | 7.63 | 10.22 | 129 |
Very Active | 2,438 | 0.05 | 10.22 | 129 |
Lightly Active | 2,035 | 2.50 | 4.95 | 96 |
Very Active | 2,099 | 2.08 | 6.80 | 69 |
Very Active | 2,096 | 0.62 | 6.82 | 46 |
Moderately Active | 2,338 | 1.25 | 6.85 | 80 |
Moderately Active | 2,488 | 0.33 | 10.63 | 45 |
Lightly Active | 2,164 | 1.93 | 6.83 | 120 |
Sedentary | 1,288 | 7.15 | 3.01 | 52 |
Lightly Active | 1,490 | 4.82 | 5.10 | 129 |
Very Active | 1,630 | 5.97 | 7.70 | 96 |
Moderately Active | 1,648 | 6.22 | 7.99 | 69 |
Very Active | 1,649 | 8.08 | 8.23 | 46 |
Very Active | 1,783 | 7.08 | 10.32 | 122 |
Moderately Active | 1,431 | 6.73 | 5.63 | 80 |
Moderately Active | 1,524 | 7.98 | 6.84 | 45 |
Very Active | 1,697 | 6.95 | 9.76 | 45 |
Sedentary | 1,232 | 7.52 | 2.39 | 120 |
Sedentary | 1,223 | 6.90 | 2.14 | 67 |
Lightly Active | 2,990 | 6.00 | 3.97 | 98 |
Lightly Active | 2,990 | 1.12 | 3.97 | 98 |
Sedentary | 2,480 | 5.37 | 2.31 | 53 |
Sedentary | 2,570 | 4.82 | 2.17 | 27 |
Lightly Active | 3,016 | 5.67 | 4.23 | 53 |
Very Active | 3,830 | 8.20 | 8.99 | 73 |
Sedentary | 3,706 | 6.77 | 6.41 | 70 |
Lightly Active | 3,418 | 2.43 | 3.87 | 99 |
Lightly Active | 3,418 | 3.98 | 3.87 | 99 |
Moderately Active | 3,439 | 4.45 | 5.33 | 61 |
Sedentary | 3,338 | 4.08 | 3.84 | 52 |
Sedentary | 3,338 | 2.92 | 3.84 | 52 |
Sedentary | 3,338 | 0.45 | 3.84 | 52 |
Sedentary | 2,892 | 3.12 | 3.10 | 129 |
Lightly Active | 3,313 | 5.00 | 4.38 | 96 |
Lightly Active | 3,313 | 1.62 | 4.38 | 96 |
Moderately Active | 3,118 | 7.95 | 4.60 | 69 |
Lightly Active | 3,253 | 4.27 | 5.36 | 122 |
Sedentary | 2,817 | 1.22 | 3.68 | 120 |
Sedentary | 2,507 | 3.75 | 2.15 | 67 |
Sedentary | 1,958 | 1.60 | 3.79 | 56 |
Sedentary | 1,958 | 1.73 | 3.79 | 56 |
Lightly Active | 2,129 | 0.77 | 5.46 | 61 |
Moderately Active | 2,216 | 1.55 | 6.27 | 52 |
Moderately Active | 2,216 | 0.72 | 6.27 | 52 |
Lightly Active | 2,154 | 0.97 | 5.66 | 129 |
Moderately Active | 2,178 | 1.03 | 5.52 | 96 |
Moderately Active | 2,308 | 1.52 | 6.76 | 69 |
Lightly Active | 2,201 | 1.15 | 5.63 | 46 |
Moderately Active | 2,341 | 4.30 | 7.35 | 122 |
Moderately Active | 2,341 | 0.80 | 7.35 | 122 |
Sedentary | 2,010 | 3.75 | 1.55 | 56 |
Sedentary | 2,010 | 4.18 | 1.55 | 56 |
Sedentary | 2,227 | 4.90 | 3.22 | 61 |
Sedentary | 2,227 | 2.00 | 3.22 | 61 |
Sedentary | 2,133 | 5.60 | 2.19 | 52 |
Sedentary | 2,317 | 8.23 | 3.63 | 129 |
Sedentary | 2,217 | 8.25 | 2.84 | 46 |
Very Active | 2,374 | 7.48 | 4.78 | 122 |
Very Active | 2,335 | 4.45 | 4.61 | 80 |
Lightly Active | 2,303 | 1.68 | 3.72 | 45 |
Lightly Active | 2,303 | 2.17 | 3.72 | 45 |
Sedentary | 2,210 | 3.38 | 2.81 | 45 |
Sedentary | 2,210 | 1.15 | 2.81 | 45 |
Sedentary | 2,210 | 2.80 | 2.81 | 45 |
Lightly Active | 2,424 | 4.87 | 4.45 | 120 |
Lightly Active | 2,424 | 1.25 | 4.45 | 120 |
Very Active | 2,297 | 4.55 | 4.13 | 67 |
Very Active | 2,297 | 2.55 | 4.13 | 67 |
Sedentary | 745 | 5.52 | 0.19 | 25 |
Sedentary | 1,680 | 1.43 | 2.10 | 129 |
Lightly Active | 2,886 | 1.15 | 5.28 | 56 |
Lightly Active | 2,915 | 1.53 | 6.15 | 99 |
Lightly Active | 2,895 | 1.00 | 5.65 | 61 |
Lightly Active | 2,923 | 2.00 | 5.81 | 52 |
Moderately Active | 3,323 | 2.00 | 8.38 | 129 |
Moderately Active | 3,357 | 1.95 | 9.83 | 96 |
Moderately Active | 2,931 | 1.33 | 6.36 | 69 |
Moderately Active | 2,848 | 1.32 | 5.41 | 46 |
Lightly Active | 2,943 | 1.75 | 6.18 | 122 |
Lightly Active | 2,822 | 1.63 | 5.34 | 80 |
Moderately Active | 3,597 | 7.20 | 11.36 | 45 |
Moderately Active | 3,597 | 2.57 | 11.36 | 45 |
Moderately Active | 3,224 | 2.02 | 9.03 | 120 |
Moderately Active | 3,224 | 0.87 | 9.03 | 120 |
Lightly Active | 2,677 | 3.80 | 5.27 | 67 |
Sedentary | 1,615 | 11.58 | 1.92 | 129 |
Sedentary | 1,615 | 3.43 | 1.92 | 129 |
Sedentary | 1,481 | 12.50 | 0.99 | 96 |
Sedentary | 1,481 | 1.47 | 0.99 | 96 |
Very Active | 1,892 | 1.53 | 5.88 | 69 |
Very Active | 2,086 | 0.38 | 7.38 | 46 |
Very Active | 2,044 | 0.58 | 8.00 | 122 |
Very Active | 2,249 | 0.08 | 10.08 | 80 |
Sedentary | 1,692 | 9.12 | 3.00 | 45 |
Sedentary | 1,692 | 2.28 | 3.00 | 45 |
Moderately Active | 1,712 | 10.35 | 3.65 | 120 |
Moderately Active | 1,712 | 1.43 | 3.65 | 120 |
Very Active | 2,065 | 1.65 | 7.77 | 67 |
Very Active | 3,625 | 2.20 | 7.87 | 52 |
Very Active | 4,430 | 5.17 | 11.11 | 129 |
Very Active | 4,430 | 1.17 | 11.11 | 129 |
Very Active | 3,427 | 2.20 | 7.41 | 96 |
Very Active | 3,492 | 2.27 | 5.56 | 69 |
Very Active | 3,597 | 2.17 | 6.70 | 46 |
Very Active | 3,597 | 0.95 | 6.70 | 46 |
Very Active | 3,765 | 1.60 | 8.30 | 122 |
Sedentary | 2,775 | 2.35 | 3.36 | 80 |
Sedentary | 2,486 | 2.22 | 3.26 | 45 |
Very Active | 3,817 | 4.43 | 7.84 | 45 |
Very Active | 3,817 | 1.88 | 7.84 | 45 |
Very Active | 3,817 | 0.28 | 7.84 | 45 |
Very Active | 3,378 | 1.53 | 7.38 | 120 |
Lightly Active | 2,210 | 4.28 | 5.62 | 52 |
Lightly Active | 2,210 | 3.93 | 5.62 | 52 |
Lightly Active | 2,210 | 1.50 | 5.62 | 52 |
Lightly Active | 2,445 | 1.05 | 7.48 | 46 |
Lightly Active | 2,445 | 0.02 | 7.48 | 46 |
Lightly Active | 2,694 | 1.72 | 9.37 | 122 |
Lightly Active | 2,617 | 3.73 | 8.15 | 45 |
Lightly Active | 2,617 | 2.95 | 8.15 | 45 |
Lightly Active | 2,617 | 1.05 | 8.15 | 45 |
Very Active | 3,065 | 0.20 | 5.18 | 52 |
Very Active | 3,920 | 7.58 | 8.43 | 122 |
Very Active | 3,920 | 0.17 | 8.43 | 122 |
Very Active | 3,856 | 1.82 | 10.03 | 80 |
Very Active | 3,856 | 2.13 | 10.03 | 80 |
Very Active | 2,244 | 6.75 | 10.24 | 99 |
Very Active | 2,244 | 0.42 | 10.24 | 99 |
Very Active | 2,188 | 1.90 | 9.32 | 61 |
Very Active | 2,188 | 0.07 | 9.32 | 61 |
Very Active | 2,115 | 6.90 | 8.30 | 129 |
Moderately Active | 2,055 | 8.25 | 8.14 | 96 |
Moderately Active | 2,055 | 1.18 | 8.14 | 96 |
Very Active | 2,158 | 5.90 | 7.94 | 46 |
Very Active | 2,158 | 0.83 | 7.94 | 46 |
Very Active | 2,170 | 0.22 | 8.86 | 122 |
Very Active | 2,244 | 0.33 | 9.65 | 80 |
Very Active | 2,231 | 6.18 | 8.26 | 45 |
Very Active | 2,231 | 1.37 | 8.26 | 45 |
Very Active | 2,100 | 7.02 | 6.71 | 120 |
Very Active | 2,100 | 0.10 | 6.71 | 120 |
Very Active | 917 | 6.43 | 3.90 | 25 |
Moderately Active | 2,575 | 1.13 | 7.34 | 129 |
Very Active | 3,086 | 1.20 | 11.24 | 96 |
Very Active | 2,676 | 1.15 | 9.14 | 46 |
Very Active | 2,840 | 1.08 | 10.58 | 80 |
Very Active | 2,496 | 0.60 | 7.67 | 52 |
Very Active | 2,439 | 1.50 | 4.99 | 46 |
Very Active | 2,233 | 1.08 | 4.73 | 122 |
Very Active | 2,862 | 1.75 | 10.82 | 80 |
Sedentary | 2,519 | 8.10 | 4.67 | 45 |
Sedentary | 1,799 | 0.73 | 0.35 | 120 |
Sedentary | 2,543 | 1.80 | 3.92 | 67 |
Very Active | 3,669 | 2.52 | 11.24 | 52 |
Very Active | 3,669 | 0.30 | 11.24 | 52 |
Sedentary | 2,702 | 3.23 | 2.66 | 129 |
Sedentary | 2,702 | 1.30 | 2.66 | 129 |
Sedentary | 2,702 | 2.03 | 2.66 | 129 |
Very Active | 3,304 | 2.05 | 7.26 | 96 |
Very Active | 4,234 | 3.00 | 11.05 | 69 |
Very Active | 4,128 | 2.10 | 10.19 | 46 |
Very Active | 4,128 | 0.90 | 10.19 | 46 |
Lightly Active | 3,798 | 1.25 | 7.24 | 122 |
Lightly Active | 3,798 | 1.40 | 7.24 | 122 |
Very Active | 3,839 | 1.52 | 7.71 | 80 |
Very Active | 3,839 | 1.45 | 7.71 | 80 |
Very Active | 3,839 | 0.87 | 7.71 | 80 |
Very Active | 3,713 | 1.10 | 6.62 | 45 |
Sedentary | 2,606 | 2.95 | 1.78 | 45 |
Sedentary | 2,606 | 1.55 | 1.78 | 45 |
Sedentary | 2,624 | 8.17 | 2.09 | 120 |
Sedentary | 2,624 | 1.65 | 2.09 | 120 |
Very Active | 3,775 | 1.67 | 7.01 | 67 |
Sedentary | 2,260 | 5.63 | 2.94 | 52 |
Sedentary | 2,260 | 2.73 | 2.94 | 52 |
Lightly Active | 2,667 | 8.60 | 4.63 | 96 |
Sedentary | 2,229 | 5.58 | 2.45 | 69 |
Sedentary | 2,229 | 4.30 | 2.45 | 69 |
Sedentary | 2,100 | 6.88 | 1.49 | 46 |
Sedentary | 2,114 | 7.70 | 1.36 | 122 |
Sedentary | 1,961 | 7.90 | 0.83 | 80 |
Sedentary | 1,953 | 2.95 | 0.94 | 45 |
Sedentary | 1,953 | 4.47 | 0.94 | 45 |
Sedentary | 1,890 | 4.67 | 0.65 | 45 |
Sedentary | 1,890 | 4.60 | 0.65 | 45 |
Sedentary | 1,890 | 1.32 | 0.65 | 45 |
summary(df_tot_user_dist)
## user_type Calories HoursSlept TotalDistance
## Very Active :83 Min. : 745 Min. : 0.020 Min. : 0.020
## Moderately Active:28 1st Qu.:1958 1st Qu.: 1.393 1st Qu.: 3.000
## Lightly Active :40 Median :2260 Median : 2.315 Median : 5.620
## Sedentary :77 Mean :2462 Mean : 3.401 Mean : 5.613
## 3rd Qu.:2893 3rd Qu.: 5.265 3rd Qu.: 7.888
## Max. :4430 Max. :12.500 Max. :11.550
## TotalIntensity
## Min. : 25.00
## 1st Qu.: 46.00
## Median : 69.00
## Mean : 78.88
## 3rd Qu.:120.00
## Max. :139.00
# -- Correlation tests before grouping
library(flextable)
df_tot %>% select(-Id, -"ID-AD", -logId) %>%
dlookr::normality() %>%
mutate(across(is.numeric, ~round(., 2))) %>%
arrange(statistic) %>%
flextable()
vars | statistic | p_value | sample |
|---|---|---|---|
SedentaryActiveDistance | 0.13 | 0 | 228 |
LoggedActivitiesDistance | 0.25 | 0 | 228 |
VeryActiveMinutes | 0.71 | 0 | 228 |
ModeratelyActiveDistance | 0.74 | 0 | 228 |
ModeratelyActiveMinutes | 0.74 | 0 | 228 |
VeryActiveDistance | 0.75 | 0 | 228 |
TotalIntensity | 0.88 | 0 | 228 |
AverageIntensity | 0.88 | 0 | 228 |
HoursSlept | 0.90 | 0 | 228 |
SedentaryMinutes | 0.96 | 0 | 228 |
Calories | 0.96 | 0 | 228 |
TotalSteps | 0.97 | 0 | 228 |
TotalDistance | 0.97 | 0 | 228 |
LightlyActiveDistance | 0.98 | 0 | 228 |
LightlyActiveMinutes | 0.98 | 0 | 228 |
# -- Apply similar method after grouping -- #
df_tot_user_mins %>%
group_by(user_type) %>%
dlookr::normality() %>%
mutate(across(is.numeric, ~round(., 2))) %>%
arrange(statistic) %>%
flextable()
variable | user_type | statistic | p_value | sample |
|---|---|---|---|---|
TotalIntensity | Lightly Active | 0.82 | 0.00 | 47 |
HoursSlept | Moderately Active | 0.84 | 0.00 | 34 |
TotalIntensity | Sedentary | 0.86 | 0.00 | 67 |
HoursSlept | Very Active | 0.87 | 0.00 | 80 |
HoursSlept | Lightly Active | 0.88 | 0.00 | 47 |
TotalIntensity | Very Active | 0.88 | 0.00 | 80 |
Calories | Very Active | 0.90 | 0.00 | 80 |
Calories | Moderately Active | 0.90 | 0.01 | 34 |
HoursSlept | Sedentary | 0.90 | 0.00 | 67 |
TotalIntensity | Moderately Active | 0.90 | 0.00 | 34 |
TotalDistance | Sedentary | 0.93 | 0.00 | 67 |
TotalDistance | Lightly Active | 0.95 | 0.04 | 47 |
Calories | Lightly Active | 0.96 | 0.07 | 47 |
TotalDistance | Very Active | 0.96 | 0.01 | 80 |
TotalDistance | Moderately Active | 0.96 | 0.19 | 34 |
Calories | Sedentary | 0.98 | 0.50 | 67 |
df_tot_user_dist %>%
group_by(user_type) %>%
dlookr::normality() %>%
mutate(across(is.numeric, ~round(., 2))) %>%
arrange(statistic) %>%
flextable()
variable | user_type | statistic | p_value | sample |
|---|---|---|---|---|
HoursSlept | Moderately Active | 0.81 | 0.00 | 28 |
TotalIntensity | Sedentary | 0.84 | 0.00 | 77 |
HoursSlept | Very Active | 0.86 | 0.00 | 83 |
TotalIntensity | Lightly Active | 0.86 | 0.00 | 40 |
Calories | Very Active | 0.88 | 0.00 | 83 |
HoursSlept | Lightly Active | 0.88 | 0.00 | 40 |
TotalIntensity | Very Active | 0.89 | 0.00 | 83 |
TotalIntensity | Moderately Active | 0.89 | 0.01 | 28 |
HoursSlept | Sedentary | 0.90 | 0.00 | 77 |
TotalDistance | Lightly Active | 0.91 | 0.01 | 40 |
Calories | Moderately Active | 0.94 | 0.10 | 28 |
TotalDistance | Very Active | 0.97 | 0.03 | 83 |
TotalDistance | Moderately Active | 0.97 | 0.46 | 28 |
TotalDistance | Sedentary | 0.97 | 0.08 | 77 |
Calories | Lightly Active | 0.98 | 0.74 | 40 |
Calories | Sedentary | 0.98 | 0.39 | 77 |
We apply the Shaprio-Wilk test for Normality before and after categorizing based on minutes and notice that TotalDistance for Moderately Active and Calories for Sedentary population are normally distributed (pvalue of 0.19 and 0.50). As a reminder, the null hypothesis is that the distribution is Gaussian (normal) and if the pvalue is less than or equal to 0.05, then the distribution is not Gaussian.
We will now visualize the plots after binning by minutes, which lead to interesting findings:
# Categorize by Minutes
# C:Q - Calories
df <- df_tot_user_mins
# Bar plots to visualize the samples in the dataset.
ggplot(data = df_tot_user_mins, aes(x = user_type, fill = user_type)) +
geom_bar(data = df_tot_user_mins, aes(x = fct_infreq(user_type), fill = user_type)) +
labs(x = "UserType", y = "Frequency", title = "Bar Chart: Usertype Categorized by Minutes", fill = "UserType") +
scale_fill_manual(values = viridis(n = length(unique(df$user_type)))) +
theme_linedraw(base_size = 18)
ggplot(data = df_tot_user_dist, aes(x = user_type, fill = user_type)) +
geom_bar(data = df_tot_user_dist, aes(x = fct_infreq(user_type), fill = user_type)) +
labs(x = "UserType", y = "Frequency", title = "Bar Chart: Usertype Categorized by Distance", fill = "UserType") +
scale_fill_manual(values = viridis(n = length(unique(df$user_type)))) +
theme_linedraw(base_size = 18)
# Q:Q - Correlogram to visualize the correlation between all numeric columns.
df_num <- df %>% select(-user_type) %>% drop_na()
colnames(df_num) <- c("Calories", "Hours Slept", "Total Distance", "Total Intensity")
corr_mat <- cor(as.matrix(df_num))
corrplot(corr_mat, method = 'square', tl.col = 'black') +
theme_linedraw(base_size = 18)
## NULL
# Boxplots to visualize the distribution.
ggplot(data = df, aes(x = user_type, y = Calories, fill = user_type)) +
geom_violin(data = df, trim = FALSE, draw_quantiles = c(0.25, 0.5, 0.75)) +
#geom_boxplot(data = df, aes(x = user_type)) +
labs(x = "UserType", y = "Calories", title = "Box Plot: Usertype vs Calories Categorized by Minutes", fill = "UserType") +
scale_fill_manual(values = viridis(n = length(unique(df$user_type)))) +
theme_linedraw(base_size = 18)
ggplot(data = df, aes(x = user_type, y = Calories, fill = user_type)) +
#geom_violin(data = df, trim = FALSE, draw_quantiles = c(0.25, 0.5, 0.75)) +
geom_boxplot(data = df, aes(x = df$user_type)) +
labs(x = "UserType", y = "Calories", title = "Box Plot: Usertype vs Calories Categorized by Minutes", fill = "UserType") +
scale_fill_manual(values = viridis(n = length(unique(df$user_type)))) +
theme_linedraw(base_size = 18)
ggplot(data = df, aes(x = Calories, group = user_type, fill = user_type)) +
#geom_density(position = "fill", adjust = 1.5) +
geom_density(data = df, aes(x = Calories, fill = df$user_type, alpha = 0.2)) +
facet_wrap( ~ user_type) +
labs(x = "Calories", y = "Density", title = "Density Plot: Calories Categorized by Minutes", fill = "UserType") +
scale_fill_manual(values = viridis(n = length(unique(df$user_type)))) +
theme_linedraw(base_size = 18)
ggplot(data = df, aes(x = Calories, group = user_type, fill = user_type)) +
geom_density(position = "fill", adjust = 1.5) +
#geom_density(data = df, aes(x = Calories, fill = df$user_type, alpha = 0.2)) +
labs(x = "Calories", y = "Density", title = "Density Plot: Calories Categorized by Minutes", fill = "UserType") +
scale_fill_manual(values = viridis(n = length(unique(df$user_type)))) +
theme_linedraw(base_size = 18)
ggplot(data=df, aes(x = Calories, color = df$user_type)) +
stat_ecdf(data=df, geom = "step", aes(color=df$user_type, x=Calories)) +
labs(x = "Calories", y = "Probability", title = "CDF: Calories Categorized by Minutes", color = "UserType") +
#facet_wrap( ~ user_type) +
scale_color_manual(values = viridis(n = length(unique(df$user_type)))) +
theme_linedraw(base_size = 18)
# C:Q Intensity
ggplot(data = df, aes(x = user_type, y = TotalDistance, fill = user_type)) +
geom_violin(data = df, trim = FALSE, draw_quantiles = c(0.25, 0.5, 0.75)) +
#geom_boxplot(data = df, aes(x = user_type)) +
labs(x = "UserType", y = "Total Distance", title = "Box Plot: Usertype vs Total Distance Categorized by Minutes", fill = "UserType") +
scale_fill_manual(values = viridis(n = length(unique(df$user_type)))) +
theme_linedraw(base_size = 18)
ggplot(data = df, aes(x = user_type, y = TotalDistance, fill = user_type)) +
#geom_violin(data = df, trim = FALSE, draw_quantiles = c(0.25, 0.5, 0.75)) +
geom_boxplot(data = df, aes(x = df$user_type)) +
labs(x = "UserType", y = "Total Distance", title = "Box Plot: Usertype vs Total Distance Categorized by Minutes", fill = "UserType") +
scale_fill_manual(values = viridis(n = length(unique(df$user_type)))) +
theme_linedraw(base_size = 18)
ggplot(data = df, aes(x = TotalDistance, group = user_type, fill = user_type)) +
#geom_density(position = "fill", adjust = 1.5) +
geom_density(data = df, aes(x = TotalDistance, fill = df$user_type, alpha = 0.2)) +
facet_wrap( ~ user_type) +
labs(x = "Total Distance", y = "Density", title = "Density Plot: Total Distance Categorized by Minutes", fill = "UserType") +
scale_fill_manual(values = viridis(n = length(unique(df$user_type)))) +
theme_linedraw(base_size = 18)
ggplot(data = df, aes(x = TotalDistance, group = user_type, fill = user_type)) +
geom_density(position = "fill", adjust = 1.5) +
#geom_density(data = df, aes(x = TotalDistance, fill = df$user_type, alpha = 0.2)) +
labs(x = "Total Distance", y = "Density", title = "Density Plot: Total Distance Categorized by Minutes", fill = "UserType") +
scale_fill_manual(values = viridis(n = length(unique(df$user_type)))) +
theme_linedraw(base_size = 18)
ggplot(data=df, aes(x = TotalDistance, color = df$user_type)) +
stat_ecdf(data=df, geom = "step", aes(color=df$user_type, x=TotalDistance)) +
labs(x = "Total Distance", y = "Probability", title = "CDF: Total Distance Categorized by Minutes", color = "UserType") +
#facet_wrap( ~ user_type) +
scale_color_manual(values = viridis(n = length(unique(df$user_type)))) +
theme_linedraw(base_size = 18)
# C:Q HoursSlept
ggplot(data = df, aes(x = user_type, y = HoursSlept, fill = user_type)) +
geom_violin(data = df, trim = FALSE, draw_quantiles = c(0.25, 0.5, 0.75)) +
#geom_boxplot(data = df, aes(x = user_type)) +
labs(x = "UserType", y = "Hours Slept", title = "Box Plot: Usertype vs Hours Slept Categorized by Minutes", fill = "UserType") +
scale_fill_manual(values = viridis(n = length(unique(df$user_type)))) +
theme_linedraw(base_size = 18)
ggplot(data = df, aes(x = user_type, y = HoursSlept, fill = user_type)) +
#geom_violin(data = df, trim = FALSE, draw_quantiles = c(0.25, 0.5, 0.75)) +
geom_boxplot(data = df, aes(x = df$user_type)) +
labs(x = "UserType", y = "Hours Slept", title = "Box Plot: Usertype vs Hours Slept Categorized by Minutes", fill = "UserType") +
scale_fill_manual(values = viridis(n = length(unique(df$user_type)))) +
theme_linedraw(base_size = 18)
ggplot(data = df, aes(x = HoursSlept, group = user_type, fill = user_type)) +
#geom_density(position = "fill", adjust = 1.5) +
geom_density(data = df, aes(x = HoursSlept, fill = df$user_type, alpha = 0.2)) +
facet_wrap( ~ user_type) +
labs(x = "Hours Slept", y = "Density", title = "Density Plot: Hours Slept Categorized by Minutes", fill = "UserType") +
scale_fill_manual(values = viridis(n = length(unique(df$user_type)))) +
theme_linedraw(base_size = 18)
ggplot(data = df, aes(x = HoursSlept, group = user_type, fill = user_type)) +
geom_density(position = "fill", adjust = 1.5) +
#geom_density(data = df, aes(x = HoursSlept, fill = df$user_type, alpha = 0.2)) +
labs(x = "Hours Slept", y = "Density", title = "Density Plot: Hours Slept Categorized by Minutes", fill = "UserType") +
scale_fill_manual(values = viridis(n = length(unique(df$user_type)))) +
theme_linedraw(base_size = 18)
ggplot(data=df, aes(x = HoursSlept, color = df$user_type)) +
stat_ecdf(data=df, geom = "step", aes(color=df$user_type, x=HoursSlept)) +
labs(x = "Hours Slept", y = "Probability", title = "CDF: Hours Slept Categorized by Minutes", color = "UserType") +
#facet_wrap( ~ user_type) +
scale_color_manual(values = viridis(n = length(unique(df$user_type)))) +
theme_linedraw(base_size = 18)
# C:Q Intensity
ggplot(data = df, aes(x = user_type, y = TotalIntensity, fill = user_type)) +
geom_violin(data = df, trim = FALSE, draw_quantiles = c(0.25, 0.5, 0.75)) +
labs(x = "UserType", y = "Intensity", title = "Box Plot: Usertype vs Intensity Categorized by Distance", fill = "UserType") +
scale_fill_manual(values = viridis(n = length(unique(df$user_type)))) +
theme_linedraw(base_size = 18)
ggplot(data = df, aes(x = user_type, y = TotalIntensity, fill = user_type)) +
geom_boxplot(data = df, aes(x = df$user_type)) +
labs(x = "UserType", y = "Intensity", title = "Box Plot: Usertype vs Intensity Categorized by Distance", fill = "UserType") +
scale_fill_manual(values = viridis(n = length(unique(df$user_type)))) +
theme_linedraw(base_size = 18)
ggplot(data = df, aes(x = TotalIntensity, group = user_type, fill = user_type)) +
geom_density(data = df, aes(x = HoursSlept, fill = df$user_type, alpha = 0.2)) +
facet_wrap( ~ user_type) +
labs(x = "Intensity", y = "Density", title = "Density Plot: Intensity Categorized by Distance", fill = "UserType") +
scale_fill_manual(values = viridis(n = length(unique(df$user_type)))) +
theme_linedraw(base_size = 18)
ggplot(data = df, aes(x = TotalIntensity, group = user_type, fill = user_type)) +
geom_density(position = "fill", adjust = 1.5) +
labs(x = "Intensity", y = "Density", title = "Density Plot: Intensity Categorized by Distance", fill = "UserType") +
scale_fill_manual(values = viridis(n = length(unique(df$user_type)))) +
theme_linedraw(base_size = 18)
ggplot(data=df, aes(x = TotalIntensity, color = df$user_type)) +
stat_ecdf(data=df, geom = "step", aes(color=df$user_type, x=HoursSlept)) +
labs(x = "Intensity", y = "Probability", title = "CDF: Intensity Categorized by Distance", color = "UserType") +
scale_color_manual(values = viridis(n = length(unique(df$user_type)))) +
theme_linedraw(base_size = 18)
We notice that the Calories is least correlated with Hours Slept and most correlated with Total Distance traveled. Both of the Very Active and Moderately Active densities are bimodally distributed suggesting those that burn a large amount of calories are getting enough sleep or are sleep deprived. Interestingly, those that are Sedentary are most likely to oversleep (get more than 8 hours of daily sleep - seen by the CDF plot of Hours Slept and Stacked Density plot).
Additionally, we see a greater proportion of Sedentary population when binning by Distance compared to Sleep. This implies when we categorize by Distance, there is a greater weight at the tails of the activity levels, however there is not too much of a difference between the other plots.
The Stacked Density plots of Total Distance suggest most of the Sedentary population does not travel more than 3 miles (around 82% by the CDF plot) compared to about 6 miles for the Lightly Active population (at 75th percentile). The gap (width between groups) is narrowed for Moderately Active and Very Active populations. We also see the facet wrap of the Total Distance is Gaussian like for the Moderately Active group and highly skewed left. In contrast, the Total Distance for Sedentary population is skewed right. These plots suggest we that our categorization coincides with our intuition.
The violin and boxplots of the Calories coincide with our preliminary visualizations. The Very Active group is bimodal and the Sedentary group has the smallest third quartile. Getting light activity in the day increases your likelihood of burning more calories and it is not necessary to highly active to burn more than 2000 calories.
Similar visualizations can be obtained when binning by distance instead of minutes:
# Categorize by Distance
df <- df_tot_user_dist
ggplot(data = df, aes(x = user_type, y = Calories, fill = user_type)) +
geom_violin(data = df, trim = FALSE, draw_quantiles = c(0.25, 0.5, 0.75)) +
#geom_boxplot(data = df, aes(x = user_type)) +
labs(x = "UserType", y = "Calories", title = "Box Plot: Usertype vs Calories Categorized by Distance", fill = "UserType") +
scale_fill_manual(values = viridis(n = length(unique(df$user_type)))) +
theme_linedraw(base_size = 18)
ggplot(data = df, aes(x = user_type, y = Calories, fill = user_type)) +
#geom_violin(data = df, trim = FALSE, draw_quantiles = c(0.25, 0.5, 0.75)) +
geom_boxplot(data = df, aes(x = df$user_type)) +
labs(x = "UserType", y = "Calories", title = "Box Plot: Usertype vs Calories Categorized by Distance", fill = "UserType") +
scale_fill_manual(values = viridis(n = length(unique(df$user_type)))) +
theme_linedraw(base_size = 18)
ggplot(data = df, aes(x = Calories, group = user_type, fill = user_type)) +
#geom_density(position = "fill", adjust = 1.5) +
geom_density(data = df, aes(x = Calories, fill = df$user_type, alpha = 0.2)) +
facet_wrap( ~ user_type) +
labs(x = "Calories", y = "Density", title = "Density Plot: Calories Categorized by Distance", fill = "UserType") +
scale_fill_manual(values = viridis(n = length(unique(df$user_type)))) +
theme_linedraw(base_size = 18)
ggplot(data = df, aes(x = Calories, group = user_type, fill = user_type)) +
geom_density(position = "fill", adjust = 1.5) +
#geom_density(data = df, aes(x = Calories, fill = df$user_type, alpha = 0.2)) +
labs(x = "Calories", y = "Density", title = "Density Plot: Calories Categorized by Distance", fill = "UserType") +
scale_fill_manual(values = viridis(n = length(unique(df$user_type)))) +
theme_linedraw(base_size = 18)
ggplot(data=df, aes(x = Calories, color = df$user_type)) +
stat_ecdf(data=df, geom = "step", aes(color=df$user_type, x=Calories)) +
labs(x = "Calories", y = "Probability", title = "CDF: Calories Categorized by Distance", color = "UserType") +
#facet_wrap( ~ user_type) +
scale_color_manual(values = viridis(n = length(unique(df$user_type)))) +
theme_linedraw(base_size = 18)
# C:Q Intensity
ggplot(data = df, aes(x = user_type, y = TotalDistance, fill = user_type)) +
geom_violin(data = df, trim = FALSE, draw_quantiles = c(0.25, 0.5, 0.75)) +
#geom_boxplot(data = df, aes(x = user_type)) +
labs(x = "UserType", y = "Total Distance", title = "Box Plot: Usertype vs Total Distance Categorized by Distance", fill = "UserType") +
scale_fill_manual(values = viridis(n = length(unique(df$user_type)))) +
theme_linedraw(base_size = 18)
ggplot(data = df, aes(x = user_type, y = TotalDistance, fill = user_type)) +
#geom_violin(data = df, trim = FALSE, draw_quantiles = c(0.25, 0.5, 0.75)) +
geom_boxplot(data = df, aes(x = df$user_type)) +
labs(x = "UserType", y = "Total Distance", title = "Box Plot: Usertype vs Total Distance Categorized by Distance", fill = "UserType") +
scale_fill_manual(values = viridis(n = length(unique(df$user_type)))) +
theme_linedraw(base_size = 18)
ggplot(data = df, aes(x = TotalDistance, group = user_type, fill = user_type)) +
#geom_density(position = "fill", adjust = 1.5) +
geom_density(data = df, aes(x = TotalDistance, fill = df$user_type, alpha = 0.2)) +
facet_wrap( ~ user_type) +
labs(x = "Total Distance", y = "Density", title = "Density Plot: Total Distance Categorized by Distance", fill = "UserType") +
scale_fill_manual(values = viridis(n = length(unique(df$user_type)))) +
theme_linedraw(base_size = 18)
ggplot(data = df, aes(x = TotalDistance, group = user_type, fill = user_type)) +
geom_density(position = "fill", adjust = 1.5) +
#geom_density(data = df, aes(x = TotalDistance, fill = df$user_type, alpha = 0.2)) +
labs(x = "Total Distance", y = "Density", title = "Density Plot: Total Distance Categorized by Distance", fill = "UserType") +
scale_fill_manual(values = viridis(n = length(unique(df$user_type)))) +
theme_linedraw(base_size = 18)
ggplot(data=df, aes(x = TotalDistance, color = df$user_type)) +
stat_ecdf(data=df, geom = "step", aes(color=df$user_type, x=TotalDistance)) +
labs(x = "Total Distance", y = "Probability", title = "CDF: Total Distance Categorized by Distance", color = "UserType") +
#facet_wrap( ~ user_type) +
scale_color_manual(values = viridis(n = length(unique(df$user_type)))) +
theme_linedraw(base_size = 18)
# C:Q HoursSlept
ggplot(data = df, aes(x = user_type, y = HoursSlept, fill = user_type)) +
geom_violin(data = df, trim = FALSE, draw_quantiles = c(0.25, 0.5, 0.75)) +
#geom_boxplot(data = df, aes(x = user_type)) +
labs(x = "UserType", y = "Hours Slept", title = "Box Plot: Usertype vs Hours Slept Categorized by Distance", fill = "UserType") +
scale_fill_manual(values = viridis(n = length(unique(df$user_type)))) +
theme_linedraw(base_size = 18)
ggplot(data = df, aes(x = user_type, y = HoursSlept, fill = user_type)) +
#geom_violin(data = df, trim = FALSE, draw_quantiles = c(0.25, 0.5, 0.75)) +
geom_boxplot(data = df, aes(x = df$user_type)) +
labs(x = "UserType", y = "Hours Slept", title = "Box Plot: Usertype vs Hours Slept Categorized by Distance", fill = "UserType") +
scale_fill_manual(values = viridis(n = length(unique(df$user_type)))) +
theme_linedraw(base_size = 18)
ggplot(data = df, aes(x = HoursSlept, group = user_type, fill = user_type)) +
#geom_density(position = "fill", adjust = 1.5) +
geom_density(data = df, aes(x = HoursSlept, fill = df$user_type, alpha = 0.2)) +
facet_wrap( ~ user_type) +
labs(x = "Hours Slept", y = "Density", title = "Density Plot: Hours Slept Categorized by Distance", fill = "UserType") +
scale_fill_manual(values = viridis(n = length(unique(df$user_type)))) +
theme_linedraw(base_size = 18)
ggplot(data = df, aes(x = HoursSlept, group = user_type, fill = user_type)) +
geom_density(position = "fill", adjust = 1.5) +
#geom_density(data = df, aes(x = HoursSlept, fill = df$user_type, alpha = 0.2)) +
labs(x = "Hours Slept", y = "Density", title = "Density Plot: Hours Slept Categorized by Distance", fill = "UserType") +
scale_fill_manual(values = viridis(n = length(unique(df$user_type)))) +
theme_linedraw(base_size = 18)
ggplot(data=df, aes(x = HoursSlept, color = df$user_type)) +
stat_ecdf(data=df, geom = "step", aes(color=df$user_type, x=HoursSlept)) +
labs(x = "Hours Slept", y = "Probability", title = "CDF: Hours Slept Categorized by Distance", color = "UserType") +
scale_color_manual(values = viridis(n = length(unique(df$user_type)))) +
theme_linedraw(base_size = 18)
# C:Q Intensity
ggplot(data = df, aes(x = user_type, y = TotalIntensity, fill = user_type)) +
geom_violin(data = df, trim = FALSE, draw_quantiles = c(0.25, 0.5, 0.75)) +
labs(x = "UserType", y = "Intensity", title = "Box Plot: Usertype vs Intensity Categorized by Distance", fill = "UserType") +
scale_fill_manual(values = viridis(n = length(unique(df$user_type)))) +
theme_linedraw(base_size = 18)
ggplot(data = df, aes(x = user_type, y = TotalIntensity, fill = user_type)) +
geom_boxplot(data = df, aes(x = df$user_type)) +
labs(x = "UserType", y = "Intensity", title = "Box Plot: Usertype vs Intensity Categorized by Distance", fill = "UserType") +
scale_fill_manual(values = viridis(n = length(unique(df$user_type)))) +
theme_linedraw(base_size = 18)
ggplot(data = df, aes(x = TotalIntensity, group = user_type, fill = user_type)) +
geom_density(data = df, aes(x = HoursSlept, fill = df$user_type, alpha = 0.2)) +
facet_wrap( ~ user_type) +
labs(x = "Intensity", y = "Density", title = "Density Plot: Intensity Categorized by Distance", fill = "UserType") +
scale_fill_manual(values = viridis(n = length(unique(df$user_type)))) +
theme_linedraw(base_size = 18)
ggplot(data = df, aes(x = TotalIntensity, group = user_type, fill = user_type)) +
geom_density(position = "fill", adjust = 1.5) +
labs(x = "Intensity", y = "Density", title = "Density Plot: Intensity Categorized by Distance", fill = "UserType") +
scale_fill_manual(values = viridis(n = length(unique(df$user_type)))) +
theme_linedraw(base_size = 18)
ggplot(data=df, aes(x = TotalIntensity, color = df$user_type)) +
stat_ecdf(data=df, geom = "step", aes(color=df$user_type, x=HoursSlept)) +
labs(x = "Intensity", y = "Probability", title = "CDF: Intensity Categorized by Distance", color = "UserType") +
scale_color_manual(values = viridis(n = length(unique(df$user_type)))) +
theme_linedraw(base_size = 18)