3 Process

3.1 Ver Datasets

3.2 Eliminando datasets

3.3 Limpiamos variables que no necesitamos

3.4 Merging Datsets

3.5 Unified dataset check

3.5.1 Variable cleaning in the final table ’fb_Final_daily “.

4 Analyze Antes de comenzar el analisis, establecemos un tema para los plots

custom_theme_original <- function() {
  theme(
    panel.border = element_rect(colour = "black", 
                                fill = NA, 
                                linetype = 1),
    panel.background = element_rect(fill = "white", 
                                    color = 'grey50'),
    panel.grid.minor.y = element_blank(),
    axis.text = element_text(colour = "blue", 
                             face = "italic", 
                             family = "Arial"),
    axis.title = element_text(colour = "gray", 
                              family = "Arial"),
    axis.ticks = element_line(colour = "blue"),
    plot.title = element_text(size=20, 
                              hjust = 0.5, 
                              family = "Arial"),
    plot.subtitle=element_text(size=13, 
                               hjust = 0.5),
    plot.caption = element_text(colour = "brown", 
                             face = "italic", 
                             family = "Arial")
  )
}

4.1 Physiological activity:Heart-rate as a predictor of health problems

4.2 Physical activity 1: Calories by activity (total distance)

fb_final_daily %>% 
 group_by(TotalDistance, Calories) %>% 
  ggplot(aes(x = TotalSteps, y = Calories, color = Calories)) +
  geom_point(alpha=0.3, position = position_jitter()) +
  geom_smooth() + 
  custom_theme_original() +
  theme(legend.position = c(.8, .3),
        legend.spacing.y = unit(1, "mm"), 
        panel.border = element_rect(colour = "black", fill=NA),
        legend.background = element_blank(),
        legend.box.background = element_rect(colour = "black")) +
  labs(title = 'Calories burned by distance',
       y = 'Calories',
       x = 'Total Steps',
       caption = 'Data Source: FitBit Fitness Tracker Data')

Pearson correlation index

cor.test(fb_final_daily$TotalDistance, fb_final_daily$Calories, method = 'pearson', conf.level = 0.95)

###4.3 Physical Activity: Calories by activity (total distance) Daily Activity Plot 3

4.4 Intensity of exercise activity Daily Activity Plot 4

###4.5 Sleep distribution

fb_final_daily %>% 
  select(TotalMinutesAsleep) %>% 
  drop_na() %>% 
  mutate(sleep_quality = ifelse(TotalMinutesAsleep <= 420, 'Less than 7h',
                         ifelse(TotalMinutesAsleep <= 540, '7h to 9h', 
                         'More than 9h'))) %>%
  mutate(sleep_quality = factor(sleep_quality, 
                          levels = c('Less than 7h','7h to 9h',
                                     'More than 9h'))) %>% 
  ggplot(aes(x = TotalMinutesAsleep, fill = sleep_quality)) +
  geom_histogram(position = 'dodge', bins = 30) +
  custom_theme_original() +
  scale_fill_manual(values=c("tan1", "#66CC99", "lightcoral")) +
  theme(legend.position = c(.80, .80),
        legend.title = element_blank(),
        legend.spacing.y = unit(0, "mm"), 
        panel.border = element_rect(colour = "black", fill=NA),
        legend.background = element_blank(),
        legend.box.background = element_rect(colour = "black")) +
  labs(
    title = "Sleep distribution",
    x = "Time slept (minutes)",
    y = "Count",
    caption = 'Plot 5'
  )

###4.6 Sleep vs distance covered