Este documento, sus códigos y resultados forman parte de la presentación final de un informe para el curso de “Google Data Analytics” en Coursera.
El Caso 2 refiere a una empresa “Bellabeat” interesada en evaluar datos medidos por un dispositivo datalogger de actividad física (Activity Tracker).
La fuente de datos original proviene de un estudio realizado a través de la plataforma Amazon Mechanical Turk (MTurk) por parte de un equipo de RTI International y se publica en dos paquetes, mientras que en el caso solo se provee la segunda parte.
Las funciones de ayuda o HELPER Functions, tienen como objetivo reducir la cantidad de código en las secciones posteriores. No están pulidos al detalle ni mucho menos gozan de análisis de performance. Solo cumplieron su objetivo durante el proceso de generación de los códigos de este documento.
sessionInfo()## R version 4.2.0 (2022-04-22 ucrt)
## Platform: x86_64-w64-mingw32/x64 (64-bit)
## Running under: Windows 10 x64 (build 17763)
##
## Matrix products: default
##
## locale:
## [1] LC_COLLATE=Spanish_Argentina.1252 LC_CTYPE=Spanish_Argentina.1252
## [3] LC_MONETARY=Spanish_Argentina.1252 LC_NUMERIC=C
## [5] LC_TIME=Spanish_Argentina.1252
##
## attached base packages:
## [1] stats graphics grDevices utils datasets methods base
##
## other attached packages:
## [1] data.table_1.14.2 forcats_0.5.1 stringr_1.4.0 dplyr_1.0.9
## [5] purrr_0.3.4 readr_2.1.2 tidyr_1.2.0 tibble_3.1.7
## [9] ggplot2_3.3.6 tidyverse_1.3.1
##
## loaded via a namespace (and not attached):
## [1] tidyselect_1.1.2 xfun_0.31 bslib_0.3.1 haven_2.5.0
## [5] colorspace_2.0-3 vctrs_0.4.1 generics_0.1.2 htmltools_0.5.3
## [9] yaml_2.3.5 utf8_1.2.2 rlang_1.0.2 jquerylib_0.1.4
## [13] pillar_1.7.0 withr_2.5.0 glue_1.6.2 DBI_1.1.2
## [17] dbplyr_2.2.0 modelr_0.1.8 readxl_1.4.0 lifecycle_1.0.1
## [21] munsell_0.5.0 gtable_0.3.0 cellranger_1.1.0 rvest_1.0.2
## [25] evaluate_0.15 knitr_1.39 tzdb_0.3.0 fastmap_1.1.0
## [29] fansi_1.0.3 broom_0.8.0 backports_1.4.1 scales_1.2.0
## [33] jsonlite_1.8.0 fs_1.5.2 hms_1.1.1 digest_0.6.29
## [37] stringi_1.7.6 grid_4.2.0 cli_3.3.0 tools_4.2.0
## [41] magrittr_2.0.3 sass_0.4.1 crayon_1.5.1 pkgconfig_2.0.3
## [45] ellipsis_0.3.2 xml2_1.3.3 reprex_2.0.1 lubridate_1.8.0
## [49] assertthat_0.2.1 rmarkdown_2.14 httr_1.4.3 rstudioapi_0.13
## [53] R6_2.5.1 compiler_4.2.0
Después de unir los dos paquetes de información se obtiene el siguiente conjunto de archivos.
Lista de archivos
El conjunto de archivos y columnas de datos se distribuye de la siguiente manera, en donde se observan columnas con nombres duplicados en diferentes archivos.
Modelo de datos
Luego de confirmar que las columnas duplicadas contuvieran el mismo conjunto de datos, o de realizar las respectivas acciones de saneamiento se procede a realizar el JOIN para contener una tabla por cada segmentación de tiempo.
Modelo de datos
helper_load_datasets_from =
function( filespath ){
FILES = data.table( ext="", table="", fullpath=
list.files(path = filespath, recursive=TRUE, full.names=TRUE) )
FILES %>%
mutate(
ext = tools::file_ext(fullpath) ,
table = basename(fullpath) %>%
stringr::str_replace_all("_merged.csv|.csv","")
) %>%
arrange( table )
}helper_str_datetime = function(x) {
lubridate::parse_date_time(x, c("%m-%d-%Y %I:%M:%S %p", "mdy_HMS", "mdy", "ymd") ) }helper_make_lookup <- function( lookuptxt ){
lookup <- setDT(
read.table( text = lookuptxt,
header = TRUE, colClasses=c(.session_id='character') )
)
lookup$.session_id = lookup$.session_id %>% factor( levels=levels(factor_session_id))
lookup$.datetime = lookup$.datetime %>% helper_str_datetime
data.table::setkeyv( lookup, c('.session_id','.datetime') )
return(lookup)
}# dilate/erode filter in image processing, widens light/dark areas of active selection
# https://docs.gimp.org/2.10/en/plug-in-dilate.html
dilate_filter <-
function( X, df_original, by, depth=5, limit=NA ){
X <- if ( is.data.frame(X) ) list(X)
else if (is.list(X)) X
else NULL
if ( nrow(X[[1]]) == 0 ) return(0)
result = list()
limit = min( limit, length(X), na.rm=TRUE ) #coalesce(limit,length(X))
for( k in 1:limit ){
DF <- df_original %>% rowid_to_column( "idx" )
indexes <- left_join( X[[k]], DF, by=by ) %>% .$idx
indexes <- sapply( indexes, function(rnum) {
from <- max( 1, (rnum-depth) );
to <- min( (rnum+depth-1), nrow(df_original ) );
X=c( rep('', rnum-from ), 'X', rep('', to-rnum ) )
return( list( X = X, idx = from:to ) )
} )
highlightXs = indexes['X',] %>% unlist
indexes = indexes['idx',] %>% unlist
DF <- cbind( X=c(highlightXs), DF[ c(indexes), ])
result[[ length(result)+1 ]] <- DF
}
if (length(result)==1) return(result[[1]])
else return(result)
}helper_plot_timegaps <-
function( DF, classname, datetime, value, scale_limits ) {
DFrename <- DF %>%
select( classname = {{classname}},
value = {{value}},
datetime = {{datetime}} )
DF <- DFrename %>%
group_by( classname,
datetime = lubridate::floor_date( datetime, "6 hour" ) ) %>%
summarise( value = sum( value ),
na_cnt = sum( is.na( value ) ) ) %>%
arrange( sum(na_cnt), .by_group = TRUE )
DF
value_max = scale_limits[2]
value_min = scale_limits[1]
midpoint <- (value_max+value_min)/2
classname_cnt <- length(levels(DF$classname))
#DF$classname_reorder = with(DF, reorder(classname, is.na(value), sum ) )
p <-
ggplot( DF, aes( x=datetime,
y=classname , #fct_rev(fct_inorder(fct_drop(classname))),
fill=value) ) +
geom_raster(na.rm=TRUE) +
#geom_tile( linetype=0, colour = 'white' ) +
scale_fill_gradient2(low="steelblue4",mid="white",high="red2",
midpoint=midpoint ,na.value=NA, limits=scale_limits ) +
coord_cartesian( expand=FALSE, clip = 'off' ) +
scale_x_datetime( position = "top",
breaks = c(min(DF$datetime), max(DF$datetime)),
date_minor_breaks = "1 week",
date_labels="%d-%b-%y"
)+
theme(
#legend.position = c(1.015,0.115),
legend.key.height = unit(2, 'cm'),
legend.key.width = unit(0.2, 'cm'),
legend.direction = "vertical",
legend.text = element_blank(),
legend.title = element_blank(),
legend.margin = margin(0,-0.5,0,-0.3, "cm"),
panel.border = element_blank(),
panel.spacing = element_blank(),
panel.grid = element_line(color="red1", size=0.25),
panel.grid.major.x = element_blank(),
panel.grid.minor.x = element_blank(),
axis.text = element_text(vjust=0.3, size=8),
axis.text.x = element_text(vjust=1, angle=0 ),
axis.ticks.y = element_blank(),
panel.background = element_blank(),
plot.caption = element_blank(),#element_text(hjust=0 , size=8 , face = "italic"),
plot.subtitle = element_blank(),#element_text(hjust=0 , size=8),
plot.margin = margin(0,0.3,0,-0.3, "cm"),
plot.title = element_text(hjust=0.5, size=12, face="bold",
margin=margin(0,0,-1,0,"cm") ) ) +
labs( x = "", y = "",
#caption= "FUENTE: propia", fill = "Calories",
#subtitle = "Spaces without any data (missing, filtered, etc)",
title = "TimeGAPs / Value Distribution"
) +
geom_hline(yintercept = 0.5 + 0:classname_cnt, colour="white", size = 0.5)
return(p)
}helper_heatmap_analisis <- function( DF ){
# Mostrar el espectro de fechas
scale = c(0, max(c(DF$X, DF$Y), na.rm=TRUE) )
DF %>% helper_plot_timegaps( .session_id, .datetime, X,scale) %>% print
DF %>% helper_plot_timegaps( .session_id, .datetime, Y,scale) %>% print
# Comparar en las mismas fechas
mindate = min( as.Date( DF[ !is.na(Y) ]$.datetime ) )
mindate2 = min( as.Date( DF[ !is.na(X) ]$.datetime ) )
mindate = min( mindate, mindate2 )
DF <- DF[ .datetime >= mindate ]
maxdate = max( as.Date( DF[ !is.na(Y) ]$.datetime ) )
maxdate2 = max( as.Date( DF[ !is.na(X) ]$.datetime ) )
maxdate = min( maxdate, maxdate2 )
DF <- DF[ .datetime <= maxdate ]
sum( DF$X, na.rm=TRUE ) %>% print
sum( DF$Y, na.rm=TRUE ) %>% print
maxy = max( DF$X, DF$Y )
# Grafica de segmento de tiempo con problemas
p <- DF[ .datetime >= mindate ] %>% # helper_str_datetime("2016-04-08")
.[ .datetime <= maxdate ] %>% # helper_str_datetime("2016-04-17")
ggplot( ) +
scale_y_continuous( limits=c(0,maxy) )+
#expand_limits( y = c(0,maxy) ) +
theme(legend.position="bottom")
(p + geom_line( aes(x=.datetime, y=X, color=.session_id), na.rm=TRUE )) %>% print
(p + geom_line( aes(x=.datetime, y=Y, color=.session_id), na.rm=TRUE )) %>% print
# COMPARAR variaciones de una columna con otra
DFcomp <- DF %>%
mutate( diff = coalesce(X,0)-Y ) %>%
select( .session_id, .datetime, X, Y, diff ) %>%
arrange( diff %>% desc)
# meant to feedback into lookup table
DFcomp[is.na(X) & !is.na(Y)] %>% head(100) %>% print.data.frame()
DFcomp[diff<0] %>% arrange(diff) %>% tail(100)%>% print.data.frame()
DFcomp[diff>0] %>% head(100) %>% print.data.frame()
DFcomp %>% mutate( diff = abs(diff) ) %>%
helper_plot_timegaps( .session_id, .datetime, diff, scale ) %>% print
}pivot_like_excel <- function ( DF, grouping, pivot_col, attributes ){
DF %>%
select( all_of( c( grouping, pivot_col, attributes ) ) ) %>%
pivot_longer( attributes, names_to="attributes" ) %>%
pivot_wider( names_from=pivot_col, values_from=value ) %>%
#, names_glue=paste0(pivot_col,".{",pivot_col,"}")
group_by( attributes, .add=TRUE)
}ggplot2::diamonds %>%
group_by( cut, color ) %>%
summarise( mean_x=mean(x), mean_y=min(y), mean_z=max(z) ) %>%
pivot_like_excel(
grouping = c('cut'), pivot_col='color',
attributes = c('mean_x', 'mean_y', 'mean_z') )plot_histo_normal = function(variable){
u = variable %>% mean %>% round(4)
s = variable %>% sd %>% round(4)
x = variable %>% {seq( min(.), max(.), length=40 )}
h = hist( variable )
lines( x, dnorm( x , mean=u, sd=s)*max(h$counts)/max(h$density) )
abline( v = u+s , lty=2 )
abline( v = u-s , lty=2 )
return( list(mean=u,sd=s,l2=u+s,l1=u-s) )
}iris$Sepal.Width %>% plot_histo_normalEsta sección es agregada para mejor comprensión de la naturaleza de origen de los datos.
Fitbit devices use a 3-axis accelerometer to count your steps. more
This sensor also allows your device to determine the frequency, duration, intensity, and patterns of your movement. When your body is completely at rest and you haven’t moved for about an hour, your Fitbit device records that you’re asleep. more
Un acelerómetro es un hardware que mide aceleración en XYZ y se encuentra frecuentemente incluido en casi todos los smartphones actuales, gps, y health-trackers. La aceleración es medida de unidades de fuerza G (igual a 9.8 ms^2) a una frecuencia de muestreo que suele ser de varios cientos o miles por segundo.
Algoritmos de reconocimiento de patrones permiten identificar eventos como pasos al caminar, subir escalones, dormir, correr, etc. Los algoritmos suelen estar integrados en el dispositivos y ajustados a las diferentes sensibilidades de los chips de acelerometros disponibles.
En la práctica los cálculos a realizarse requieren operatoria vectorial y suavizado de señal.
Cuando una persona camina, su cuerpo se balancea y se observa un rebote en dirección de la gravedad, hacia arriba y hacia abajo, que varía de magnitud con el largo de los pasos.
Las variables cuentan los pasos mediante el conteo
de los rebotes.
Se puede obtener mas información consultando por “Step Detection
Algorithm”.
Cuando el corazón late, los vasos capilares se expanden y contraen basados en los cambios del volumen de sangre.
El sensor óptico de latidos cardíacos emite una luz muchas veces por
segundo que luego es capturada por un sensor fotosensible para detectar
los cambios de volumen/expansión en los capilares por medio de la
variación en la cantidad de luz que es retornada al diodo receptor.
Finalmente se calculan la cantidad de latidos por minuto.
Se utiliza una luz verde y una infraroja inofensiva de baja
potencia.
Algunos factores pueden alterar la calidad de la medición y por ello podrían observarse picos en las mediciones son, por ejemplo:
Fitbit estimates your sleep stages using a combination of your movement and heart-rate patterns. When you haven’t moved for about an hour, your tracker or watch assumes that you’re asleep. Additional data—such as the length of time your movements are indicative of sleep behavior (such as rolling over, etc.)—help confirm that you’re asleep.
While you’re sleeping, your device tracks the beat-to-beat changes in your heart rate, known as heart rate variability (HRV), which fluctuate as you transition between light sleep, deep sleep, and REM sleep stages. When you sync your device in the morning, we use your movement and heart rate patterns to estimate your sleep cycles from the previous night.
If your device doesn’t track heart rate, you see your sleep pattern in the Fitbit app. Your sleep pattern includes your time spent in :
- 1 = asleep : Real quiet sleep.
- 2 = restless: You moved in your sleep, like tossing and turning.
- 3 = awake : Device detects excessive movement—enough that restful sleep would not be possible
Cuando la aceleración del usuario es CERO, el dispositivo se encuentra en reposo. El repaso prolongado se utiliza para el monitoreo del sueño. El método se denomina Sleep Period Time Window o Actigrafía o Detección por ventana de tiempo (SPT-window) que termina al detectarse un movimiento.
Otro método similar es Guided Sleep Detection o Detección guiada por el usuario en el cual es el usuario el que registra los momentos de dormirse y despertarse usando su smartphone.
Los dispositivos que cuentan con sensor de ritmo cardíaco permitirán identificar los tipos de sueño con mayor precisión ya que las pulsaciones se encuentran asociadas al sistema nervioso.
En estudios científicos el método estandard es denomina
Polisomnografía (PSG) que contempla muchas mas variables.
Adicionalmente, Fitbit usa la siguiente FORMULA para calcular un
parámetro de eficiencia llamado Sleep_Efficiency:
Otras métricas son: - AwakeCount: Cantidad de períodos distintos de despertar durante un sueño determinados por un algoritmo de clustering. - RestlessCount: Cantidad de períodos distintos de restless durante un ciclo de sueño, determinados por un algoritmo de clustering. - MinutesAsleep, MinutesRestless, MinutesAwake, MinutesLight, MinutesDeep, MinutesREM - Nota que, para medir REM, es necesario utilizar el sensor optico de ritmo cardíaco, dado que no es suficiente información la proporcionada por el acelerómetro.
En las tablas analizadas de FitBit, la variable se llama sleep_state y solamente registra valores en los eventos de sueño. No registra ninguna valor estando despierto.
Note: Weight data can be entered by participants using the mobile app or Fitbit dashboard, or by using a scale connected to the Fitbit account (Aria or Withings).
El Peso del usuario puede ingresarse por vía manual o a través de una Balanza Inteligente.
El Indice de Masa Corporal o Body Mass Index es una clasificación adoptada por la OMS para medir el estado nutricional. Es muy sencillo de calcular, aunque tiene como contras que no tiene en cuenta edad, el sexo, el porcentaje de grasa corporal o la masa muscular.
Se define como Normal un valor de IMC entre 18.5 y 25. ver mas
NOTA: De acuerdo con un estudio hasta 2016, se observa en USA una medición en mujeres con sobrepeso para el 38.2% de la población, obesidad en 20.8%, 10.1% para obesidad mórbida y apenas un 1.7% de bajo peso, quedando un 29.2% de población normal de acuerdo a los niveles de CDC. Además se observa una tendencia rápidamente creciente.
La Bioimpedancia es una técnica para el análisis de composición corporal donde se utilizan señales eléctricas de diferentes frecuencias, desde 500 a 1000 kHz configuradas en diferentes escalones para medir la respuesta en frecuencia de los tejidos vivos. Se obtiene con una Balanza Inteligente.
La composición de los tejidos y la cantidad de agua presente en el cuerpo alteran la respuesta en frecuencia y permiten obtener parámetros estadísticos como nuestra masa grasa.
El agua es buen conductor pero la grasa no lo es.
Los músculos tienen un contenido de agua prácticamente constante e
aproximadamente igual a un 73%.
Al conjunto, junto con los huesos, se lo denomina masa magra.
La diferencia entre el peso (masa corporal) y la masa magra conforma la
masa grasa.
La masa subcutánea puede llegar a representar el 50% de la grasa
corporal total de un individuo normal.
NOTA: Si la balanza mide únicamente desde los pies, no incluye electrodos para las manos, la corriente recorrerá nuestro cuerpo de cintura para abajo, eso quiere decir que no se podrá estimar la grasa abdominal.
Los datos ingresados manualmente comúnmente son las actividades, en ocasiones el sueño, y la medida corporal como el Peso cuando no se dispone de balanza inteligente.
Otras Medidas básicas adicionales pueden ser:
Otros instrumentos usandos en Antropometría son:
Claves únicas que identifican cada registro en la tabla.
.session_id representa al individuo y .datetime es el instante de tiempo para la fila o dato.
El prefijo con “.” tiene el fin de representar su carácter de clave.
El Session ID no puede ser representado por un tipo Int32, pero si
podría ser representado por un Int64.
Sin embargo, debido a la limitada cantidad de individuos (35) se eligió
representarlos en forma agrupada con un tipo “factor”, lo cual requiere
mucha menor cantidad de bits y simplifica el código en R.
Fitbit devices use the FORMULA below to calculate distance.
- Steps x Stride_Length = Distance Traveled
When you track an activity with GPS, your device calculates your distance using GPS data rather than steps. If you begin moving before you get a GPS signal, your device calculates distance using your steps until GPS connects.
Las distancias no aparecen en las tablas minute-LEVEL, solo en las hourly y daily.
Your stride length is determined by your height and sex.
To update your stride length, track a run with GPS or measure your stride length manually.
StrideLength no forma parte del dataset.
Pero puede ser estimado utilizando la distancia proporcionada en las tablas daily-LEVEL, la cantidad de pasos para dicha distancia y el nivel de intensidad.
El conocimiento general establece que la longitud media del paso para las mujeres adultas es de 67 cm, y la de los hombres de 76 cm. Con esta información validamos que los datos obtenidos son coherentes.
Adicionalmente, como las personas mantienen ciertas proporciones de simetría, se puede utilizar el coeficiente 0.42 para relacionar la altura de la persona con el largo del paso. Corroboramos este cálculo y la constante mencionada haciendo uso del valor de Indice de Masa Corporal y reduciendo el grado de error.
Calories: Estimated energy expenditure. (using BMR)
Fitbit devices combine your basal metabolic rate and your activity data to estimate your calories burned. If your device tracks heart rate, your heart-rate data is also included, especially to estimate calories burned during exercise.
Calorías, Cal o kcal, es la energía que el cuerpo gasta para realizar
las funciones corporales.
En todos los casos, es un valor estimado mediante formulas de
aproximación.
Cuando el cuerpo se encuentra en reposo, la cantidad de calorías
quemadas es igual al BMR.
Las calorías quemadas están directamente asociadas con el consumo de
oxigeno y por tanto con la actividad muscular.
Las pulsaciones cardíacas permiten una buena aproximación lineal a la
intensidad de las actividades y la quema de energía (ver 1
y 2
). Consecuentemente se utiliza para definir distintos niveles de
intensidad de las actividades.
Dependiendo del nivel de actividad el cuerpo quema mayor cantidad de
calorías, entonces el BMR es multiplicado por un factor de intensidad,
generalmente entre 1.2 y 1.95.
Otra forma de estimar las calorías es conociendo los METs para una cierta actividad y utilizar la FORMULA:
El Metabolismo Basal representa entre el 60 y 70% de las calorías
totales quemadas durante el día.
Calorías Marginales son las cantidades de calorías quemadas
adicionalmente por los ejercicios. Es decir, restando el Metabolismo
Basal.
Tasa de Metabolismo Basal (TMB) o Basal Metabolic Rate (BMR)
Fitbit uses the gender, age, height, and weight data entered into the user profile to calculate basal metabolic rate (BMR).
BMR: the rate at which you burn calories at rest to maintain vital body functions (including breathing, blood circulation, and heartbeat)
El metabolismo basal (también llamado Resting Metabolic Rate RMR) es la cantidad de energía (calorias) que el cuerpo gasta en estado de reposo.
Es la energía necesaria para que funcionen todas nuestras constantes vitales de forma involuntaria y que nuestro organismo mantenga una temperatura adecuada y constante.
Las FORMULA’s (Mifflin-St Jeor Equation) que utiliza Fitbit consiste en una aproximación estandarizada:
Por ejemplo, considerando altura promedio para Hombre y Mujer y el Indice de Masa Corporal normal de 22:
La diferencia entre géneros de las mismas medidas es de alrededor de +30%.
Se puede observar que el Peso es el principal factor en determinar el Metabolismo Basal:
Metabolic equivalents
1 MET is defined as energy you use when you’re resting or sitting still (equals BMR). Is a ratio of the Active vs Resting Metabolic Rate.
Measures how much energy you’re burning throughout the activities.
The higher the MET value, the more energy your muscles will need to expend. moreThe American Heart Association recommends at least 150 minutes of moderate-intensity aerobic exercise each week for optimal cardiovascular health. That’s equal to about 500 MET minutes per week, according to the Department of Health and Human Services.
Aiming for at least 500 MET minutes a week is a good goal for optimal cardiovascular health.
Es la relación del Ritmo Metabólico en Actividad vs en Reposo.
Mide cuánta energía adicional se gasta a través de actividades.
A mayor valor de MET, los músculos necesitarán quemar más energía
(calorias).
La quema adicional depende de factores como la Edad y el nivel
atlético.
En FitBit: los valores se observan multiplicados por 10. Ejemplo: 38 = 3.8 METs
Los músculos consumen 3.5 mililitros de oxigeno por cada kg de masa corporal por minuto para crear la energía necesaria. Entonces:
Los METs son comúnmente utilizados para tabular el nivel de esfuerzo de los distintos tipos de ejercicio como caminar o correr considerando pequeñas variaciones entre personas. Los dispositivos permiten seleccionar el tipo de actividad y así también la cantidad de METs asociados a ella.
Por ejemplo, jugando 1 hora de tenis con un MET de 8 y peso de 73 kg.
Minutes spent in one of four intensity categories.
Intensity minutes ~ Exercise minutes ~ Active minutes.
The cut points for intensity classifications and METs are determined by proprietary algorithms from Fitbit.
- 0 : Sedentary : Durmiendo, Despierto en la Cama
- 1 : Lightly Active : Caminando (Walking)
- 2 : Moderately Active : Caminata veloz (Jogging), Natación, Ciclismo, Entrenamiento normal
- 3 : Very Active : Corriendo (Running), Correr, Nadar rapido, Escalar, etc.
La intensidad es una clasificación del esfuerzo adicional que supone
la actividad.
Puede calcularse y definirse de diferentes formas.
Mediante medición de pulsaciones cardíacas:
FitBit utiliza ambas formulas a continuación dependiendo del dispositivo. ref
Mediante METs: ref
Mediante PASOS:
No hay suficiente información oficial encontrada, sin embargo hay investigaciones que encuentra un punto de corte en los 100 pasos/min que define un nivel de actividad moderada.
En Antropometria o Biometrica también puede utilizarse otro conjunto de variables de utilidad:
FILES = helper_load_datasets_from( "../input/fitbit" )
duplis = duplicated(FILES$table)
duplis = FILES$table[ duplis ]
nonduplis = ! (FILES$table %in% duplis)
nonduplis = FILES[ nonduplis ]
file.copy( nonduplis$fullpath, "../input/stage1/", overwrite=TRUE)
#1624580081,4/12/2016 12:00
for ( fname in duplis ){
lst = FILES[ ext=='csv' & table==fname ]$fullpath
a = readLines( lst[1] )
b = readLines( lst[2] )
if( a[1] == b[1]){
a = setdiff( a, b )
x = append( b, a) #, deparse.level=0 )
writeLines(x, paste0("../input/stage1/",fname,".csv") )
} else { cat("ERROR: Requiere atencion manual:\n", fname, "\n", a, "\n", b) }
}FILES <- helper_load_datasets_from( "../input/stage1" )
FILES %>% print.data.frame(right=FALSE, row.names=FALSE)## ext table fullpath
## csv dailyActivity ../input/stage1/dailyActivity.csv
## csv dailyCalories ../input/stage1/dailyCalories_merged.csv
## csv dailyIntensities ../input/stage1/dailyIntensities_merged.csv
## csv dailySteps ../input/stage1/dailySteps_merged.csv
## csv heartrate_seconds ../input/stage1/heartrate_seconds.csv
## csv hourlyCalories ../input/stage1/hourlyCalories.csv
## csv hourlyIntensities ../input/stage1/hourlyIntensities.csv
## csv hourlySteps ../input/stage1/hourlySteps.csv
## csv minuteCaloriesNarrow ../input/stage1/minuteCaloriesNarrow.csv
## csv minuteCaloriesWide ../input/stage1/minuteCaloriesWide_merged.csv
## csv minuteIntensitiesNarrow ../input/stage1/minuteIntensitiesNarrow.csv
## csv minuteIntensitiesWide ../input/stage1/minuteIntensitiesWide_merged.csv
## csv minuteMETsNarrow ../input/stage1/minuteMETsNarrow.csv
## csv minuteSleep ../input/stage1/minuteSleep.csv
## csv minuteStepsNarrow ../input/stage1/minuteStepsNarrow.csv
## csv minuteStepsWide ../input/stage1/minuteStepsWide_merged.csv
## csv sleepDay ../input/stage1/sleepDay_merged.csv
## csv weightLogInfo ../input/stage1/weightLogInfo.csv
#read_func <- read.csv
read_func <- data.table::fread # 3 times faster
# CSV LOADING FUNCTION
helper_csv_loading = function(x){
fname = basename(x)
read_func(x, header=TRUE, dec=".", sep=",", colClasses=c( Id="character" ) )# %>%
}
# Load a database of all initial datasets of the project
system.time({
csv_files <- FILES$fullpath[ FILES$ext=="csv" ]
db <- lapply(csv_files, helper_csv_loading)
names(db) <- FILES$table
})## user system elapsed
## 9.56 0.55 8.07
by <- c( '.session_id', # session ids
'.datetime' ) # grouping keys############# dailys #############
names(db$dailyActivity) <- names( c(
.session_id = "Id", #
.datetime = "ActivityDate", #
Steps = "TotalSteps", # Num of steps.
ActiveKm_Total = "TotalDistance", # Km tracked.
ActiveKm_Tracker = "TrackerDistance", # Km tracked by Fitbit device.
ActiveKm_LoggedKm = "LoggedActivitiesDistance",# Km logged by user.
ActiveKm_3_Heavy = "VeryActiveDistance", # Km travelled on very active act
ActiveKm_2_Moderate = "ModeratelyActiveDistance",# Km travelled on moderate act
ActiveKm_1_Light = "LightActiveDistance", # Km travelled on light act
ActiveKm_0_Sedentary = "SedentaryActiveDistance", # Km travelled on sedentary act
ActiveMinutes_3_Heavy = "VeryActiveMinutes", # Minutes spent in ... activity
ActiveMinutes_2_Moderate = "FairlyActiveMinutes", # Minutes spent in ... activity
ActiveMinutes_1_Light = "LightlyActiveMinutes", # Minutes spent in ... activity
ActiveMinutes_0_Sedentary = "SedentaryMinutes", # Minutes spent in ... activity
Calories = "Calories") # Estimated kcals expenditure
)############# dailys #############
names(db$dailyCalories) <- names( c(
.session_id = "Id", #
.datetime = "ActivityDay", #
CaloriesDaily= "Calories") #
)
############# hourlys #############
names(db$hourlyCalories) <- names( c(
.session_id = "Id", #
.datetime = "ActivityHour",#
CaloriesHourly= "Calories") #
)
############# minutes #############
names(db$minuteCaloriesNarrow) <- names( c(
.session_id = "Id", #
.datetime = "ActivityMinute",#
calories_narrow = "Calories") #
)
############# minutesWIDE #############
names(db$minuteCaloriesWide)[1:2] <- c(".session_id", ".datetime")
names(db$minuteCaloriesWide) <-
stringr::str_replace_all( names(db$minuteCaloriesWide), "Cal", "zzz.Cal" ) ############# dailys #############
names( db$dailyIntensities ) <- names( c(
.session_id = "Id", #
.datetime = "ActivityDay", #
ActiveMinutes_0_Sedentary= "SedentaryMinutes", #
ActiveMinutes_1_Light = "LightlyActiveMinutes", #
ActiveMinutes_2_Moderate = "FairlyActiveMinutes", #
ActiveMinutes_3_Heavy = "VeryActiveMinutes", #
ActiveKm_0_Sedentary = "SedentaryActiveDistance",#
ActiveKm_1_Light = "LightActiveDistance", #
ActiveKm_2_Moderate = "ModeratelyActiveDistance",#
ActiveKm_3_Heavy = "VeryActiveDistance") #
)
############# hourlys #############
names( db$hourlyIntensities ) <- names( c(
.session_id = "Id", #
.datetime = "ActivityHour", #
IntensityHourly = "TotalIntensity", #
IntensityAverage = "AverageIntensity") #
)
############# minutes #############
names( db$minuteIntensitiesNarrow ) <- names( c(
.session_id = "Id", #
.datetime = "ActivityMinute", #
intensity_narrow = "Intensity") #
)
############# minutesWIDE #############
names(db$minuteIntensitiesWide)[1:2] <- c(".session_id", ".datetime")
names(db$minuteIntensitiesWide) <-
stringr::str_replace_all( names(db$minuteIntensitiesWide), "Int", "zzz.Int" ) ############# dailys #############
names(db$dailySteps) <- c(".session_id", ".datetime", "StepsDaily")
############# hourlys #############
names(db$hourlySteps) <- c(".session_id", ".datetime", "StepsHourly")
############# minutes #############
names(db$minuteStepsNarrow) <- c(".session_id", ".datetime", "steps_narrow")
names(db$minuteStepsWide)[1:2] <- c(".session_id", ".datetime")
names(db$minuteStepsWide) <-
stringr::str_replace_all( names(db$minuteStepsWide), "Ste", "zzz.Ste" )No registran valores estando despierto.
#### Sleep
# sleepDay : Classic Day Minute Totals
# minuteSleep : Classic Sleep Log (1 minute)
# Classic Data is supported by "Classic sleep algorithm"
sleep_not_probable_devices_by_stages = c('Alta HR', 'Charge 2', 'Blaze', 'Ionic')
sleep_not_probable_devices_by_automatic = c('Charge', 'Alta', 'Alta HR', 'Charge HR', 'Flex', 'Blaze', 'Charge 2', 'Flex 2', 'Ionic', 'Surge')
names( db$sleepDay ) <- names( c(
.session_id = "Id", #
.datetime = "SleepDay", #
Sleep_Records = "TotalSleepRecords", # Num of recorded periods. Includes naps > 60min
Sleep_Minutes = "TotalMinutesAsleep",# Minutes classified as being “asleep”
Sleep_TimeInBed = "TotalTimeInBed") # Minutes in bed occured during a sleep-record,
# includes(asleep,restless,awake)
)
names( db$minuteSleep ) <- names( c(
.session_id = "Id", #
.datetime = "date", # floor() can be used to convert to whole minutes
# EXAMPLE: 04/20/2018 10:15:30 to 04/20/2018 10:15:00
# EXAMPLE: 04/20/2018 10:16:30 to 04/20/2018 10:16:00
sleep_state = "value", # Value indicating the sleep state: 1=asleep,2=restless,3=awake
# 1=asleep : manual register: awake in bed
# 2=restless : manual register: awake in bed
# 3=awake : manual register: awake in bed
sleep_logid = "logId") # Unique log id in Fitbit’s system for the sleep record
)db$weightLogInfo <- db$weightLogInfo %>% rename( any_of( c(
.session_id = "Id", #
.datetime = "Date", # mm/dd/yy hh:mm:ss
BMI_Weight = "WeightKg", # Weight in kilograms
BMI_WeightPounds = "WeightPounds",# Weight in pounds
BMI_Weight_Fat = "Fat", # Body fat percentage recorded
BMI = "BMI", # Body Mass Index = weight / height^2
BMI_IsManual = "IsManualReport",# if FALSE Automated Scale devices was used
BMI_LogId = "LogId", # Unique log id in Fitbit’s systems
BMI_Height = 'BMI_Height' ,# calculated next
BMI_Class = 'BMI_Class' ,# calculated next
BMI_ClassOMS = 'BMI_ClassOMS' # calculated next
) ))
db$weightLogInfo$BMI_Height <- # quickly obtain individuals Heights
sqrt( db$weightLogInfo$BMI_Weight / db$weightLogInfo$BMI )
db$weightLogInfo$BMI_Class <-
sapply( db$weightLogInfo$BMI, function(BMI){
if (BMI < 18.5) return('0 Bajo')
else if ( BMI < 25 ) return('1 Normal')
else if ( BMI < 30 ) return('2 Alto')
else return('3 Obeso')
} )
db$weightLogInfo$BMI_ClassOMS <-
sapply( db$weightLogInfo$BMI, function(BMI){
if (BMI < 16) return('00 Bajo Grave')
else if ( BMI < 17 ) return('01 Bajo Medio')
else if ( BMI < 18.5 ) return('02 Bajo Leve')
else if ( BMI < 25 ) return('10 Normal')
else if ( BMI < 27.5 ) return('20 Alto')
else if ( BMI < 30 ) return('21 PreObeso')
else if ( BMI < 35 ) return('30 Obeso')
else if ( BMI < 40 ) return('31 Obeso Grave')
else return('32 Obeso Morbido')
} )names(db$heartrate_seconds) <- c( ".session_id", ".datetime", "heartrate")names(db$minuteMETsNarrow) <- c(".session_id", ".datetime", "mets")# Create unique factor type
factor_session_id <- db$dailyActivity$.session_id %>% unique %>% factor
factor_session_id %>% levels## [1] "1503960366" "1624580081" "1644430081" "1844505072" "1927972279"
## [6] "2022484408" "2026352035" "2320127002" "2347167796" "2873212765"
## [11] "2891001357" "3372868164" "3977333714" "4020332650" "4057192912"
## [16] "4319703577" "4388161847" "4445114986" "4558609924" "4702921684"
## [21] "5553957443" "5577150313" "6117666160" "6290855005" "6391747486"
## [26] "6775888955" "6962181067" "7007744171" "7086361926" "8053475328"
## [31] "8253242879" "8378563200" "8583815059" "8792009665" "8877689391"
for( dfname in names(db) ){
# Assign the same factor to all tables
db[[dfname]]$.session_id <-
factor( db[[dfname]]$.session_id, levels=levels(factor_session_id) )
}# system.time({
# # Create unique factor type
# factor_session_id <-
# lapply( db , function(df) df$.session_id ) %>%
# unlist %>% unique %>% factor
#
# for( dfname in names(db) ){
# # Assign the same factor to all tables
# db[[dfname]]$.session_id <-
# factor( db[[dfname]]$.session_id, levels=factor_session_id )
# }
# })system.time({
for( dfname in names(db) ){
db[[dfname]]$.datetime <- db[[dfname]]$.datetime %>%
lubridate::parse_date_time( c("%m-%d-%Y %I:%M:%S %p", "mdy_HMS", "mdy", "ymd") )
}
})## user system elapsed
## 5.17 1.03 6.22
system.time({
for( dfname in names(db) ){
# Assign table-keys to data.tables as it improves performance
if( is.data.table( db[[dfname]] ) )
data.table::setkeyv( db[[dfname]], by )
}
})## user system elapsed
## 0.95 0.17 0.56
helper_wide_to_narrow = function( DF, prefix ) {
DF %>% pivot_longer(
cols = starts_with( prefix ),
names_to = c("MINUTES"),
names_prefix = prefix,
values_to = prefix) %>%
mutate( .datetime = .datetime + lubridate::minutes(MINUTES) ) %>%
select( -c(MINUTES) ) %>%
as.data.frame
}db$minuteCaloriesWide.Narrow <-
helper_wide_to_narrow( db$minuteCaloriesWide, prefix="zzz.Calories" ) %>%
rename( CaloriesWide = zzz.Calories ) %>%
as.data.table()
db$minuteIntensitiesWide.Narrow <-
helper_wide_to_narrow( db$minuteIntensitiesWide, prefix="zzz.Intensity" ) %>%
rename( IntensityWide = zzz.Intensity ) %>%
as.data.table()
db$minuteStepsWide.Narrow <-
helper_wide_to_narrow( db$minuteStepsWide, prefix="zzz.Steps" ) %>%
rename( StepsWide = zzz.Steps ) %>%
as.data.table()# Now free some memory
db$minuteCaloriesWide <- NULL
db$minuteIntensitiesWide <- NULL
db$minuteStepsWide <- NULLminuteSleep y sleepDay tienen duplicados.
NOTA: El siguiente segmento de código permite buscar todo el conjunto e inspeccionar los valores vecinos al duplicado detectado. Al encontrarse duplicados se toma una acción en el segmento que le sigue y se vuelve a ejecutar el código de búsqueda.
for( dfname in names(db) ){
DF <- db[[dfname]]
duplis = DF %>% filter( duplicated( . ) )
DF <- DF %>% rowid_to_column( "idx" )
if( nrow(duplis) > 0 ) {
left_join( duplis, DF, by=by ) %>% head(20) %>% print
cat( dfname,": \t", nrow(duplis), " duplicates found !!!!! \n" )
break
} else { cat( dfname,": \t0 duplicates found.\n" ) }
}db$minuteSleep <- distinct( db$minuteSleep )
db$sleepDay <- distinct( db$sleepDay )Las calorías se queman el 100% del tiempo, por lo que un valor de
CERO es imposible en la práctica.
Las Calorias también se estiman con el Metabolismo Basal BMR, por lo que
su valor mínimo es constante distinto de cero.
db$minuteCaloriesNarrow[calories_narrow==0] %>% # returns 12 CERO values
dilate_filter( db$minuteCaloriesNarrow, by )# Repair with the median value of the neighbors cells.
median_list_calories <-
db$minuteCaloriesNarrow[calories_narrow==0] %>%
split( row.names(.) ) %>%
dilate_filter( db$minuteCaloriesNarrow, by ) %>%
map( ~.[['calories_narrow']] %>% median ) %>% unlist
cat( "db$minuteCaloriesNarrow[calories_narrow==0]$Calories <- \n" )
dput(median_list_calories)db$minuteCaloriesNarrow[calories_narrow==0]$calories_narrow <-
c(0.830999970436096, 1.13170003890991, 1.2175999879837, 0.991400003433228,
1.40530002117157, 1.12284004688263, 0.775099992752075, 0.925499975681305,
6.11118006706238, 7.58239006996155, 1.58438003063202, 1.69755005836487)db$minuteCaloriesWide.Narrow[CaloriesWide==0] %>% # returns 7 CERO values
dilate_filter( db$minuteCaloriesWide.Narrow, by )# Produce a fixed vector of values to fix.
median_list_calories <-
db$minuteCaloriesWide.Narrow[CaloriesWide==0] %>%
split( row.names(.) ) %>%
dilate_filter( db$minuteCaloriesWide.Narrow, by ) %>%
map( ~.[['CaloriesWide']] %>% median ) %>% unlist
cat( "db$minuteCaloriesWide.Narrow[CaloriesWide==0]$CaloriesWide <- \n" )
dput(median_list_calories)db$minuteCaloriesWide.Narrow[CaloriesWide==0]$CaloriesWide <-
c(0.830999970436096, 0.775099992752075, 0.925499975681305, 1.58438003063202,
1.69755005836487, 1.13170003890991, 1.2175999879837)db$dailyActivity[Calories==0] %>% # returns 12 CERO values
dilate_filter( db$dailyActivity, by )# Produce a fixed vector of values to fix.
median_list_calories <-
db$dailyActivity[Calories==0] %>%
split( row.names(.) ) %>%
dilate_filter( db$dailyActivity, by ) %>%
map( ~.[['Calories']] %>% median ) %>% unlist
cat( "db$dailyActivity[Calories==0]$Calories <- \n" )
dput(median_list_calories)# this line requires to floor()
db$dailyActivity[Calories==0]$Calories <- c(1556.5, 1963, 2168, 2947, 2174, 2410, 1740.5, 1958.5, 2807) %>% floorEs un valor en BPM que resulta imposible que sea CERO.
db$heartrate_seconds[heartrate==0] %>%
dilate_filter( db$heartrate_seconds, by )## [1] 0
El valor minimo de METs es 1.
Al chequear la variable encontramos 7 valores en CERO. Para repararlos
asignamos la mediana de los valores vecinos.
db$minuteMETsNarrow[mets==0] %>% # returns 13 CERO values
dilate_filter( db$minuteMETsNarrow, by )# Produce a fixed vector of values to fix.
median_list_mets <-
db$minuteMETsNarrow[mets==0] %>%
split( row.names(.) ) %>%
dilate_filter( db$minuteMETsNarrow, by ) %>%
map( ~.[['mets']] %>% median ) %>% unlist
cat( "db$minuteMETsNarrow[METs==0]$METs <- \n" )
dput(median_list_mets)db$minuteMETsNarrow[mets==0]$mets <-
c(10, 15, 10, 10, 10, 10, 12, 10, 10, 10, 54, 67, 14)Solo podemos limpiar de ceros las variables que efectivamente nunca podrán cruzar por cero.
Por ejemplo, sabemos que la variable “intensity” puede tomar el valor categórico de cero que significa Sin-Actividad o Sedentario.
db$personal <- data.table( .session_id = levels(factor_session_id) )Indice de Masa Corporal
db$weightLogInfo %>% head(5)db$weightLogInfo[ db$weightLogInfo$BMI_Weight_Fat >0 ]$.session_id %>% unique %>% as.character()## [1] "1503960366" "2347167796" "4319703577"
db$weightLogInfo[ db$weightLogInfo$BMI_IsManual==FALSE ]$.session_id %>% unique %>% as.character()## [1] "1927972279" "5577150313" "8877689391"
db$weightLogInfo %>%
group_by(
.session_id,
.datetime = lubridate::floor_date( .datetime, "1 day" ), ) %>%
summarize(
BMI_Weight = BMI_Weight %>% mean(na.rm=TRUE) %>% round(2),
) %>%
ggplot( aes( .datetime, BMI_Weight, col=.session_id ) ) +
geom_line()DF <-
db$weightLogInfo %>%
group_by( .session_id ) %>%
summarise(
N = n(),
BMI_ClassOMS_first = BMI_ClassOMS %>% first,
BMI_ClassOMS_last = BMI_ClassOMS %>% last,
BMI = BMI %>% mean(na.rm=TRUE) %>% round(2),
BMI_Height = BMI_Height %>% mean(na.rm=TRUE) %>% round(2),
BMI_Weight = BMI_Weight %>% mean(na.rm=TRUE) %>% round(2),
BMI_Weight_Fat = BMI_Weight_Fat %>% mean(na.rm=TRUE) %>% round(2),
)
h = DF$BMI_Weight %>% plot_histo_normal()db$personal$Fat <- DF$BMI_Weight_Fat %>% setNames( DF$.session_id ) %>% .[ db$personal$.session_id ]
db$personal$Height <- DF$BMI_Height %>% setNames( DF$.session_id ) %>% .[ db$personal$.session_id ]
db$personal$Kg <- DF$BMI_Weight %>% setNames( DF$.session_id ) %>% .[ db$personal$.session_id ]
db$personal$bmi_class <- DF$BMI_ClassOMS_first %>% setNames( DF$.session_id ) %>% .[ db$personal$.session_id ]
db$personal$bmi <- DF$BMI %>% setNames( DF$.session_id ) %>% .[ db$personal$.session_id ]# Sleep pattern by Hour of Day
#-------------------------------
db$minuteStepsNarrow %>%
group_by(
.session_id,
dayhour = lubridate::hour( .datetime ), ) %>%
summarize(
steps = sum( steps_narrow, na.rm=TRUE) %>% round(2),
) %>%
ggplot( aes( dayhour, steps, col=.session_id ) ) + geom_line() +
geom_vline( xintercept = 21.5, linetype=2 ) +
geom_vline( xintercept = 7.5, linetype=2 )# Sleep pattern by Day of the week
#-------------------------------
db$minuteStepsNarrow %>% #complete(.session_id,.datetime) %>%
group_by(
.session_id,
weekday = lubridate::wday( .datetime, week_start=1 ),
) %>%
summarize(
steps = sum( steps_narrow )%>% round(2),
) %>%
ggplot( aes( weekday, steps, col=.session_id ) ) +
geom_line()# Sleep pattern by Day of the week by different weeks for 4 IDs
#-------------------------------
db$minuteStepsNarrow %>%
filter( as.numeric(.session_id) %in% 1:6 ) %>%
group_by(
.session_id,
weeknum = lubridate::isoweek( .datetime ),
weekday = lubridate::wday( .datetime, week_start=1 ),
) %>%
summarize(
steps = ( steps_narrow ) %>% sum %>% round(2),
) %>%
ggplot() + geom_line( aes( weekday, steps, col=.session_id, alpha=weeknum,
group=interaction( .session_id, weeknum ) ) )DF <-
db$minuteStepsNarrow %>%
group_by(
.session_id,
.datetime = lubridate::floor_date( .datetime, "1 day" ),
) %>%
summarize(
steps = sum( steps_narrow , na.rm=TRUE ),
)
DF2 <-
DF %>%
group_by( .session_id ) %>%
summarize( steps_avg = mean( steps , na.rm=TRUE ) %>% floor )
db$personal$steps_avg <- DF2$steps_avg %>% setNames( DF2$.session_id ) %>% .[ db$personal$.session_id ]No se registran valores estando despierto. Por tanto sleep_state nunca tendrá valor CERO.
Se observan 5 números de usuarios que NO aportan cantidades significativas de datos Sleep.
FitBit’s Sleep quality = 100 * minutesAsleep / (TimeInBed - minutesAfterWakeup)
# Sleep pattern by Hour of Day
#-------------------------------
db$minuteSleep %>% #complete(.session_id,.datetime) %>%
group_by(
.session_id,
dayhour = lubridate::hour( .datetime ), ) %>%
summarize(
sleep = (!is.na(sleep_state)) %>% sum %>% round(2),
awake = (is.na(sleep_state)) %>% sum %>% round(2),
) %>%
ggplot( aes( dayhour, sleep, col=.session_id ) ) + geom_line()+
geom_vline( xintercept = 20.5, linetype=2 ) +
geom_vline( xintercept = 9, linetype=2 )# Sleep pattern by Day of the week
#-------------------------------
db$minuteSleep %>% #complete(.session_id,.datetime) %>%
group_by(
.session_id,
weekday = lubridate::wday( .datetime, week_start=1 ),
) %>%
summarize(
sleep = (!is.na(sleep_state)) %>% sum %>% round(2),
) %>%
ggplot( aes( weekday, sleep, col=.session_id ) ) +
geom_line()# Sleep pattern by Day of the week by different weeks for 4 IDs
#-------------------------------
db$minuteSleep %>%
filter( as.numeric(.session_id) %in% 1:6 ) %>%
group_by(
.session_id,
weeknum = lubridate::isoweek( .datetime ),
weekday = lubridate::wday( .datetime, week_start=1 ),
) %>%
summarize(
sleep = (!is.na(sleep_state)) %>% sum %>% round(2),
) %>%
ggplot() + geom_line( aes( weekday, sleep, col=.session_id, alpha=weeknum,
group=interaction( .session_id, weeknum ) ) )### Daily summary
DF <-
db$minuteSleep %>%
group_by(
.session_id,
.datetime = lubridate::floor_date( .datetime, "1 day" ), ) %>%
summarize(
s1asleep = sum( sleep_state==1 , na.rm=TRUE ),
s2restless = sum( sleep_state==2 , na.rm=TRUE ),
s3awake = sum( sleep_state==3 , na.rm=TRUE ),
minutes_inbed = n(),
minutes_total = n_distinct( lubridate::yday(.datetime) ) *24*60,
average = round( minutes_inbed/minutes_total *24 ,2),
quality = round( 100*s1asleep/(minutes_inbed - s3awake) ,2),
)
### Remove outliers
h = DF$minutes_inbed %>% plot_histo_normalDF2 <- DF %>% split( f=findInterval( .$minutes_inbed, h$l1 ) ) #h$mean-h$sd*1.5
h = DF2[[1]]$minutes_inbed %>% plot_histo_normalh = DF2[[2]]$minutes_inbed %>% plot_histo_normal### Calculate overall average values
DF3 <-
DF2[[2]] %>%
#filter( !.session_id %in% c('2022484408','2320127002','4558609924','7007744171','8053475328') ) %>%
mutate(
weekday = lubridate::wday( .datetime, week_start=1 )
) %>%
group_by(
.session_id ) %>%
summarize(
s1asleep = sum( s1asleep , na.rm=TRUE ),
s2restless = sum( s2restless , na.rm=TRUE ),
s3awake = sum( s3awake , na.rm=TRUE ),
w_min_inbed = sum( minutes_inbed[weekday<6], na.rm=TRUE ),
w_min_total = sum( minutes_total[weekday<6], na.rm=TRUE ),
workday_avg = round( w_min_inbed/w_min_total *24 ,2),
minutes_inbed = sum(minutes_inbed),
minutes_total = sum(minutes_total),
average = round( minutes_inbed/minutes_total *24 ,2),
quality = round( 100*s1asleep/(minutes_inbed - s3awake) ,2),
cociente = round( workday_avg / average * 100 ,2),
) %>%
complete(.session_id)
### Overall Results
DF3 %>%
select( .session_id, workday_avg, average, quality ) %>%
drop_na( average ) %>%
print.data.frame( )## .session_id workday_avg average quality
## 1 1503960366 5.92 6.55 94.09
## 2 1644430081 10.60 9.21 85.12
## 3 1844505072 12.57 11.89 81.89
## 4 1927972279 7.97 8.11 96.02
## 5 2026352035 8.78 8.97 94.38
## 6 2347167796 7.81 8.15 92.01
## 7 3977333714 7.34 7.40 65.15
## 8 4020332650 6.88 7.06 95.56
## 9 4319703577 8.28 8.52 95.59
## 10 4388161847 6.60 7.56 95.60
## 11 4445114986 7.18 7.14 94.19
## 12 4558609924 NaN 4.62 98.92
## 13 4702921684 7.22 7.51 95.22
## 14 5553957443 7.28 8.49 91.69
## 15 5577150313 7.50 7.66 95.09
## 16 6117666160 9.30 9.02 95.41
## 17 6775888955 6.96 6.92 94.33
## 18 6962181067 7.70 7.78 96.57
## 19 7086361926 7.49 7.76 97.20
## 20 8053475328 8.28 7.39 99.55
## 21 8378563200 7.28 7.99 93.48
## 22 8792009665 7.77 7.79 95.95
### Set data into 'personal' table
db$personal$sleep_avg <- DF3$average %>% setNames( DF3$.session_id ) %>% .[ db$personal$.session_id ]
db$personal$sleep_qlty <- DF3$quality %>% setNames( DF3$.session_id ) %>% .[ db$personal$.session_id ]### Inspection Sleep Pattern by Weekday
DF3 <-
DF2[[2]] %>%
mutate(
weekday = lubridate::wday( .datetime, week_start=1 )
) %>%
group_by(
.session_id, weekday ) %>%
summarise(
s1asleep = sum( s1asleep , na.rm=TRUE ),
s2restless = sum( s2restless , na.rm=TRUE ),
s3awake = sum( s3awake , na.rm=TRUE ),
minutes_inbed = sum(minutes_inbed),
minutes_total = sum(minutes_total),
average = round( minutes_inbed/minutes_total *24 ,2),
quality = round( 100*s1asleep/(minutes_inbed - s3awake) ,2),
)
DF3 %>%
pivot_like_excel(
grouping = c('.session_id'), pivot_col = 'weekday',
attributes = c('average', 'quality', 'minutes_inbed') ) %>%
head(10)## Note: Using an external vector in selections is ambiguous.
## i Use `all_of(attributes)` instead of `attributes` to silence this message.
## i See <https://tidyselect.r-lib.org/reference/faq-external-vector.html>.
## This message is displayed once per session.
## Note: Using an external vector in selections is ambiguous.
## i Use `all_of(pivot_col)` instead of `pivot_col` to silence this message.
## i See <https://tidyselect.r-lib.org/reference/faq-external-vector.html>.
## This message is displayed once per session.
Metabolismo Basal.
En la siguiente gráfica se observa que las calorías se distribuyen en
niveles, siendo el nivel inferior el representante del BMR.
# Show low-valued outliers
db$minuteCaloriesNarrow %>%
arrange(calories_narrow) %>%
head(5) %>%
dilate_filter( db$minuteCaloriesNarrow, by )# Show high-valued outliers
db$minuteCaloriesNarrow %>%
arrange(calories_narrow %>% desc) %>%
head(5) %>%
dilate_filter( db$minuteCaloriesNarrow, by )# Estimation of METs levels is sensitive to low-value outliers
db$minuteCaloriesNarrow[ 1134839 ]$calories_narrow = 1.23360002
db$minuteCaloriesNarrow[ 186540 ]$calories_narrow = 1.40530002
db$minuteCaloriesNarrow[ 209580 ]$calories_narrow = 1.40530002
db$minuteCaloriesNarrow[ 179280 ]$calories_narrow = 1.40530002
db$minuteCaloriesNarrow[ 180780 ]$calories_narrow = 1.40530002
# 1 MET = BMR = Reposo
# 2 METs = 2 times 1MET = Ligero
# 3 METs = 3 times 1MET = Moderado
# 6 METs = 6 times 1MET = Intenso
DF <-
db$minuteCaloriesNarrow %>%
group_by( .session_id ) %>%
summarise(
N = n(),
m = calories_narrow %>% min %>% round(2),
# Pivot column by intervals
# ----------------
calories_narrow %>% # pivot a column
split( cut(., m*c(0, 1.9, 2.9, 5.9, 30) , right=TRUE) ) %>%
setNames( c("Reposo","Ligero","Moderado", "Intenso") ) %>%
sapply( function(x) round( median(x) ,2) ) %>%
rbind %>% as_tibble,
BMR = round( Reposo*60*24, 1),
intervalos_concat = calories_narrow %>%
cut( m*c(0, 1.5, 2.5, 5.5, 20) , right=TRUE) %>%
levels %>%
paste(collapse=" "),
# Pivot column by Quantiles
# ----------------
# quantile( calories_narrow, c(0, 0.1, 0.6, 0.7, 0.99, 1), type=5, na.rm=TRUE) %>%
# rbind %>% round(2) %>% as_tibble,
# m2 = coalesce( median( ifelse( calories_narrow <= `70%`, calories_narrow , NA ), na.rm=TRUE),
# min(calories_narrow) ) %>% round(2),
# BMR2 = m2*60*24,
# Pivot column by diff between values
# ----------------
# m3 = calories_narrow %>% sort %>%
# split( cumsum( c(0, diff(.) >= 0.4 ) ) ) %>%
# .[[1]] %>% median %>% round(2),
# BMR3 = m3*60*24,
)
h = DF$BMR %>% plot_histo_normal()DF2 <- DF %>% split( DF$BMR>750 )
h = DF2[[2]]$BMR %>% plot_histo_normal()DF2 <- DF2[[2]]%>% complete(.session_id)
db$personal$bmr <- DF2$BMR %>% setNames( DF2$.session_id ) %>% .[ db$personal$.session_id ]# Calories burn pattern by Hour of Day
#-------------------------------
db$minuteCaloriesNarrow %>%
group_by(
.session_id,
dayhour = lubridate::hour( .datetime ), ) %>%
summarize(
calories_narrow = calories_narrow %>% sum %>% round(2),
) %>%
ggplot( aes( dayhour, calories_narrow, col=.session_id ) ) +
geom_line()# Calories burn pattern by Day of the week
#-------------------------------
db$minuteCaloriesNarrow %>%
group_by(
.session_id,
weekday = lubridate::wday( .datetime, week_start=1 ),
) %>%
summarize(
calories_narrow = calories_narrow %>% sum %>% round(2),
) %>%
ggplot() + geom_line( aes( weekday, calories_narrow, col=.session_id ) )# Calories burn pattern by Day of the week by different weeks for 4 IDs
#-------------------------------
db$minuteCaloriesNarrow %>%
filter( as.numeric(.session_id) %in% 1:4 ) %>%
group_by(
.session_id,
weeknum = lubridate::isoweek( .datetime ),
weekday = lubridate::wday( .datetime, week_start=1 ),
) %>%
summarize(
calories_narrow = calories_narrow %>% sum %>% round(2),
) %>%
ggplot() + geom_line( aes( weekday, calories_narrow, col=.session_id, alpha=weeknum,
group=interaction( .session_id, weeknum ) ) )### Calcular 'factor*calorias' y 'calorias marginales'
#-------------------------------
DF3 <-
db$minuteCaloriesNarrow %>%
merge( DF2 , by=c('.session_id'), all = TRUE ) %>%
group_by(
.session_id,
.datetime = lubridate::floor_date( .datetime, "1 day" ) ) %>%
summarise(
N = n(),
Calories = sum(calories_narrow),
Reposo = sum( Reposo ),
ifactor = round( Calories/Reposo ,3),
marginal = round( Calories-Reposo ,1),
)
DF4 <-
DF3 %>%
group_by( .session_id ) %>%
summarize(
ifactor = mean(ifactor) %>% round(2),
marginal = mean(marginal) %>% round(1),
)
h = DF4$ifactor %>% na.omit %>% plot_histo_normal()h = DF4$marginal %>% na.omit %>% plot_histo_normal()db$personal$margi_cal <- DF4$marginal %>% setNames( DF2$.session_id ) %>% .[ db$personal$.session_id ]
db$personal$factor_cal <- DF4$ifactor %>% setNames( DF2$.session_id ) %>% .[ db$personal$.session_id ]# Intensity pattern by Hour of Day
#-------------------------------
db$minuteIntensitiesNarrow %>%
group_by(
.session_id,
dayhour = lubridate::hour( .datetime ), ) %>%
summarize(
intensity_narrow = intensity_narrow %>% sum %>% round(2),
) %>%
ggplot( aes( dayhour, intensity_narrow, col=.session_id ) ) +
geom_line()# Intensity pattern by Day of the week
#-------------------------------
db$minuteIntensitiesNarrow %>%
group_by(
.session_id,
weekday = lubridate::wday( .datetime, week_start=1 ),
) %>%
summarize(
intensity_narrow = intensity_narrow %>% sum %>% round(2),
) %>%
ggplot( aes( weekday, intensity_narrow, col=.session_id ) ) +
geom_line()# Intensity pattern by Day of the week by different weeks for 4 IDs
#-------------------------------
db$minuteIntensitiesNarrow %>%
filter( as.numeric(.session_id) %in% 1:4 ) %>%
group_by(
.session_id,
weeknum = lubridate::isoweek( .datetime ),
weekday = lubridate::wday( .datetime, week_start=1 ),
) %>%
summarize(
intensity_narrow = intensity_narrow %>% sum %>% round(2),
) %>%
ggplot() + geom_line( aes( weekday, intensity_narrow, col=.session_id, alpha=weeknum,
group=interaction( .session_id, weeknum ) ) )### Calcular 'factor*calorias' y 'calorias marginales'
#-------------------------------
DF <-
db$minuteIntensitiesNarrow %>%
merge( DF2 , by=c('.session_id'), all = TRUE ) %>%
group_by(
.session_id,
.datetime = lubridate::floor_date( .datetime, "1 day" )
) %>%
summarise(
N = n(),
Inten0 = round( sum(intensity_narrow==0)/N*100 ,2),
Inten1 = round( sum(intensity_narrow==1)/N*100 ,2),
Inten2 = round( sum(intensity_narrow==2)/N*100 ,2),
Inten3 = round( sum(intensity_narrow==3)/N*100 ,2),
Intensity = round( sum(intensity_narrow)/N*100 ,2),
)
h = DF$Inten0 %>% plot_histo_normal()plot( DF$Inten0, DF$Inten1 )plot( DF$Inten0, DF$Inten2 )plot( DF$Inten0, DF$Inten3 )db$personal$intensity0 <- DF$Inten0 %>% setNames( DF2$.session_id ) %>% .[ db$personal$.session_id ]# intensity pattern by Hour of Day
#-------------------------------
db$heartrate_seconds %>%
group_by(
.session_id,
dayhour = lubridate::hour( .datetime ), ) %>%
summarize(
heartrate = heartrate %>% sum %>% round(2),
) %>%
ggplot( aes( dayhour, heartrate, col=.session_id ) ) +
geom_line()# intensity pattern by Day of the week
#-------------------------------
db$heartrate_seconds %>%
group_by(
.session_id,
weekday = lubridate::wday( .datetime, week_start=1 ),
) %>%
summarize(
heartrate = heartrate %>% sum %>% round(2),
) %>%
ggplot( aes( weekday, heartrate, col=.session_id ) ) +
geom_line()# Calories burn pattern by Day of the week by different weeks for 4 IDs
#-------------------------------
db$heartrate_seconds %>%
filter( as.numeric(.session_id) %in% 1:20 ) %>%
group_by(
.session_id,
weeknum = lubridate::isoweek( .datetime ),
weekday = lubridate::wday( .datetime, week_start=1 ),
) %>%
summarize(
heartrate = heartrate %>% sum %>% round(2),
) %>%
ggplot() + geom_line( aes( weekday, heartrate, col=.session_id, alpha=weeknum,
group=interaction( .session_id, weeknum ) ) )DF <-
db$heartrate_seconds %>%
group_by( .session_id ) %>%
summarise(
quantile( heartrate, c(0, 0.01, 1), type=5, na.rm=TRUE) %>% rbind %>% as_tibble,
RestHR = `1%`,
age = 30, #220-`100%`,
RHR = (220-age)-RestHR,
Zone1 = RestHR + RHR*0.4,
Zone2 = RestHR + RHR*0.6,
Zone3 = RestHR + RHR*0.85,
Mean = mean(heartrate) %>% floor
)
h = DF$RestHR %>% plot_histo_normal()db$personal$restHR <- DF$RestHR %>% setNames( DF$.session_id ) %>% .[ db$personal$.session_id ]Stride Length = Distance Traveled / Steps
Según diversos estudios la longitud del paso promedio para mujeres adultas es de 67 cm, y para hombres de 76,2 cm. Esta información se verifica con los siguientes cálculos.
DF <- helper_make_lookup("
.session_id stride1 stride2 stride3
1 1503960366 0.63 0.64 0.66
2 1624580081 0.64 0.70 0.78
3 1644430081 0.73 0.73 0.73
4 1844505072 0.66 0.66 0.67
5 1927972279 0.69 0.69 0.70
6 2022484408 0.70 0.71 0.74
7 2026352035 0.61 0.62 0.63
8 2320127002 0.66 0.68 0.71
9 2347167796 0.66 0.67 0.69
10 2873212765 0.67 0.68 0.70
11 3372868164 0.68 0.69 0.71
12 3977333714 0.64 0.67 0.72
13 4020332650 0.69 0.71 0.72
14 4057192912 0.73 0.73 0.74
15 4319703577 0.67 0.68 0.69
16 4388161847 0.77 0.78 0.81
17 4445114986 0.61 0.64 0.66
18 4558609924 0.66 0.66 0.66
19 4702921684 0.76 0.79 0.80
20 5553957443 0.65 0.65 0.66
21 5577150313 0.75 0.75 0.75
22 6117666160 0.76 0.77 0.81
23 6290855005 0.76 0.76 0.76
24 6775888955 0.72 0.72 0.72
25 6962181067 0.66 0.68 0.70
26 7007744171 0.68 0.73 0.78
27 7086361926 0.62 0.66 0.72
28 8053475328 0.71 0.74 0.78
29 8253242879 0.66 0.71 0.80
30 8378563200 0.79 0.79 0.79
31 8583815059 0.78 0.78 0.78
32 8792009665 0.63 0.64 0.64
33 8877689391 0.71 0.78 0.88
34 6391747486 0.71 0.82 0.97
") %>% complete( .session_id )
db$personal$stride1 <- DF$stride1 %>% setNames( DF$.session_id ) %>% .[ db$personal$.session_id ]
db$personal$stride2 <- DF$stride2 %>% setNames( DF$.session_id ) %>% .[ db$personal$.session_id ]
db$personal$stride3 <- DF$stride3 %>% setNames( DF$.session_id ) %>% .[ db$personal$.session_id ]
rel_stride_height <- db$personal %>%
drop_na(Height) %>%
mutate( K = Height/stride1 ) %>% .$K %>%
split( f=.<2.5 ) %>% .[[2]] %>% # remove 3 outliers
mean # 0.42
db$personal$height <- db$personal$stride1*rel_stride_height %>% round(2)
db$personal$height <- with( db$personal, coalesce( Height, height ) )# We proceed to floor seconds to minutes as requested by Data Structure
#
# https://www.fitabase.com/media/1930/fitabasedatadictionary102320.pdf
#> Note 1: A variable sampling technique controls the frequency at which heart rate is recorded.
#> - Devices will sample heart rate every 5 to 15 seconds on average.
#> Note 2: Fitabase uses the seconds data to generate a mean value and reports the floor()
#> - For example, if the calculated mean for the interval is 156.79, we report 156.
db$minuteSleep <-
db$minuteSleep %>%
group_by( .session_id,
.datetime = lubridate::floor_date( .datetime, "1 minute" ) ) %>%
summarise( sleep_state = floor( sleep_state ) , .groups=NULL) %>%
as.data.table()
if( !is.null(db$heartrate_seconds) ){
db$minuteHeartrate <-
db$heartrate_seconds %>%
group_by( .session_id,
.datetime = lubridate::floor_date( .datetime, "1 minute" ) ) %>%
summarise( heartrate = mean( heartrate ) , .groups=NULL) %>%
as.data.table
#db$heartrate_seconds <- NULL
}## • FREE RAM
db$heartrate_seconds <- NULL # Huge database
DF <- NULL
DF2 <- NULL
DF3 <- NULL
DF4 <- NULL
DF <- NULL
FILES <- NULL
h <- NULLdb$minuteActivity.2 <-
db$minuteCaloriesNarrow %>%
merge( db$minuteIntensitiesNarrow , by=by, all=TRUE ) %>%
merge( db$minuteStepsNarrow , by=by, all=TRUE ) %>%
merge( db$minuteMETsNarrow , by=by, all=TRUE ) %>%
merge( db$minuteCaloriesWide.Narrow , by=by, all=TRUE ) %>%
merge( db$minuteIntensitiesWide.Narrow , by=by, all=TRUE ) %>%
merge( db$minuteStepsWide.Narrow , by=by, all=TRUE ) %>%
merge( db$minuteSleep , by=by, all=TRUE ) %>%
merge( db$minuteHeartrate , by=by, all=TRUE ) %>%
mutate( calories = coalesce( calories_narrow , CaloriesWide ) ,
intensity = coalesce( intensity_narrow , IntensityWide ),
steps = coalesce( steps_narrow , StepsWide ) ) %>%
select( # THIS select() REMOVEs REDUNDANCIES CONFIRMED ON FOLLOWING STEPS
-c('CaloriesWide' ,
'calories_narrow' ,
'IntensityWide' ,
'intensity_narrow',
'StepsWide' ,
'steps_narrow'
) ) %>%
select( .session_id, .datetime, sort(colnames(.)) ) %>% # sort columns
complete( .session_id, .datetime = # add NA to empty dates
seq( from = min(.datetime, na.rm=TRUE),
to = max(.datetime, na.rm=TRUE),
'min') ) %>%
as.data.table()
db$minuteActivity.2 %>% names## [1] ".session_id" ".datetime" "calories" "heartrate" "intensity"
## [6] "mets" "sleep_state" "steps"
# Sleep : This register will never reach 100% as people only sleep 1/3 a day
DF <-
db$minuteActivity.2 %>%
group_by( .session_id ) %>%
summarise(
count = sum(!is.na(sleep_state)),
total = n(),
q_sleep = round( count/ total , 4 )
)
db$personal$q_sleep <- (DF$q_sleep) %>% setNames( DF$.session_id ) %>% .[ db$personal$.session_id ] %>% coalesce( 0 )
# Steps
DF <-
db$minuteActivity.2 %>%
group_by( .session_id ) %>%
summarise(
count = sum(!is.na(steps)),
total = n(),
q_steps = round( count/ total , 4 )
)
db$personal$q_steps <- DF$q_steps %>% setNames( DF$.session_id ) %>% .[ db$personal$.session_id ] %>% coalesce( 0 )
# Itensity
DF <-
db$minuteActivity.2 %>%
group_by( .session_id ) %>%
summarise(
count = sum( intensity==0, na.rm=TRUE ),
total = sum( !is.na(intensity) ),
q_sedentarism = round( count/ total , 4 )
)
db$personal$q_sedentarism <- DF$q_sedentarism %>% setNames( DF$.session_id ) %>% .[ db$personal$.session_id ] %>% coalesce( 0 )
# Heartrate
DF <-
db$minuteActivity.2 %>%
group_by( .session_id ) %>%
summarise(
count = sum(!is.na(heartrate)),
total = n(),
q_heartrate = round( count/ total , 4 )
)
db$personal$q_heartrate <- DF$q_heartrate %>% setNames( DF$.session_id ) %>% .[ db$personal$.session_id ] %>% coalesce( 0 )
mindate = lubridate::yday( db$minuteActivity.2$.datetime %>% min )
maxdate = lubridate::yday( db$minuteActivity.2$.datetime %>% max )
daycount = maxdate - mindate
# Weight
DF <-
db$weightLogInfo %>%
group_by( .session_id ) %>%
summarise(
count = sum(!is.na(BMI_Weight)),
total = daycount,
q_weights = round( count/ total , 4 )
)
db$personal$q_weights <- ( DF$q_weights ) %>% setNames( DF$.session_id ) %>% .[ db$personal$.session_id ] %>% coalesce( 0 )# SAVE 2 DISK ################################################################
data.table::fwrite( db$minuteActivity.2, "../input/stage2/minuteActivity.2.csv" )
#openxlsx::write.xlsx( db$minuteActivity.2, "../input/stage2/minuteActivity.2.xlsx" )
data.table::fwrite( db$personal, "../input/stage2/personal.csv" )
openxlsx::write.xlsx( db$personal, "../input/stage2/personal.xlsx" )db$minuteCaloriesNarrow <- NULL
db$minuteIntensitiesNarrow <- NULL
db$minuteStepsNarrow <- NULL
db$minuteMETsNarrow <- NULL
db$minuteCaloriesWide.Narrow <- NULL
db$minuteIntensitiesWide.Narrow <- NULL
db$minuteStepsWide.Narrow <- NULL
db$minuteSleep <- NULL
db$minuteHeartrate <- NULL
db$weightLogInfo <- NULLdb$hourlyActivity.2 <-
db$minuteActivity.2 %>%
group_by( .session_id, .datetime =
lubridate::floor_date( .datetime, "1 hour" ) ) %>%
summarise(
calories = round( sum(calories) ,2),
int_m_mean = round( mean(intensity) ,2) ,
intensity = sum(intensity) ,
steps = sum(steps) ,
mets = sum(mets),
sleep_state = sum(sleep_state),
heartrate = mean(heartrate)
) %>%
merge( db$hourlyCalories , by=by, all=TRUE ) %>%
merge( db$hourlyIntensities , by=by, all=TRUE ) %>%
merge( db$hourlySteps , by=by, all=TRUE ) %>%
mutate( calories = coalesce( calories , CaloriesHourly ),
intensity = coalesce( intensity, IntensityHourly ),
steps = coalesce( steps , StepsHourly ) ) %>%
select( .session_id, .datetime, sort(colnames(.)) ) %>%
as.data.table
db$hourlyActivity.2 %>% namesSe corren funciones de comparación y se quitan las columnas con datos redundantes.
Ninguna tabla ‘hourly’ difiere de los datos contenidos en ‘minute’, por tanteo pueden ser descartadas.
db$hourlyActivity.2 %>% helper_df_col_compare( 'calories' , WITH='CaloriesHourly' ) %>% filter(DIFF_x100>1)
db$hourlyActivity.2 %>% helper_df_col_compare( 'intensity' , WITH='IntensityHourly' )
db$hourlyActivity.2 %>% helper_df_col_compare( 'int_m_mean', WITH='IntensityAverage' )
db$hourlyActivity.2 %>% helper_df_col_compare( 'steps' , WITH='StepsHourly' )
### Conclusions:
### - all 'hourly*' are fully contained by 'minuteActivity.2'
### - 'IntensityAverage' has differences due to different time spans.
### - 'CaloriesHourly' has differences due to different rounding of values.db$hourlyActivity.2$CaloriesHourly <- NULL
db$hourlyActivity.2$IntensityHourly <- NULL
db$hourlyActivity.2$IntensityAverage <- NULL
db$hourlyActivity.2$StepsHourly <- NULL
db$hourlyCalories <- NULL
db$hourlyIntensities <- NULL
db$hourlySteps <- NULL
db$hourlyActivity.2 <- NULL # Nothing to get from heredb$dailyActivity.2 <-
db$personal %>%
select( .session_id, stride1, stride2, stride3 ) %>%
merge( db$minuteActivity.2, by=by[1], all=TRUE ) %>%
group_by(
.session_id,
.datetime = lubridate::floor_date( .datetime, "1 day" ) ) %>%
summarise(
weeknum = lubridate::isoweek( min(.datetime) ),
weekday = lubridate::wday( min(.datetime), week_start=1 ),
#dayhour = lubridate::hour( min(.datetime) ) ,
#weekday = format( min(.datetime), "%a" ),
#daytime = format( min(.datetime), format="%H:%M"),
# Activity
acti0 = sum( intensity==0 ),
acti1 = sum( intensity==1 ),
acti2 = sum( intensity==2 ),
acti3 = sum( intensity==3 ),
acti_minutes = sum( !is.na(intensity) ) ,
# STEPS
steps0 = sum( ifelse(intensity==0, steps, 0) ),
steps1 = sum( ifelse(intensity==1, steps, 0) ),
steps2 = sum( ifelse(intensity==2, steps, 0) ),
steps3 = sum( ifelse(intensity==3, steps, 0) ),
steps = sum( steps ),
# Stride Length
stride1 = round( mean( stride1 ) , 2 ),
stride2 = round( mean( stride2 ) , 2 ),
stride3 = round( mean( stride3 ) , 2 ),
# DISTANCE
dist0 = round( steps0 * stride1 /1000, 2 ),
dist1 = round( steps1 * stride1 /1000, 2 ),
dist2 = round( steps2 * stride2 /1000, 2 ),
dist3 = round( steps3 * stride3 /1000, 2 ),
distsum = round( (dist0 + dist1 + dist2 + dist3) , 2),
# CALORIES
cal0 = sum( ifelse(intensity==0, calories, 0) ),
cal1 = sum( ifelse(intensity==1, calories, 0) ),
cal2 = sum( ifelse(intensity==2, calories, 0) ),
cal3 = sum( ifelse(intensity==3, calories, 0) ),
calories = round( sum( calories ) ,2),
# HEART RATE
heart0 = sum( ifelse(intensity==0, floor(mean(heartrate)), 0) ),
heart1 = sum( ifelse(intensity==1, floor(mean(heartrate)), 0) ),
heart2 = sum( ifelse(intensity==2, floor(mean(heartrate)), 0) ),
heart3 = sum( ifelse(intensity==3, floor(mean(heartrate)), 0) ),
heartrate = floor(mean(heartrate)),
# Otros Indicadores
intensity = round( mean( intensity ) ,2) ,
mets = sum( mets ),
# Sleep Stages
sleep1asleep = sum( sleep_state==1 ),
sleep2restless = sum( sleep_state==2 ),
sleep3awake = sum( sleep_state==3 ),
sleep = sum( sleep_state > 0 )
)
db$dailyActivity.2 %>% names## [1] ".session_id" ".datetime" "weeknum" "weekday"
## [5] "acti0" "acti1" "acti2" "acti3"
## [9] "acti_minutes" "steps0" "steps1" "steps2"
## [13] "steps3" "steps" "stride1" "stride2"
## [17] "stride3" "dist0" "dist1" "dist2"
## [21] "dist3" "distsum" "cal0" "cal1"
## [25] "cal2" "cal3" "calories" "heart0"
## [29] "heart1" "heart2" "heart3" "heartrate"
## [33] "intensity" "mets" "sleep1asleep" "sleep2restless"
## [37] "sleep3awake" "sleep"
# SAVE 2 DISK ################################################################
data.table::fwrite( db$dailyActivity.2, "../input/stage2/dailyActivity.2.csv" )
openxlsx::write.xlsx( db$dailyActivity.2, "../input/stage2/dailyActivity.2.xlsx" )db$dailyMERGE <-
db$dailyActivity.2 %>%
merge( db$sleepDay , by=by, all=TRUE ) %>%
merge( db$dailyCalories , by=by, all=TRUE ) %>%
merge( db$dailyIntensities , by=by, all=TRUE )%>%
rename(
Act0Daily = ActiveMinutes_0_Sedentary,
Act1Daily = ActiveMinutes_1_Light,
Act2Daily = ActiveMinutes_2_Moderate,
Act3Daily = ActiveMinutes_3_Heavy,
Dis0Daily = ActiveKm_0_Sedentary,
Dis1Daily = ActiveKm_1_Light,
Dis2Daily = ActiveKm_2_Moderate,
Dis3Daily = ActiveKm_3_Heavy
) %>%
mutate( ActDaily = Act0Daily + Act1Daily + Act2Daily + Act3Daily,
IntensityDaily = mean( ActDaily/4 ),
DisDaily = Dis0Daily + Dis1Daily + Dis2Daily + Dis3Daily
) %>%
# rename_with( ~paste0(.,"_D"), starts_with("Acti") ) %>%
merge( db$dailySteps , by=by, all=TRUE ) %>%
merge( db$dailyActivity , by=by, all=TRUE ) %>%
rename(
CaloriesActi = Calories,
Act0 = ActiveMinutes_0_Sedentary,
Act1 = ActiveMinutes_1_Light,
Act2 = ActiveMinutes_2_Moderate,
Act3 = ActiveMinutes_3_Heavy,
Dis0 = ActiveKm_0_Sedentary,
Dis1 = ActiveKm_1_Light,
Dis2 = ActiveKm_2_Moderate,
Dis3 = ActiveKm_3_Heavy,
DisLogged = ActiveKm_LoggedKm,
DisTracker = ActiveKm_Tracker,
DisTotal = ActiveKm_Total,
StepsActi = Steps,
) %>%
mutate(
#distActi = round( StepsActi* stride_avg /1000, 2),
#milesActi = round( StepsActi* stride_avg /1000 /1.60934, 2),
DisActi = Dis0 + Dis1 + Dis2 + Dis3 ,
ActActi = Act0 + Act1 + Act2 + Act3,
IntensityActi = mean( ActActi/4 ),
) %>%
# select( # THIS select() DISABLES REDUNDANCIES CONFIRMED ON NEXT STEPS
# -c( 'CaloriesDaily' , # data is fully contained in CaloriesActi
# 'CaloriesActi',
# 'Act0Daily',
# 'Act1Daily',
# 'Act2Daily',
# 'Act3Daily',
# 'Dis0Daily',
# 'Dis1Daily',
# 'Dis2Daily',
# 'Dis3Daily',
# 'ActDaily',
# 'DisDaily',
# 'StepsDaily',
# ) ) %>%
select( .session_id, .datetime, sort(colnames(.)) ) %>%
as.data.table
db$dailyMERGE %>% names## [1] ".session_id" ".datetime" "Act0" "Act0Daily"
## [5] "Act1" "Act1Daily" "Act2" "Act2Daily"
## [9] "Act3" "Act3Daily" "ActActi" "ActDaily"
## [13] "acti_minutes" "acti0" "acti1" "acti2"
## [17] "acti3" "cal0" "cal1" "cal2"
## [21] "cal3" "calories" "CaloriesActi" "CaloriesDaily"
## [25] "Dis0" "Dis0Daily" "Dis1" "Dis1Daily"
## [29] "Dis2" "Dis2Daily" "Dis3" "Dis3Daily"
## [33] "DisActi" "DisDaily" "DisLogged" "dist0"
## [37] "dist1" "dist2" "dist3" "DisTotal"
## [41] "DisTracker" "distsum" "heart0" "heart1"
## [45] "heart2" "heart3" "heartrate" "intensity"
## [49] "IntensityActi" "IntensityDaily" "mets" "sleep"
## [53] "Sleep_Minutes" "Sleep_Records" "Sleep_TimeInBed" "sleep1asleep"
## [57] "sleep2restless" "sleep3awake" "steps" "steps0"
## [61] "steps1" "steps2" "steps3" "StepsActi"
## [65] "StepsDaily" "stride1" "stride2" "stride3"
## [69] "weekday" "weeknum"
La siguiente tabla se realiza para verificar la aproximación del cálculo de distancias de manera de poder prescindir de las tablas daily.
db$dailyMERGE %>%
select( c('.session_id', 'StepsActi', 'DisTotal', 'distsum', 'stride1', 'stride2', 'stride3', 'dist0', 'dist1', 'dist2', 'dist3') ) %>%
drop_na() %>%
group_by( .session_id) %>%
summarise(
n = n(),
StepsActi = sum(StepsActi) %>% floor, # original value to compare
DisTotal = sum(DisTotal) %>% floor, # original value to compare
distsum = sum(distsum) %>% floor,
stride1 = mean(stride1),
stride2 = mean(stride2),
stride3 = mean(stride3),
dist0 = sum(dist0) %>% floor,
dist1 = sum(dist1) %>% floor,
dist2 = sum(dist2) %>% floor,
dist3 = sum(dist3) %>% floor,
) %>% arrange(n %>% desc) %>% head %>% print.data.frame()## .session_id n StepsActi DisTotal distsum stride1 stride2 stride3 dist0 dist1
## 1 4020332650 63 255135 183 137 0.69 0.71 0.72 0 117
## 2 1624580081 50 258360 173 175 0.64 0.70 0.78 0 132
## 3 1503960366 49 596789 386 391 0.63 0.64 0.66 0 210
## 4 4445114986 46 213089 144 136 0.61 0.64 0.66 0 110
## 5 4702921684 45 378386 307 300 0.76 0.79 0.80 0 231
## 6 6962181067 45 480595 325 328 0.66 0.68 0.70 0 186
## dist2 dist3
## 1 10 9
## 2 11 30
## 3 37 142
## 4 3 23
## 5 51 17
## 6 53 88
### Conclusions: Calculated distsum matches with DisTotalCálculo del largo de Paso por cada nivel de intensidad.
Se presupone que mayores niveles de actividad implican un paso mas largo
de caminata o corrida.
DF <-
db$dailyMERGE[ steps>1 & DisTotal>0 ] %>%
group_by( .session_id ) %>%
summarise(
S = sum( steps ) ,
s0 = sum( steps0 ) ,
s1 = sum( steps1 ) ,
s2 = sum( steps2 ) ,
s3 = sum( steps3 ) ,
D = sum( DisTotal ) ,
d0 = sum( Dis0 ) ,
d1 = sum( Dis1 ) ,
d2 = sum( Dis2 ) ,
d3 = sum( Dis3 ) ,
n = n(),
) %>%
mutate(
SL1 = round( d1*1000/s1 ,4), # Km to M
SL2 = round( d2*1000/s2 ,4),
SL3 = round( d3*1000/s3 ,4),
stride1 = round( pmin( SL1, SL2, SL3 ) ,2), # sort out approximation errors
stride3 = round( pmax( SL1, SL2, SL3 ) ,2),
stride2 = round( ( SL1+SL2+SL3 + ( SL1+SL2+SL3 )/3 ) /4 ,2)
)
DF <- DF %>% select( .session_id, stride1, stride2, stride3 )
DF %>% print.data.frame()## .session_id stride1 stride2 stride3
## 1 1503960366 0.62 0.63 0.65
## 2 1624580081 0.63 0.70 0.78
## 3 1644430081 0.73 0.73 0.73
## 4 1844505072 0.66 0.66 0.67
## 5 1927972279 0.68 0.69 0.70
## 6 2022484408 0.69 0.71 0.74
## 7 2026352035 0.31 0.41 0.60
## 8 2320127002 0.57 0.61 0.64
## 9 2347167796 0.66 0.67 0.69
## 10 2873212765 0.67 0.68 0.70
## 11 3372868164 0.68 0.69 0.71
## 12 3977333714 0.64 0.66 0.68
## 13 4020332650 0.66 0.69 0.71
## 14 4057192912 0.67 0.71 0.74
## 15 4319703577 0.67 0.68 0.69
## 16 4388161847 0.77 0.78 0.81
## 17 4445114986 0.61 0.64 0.65
## 18 4558609924 0.65 0.66 0.66
## 19 4702921684 0.76 0.79 0.80
## 20 5553957443 0.63 0.64 0.65
## 21 5577150313 0.75 0.75 0.75
## 22 6117666160 0.76 0.77 0.81
## 23 6290855005 0.76 0.76 0.76
## 24 6391747486 0.71 0.82 0.97
## 25 6775888955 0.72 0.72 0.72
## 26 6962181067 0.65 0.68 0.69
## 27 7007744171 0.66 0.73 0.78
## 28 7086361926 0.60 0.64 0.69
## 29 8053475328 0.70 0.72 0.76
## 30 8253242879 0.66 0.71 0.80
## 31 8378563200 0.77 0.79 0.79
## 32 8583815059 0.78 0.78 0.78
## 33 8792009665 0.62 0.63 0.64
## 34 8877689391 0.70 0.76 0.85
# Copiar este resultado a la seccion de StrideLength
DF %>%
pivot_longer( starts_with('stride') ) %>%
ggplot() +
geom_histogram( aes(x=value) ) +
facet_grid( rows = vars(name) )## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
LIMPIAR redundancias
### Inspect
DF = db$dailyMERGE %>% select( all_of( by ), X=calories, Y=CaloriesActi, Z=CaloriesDaily )
scale = c(0, max(c(DF$X, DF$Y, DF$Z), na.rm=TRUE) )
DF %>% helper_plot_timegaps( .session_id, .datetime, X ,scale) %>% printDF %>% helper_plot_timegaps( .session_id, .datetime, Y ,scale) %>% printDF %>% helper_plot_timegaps( .session_id, .datetime, Z ,scale) %>% print### Conclusion:
### - calories, CaloriesActi y CaloriesDaily have different time spans.### Inspect & Compare
db$dailyMERGE %>%
select( all_of( by ), X=CaloriesActi, Y=CaloriesDaily ) %>%
helper_heatmap_analisis()## [1] 3185696
## [1] 2222277
## [1] .session_id .datetime X Y diff
## <0 rows> (or 0-length row.names)
## .session_id .datetime X Y diff
## 1 4020332650 2016-04-12 446 3654 -3208
## 2 8877689391 2016-04-12 938 3921 -2983
## 3 8053475328 2016-04-12 791 3186 -2395
## 4 7086361926 2016-04-12 625 2772 -2147
## 5 2347167796 2016-04-12 399 2344 -1945
## 6 1503960366 2016-04-12 50 1985 -1935
## 7 7007744171 2016-04-12 1023 2937 -1914
## 8 8378563200 2016-04-12 1882 3635 -1753
## 9 4057192912 2016-04-12 538 2286 -1748
## 10 1844505072 2016-04-12 399 2030 -1631
## 11 5553957443 2016-04-12 633 2026 -1393
## 12 4445114986 2016-04-12 745 2113 -1368
## 13 2320127002 2016-04-12 790 2124 -1334
## 14 8792009665 2016-04-12 728 2044 -1316
## 15 1927972279 2016-04-12 942 2220 -1278
## 16 3977333714 2016-04-12 182 1450 -1268
## 17 2022484408 2016-04-12 1140 2390 -1250
## 18 4558609924 2016-04-12 722 1909 -1187
## 19 6962181067 2016-04-12 917 1994 -1077
## 20 2026352035 2016-04-12 600 1459 -859
## 21 1624580081 2016-04-12 706 1432 -726
## 22 8253242879 2016-04-12 1740 2044 -304
## 23 2873212765 2016-04-12 1963 1982 -19
## .session_id .datetime X Y diff
## 1 8583815059 2016-05-12 2807 0 2807
## 2 6290855005 2016-05-10 2410 0 2410
## 3 8253242879 2016-04-30 1958 0 1958
## 4 1503960366 2016-05-12 1556 0 1556
### Confirm non-unique elements of b
a <- db$dailyMERGE %>% select( all_of( by ), Calories = CaloriesActi ) %>% drop_na()
b <- db$dailyMERGE %>% select( all_of( by ), Calories = CaloriesDaily )%>% drop_na()
#setdiff( b, a ) # returns elements in b not found in a
### Conclusion:
### - 'CaloriesDaily' is fully contained by 'CaloriesActi'
### - Errors seen in last date due to different data truncation / bad original merging### Inspect
DF <- db$dailyMERGE %>%
select( all_of( by ), X=calories, Y=CaloriesActi )
DF %>% helper_heatmap_analisis()## [1] 4448095
## [1] 3185696
## .session_id .datetime X Y diff
## 1 8792009665 2016-05-10 NA 57 -57
## 2 7007744171 2016-05-07 NA 120 -120
## 3 6391747486 2016-04-09 NA 251 -251
## 4 4319703577 2016-05-12 NA 257 -257
## 5 2347167796 2016-04-29 NA 403 -403
## 6 1644430081 2016-04-10 NA 489 -489
## 7 6290855005 2016-04-10 NA 735 -735
## 8 5553957443 2016-05-12 NA 741 -741
## 9 6775888955 2016-04-09 NA 787 -787
## 10 6775888955 2016-05-07 NA 1032 -1032
## 11 2891001357 2016-04-05 NA 1144 -1144
## 12 3372868164 2016-05-01 NA 1237 -1237
## 13 6117666160 2016-05-09 NA 1248 -1248
## 14 1644430081 2016-05-11 NA 1276 -1276
## 15 3372868164 2016-04-10 NA 1276 -1276
## 16 4388161847 2016-04-05 NA 1296 -1296
## 17 8253242879 2016-04-11 NA 1377 -1377
## 18 4558609924 2016-05-12 NA 1452 -1452
## 19 5577150313 2016-04-11 NA 1511 -1511
## 20 4057192912 2016-04-15 NA 1527 -1527
## 21 1503960366 2016-05-12 NA 1556 -1556
## 22 4388161847 2016-05-12 NA 1623 -1623
## 23 5577150313 2016-05-11 NA 1665 -1665
## 24 2873212765 2016-04-11 NA 1741 -1741
## 25 8583815059 2016-04-08 NA 1814 -1814
## 26 8253242879 2016-04-29 NA 1854 -1854
## 27 4388161847 2016-03-29 NA 1878 -1878
## 28 4388161847 2016-03-30 NA 1878 -1878
## 29 4388161847 2016-03-31 NA 1878 -1878
## 30 4388161847 2016-04-01 NA 1878 -1878
## 31 4388161847 2016-04-02 NA 1878 -1878
## 32 4388161847 2016-04-03 NA 1878 -1878
## 33 4388161847 2016-04-04 NA 1878 -1878
## 34 2891001357 2016-03-29 NA 1920 -1920
## 35 2891001357 2016-03-30 NA 1920 -1920
## 36 2891001357 2016-03-31 NA 1920 -1920
## 37 2891001357 2016-04-03 NA 1920 -1920
## 38 2891001357 2016-04-04 NA 1920 -1920
## 39 8253242879 2016-04-30 NA 1958 -1958
## 40 6290855005 2016-04-04 NA 2060 -2060
## 41 6290855005 2016-04-05 NA 2060 -2060
## 42 6290855005 2016-04-06 NA 2060 -2060
## 43 6290855005 2016-04-07 NA 2060 -2060
## 44 6290855005 2016-04-08 NA 2060 -2060
## 45 6290855005 2016-04-09 NA 2060 -2060
## 46 4319703577 2016-04-08 NA 2069 -2069
## 47 4319703577 2016-04-09 NA 2168 -2168
## 48 6117666160 2016-04-10 NA 2174 -2174
## 49 6290855005 2016-05-09 NA 2175 -2175
## 50 6290855005 2016-05-10 NA 2410 -2410
## 51 6117666160 2016-04-09 NA 2617 -2617
## 52 6290855005 2016-04-03 NA 2664 -2664
## 53 4702921684 2016-04-11 NA 2677 -2677
## 54 8583815059 2016-05-11 NA 2804 -2804
## 55 8583815059 2016-05-12 NA 2807 -2807
## 56 2891001357 2016-04-02 NA 2881 -2881
## 57 6290855005 2016-04-02 NA 3119 -3119
## 58 2891001357 2016-04-01 NA 4562 -4562
## .session_id .datetime X Y diff
## 1 4445114986 2016-04-09 2207.77 2210 -2.23
## 2 4445114986 2016-04-02 2314.78 2317 -2.22
## 3 4020332650 2016-03-30 3415.81 3418 -2.19
## 4 4020332650 2016-05-05 3426.81 3429 -2.19
## 5 7086361926 2016-04-22 2843.85 2846 -2.15
## 6 4020332650 2016-04-15 2948.86 2951 -2.14
## 7 8877689391 2016-04-24 3452.87 3455 -2.13
## 8 4020332650 2016-05-06 2701.97 2704 -2.03
## 9 8877689391 2016-04-23 3888.97 3891 -2.03
## 10 8877689391 2016-05-10 2830.00 2832 -2.00
## 11 8053475328 2016-04-21 3101.11 3103 -1.89
## 12 6117666160 2016-05-02 2042.12 2044 -1.88
## 13 6962181067 2016-04-08 2186.12 2188 -1.88
## 14 3977333714 2016-04-25 1568.15 1570 -1.85
## 15 8877689391 2016-04-18 2759.23 2761 -1.77
## 16 4319703577 2016-03-30 2139.24 2141 -1.76
## 17 4445114986 2016-04-29 2194.25 2196 -1.75
## 18 7086361926 2016-04-28 2860.26 2862 -1.74
## 19 8877689391 2016-04-09 3839.28 3841 -1.72
## 20 4319703577 2016-05-06 2264.31 2266 -1.69
## 21 4020332650 2016-04-17 1988.42 1990 -1.58
## 22 5577150313 2016-04-16 4272.45 4274 -1.55
## 23 6775888955 2016-04-25 2482.46 2484 -1.54
## 24 7086361926 2016-04-23 1963.61 1965 -1.39
## 25 4702921684 2016-05-03 3143.68 3145 -1.32
## 26 5553957443 2016-05-03 2114.68 2116 -1.32
## 27 4445114986 2016-04-05 2215.70 2217 -1.30
## 28 4702921684 2016-05-05 3004.72 3006 -1.28
## 29 8053475328 2016-05-04 2916.75 2918 -1.25
## 30 5553957443 2016-04-02 1613.81 1615 -1.19
## 31 6391747486 2016-04-07 1820.83 1822 -1.17
## 32 4702921684 2016-04-13 2896.85 2898 -1.15
## 33 5577150313 2016-05-04 2753.86 2755 -1.14
## 34 8792009665 2016-05-02 2013.88 2015 -1.12
## 35 1927972279 2016-04-25 2098.91 2100 -1.09
## 36 4445114986 2016-04-17 1931.91 1933 -1.09
## 37 8877689391 2016-04-28 3058.93 3060 -1.07
## 38 7086361926 2016-05-09 2958.94 2960 -1.06
## 39 3977333714 2016-04-11 1222.00 1223 -1.00
## 40 7007744171 2016-04-19 2811.01 2812 -0.99
## 41 4558609924 2016-04-26 2222.02 2223 -0.98
## 42 7086361926 2016-04-26 2780.02 2781 -0.98
## 43 5577150313 2016-04-15 4004.04 4005 -0.96
## 44 4319703577 2016-04-04 2307.06 2308 -0.94
## 45 6117666160 2016-05-06 1837.07 1838 -0.93
## 46 7007744171 2016-04-04 2885.07 2886 -0.93
## 47 1927972279 2016-04-16 2063.09 2064 -0.91
## 48 1927972279 2016-04-30 2063.09 2064 -0.91
## 49 4558609924 2016-04-10 1905.20 1906 -0.80
## 50 2022484408 2016-05-08 1847.21 1848 -0.79
## 51 5577150313 2016-04-23 3151.29 3152 -0.71
## 52 4558609924 2016-04-30 2104.30 2105 -0.70
## 53 8792009665 2016-04-27 2066.30 2067 -0.70
## 54 5553957443 2016-04-08 2018.31 2019 -0.69
## 55 3977333714 2016-04-21 1396.34 1397 -0.66
## 56 4558609924 2016-04-01 1901.35 1902 -0.65
## 57 3977333714 2016-05-06 1589.37 1590 -0.63
## 58 7086361926 2016-04-29 2615.38 2616 -0.62
## 59 2022484408 2016-04-19 2488.40 2489 -0.60
## 60 5577150313 2016-04-18 3624.42 3625 -0.58
## 61 6117666160 2016-04-23 2650.42 2651 -0.58
## 62 7086361926 2016-05-01 2729.43 2730 -0.57
## 63 4558609924 2016-05-01 1691.43 1692 -0.57
## 64 8583815059 2016-04-14 2442.45 2443 -0.55
## 65 8583815059 2016-04-23 3011.45 3012 -0.55
## 66 8378563200 2016-04-01 3668.46 3669 -0.54
## 67 4319703577 2016-05-08 1791.51 1792 -0.49
## 68 5553957443 2016-04-22 2104.51 2105 -0.49
## 69 6962181067 2016-04-16 2172.55 2173 -0.45
## 70 6962181067 2016-04-04 2436.56 2437 -0.44
## 71 8583815059 2016-05-06 2682.56 2683 -0.44
## 72 4020332650 2016-04-13 1980.58 1981 -0.42
## 73 4558609924 2016-05-10 1888.58 1889 -0.42
## 74 8877689391 2016-04-12 3920.58 3921 -0.42
## 75 6117666160 2016-04-25 1496.59 1497 -0.41
## 76 3977333714 2016-05-03 1709.60 1710 -0.40
## 77 4558609924 2016-05-05 2261.60 2262 -0.40
## 78 2347167796 2016-04-24 2186.61 2187 -0.39
## 79 3977333714 2016-04-04 1647.62 1648 -0.38
## 80 6962181067 2016-04-29 2253.64 2254 -0.36
## 81 6962181067 2016-04-01 2013.68 2014 -0.32
## 82 5553957443 2016-04-01 2141.70 2142 -0.30
## 83 7007744171 2016-04-05 2675.75 2676 -0.25
## 84 1844505072 2016-04-18 1813.76 1814 -0.24
## 85 1844505072 2016-04-07 1964.78 1965 -0.22
## 86 4558609924 2016-04-19 1875.81 1876 -0.19
## 87 3977333714 2016-04-06 1782.82 1783 -0.18
## 88 3977333714 2016-04-10 1231.83 1232 -0.17
## 89 4445114986 2016-04-01 2132.83 2133 -0.17
## 90 7086361926 2016-04-20 2996.85 2997 -0.15
## 91 1503960366 2016-04-06 1860.86 1861 -0.14
## 92 4020332650 2016-05-09 2784.91 2785 -0.09
## 93 4388161847 2016-04-22 3543.91 3544 -0.09
## 94 8378563200 2016-05-04 3109.91 3110 -0.09
## 95 2026352035 2016-04-14 1430.91 1431 -0.09
## 96 8053475328 2016-05-11 2808.94 2809 -0.06
## 97 4702921684 2016-04-26 2783.95 2784 -0.05
## 98 6962181067 2016-05-03 2071.97 2072 -0.03
## 99 7086361926 2016-04-27 2692.99 2693 -0.01
## 100 7086361926 2016-05-03 2753.99 2754 -0.01
## .session_id .datetime X Y diff
## 1 4020332650 2016-04-12 3647.70 446 3201.70
## 2 8877689391 2016-04-12 3920.58 938 2982.58
## 3 8053475328 2016-04-12 3186.22 791 2395.22
## 4 7086361926 2016-04-12 2772.21 625 2147.21
## 5 2347167796 2016-04-12 2344.35 399 1945.35
## 6 1503960366 2016-04-12 1985.20 50 1935.20
## 7 7007744171 2016-04-12 2937.73 1023 1914.73
## 8 8877689391 2016-05-12 3729.28 1849 1880.28
## 9 4702921684 2016-05-12 3087.08 1240 1847.08
## 10 4020332650 2016-05-12 2900.03 1120 1780.03
## 11 8378563200 2016-04-12 3635.47 1882 1753.47
## 12 4057192912 2016-04-12 2274.02 538 1736.02
## 13 7086361926 2016-05-12 2924.09 1199 1725.09
## 14 1844505072 2016-04-12 2030.84 399 1631.84
## 15 3977333714 2016-05-11 1597.59 52 1545.59
## 16 8053475328 2016-05-12 2992.03 1505 1487.03
## 17 5553957443 2016-04-12 2026.01 633 1393.01
## 18 4445114986 2016-04-12 2113.98 745 1368.98
## 19 2320127002 2016-04-12 2124.28 790 1334.28
## 20 8792009665 2016-04-12 2044.37 728 1316.37
## 21 3977333714 2016-04-12 1450.65 182 1268.65
## 22 1927972279 2016-04-12 2193.22 942 1251.22
## 23 2022484408 2016-04-12 2390.34 1140 1250.34
## 24 4558609924 2016-04-12 1909.66 722 1187.66
## 25 6962181067 2016-04-12 1994.68 917 1077.68
## 26 4445114986 2016-05-12 2261.52 1212 1049.52
## 27 8378563200 2016-05-12 2946.84 1976 970.84
## 28 6962181067 2016-05-12 1848.29 928 920.29
## 29 2026352035 2016-04-12 1459.51 600 859.51
## 30 1624580081 2016-04-12 1432.15 706 726.15
## 31 2320127002 2016-05-12 1838.53 1125 713.53
## 32 1844505072 2016-05-12 1347.41 665 682.41
## 33 1927972279 2016-05-12 2063.23 1383 680.23
## 34 2026352035 2016-05-12 2036.03 1364 672.03
## 35 2022484408 2016-05-12 2493.28 1853 640.28
## 36 2873212765 2016-05-12 1979.72 1431 548.72
## 37 1624580081 2016-05-12 1527.38 1002 525.38
## 38 8253242879 2016-04-12 2044.27 1740 304.27
## 39 1503960366 2016-05-11 1908.18 1783 125.18
## 40 5577150313 2016-04-14 4097.63 4022 75.63
## 41 1644430081 2016-05-06 3384.24 3328 56.24
## 42 2873212765 2016-04-12 1982.32 1963 19.32
## 43 1503960366 2016-05-02 2017.77 2004 13.77
## 44 7086361926 2016-05-06 2937.33 2924 13.33
## 45 1503960366 2016-05-01 1831.13 1820 11.13
## 46 2873212765 2016-04-20 2102.77 2094 8.77
## 47 1503960366 2016-04-04 2036.72 2030 6.72
## 48 6962181067 2016-04-26 2016.77 2012 4.77
## 49 4558609924 2016-04-29 1922.58 1918 4.58
## 50 8253242879 2016-04-10 1658.33 1657 1.33
## 51 1644430081 2016-04-03 3437.22 3436 1.22
## 52 4558609924 2016-05-11 2132.18 2131 1.18
## 53 1844505072 2016-04-09 1808.05 1807 1.05
## 54 8378563200 2016-04-11 3776.01 3775 1.01
## 55 1927972279 2016-05-07 2230.00 2229 1.00
## 56 2026352035 2016-04-06 1422.00 1421 1.00
## 57 2873212765 2016-04-17 1530.00 1529 1.00
## 58 4445114986 2016-03-31 2228.00 2227 1.00
## 59 4445114986 2016-04-11 2298.00 2297 1.00
## 60 1844505072 2016-04-08 1824.99 1824 0.99
## 61 1844505072 2016-04-15 1725.99 1725 0.99
## 62 2026352035 2016-04-13 1521.99 1521 0.99
## 63 2320127002 2016-04-05 1708.99 1708 0.99
## 64 7086361926 2016-04-03 1794.99 1794 0.99
## 65 8253242879 2016-04-07 1776.99 1776 0.99
## 66 4702921684 2016-05-09 2990.99 2990 0.99
## 67 4702921684 2016-05-11 3069.99 3069 0.99
## 68 8378563200 2016-04-18 4157.99 4157 0.99
## 69 1503960366 2016-04-22 1827.98 1827 0.98
## 70 1844505072 2016-04-22 1827.98 1827 0.98
## 71 2022484408 2016-05-02 2473.98 2473 0.98
## 72 3372868164 2016-04-20 1867.98 1867 0.98
## 73 4319703577 2016-04-24 1880.98 1880 0.98
## 74 4445114986 2016-04-12 2113.98 2113 0.98
## 75 5553957443 2016-05-10 2335.98 2335 0.98
## 76 7086361926 2016-04-01 2496.98 2496 0.98
## 77 8253242879 2016-04-16 1880.98 1880 0.98
## 78 8378563200 2016-04-28 2884.98 2884 0.98
## 79 8792009665 2016-04-24 1799.98 1799 0.98
## 80 1503960366 2016-04-24 1788.97 1788 0.97
## 81 2026352035 2016-04-01 1407.97 1407 0.97
## 82 2026352035 2016-04-17 1214.97 1214 0.97
## 83 2320127002 2016-05-08 1821.97 1821 0.97
## 84 2873212765 2016-04-26 1882.97 1882 0.97
## 85 2022484408 2016-04-15 2525.97 2525 0.97
## 86 2022484408 2016-04-22 2638.97 2638 0.97
## 87 2347167796 2016-04-08 2104.97 2104 0.97
## 88 6962181067 2016-04-28 2182.97 2182 0.97
## 89 8378563200 2016-05-01 3369.97 3369 0.97
## 90 8583815059 2016-04-07 2437.97 2437 0.97
## 91 2022484408 2016-04-08 2577.96 2577 0.96
## 92 2320127002 2016-04-22 1851.96 1851 0.96
## 93 4057192912 2016-03-15 1776.96 1776 0.96
## 94 4319703577 2016-03-29 1958.96 1958 0.96
## 95 4319703577 2016-04-29 2175.96 2175 0.96
## 96 4702921684 2016-04-18 3013.96 3013 0.96
## 97 8053475328 2016-04-04 3209.96 3209 0.96
## 98 4319703577 2016-04-22 1886.95 1886 0.95
## 99 5553957443 2016-04-09 1692.95 1692 0.95
## 100 1644430081 2016-05-08 2987.95 2987 0.95
### Find unique elements
DF[ is.na(X) &
!is.na(Y) ] # 58 rows # many of 4388161847 & 6290855005 & 2891001357### Find significative rounding difference
DF[ X-Y > 100 ] # 17 rows # mainly errors of CaloriesActi over edge-dates '2016-05-12'DF[ X-Y < -100 ] # 28 rows # mainly for ids '4020332650' and '8583815059'### Conclusion:
### - 'CaloriesDaily' is fully contained by 'CaloriesActi'
### - Errors seen in last date due to different data truncation / bad original merging### Inspect
db$dailyMERGE %>% select( all_of( by ), X=StepsActi, Y=StepsDaily ) %>% helper_heatmap_analisis()## [1] 10171415
## [1] 7400425
## [1] .session_id .datetime X Y diff
## <0 rows> (or 0-length row.names)
## .session_id .datetime X Y diff
## 1 8877689391 2016-04-12 2350 23186 -20836
## 2 8053475328 2016-04-12 290 18060 -17770
## 3 1503960366 2016-04-12 224 13162 -12938
## 4 7086361926 2016-04-12 430 11317 -10887
## 5 2347167796 2016-04-12 0 10113 -10113
## 6 8253242879 2016-04-12 0 9033 -9033
## 7 2873212765 2016-04-12 0 8796 -8796
## 8 2320127002 2016-04-12 2098 10725 -8627
## 9 4020332650 2016-04-12 8 8539 -8531
## 10 5553957443 2016-04-12 3436 11596 -8160
## 11 3977333714 2016-04-12 759 8856 -8097
## 12 4702921684 2016-04-12 0 7213 -7213
## 13 7007744171 2016-04-12 7413 14172 -6759
## 14 1844505072 2016-04-12 0 6697 -6697
## 15 4057192912 2016-04-12 187 5394 -5207
## 16 2022484408 2016-04-12 6717 11875 -5158
## 17 8378563200 2016-04-12 3246 7626 -4380
## 18 6962181067 2016-04-12 5893 10199 -4306
## 19 4558609924 2016-04-12 1260 5135 -3875
## 20 2026352035 2016-04-12 1019 4414 -3395
## 21 4445114986 2016-04-12 278 3276 -2998
## 22 8792009665 2016-04-12 20 2564 -2544
## 23 1624580081 2016-04-12 6627 8163 -1536
## 24 1927972279 2016-04-12 24 678 -654
## [1] .session_id .datetime X Y diff
## <0 rows> (or 0-length row.names)
### Confirm non-unique elements of b
a <- db$dailyMERGE %>% select( all_of( by ), Steps = StepsActi ) %>% drop_na()
b <- db$dailyMERGE %>% select( all_of( by ), Steps = StepsDaily )%>% drop_na()
#setdiff( b, a ) # setdiff() returns elements in b not found in a
### Conclusion: 'StepsDaily' is fully contained by 'StepsActi'Same as before
### Inspect
DF <- db$dailyMERGE %>% select( all_of( by ), X=steps, Y=StepsActi )
DF %>% helper_heatmap_analisis()
DF[ X==Y ] # 1093 equal rows
### Find unique elements
DF[ is.na(X) &
!is.na(Y) ] # 58 rows # many of 4388161847 & 6290855005 & 2891001357
### Find significative rounding difference
DF[ X-Y > 0 ] # 15 rows # mainly errors of StepsActi over edge-dates '2016-05-12'
DF[ X-Y < -200 ] # 31 rows # mainly for ids '4020332650' and '8583815059'Run one at the time
### Inspect
#db$dailyMERGE %>% select( all_of( by ), X=Act0, Y=Act0Daily ) %>% helper_heatmap_analisis()
db$dailyMERGE %>% select( all_of( by ), X=Act1, Y=Act1Daily ) %>% helper_heatmap_analisis()## [1] 258966
## [1] 186740
## [1] .session_id .datetime X Y diff
## <0 rows> (or 0-length row.names)
## .session_id .datetime X Y diff
## 1 2873212765 2016-04-12 0 356 -356
## 2 1844505072 2016-04-12 0 339 -339
## 3 4020332650 2016-04-12 1 331 -330
## 4 2347167796 2016-04-12 0 320 -320
## 5 1503960366 2016-04-12 9 328 -319
## 6 7007744171 2016-04-12 43 355 -312
## 7 4702921684 2016-04-12 0 263 -263
## 8 8877689391 2016-04-12 58 312 -254
## 9 4558609924 2016-04-12 76 318 -242
## 10 2320127002 2016-04-12 69 306 -237
## 11 5553957443 2016-04-12 50 277 -227
## 12 4445114986 2016-04-12 20 196 -176
## 13 2022484408 2016-04-12 67 227 -160
## 14 8253242879 2016-04-12 0 154 -154
## 15 4057192912 2016-04-12 11 164 -153
## 16 6962181067 2016-04-12 48 189 -141
## 17 7086361926 2016-04-12 14 153 -139
## 18 2026352035 2016-04-12 64 181 -117
## 19 3977333714 2016-04-12 17 131 -114
## 20 8792009665 2016-04-12 6 116 -110
## 21 8053475328 2016-04-12 16 123 -107
## 22 8378563200 2016-04-12 49 156 -107
## 23 1624580081 2016-04-12 89 146 -57
## 24 1927972279 2016-04-12 3 55 -52
## [1] .session_id .datetime X Y diff
## <0 rows> (or 0-length row.names)
#db$dailyMERGE %>% select( all_of( by ), X=Act2, Y=Act2Daily ) %>% helper_heatmap_analisis()
#db$dailyMERGE %>% select( all_of( by ), X=Act3, Y=Act3Daily ) %>% helper_heatmap_analisis()
#db$dailyMERGE %>% select( all_of( by ), X=Dis0, Y=Dis0Daily ) %>% helper_heatmap_analisis()
#db$dailyMERGE %>% select( all_of( by ), X=Dis1, Y=Dis1Daily ) %>% helper_heatmap_analisis()
#db$dailyMERGE %>% select( all_of( by ), X=Dis2, Y=Dis2Daily ) %>% helper_heatmap_analisis()
#db$dailyMERGE %>% select( all_of( by ), X=Dis3, Y=Dis3Daily ) %>% helper_heatmap_analisis()
### Conclusion: 'Act.Daily' is fully contained by 'Act.'
################'Dis.Daily' is fully contained by 'Dis.'
################'Steps' is not found in dailyIntensitiesRun one at the time
### Inspect
# Previously we checked slight differences for Steps in the same period of time
# Therefore we can expect slight diferencies in distances
#DF <- db$dailyMERGE %>% select( all_of( by ), X=acti0, Y=Act0 ) # 58 NA's & 0 Ysignificatives
DF <- db$dailyMERGE %>% select( all_of( by ), X=acti1, Y=Act1 ) # 58 NA's & 0 Ysignificatives
#DF <- db$dailyMERGE %>% select( all_of( by ), X=acti2, Y=Act2 ) # 58 NA's & 0 Ysignificatives
#DF <- db$dailyMERGE %>% select( all_of( by ), X=acti3, Y=Act3 ) # 58 NA's & 0 Ysignificatives
#DF <- db$dailyMERGE %>% select( all_of( by ), X=dist0, Y=Dis0 ) # 100 NA's & 0 Ysignificatives
#DF <- db$dailyMERGE %>% select( all_of( by ), X=dist1, Y=Dis1 ) # 100 NA's & 0 Ysignificatives
#DF <- db$dailyMERGE %>% select( all_of( by ), X=dist2, Y=Dis2 ) # 100 NA's & 0 Ysignificatives
#DF <- db$dailyMERGE %>% select( all_of( by ), X=dist3, Y=Dis3 ) # 100 NA's & 0 Ysignificatives
DF %>% helper_heatmap_analisis()
### Find unique elements
DF[ is.na(X) &
!is.na(Y) ] # many of 4388161847 & 6290855005 & 2891001357 & mix
### Find significative rounding difference
DF[ X-Y > 0 ] # X bigger than Y is expected as it has more data
DF[ X-Y < 0 ]db$dailyMERGE %>%
select( c('.session_id', 'dist0', 'Dis0', 'dist1', 'Dis1',
,'steps','DisTotal', 'dist2', 'Dis2', 'dist3', 'Dis3') ) %>%
drop_na() %>%
group_by( .session_id) %>%
summarise(
n_minu = n_distinct(steps), n_daily = n_distinct(DisTotal),
dist0 = sum(dist0) %>% floor, Dis0 = sum(Dis0) %>% floor,
dist1 = sum(dist1) %>% floor, Dis1 = sum(Dis1) %>% floor,
dist2 = sum(dist2) %>% floor, Dis2 = sum(Dis2) %>% floor,
dist3 = sum(dist3) %>% floor, Dis3 = sum(Dis3) %>% floor,
) %>% arrange(n_minu %>% desc) %>% print.data.frame()## .session_id n_minu n_daily dist0 Dis0 dist1 Dis1 dist2 Dis2 dist3 Dis3
## 1 1624580081 49 47 0 0 132 131 11 11 30 30
## 2 1503960366 48 45 0 0 210 208 37 36 142 141
## 3 4445114986 45 45 0 0 110 118 3 2 23 22
## 4 6962181067 44 43 0 0 186 183 53 54 88 86
## 5 4702921684 43 42 0 0 231 237 51 52 17 16
## 6 2026352035 42 43 0 0 133 131 0 0 0 0
## 7 7086361926 42 42 0 0 114 111 31 30 109 104
## 8 8378563200 42 43 0 0 172 168 22 22 100 101
## 9 8877689391 42 43 0 0 258 267 18 16 304 293
## 10 2022484408 41 40 0 0 214 210 33 32 112 112
## 11 2873212765 41 39 0 0 175 169 12 11 24 24
## 12 3977333714 41 41 0 0 135 137 104 98 61 58
## 13 5553957443 41 41 0 0 148 144 30 30 64 62
## 14 8053475328 41 42 0 0 107 106 19 18 367 357
## 15 4319703577 40 39 0 0 161 161 24 23 13 13
## 16 4558609924 40 41 0 0 159 157 22 22 19 19
## 17 2320127002 39 38 0 0 120 116 3 3 5 4
## 18 4020332650 39 48 0 0 117 112 10 10 9 9
## 19 1644430081 38 36 0 0 142 142 49 48 33 33
## 20 5577150313 38 37 0 0 100 100 26 26 125 124
## 21 7007744171 36 36 0 0 188 182 26 27 105 105
## 22 8583815059 32 36 0 0 94 94 29 29 24 24
## 23 1844505072 31 28 0 0 83 78 1 1 0 0
## 24 2347167796 31 30 0 0 145 141 39 38 33 31
## 25 8792009665 31 32 0 0 54 53 3 3 1 1
## 26 4388161847 30 30 0 0 164 164 28 27 53 53
## 27 6117666160 30 30 0 0 184 183 5 5 4 4
## 28 1927972279 29 29 0 0 33 33 1 1 3 2
## 29 3372868164 28 28 0 0 107 107 4 4 18 18
## 30 6290855005 24 24 0 0 118 117 3 3 2 2
## 31 6775888955 24 23 0 0 34 33 20 20 26 26
## 32 8253242879 23 23 0 0 40 39 17 16 53 48
## 33 4057192912 21 19 0 0 45 41 7 7 3 3
## 34 6391747486 4 4 0 0 7 7 0 0 2 2
### Conclusions: Calculated 'dist.' corresponding to minu-LEVEL matches closely to daily-LEVEL
###############: Variations in the amount of dates slightly changes results
###############: Slight differences shown where 'n' is equal in both tables,
############### as a result of StrideLength approximation or changes in data.
###############: 'dailyActivities' can now be replaced by 'minute-LEVEL'db$dailyMERGE <- NULL
db$sleepDay <- NULL
db$dailyCalories <- NULL
db$dailyIntensities <- NULL
db$dailyActivity <- NULL
db$dailyActivity.2 <- NULL
db$dailySteps <- NULLdb$personal #%>% format_csv()p <-
db$personal %>% arrange( desc(q_steps) ) %>%
ggplot( aes( y=fct_inorder(.session_id)) ) +
labs(x=NULL, y=NULL, title=NULL) +
scale_x_discrete(position = "top", expand = c(0, 0) ) +
theme_bw()
plot1 <- p + labs( x="DataCount" ) +
geom_bar( aes( weight = q_steps, fill=bmi_class) ) +
geom_bar( aes( weight = q_sleep), fill='lightgoldenrod1' )
#### -----
p <- p + theme( axis.text.y=element_blank() )
plot2 <- p + labs( x="Burnout Profile" ) +
geom_bar( aes( weight = bmr, fill=bmi_class) ) +
geom_bar( aes( weight = margi_cal ), fill='indianred' )
plot3 <- p + labs( x="Sedentarism" ) +
geom_bar( aes( weight = q_steps, fill=bmi_class) ) +
geom_bar( aes( weight = q_sedentarism*q_steps), fill='black' ) +
geom_bar( aes( weight = sleep_avg/24*q_steps ), fill='lightgoldenrod1' )
ggpubr::ggarrange( plot1, plot2, plot3, ncol=3, common.legend = TRUE, legend = "bottom")No podemos distinguir por BMR, ya que existe 30% de variación entre hombres y mujeres.
db$personal %>% arrange( bmr ) %>%
ggplot() +
geom_function(fun = function(x,E=35,W=50) (x-5+5*E-10*W)/6.25/100 , colour="lightblue", linetype=2, size=1) +
geom_function(fun = function(x,E=35,W=75) (x-5+5*E-10*W)/6.25/100 , colour="lightblue", linetype=2) +
geom_function(fun = function(x,E=35,W=100) (x-5+5*E-10*W)/6.25/100 , colour="lightblue", linetype=2, size=1) +
geom_function(fun = function(x,E=35,W=50) (x+161+5*E-10*W)/6.25/100 , colour="pink", linetype=2, size=1) +
geom_function(fun = function(x,E=35,W=75) (x+161+5*E-10*W)/6.25/100 , colour="pink", linetype=2) +
geom_function(fun = function(x,E=35,W=100) (x+161+5*E-10*W)/6.25/100 , colour="pink", linetype=2, size=1) +
geom_function(fun = function(x,E=62,W=131) (x-5+5*E-10*W)/6.25/100 , colour = "violet", size=1.0) +
geom_function(fun = function(x,E=29,W=131) (x+161+5*E-10*W)/6.25/100 , colour = "violet", size=1.0) +
geom_function(fun = function(x,E=73,W=40) (x-5+5*E-10*W)/6.25/100 , colour = "violet", size=1.0) +
geom_function(fun = function(x,E=40,W=40) (x+161+5*E-10*W)/6.25/100 , colour = "violet", size=1.0) +
geom_point( aes( bmr, height, color=bmi_class ), size=4 ) +
scale_y_continuous( limits=c(1.5,2.0) ) db$personal %>% arrange( bmr ) %>%
ggplot() +
geom_function(fun = function(x) sqrt( x/18 ) , colour = "white", size=1.1) +
geom_function(fun = function(x) sqrt( x/25 ) , colour = "white", size=1.1) +
geom_function(fun = function(x) sqrt( x/30 ) , colour = "white", size=1.1) +
geom_function(fun = function(x) sqrt( x/35 ) , colour = "white", size=1.1) +
geom_point( aes( Kg, height, color=bmr, size=bmi ) ) +
coord_cartesian( expand=FALSE ) +
scale_y_continuous( limits=c(1.5,2.0) ) +
scale_x_continuous( limits=c(50,135 ) ) +
scale_size(range = c(3,9)) +
scale_color_gradient(low="blue", high="red")knitr::knit_exit()