Samsung Health Project

Load my library

library("tidyverse")
library("janitor")
library("lubridate")
library("dplyr")

read my csv file into R

dirty_pedometer <-read_csv("com.samsung.shealth.tracker.pedometer_day_summary.202205141347.csv")

Take a look at my data

glimpse(dirty_pedometer)

## Rows: 3,752
## Columns: 19
## $ step_count          <dbl> 2854, 1215, 13817, 3845, 3118, 3610, 11874, 18, 13…
## $ binning_data        <chr> "5af9518a-f435-49e8-aa21-498288115d0b.binning_data…
## $ active_time         <dbl> 1384027, 659314, 6907844, 2070113, 1631525, 191735…
## $ recommendation      <dbl> 10000, 10000, 6000, 10000, 10000, 10000, 10000, 10…
## $ run_step_count      <dbl> 3, 0, 13, 3, 21, 0, 3046, 0, 6187, 0, 3952, 16, 30…
## $ update_time         <time> 01:36:00, 26:49:00, 18:31:00, 00:02:00, 26:35:00,…
## $ source_package_name <chr> "com.sec.android.app.shealth", "com.sec.android.ap…
## $ create_time         <time> 37:17:00, 52:04:00, 56:51:00, 00:02:00, 27:37:00,…
## $ source_info         <chr> NA, NA, "70941998-9f62-4bd0-929a-3aa796780061.sour…
## $ speed               <dbl> 1.750000, 1.972222, 1.401954, 2.318954, 1.367646, …
## $ distance            <dbl> 1959.611, 771.151, 9684.481, 2672.490, 2231.350, 2…
## $ calorie             <dbl> 93.9490000, 37.8900000, 438.6090000, 125.2899860, …
## $ walk_step_count     <dbl> 2851, 1215, 13804, 3842, 3097, 3610, 8828, 18, 763…
## $ deviceuuid          <chr> "cMpZmcg7wE", "cMpZmcg7wE", "VfS0qUERdZ", "aGTdXSg…
## $ pkg_name            <chr> "com.sec.android.app.shealth", "com.sec.android.ap…
## $ healthy_step        <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ achievement         <chr> "5af9518a-f435-49e8-aa21-498288115d0b.achievement.…
## $ datauuid            <chr> "5af9518a-f435-49e8-aa21-498288115d0b", "029e8438-…
## $ day_time            <dbl> 1.53066e+12, 1.53593e+12, 1.53429e+12, 1.53179e+12…

colnames(dirty_pedometer)

##  [1] "step_count"          "binning_data"        "active_time"        
##  [4] "recommendation"      "run_step_count"      "update_time"        
##  [7] "source_package_name" "create_time"         "source_info"        
## [10] "speed"               "distance"            "calorie"            
## [13] "walk_step_count"     "deviceuuid"          "pkg_name"           
## [16] "healthy_step"        "achievement"         "datauuid"           
## [19] "day_time"

clean up column names

clean_names(dirty_pedometer)

## # A tibble: 3,752 × 19
##    step_count binning_data active_time recommendation run_step_count update_time
##         <dbl> <chr>              <dbl>          <dbl>          <dbl> <time>     
##  1       2854 5af9518a-f4…     1384027          10000              3 01:36      
##  2       1215 029e8438-d4…      659314          10000              0 26:49      
##  3      13817 70941998-9f…     6907844           6000             13 18:31      
##  4       3845 35253cb3-13…     2070113          10000              3 00:02      
##  5       3118 674015c9-1f…     1631525          10000             21 26:35      
##  6       3610 a4c446d0-3d…     1917359          10000              0 00:01      
##  7      11874 37c413cb-67…     5377074          10000           3046 18:32      
##  8         18 7794ec31-68…       10909          10000              0 13:52      
##  9      13821 5bc9d2a8-9d…     5823435          10000           6187 57:55      
## 10       3465 cae133e9-0e…     1797552          10000              0 00:02      
## # … with 3,742 more rows, and 13 more variables: source_package_name <chr>,
## #   create_time <time>, source_info <chr>, speed <dbl>, distance <dbl>,
## #   calorie <dbl>, walk_step_count <dbl>, deviceuuid <chr>, pkg_name <chr>,
## #   healthy_step <dbl>, achievement <chr>, datauuid <chr>, day_time <dbl>

Look back at the names

colnames(dirty_pedometer)

##  [1] "step_count"          "binning_data"        "active_time"        
##  [4] "recommendation"      "run_step_count"      "update_time"        
##  [7] "source_package_name" "create_time"         "source_info"        
## [10] "speed"               "distance"            "calorie"            
## [13] "walk_step_count"     "deviceuuid"          "pkg_name"           
## [16] "healthy_step"        "achievement"         "datauuid"           
## [19] "day_time"

remove columns

select_pedometer <- select(dirty_pedometer,step_count,active_time,run_step_count,walk_step_count,distance,day_time)

view new data set with only the columns I am interested in

head(select_pedometer)

## # A tibble: 6 × 6
##   step_count active_time run_step_count walk_step_count distance      day_time
##        <dbl>       <dbl>          <dbl>           <dbl>    <dbl>         <dbl>
## 1       2854     1384027              3            2851    1960. 1530660000000
## 2       1215      659314              0            1215     771. 1535930000000
## 3      13817     6907844             13           13804    9684. 1534290000000
## 4       3845     2070113              3            3842    2672. 1531790000000
## 5       3118     1631525             21            3097    2231. 1535670000000
## 6       3610     1917359              0            3610    2500. 1531270000000

try to figure out how to convert the date

create a variable with the day_time column, which is in scientific notation

myDate <- as.numeric(select_pedometer$day_time)
str(myDate)

##  num [1:3752] 1.53e+12 1.54e+12 1.53e+12 1.53e+12 1.54e+12 ...

Create another variable that tells the scientific notation to begin its count on Jan 01, 1970

my_posix <- as.POSIXct(myDate, origin="1970-01-01")
str(my_posix)

##  POSIXct[1:3752], format: "50474-09-07 05:20:00" "50641-09-07 14:13:20" "50589-09-18 02:40:00" ...

Then convert to just a date wothout the time.

myHealth_date <- as.Date(my_posix)
str(myHealth_date)

##  Date[1:3752], format: "50474-09-07" "50641-09-07" "50589-09-18" "50510-06-29" "50633-06-12" ...

This didnt work, the year is wrong. I am on the right track though.

head(myHealth_date,2)

## [1] "50474-09-07" "50641-09-07"

Samsung Health Project

2022-05-16