Load my library
library("tidyverse")
library("janitor")
library("lubridate")
library("dplyr")
read my csv file into R
dirty_pedometer <-read_csv("com.samsung.shealth.tracker.pedometer_day_summary.202205141347.csv")
Take a look at my data
glimpse(dirty_pedometer)
## Rows: 3,752
## Columns: 19
## $ step_count <dbl> 2854, 1215, 13817, 3845, 3118, 3610, 11874, 18, 13…
## $ binning_data <chr> "5af9518a-f435-49e8-aa21-498288115d0b.binning_data…
## $ active_time <dbl> 1384027, 659314, 6907844, 2070113, 1631525, 191735…
## $ recommendation <dbl> 10000, 10000, 6000, 10000, 10000, 10000, 10000, 10…
## $ run_step_count <dbl> 3, 0, 13, 3, 21, 0, 3046, 0, 6187, 0, 3952, 16, 30…
## $ update_time <time> 01:36:00, 26:49:00, 18:31:00, 00:02:00, 26:35:00,…
## $ source_package_name <chr> "com.sec.android.app.shealth", "com.sec.android.ap…
## $ create_time <time> 37:17:00, 52:04:00, 56:51:00, 00:02:00, 27:37:00,…
## $ source_info <chr> NA, NA, "70941998-9f62-4bd0-929a-3aa796780061.sour…
## $ speed <dbl> 1.750000, 1.972222, 1.401954, 2.318954, 1.367646, …
## $ distance <dbl> 1959.611, 771.151, 9684.481, 2672.490, 2231.350, 2…
## $ calorie <dbl> 93.9490000, 37.8900000, 438.6090000, 125.2899860, …
## $ walk_step_count <dbl> 2851, 1215, 13804, 3842, 3097, 3610, 8828, 18, 763…
## $ deviceuuid <chr> "cMpZmcg7wE", "cMpZmcg7wE", "VfS0qUERdZ", "aGTdXSg…
## $ pkg_name <chr> "com.sec.android.app.shealth", "com.sec.android.ap…
## $ healthy_step <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ achievement <chr> "5af9518a-f435-49e8-aa21-498288115d0b.achievement.…
## $ datauuid <chr> "5af9518a-f435-49e8-aa21-498288115d0b", "029e8438-…
## $ day_time <dbl> 1.53066e+12, 1.53593e+12, 1.53429e+12, 1.53179e+12…
colnames(dirty_pedometer)
## [1] "step_count" "binning_data" "active_time"
## [4] "recommendation" "run_step_count" "update_time"
## [7] "source_package_name" "create_time" "source_info"
## [10] "speed" "distance" "calorie"
## [13] "walk_step_count" "deviceuuid" "pkg_name"
## [16] "healthy_step" "achievement" "datauuid"
## [19] "day_time"
clean up column names
clean_names(dirty_pedometer)
## # A tibble: 3,752 × 19
## step_count binning_data active_time recommendation run_step_count update_time
## <dbl> <chr> <dbl> <dbl> <dbl> <time>
## 1 2854 5af9518a-f4… 1384027 10000 3 01:36
## 2 1215 029e8438-d4… 659314 10000 0 26:49
## 3 13817 70941998-9f… 6907844 6000 13 18:31
## 4 3845 35253cb3-13… 2070113 10000 3 00:02
## 5 3118 674015c9-1f… 1631525 10000 21 26:35
## 6 3610 a4c446d0-3d… 1917359 10000 0 00:01
## 7 11874 37c413cb-67… 5377074 10000 3046 18:32
## 8 18 7794ec31-68… 10909 10000 0 13:52
## 9 13821 5bc9d2a8-9d… 5823435 10000 6187 57:55
## 10 3465 cae133e9-0e… 1797552 10000 0 00:02
## # … with 3,742 more rows, and 13 more variables: source_package_name <chr>,
## # create_time <time>, source_info <chr>, speed <dbl>, distance <dbl>,
## # calorie <dbl>, walk_step_count <dbl>, deviceuuid <chr>, pkg_name <chr>,
## # healthy_step <dbl>, achievement <chr>, datauuid <chr>, day_time <dbl>
Look back at the names
colnames(dirty_pedometer)
## [1] "step_count" "binning_data" "active_time"
## [4] "recommendation" "run_step_count" "update_time"
## [7] "source_package_name" "create_time" "source_info"
## [10] "speed" "distance" "calorie"
## [13] "walk_step_count" "deviceuuid" "pkg_name"
## [16] "healthy_step" "achievement" "datauuid"
## [19] "day_time"
remove columns
select_pedometer <- select(dirty_pedometer,step_count,active_time,run_step_count,walk_step_count,distance,day_time)
view new data set with only the columns I am interested in
head(select_pedometer)
## # A tibble: 6 × 6
## step_count active_time run_step_count walk_step_count distance day_time
## <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 2854 1384027 3 2851 1960. 1530660000000
## 2 1215 659314 0 1215 771. 1535930000000
## 3 13817 6907844 13 13804 9684. 1534290000000
## 4 3845 2070113 3 3842 2672. 1531790000000
## 5 3118 1631525 21 3097 2231. 1535670000000
## 6 3610 1917359 0 3610 2500. 1531270000000
try to figure out how to convert the date
create a variable with the day_time column, which is in scientific notation
myDate <- as.numeric(select_pedometer$day_time)
str(myDate)
## num [1:3752] 1.53e+12 1.54e+12 1.53e+12 1.53e+12 1.54e+12 ...
Create another variable that tells the scientific notation to begin its count on Jan 01, 1970
my_posix <- as.POSIXct(myDate, origin="1970-01-01")
str(my_posix)
## POSIXct[1:3752], format: "50474-09-07 05:20:00" "50641-09-07 14:13:20" "50589-09-18 02:40:00" ...
Then convert to just a date wothout the time.
myHealth_date <- as.Date(my_posix)
str(myHealth_date)
## Date[1:3752], format: "50474-09-07" "50641-09-07" "50589-09-18" "50510-06-29" "50633-06-12" ...
This didnt work, the year is wrong. I am on the right track though.
head(myHealth_date,2)
## [1] "50474-09-07" "50641-09-07"