Not all packages are in one chunk code due to conflicts with lubridate either with VIM, mice or misforest package.
library(tidyverse)
library(reshape2)
library(lubridate)
library(gt)
library(glue)
library(paletteer)
library(scales)
sessionInfo()
## R version 3.6.3 (2020-02-29)
## Platform: x86_64-apple-darwin15.6.0 (64-bit)
## Running under: macOS Catalina 10.15.4
##
## Matrix products: default
## BLAS: /Library/Frameworks/R.framework/Versions/3.6/Resources/lib/libRblas.0.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/3.6/Resources/lib/libRlapack.dylib
##
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
##
## attached base packages:
## [1] stats graphics grDevices utils datasets methods base
##
## other attached packages:
## [1] scales_1.1.0 paletteer_1.1.0 glue_1.3.2 gt_0.2.0.5
## [5] lubridate_1.7.8 reshape2_1.4.3 forcats_0.5.0 stringr_1.4.0
## [9] dplyr_0.8.5 purrr_0.3.3 readr_1.3.1 tidyr_1.0.2
## [13] tibble_3.0.0 ggplot2_3.3.0 tidyverse_1.3.0
##
## loaded via a namespace (and not attached):
## [1] Rcpp_1.0.4 lattice_0.20-40 assertthat_0.2.1 digest_0.6.25
## [5] R6_2.4.1 cellranger_1.1.0 plyr_1.8.6 backports_1.1.5
## [9] reprex_0.3.0 oompaBase_3.2.9 evaluate_0.14 httr_1.4.1
## [13] pillar_1.4.3 rlang_0.4.5 readxl_1.3.1 rstudioapi_0.11
## [17] rmarkdown_2.1 munsell_0.5.0 broom_0.5.5 compiler_3.6.3
## [21] modelr_0.1.6 xfun_0.12 pkgconfig_2.0.3 scico_1.1.0
## [25] htmltools_0.4.0 tidyselect_1.0.0 viridisLite_0.3.0 fansi_0.4.1
## [29] crayon_1.3.4 dbplyr_1.4.2 withr_2.1.2 jcolors_0.0.4
## [33] grid_3.6.3 nlme_3.1-145 jsonlite_1.6.1 gtable_0.3.0
## [37] lifecycle_0.2.0 DBI_1.1.0 magrittr_1.5 palr_0.2.0
## [41] pals_1.6 cli_2.0.2 stringi_1.4.6 mapproj_1.2.7
## [45] fs_1.3.2 xml2_1.2.5 ellipsis_0.3.0 generics_0.0.2
## [49] vctrs_0.2.4 rematch2_2.1.1 tools_3.6.3 dichromat_2.0-0
## [53] maps_3.3.0 hms_0.5.3 yaml_2.2.1 colorspace_1.4-1
## [57] cluster_2.1.0 rvest_0.3.5 knitr_1.28 haven_2.2.0
fileUrl <- "https://d396qusza40orc.cloudfront.net/repdata%2Fdata%2Factivity.zip"
fname <- c("./data/pamd.zip")
source("LoadUnzip.R")
dateDownloaded
# LoadUnzip.R = if(!file.exists("data")){
# dir.create("data")
# }
#
# download.file(fileUrl,destfile="./data/pamd.zip",method="curl")
#
# dateDownloaded <- date()
#
# maindf <- read_delim(fname, delim = ",", col_names = TRUE)
Dataset download date Thu Apr 16 20:38:47 2020.
Range of Date
start_date <- min(maindf$date)
end_date <- max(maindf$date)
range(maindf$date)
## [1] "2012-10-01" "2012-11-30"
For this part of the assignment, you can ignore the missing values in the dataset.
Calculate the total number of steps taken per day? Answer: MeanTotal gives us the breakdown of steps each day.
If you do not understand the difference between a histogram and a barplot, research the difference between them. Make a histogram of the total number of steps taken each day
Calculate and report the mean and median of the total number of steps taken per day
GTMeanTotal is using the (gt) grammar for tables, scale, glue, and paletteer package. The yellow hightlighted portion gives as the Total steps - mean and median for 53 observations (I used drop_na from dplyr package to remove the NA).
MeanTotal <- maindf %>% drop_na() %>% group_by(date) %>% summarize(Steps = sum(steps)) %>% mutate(m=month(date))
head(MeanTotal)
## # A tibble: 6 x 3
## date Steps m
## <date> <dbl> <dbl>
## 1 2012-10-02 126 10
## 2 2012-10-03 11352 10
## 3 2012-10-04 12116 10
## 4 2012-10-05 13294 10
## 5 2012-10-06 15420 10
## 6 2012-10-07 11015 10
Mean and Median
StepsMeanMed <- MeanTotal %>% summarize(Obs = length(date), SumofSteps = sum(Steps), StepsMean = mean(Steps), StepsMedian = median(Steps) )
GTMeanTotal <- StepsMeanMed %>% gt::gt() %>% data_color(
columns = vars(Obs,SumofSteps, StepsMean,StepsMedian),
colors = c("yellow")) %>%
tab_header(title = "Step Mean and Median Summary",
subtitle = glue("{start_date} to {end_date}"))
GTMeanTotal
| Step Mean and Median Summary | |||
|---|---|---|---|
| 2012-10-01 to 2012-11-30 | |||
| Obs | SumofSteps | StepsMean | StepsMedian |
| 53 | 570608 | 10766.19 | 10765 |
g1<-ggplot(data = MeanTotal, aes(x=Steps,fill=factor(m)))
gg1 <- g1+geom_histogram(bins = 9, alpha=.5)+geom_vline(xintercept = mean(MeanTotal$Steps))+
labs(title="Personal Movement Activity Monitoring Device",
subtitle="Total Steps Each Day, Observation = 53 days",
x="STEPS",
y="FREQUENCY",
fill="MONTH")
gg1
I’m using the gt (grammar for tables) package to analyze the average of steps in 5 minute interval. This table has 288 observations - see below:.
SummaryIntervalSteps <- maindf %>% drop_na() %>% group_by(TimeInterval=interval)%>%
summarize(Total_Obs = length(interval),
SumOfSteps = sum(steps),
MeanOfSteps = mean(steps))
GTInterval <- SummaryIntervalSteps %>% gt::gt() %>% data_color(
columns = vars(SumOfSteps, MeanOfSteps),
colors = scales::col_numeric(
palette = paletteer::paletteer_d(
palette = "ggsci::red_material"
) %>% as.character(),
domain = NULL
)
) %>% tab_header(title = "Personal Movement Activity Monitoring Device",
subtitle = glue("{start_date} to {end_date}"))
GTInterval
| Personal Movement Activity Monitoring Device | |||
|---|---|---|---|
| 2012-10-01 to 2012-11-30 | |||
| TimeInterval | Total_Obs | SumOfSteps | MeanOfSteps |
| 0 | 53 | 91 | 1.7169811 |
| 5 | 53 | 18 | 0.3396226 |
| 10 | 53 | 7 | 0.1320755 |
| 15 | 53 | 8 | 0.1509434 |
| 20 | 53 | 4 | 0.0754717 |
| 25 | 53 | 111 | 2.0943396 |
| 30 | 53 | 28 | 0.5283019 |
| 35 | 53 | 46 | 0.8679245 |
| 40 | 53 | 0 | 0.0000000 |
| 45 | 53 | 78 | 1.4716981 |
| 50 | 53 | 16 | 0.3018868 |
| 55 | 53 | 7 | 0.1320755 |
| 100 | 53 | 17 | 0.3207547 |
| 105 | 53 | 36 | 0.6792453 |
| 110 | 53 | 8 | 0.1509434 |
| 115 | 53 | 18 | 0.3396226 |
| 120 | 53 | 0 | 0.0000000 |
| 125 | 53 | 59 | 1.1132075 |
| 130 | 53 | 97 | 1.8301887 |
| 135 | 53 | 9 | 0.1698113 |
| 140 | 53 | 9 | 0.1698113 |
| 145 | 53 | 20 | 0.3773585 |
| 150 | 53 | 14 | 0.2641509 |
| 155 | 53 | 0 | 0.0000000 |
| 200 | 53 | 0 | 0.0000000 |
| 205 | 53 | 0 | 0.0000000 |
| 210 | 53 | 60 | 1.1320755 |
| 215 | 53 | 0 | 0.0000000 |
| 220 | 53 | 0 | 0.0000000 |
| 225 | 53 | 7 | 0.1320755 |
| 230 | 53 | 0 | 0.0000000 |
| 235 | 53 | 12 | 0.2264151 |
| 240 | 53 | 0 | 0.0000000 |
| 245 | 53 | 0 | 0.0000000 |
| 250 | 53 | 82 | 1.5471698 |
| 255 | 53 | 50 | 0.9433962 |
| 300 | 53 | 0 | 0.0000000 |
| 305 | 53 | 0 | 0.0000000 |
| 310 | 53 | 0 | 0.0000000 |
| 315 | 53 | 0 | 0.0000000 |
| 320 | 53 | 11 | 0.2075472 |
| 325 | 53 | 33 | 0.6226415 |
| 330 | 53 | 86 | 1.6226415 |
| 335 | 53 | 31 | 0.5849057 |
| 340 | 53 | 26 | 0.4905660 |
| 345 | 53 | 4 | 0.0754717 |
| 350 | 53 | 0 | 0.0000000 |
| 355 | 53 | 0 | 0.0000000 |
| 400 | 53 | 63 | 1.1886792 |
| 405 | 53 | 50 | 0.9433962 |
| 410 | 53 | 136 | 2.5660377 |
| 415 | 53 | 0 | 0.0000000 |
| 420 | 53 | 18 | 0.3396226 |
| 425 | 53 | 19 | 0.3584906 |
| 430 | 53 | 218 | 4.1132075 |
| 435 | 53 | 35 | 0.6603774 |
| 440 | 53 | 185 | 3.4905660 |
| 445 | 53 | 44 | 0.8301887 |
| 450 | 53 | 165 | 3.1132075 |
| 455 | 53 | 59 | 1.1132075 |
| 500 | 53 | 0 | 0.0000000 |
| 505 | 53 | 83 | 1.5660377 |
| 510 | 53 | 159 | 3.0000000 |
| 515 | 53 | 119 | 2.2452830 |
| 520 | 53 | 176 | 3.3207547 |
| 525 | 53 | 157 | 2.9622642 |
| 530 | 53 | 111 | 2.0943396 |
| 535 | 53 | 321 | 6.0566038 |
| 540 | 53 | 849 | 16.0188679 |
| 545 | 53 | 972 | 18.3396226 |
| 550 | 53 | 2091 | 39.4528302 |
| 555 | 53 | 2358 | 44.4905660 |
| 600 | 53 | 1669 | 31.4905660 |
| 605 | 53 | 2611 | 49.2641509 |
| 610 | 53 | 2850 | 53.7735849 |
| 615 | 53 | 3363 | 63.4528302 |
| 620 | 53 | 2648 | 49.9622642 |
| 625 | 53 | 2495 | 47.0754717 |
| 630 | 53 | 2764 | 52.1509434 |
| 635 | 53 | 2085 | 39.3396226 |
| 640 | 53 | 2333 | 44.0188679 |
| 645 | 53 | 2341 | 44.1698113 |
| 650 | 53 | 1980 | 37.3584906 |
| 655 | 53 | 2599 | 49.0377358 |
| 700 | 53 | 2322 | 43.8113208 |
| 705 | 53 | 2352 | 44.3773585 |
| 710 | 53 | 2677 | 50.5094340 |
| 715 | 53 | 2889 | 54.5094340 |
| 720 | 53 | 2646 | 49.9245283 |
| 725 | 53 | 2702 | 50.9811321 |
| 730 | 53 | 2951 | 55.6792453 |
| 735 | 53 | 2349 | 44.3207547 |
| 740 | 53 | 2770 | 52.2641509 |
| 745 | 53 | 3686 | 69.5471698 |
| 750 | 53 | 3066 | 57.8490566 |
| 755 | 53 | 2976 | 56.1509434 |
| 800 | 53 | 3889 | 73.3773585 |
| 805 | 53 | 3615 | 68.2075472 |
| 810 | 53 | 6860 | 129.4339623 |
| 815 | 53 | 8349 | 157.5283019 |
| 820 | 53 | 9071 | 171.1509434 |
| 825 | 53 | 8236 | 155.3962264 |
| 830 | 53 | 9397 | 177.3018868 |
| 835 | 53 | 10927 | 206.1698113 |
| 840 | 53 | 10384 | 195.9245283 |
| 845 | 53 | 9517 | 179.5660377 |
| 850 | 53 | 9720 | 183.3962264 |
| 855 | 53 | 8852 | 167.0188679 |
| 900 | 53 | 7603 | 143.4528302 |
| 905 | 53 | 6574 | 124.0377358 |
| 910 | 53 | 5783 | 109.1132075 |
| 915 | 53 | 5730 | 108.1132075 |
| 920 | 53 | 5497 | 103.7169811 |
| 925 | 53 | 5086 | 95.9622642 |
| 930 | 53 | 3509 | 66.2075472 |
| 935 | 53 | 2397 | 45.2264151 |
| 940 | 53 | 1314 | 24.7924528 |
| 945 | 53 | 2054 | 38.7547170 |
| 950 | 53 | 1854 | 34.9811321 |
| 955 | 53 | 1116 | 21.0566038 |
| 1000 | 53 | 2150 | 40.5660377 |
| 1005 | 53 | 1430 | 26.9811321 |
| 1010 | 53 | 2248 | 42.4150943 |
| 1015 | 53 | 2791 | 52.6603774 |
| 1020 | 53 | 2063 | 38.9245283 |
| 1025 | 53 | 2692 | 50.7924528 |
| 1030 | 53 | 2347 | 44.2830189 |
| 1035 | 53 | 1983 | 37.4150943 |
| 1040 | 53 | 1839 | 34.6981132 |
| 1045 | 53 | 1502 | 28.3396226 |
| 1050 | 53 | 1330 | 25.0943396 |
| 1055 | 53 | 1693 | 31.9433962 |
| 1100 | 53 | 1662 | 31.3584906 |
| 1105 | 53 | 1573 | 29.6792453 |
| 1110 | 53 | 1130 | 21.3207547 |
| 1115 | 53 | 1354 | 25.5471698 |
| 1120 | 53 | 1504 | 28.3773585 |
| 1125 | 53 | 1403 | 26.4716981 |
| 1130 | 53 | 1772 | 33.4339623 |
| 1135 | 53 | 2649 | 49.9811321 |
| 1140 | 53 | 2228 | 42.0377358 |
| 1145 | 53 | 2364 | 44.6037736 |
| 1150 | 53 | 2440 | 46.0377358 |
| 1155 | 53 | 3137 | 59.1886792 |
| 1200 | 53 | 3385 | 63.8679245 |
| 1205 | 53 | 4648 | 87.6981132 |
| 1210 | 53 | 5027 | 94.8490566 |
| 1215 | 53 | 4917 | 92.7735849 |
| 1220 | 53 | 3360 | 63.3962264 |
| 1225 | 53 | 2659 | 50.1698113 |
| 1230 | 53 | 2887 | 54.4716981 |
| 1235 | 53 | 1718 | 32.4150943 |
| 1240 | 53 | 1406 | 26.5283019 |
| 1245 | 53 | 2000 | 37.7358491 |
| 1250 | 53 | 2388 | 45.0566038 |
| 1255 | 53 | 3566 | 67.2830189 |
| 1300 | 53 | 2244 | 42.3396226 |
| 1305 | 53 | 2114 | 39.8867925 |
| 1310 | 53 | 2293 | 43.2641509 |
| 1315 | 53 | 2172 | 40.9811321 |
| 1320 | 53 | 2451 | 46.2452830 |
| 1325 | 53 | 2991 | 56.4339623 |
| 1330 | 53 | 2266 | 42.7547170 |
| 1335 | 53 | 1332 | 25.1320755 |
| 1340 | 53 | 2118 | 39.9622642 |
| 1345 | 53 | 2838 | 53.5471698 |
| 1350 | 53 | 2508 | 47.3207547 |
| 1355 | 53 | 3223 | 60.8113208 |
| 1400 | 53 | 2955 | 55.7547170 |
| 1405 | 53 | 2754 | 51.9622642 |
| 1410 | 53 | 2310 | 43.5849057 |
| 1415 | 53 | 2581 | 48.6981132 |
| 1420 | 53 | 1880 | 35.4716981 |
| 1425 | 53 | 1990 | 37.5471698 |
| 1430 | 53 | 2218 | 41.8490566 |
| 1435 | 53 | 1458 | 27.5094340 |
| 1440 | 53 | 907 | 17.1132075 |
| 1445 | 53 | 1382 | 26.0754717 |
| 1450 | 53 | 2312 | 43.6226415 |
| 1455 | 53 | 2320 | 43.7735849 |
| 1500 | 53 | 1591 | 30.0188679 |
| 1505 | 53 | 1912 | 36.0754717 |
| 1510 | 53 | 1881 | 35.4905660 |
| 1515 | 53 | 2059 | 38.8490566 |
| 1520 | 53 | 2436 | 45.9622642 |
| 1525 | 53 | 2531 | 47.7547170 |
| 1530 | 53 | 2551 | 48.1320755 |
| 1535 | 53 | 3462 | 65.3207547 |
| 1540 | 53 | 4394 | 82.9056604 |
| 1545 | 53 | 5229 | 98.6603774 |
| 1550 | 53 | 5412 | 102.1132075 |
| 1555 | 53 | 4450 | 83.9622642 |
| 1600 | 53 | 3293 | 62.1320755 |
| 1605 | 53 | 3399 | 64.1320755 |
| 1610 | 53 | 3951 | 74.5471698 |
| 1615 | 53 | 3348 | 63.1698113 |
| 1620 | 53 | 3016 | 56.9056604 |
| 1625 | 53 | 3168 | 59.7735849 |
| 1630 | 53 | 2325 | 43.8679245 |
| 1635 | 53 | 2044 | 38.5660377 |
| 1640 | 53 | 2367 | 44.6603774 |
| 1645 | 53 | 2409 | 45.4528302 |
| 1650 | 53 | 2449 | 46.2075472 |
| 1655 | 53 | 2315 | 43.6792453 |
| 1700 | 53 | 2471 | 46.6226415 |
| 1705 | 53 | 2984 | 56.3018868 |
| 1710 | 53 | 2688 | 50.7169811 |
| 1715 | 53 | 3245 | 61.2264151 |
| 1720 | 53 | 3854 | 72.7169811 |
| 1725 | 53 | 4184 | 78.9433962 |
| 1730 | 53 | 3654 | 68.9433962 |
| 1735 | 53 | 3162 | 59.6603774 |
| 1740 | 53 | 3980 | 75.0943396 |
| 1745 | 53 | 2995 | 56.5094340 |
| 1750 | 53 | 1843 | 34.7735849 |
| 1755 | 53 | 1985 | 37.4528302 |
| 1800 | 53 | 2156 | 40.6792453 |
| 1805 | 53 | 3075 | 58.0188679 |
| 1810 | 53 | 3959 | 74.6981132 |
| 1815 | 53 | 4522 | 85.3207547 |
| 1820 | 53 | 3141 | 59.2641509 |
| 1825 | 53 | 3592 | 67.7735849 |
| 1830 | 53 | 4118 | 77.6981132 |
| 1835 | 53 | 3935 | 74.2452830 |
| 1840 | 53 | 4523 | 85.3396226 |
| 1845 | 53 | 5271 | 99.4528302 |
| 1850 | 53 | 4589 | 86.5849057 |
| 1855 | 53 | 4537 | 85.6037736 |
| 1900 | 53 | 4498 | 84.8679245 |
| 1905 | 53 | 4125 | 77.8301887 |
| 1910 | 53 | 3076 | 58.0377358 |
| 1915 | 53 | 2828 | 53.3584906 |
| 1920 | 53 | 1925 | 36.3207547 |
| 1925 | 53 | 1098 | 20.7169811 |
| 1930 | 53 | 1452 | 27.3962264 |
| 1935 | 53 | 2121 | 40.0188679 |
| 1940 | 53 | 1601 | 30.2075472 |
| 1945 | 53 | 1354 | 25.5471698 |
| 1950 | 53 | 2420 | 45.6603774 |
| 1955 | 53 | 1777 | 33.5283019 |
| 2000 | 53 | 1040 | 19.6226415 |
| 2005 | 53 | 1008 | 19.0188679 |
| 2010 | 53 | 1025 | 19.3396226 |
| 2015 | 53 | 1767 | 33.3396226 |
| 2020 | 53 | 1421 | 26.8113208 |
| 2025 | 53 | 1122 | 21.1698113 |
| 2030 | 53 | 1447 | 27.3018868 |
| 2035 | 53 | 1131 | 21.3396226 |
| 2040 | 53 | 1036 | 19.5471698 |
| 2045 | 53 | 1130 | 21.3207547 |
| 2050 | 53 | 1712 | 32.3018868 |
| 2055 | 53 | 1068 | 20.1509434 |
| 2100 | 53 | 845 | 15.9433962 |
| 2105 | 53 | 913 | 17.2264151 |
| 2110 | 53 | 1243 | 23.4528302 |
| 2115 | 53 | 1020 | 19.2452830 |
| 2120 | 53 | 660 | 12.4528302 |
| 2125 | 53 | 425 | 8.0188679 |
| 2130 | 53 | 777 | 14.6603774 |
| 2135 | 53 | 864 | 16.3018868 |
| 2140 | 53 | 460 | 8.6792453 |
| 2145 | 53 | 413 | 7.7924528 |
| 2150 | 53 | 431 | 8.1320755 |
| 2155 | 53 | 139 | 2.6226415 |
| 2200 | 53 | 77 | 1.4528302 |
| 2205 | 53 | 195 | 3.6792453 |
| 2210 | 53 | 255 | 4.8113208 |
| 2215 | 53 | 451 | 8.5094340 |
| 2220 | 53 | 375 | 7.0754717 |
| 2225 | 53 | 461 | 8.6981132 |
| 2230 | 53 | 517 | 9.7547170 |
| 2235 | 53 | 117 | 2.2075472 |
| 2240 | 53 | 17 | 0.3207547 |
| 2245 | 53 | 6 | 0.1132075 |
| 2250 | 53 | 85 | 1.6037736 |
| 2255 | 53 | 244 | 4.6037736 |
| 2300 | 53 | 175 | 3.3018868 |
| 2305 | 53 | 151 | 2.8490566 |
| 2310 | 53 | 0 | 0.0000000 |
| 2315 | 53 | 44 | 0.8301887 |
| 2320 | 53 | 51 | 0.9622642 |
| 2325 | 53 | 84 | 1.5849057 |
| 2330 | 53 | 138 | 2.6037736 |
| 2335 | 53 | 249 | 4.6981132 |
| 2340 | 53 | 175 | 3.3018868 |
| 2345 | 53 | 34 | 0.6415094 |
| 2350 | 53 | 12 | 0.2264151 |
| 2355 | 53 | 57 | 1.0754717 |
g2<-ggplot(data = SummaryIntervalSteps, aes(y=MeanOfSteps, x=TimeInterval))
g2+geom_line(color="red")+
labs(title="Personal Movement Activity Monitoring Device",
subtitle="Average Steps Each Day",
x="Time Interval ",
y="Average Steps",
fill="MONTH")
SummaryIntervalSteps %>% filter(MeanOfSteps==max(MeanOfSteps)) %>% select(TimeInterval, MeanOfSteps)
## # A tibble: 1 x 2
## TimeInterval MeanOfSteps
## <dbl> <dbl>
## 1 835 206.
Note that there are a number of days/intervals where there are missing values (coded as NA). The presence of missing days may introduce bias into some calculations or summaries of the data.
Yeardf <- maindf %>% group_by(`Year`=year(date)) %>%
summarize(ISNA_Count = sum(is.na(steps)), Mean_ISNA=mean(is.na(steps)),
Total_Obs = length(maindf$date)) %>% ungroup() %>% gt() %>%
tab_header(title = "Year 'IS.NA' Summary",
subtitle = glue("{start_date} to {end_date}"))
Yeardf
| Year 'IS.NA' Summary | |||
|---|---|---|---|
| 2012-10-01 to 2012-11-30 | |||
| Year | ISNA_Count | Mean_ISNA | Total_Obs |
| 2012 | 2304 | 0.1311475 | 17568 |
Monthdf <- maindf %>% group_by(M=month(date,label=TRUE, abbr=TRUE)) %>%
summarize(Total_Obs = length(date), ISNA_Count = sum(is.na(steps)),
Mean_ISNA=mean(is.na(steps)))
MonthdfGT <- Monthdf %>% gt() %>%
tab_header(
title = "Monthly 'IS.NA' Summary",
subtitle = glue("{start_date} to {end_date}"))
MonthdfGT
| Monthly 'IS.NA' Summary | |||
|---|---|---|---|
| 2012-10-01 to 2012-11-30 | |||
| M | Total_Obs | ISNA_Count | Mean_ISNA |
| Oct | 8928 | 576 | 0.06451613 |
| Nov | 8640 | 1728 | 0.20000000 |
dailydf <-
maindf %>% group_by(
Daily = wday(date, label = TRUE, abbr = TRUE)) %>%
summarize(
Total_Obs = length(date),
ISNA_Count = sum(is.na(steps)),
Mean_ISNA = mean(is.na(steps))
)
dailydf %>% gt() %>%
tab_header(title = "By day 'IS.NA' Summary",
subtitle = glue("{start_date} to {end_date}"))
| By day 'IS.NA' Summary | |||
|---|---|---|---|
| 2012-10-01 to 2012-11-30 | |||
| Daily | Total_Obs | ISNA_Count | Mean_ISNA |
| Sun | 2304 | 288 | 0.1250000 |
| Mon | 2592 | 576 | 0.2222222 |
| Tue | 2592 | 0 | 0.0000000 |
| Wed | 2592 | 288 | 0.1111111 |
| Thu | 2592 | 288 | 0.1111111 |
| Fri | 2592 | 576 | 0.2222222 |
| Sat | 2304 | 288 | 0.1250000 |
g3 <- ggplot(data = dailydf, aes(Daily))
g3 + geom_bar(aes(weight = ISNA_Count))+
labs(title="Personal Movement Activity Monitoring Device",
subtitle="Missing data in Week Days",
x="Daily",
y="Frequency")
# load these library at these section because it affects lubridate labels
# and abbr
library(mice)
library(missForest)
library(VIM)
Using aggr function to plot NA.
aggr(maindf, col=c('navyblue', 'red'),
numbers=TRUE, sortVars=TRUE,
labels=names(maindf), cex.axis=.8,
gap=5, ylab=c("Missing Data", "Pattern"))
##
## Variables sorted by number of missings:
## Variable Count
## steps 0.1311475
## date 0.0000000
## interval 0.0000000
I will use the Predictive Mean Matching (PMM) model to impute the missing data. Please review the help file ?pmm for more details.
imputed_maindf1 <- mice(maindf, m=3, maxit = 50,
method = 'pmm', seed = 420)
##
## iter imp variable
## 1 1 steps
## 1 2 steps
## 1 3 steps
## 2 1 steps
## 2 2 steps
## 2 3 steps
## 3 1 steps
## 3 2 steps
## 3 3 steps
## 4 1 steps
## 4 2 steps
## 4 3 steps
## 5 1 steps
## 5 2 steps
## 5 3 steps
## 6 1 steps
## 6 2 steps
## 6 3 steps
## 7 1 steps
## 7 2 steps
## 7 3 steps
## 8 1 steps
## 8 2 steps
## 8 3 steps
## 9 1 steps
## 9 2 steps
## 9 3 steps
## 10 1 steps
## 10 2 steps
## 10 3 steps
## 11 1 steps
## 11 2 steps
## 11 3 steps
## 12 1 steps
## 12 2 steps
## 12 3 steps
## 13 1 steps
## 13 2 steps
## 13 3 steps
## 14 1 steps
## 14 2 steps
## 14 3 steps
## 15 1 steps
## 15 2 steps
## 15 3 steps
## 16 1 steps
## 16 2 steps
## 16 3 steps
## 17 1 steps
## 17 2 steps
## 17 3 steps
## 18 1 steps
## 18 2 steps
## 18 3 steps
## 19 1 steps
## 19 2 steps
## 19 3 steps
## 20 1 steps
## 20 2 steps
## 20 3 steps
## 21 1 steps
## 21 2 steps
## 21 3 steps
## 22 1 steps
## 22 2 steps
## 22 3 steps
## 23 1 steps
## 23 2 steps
## 23 3 steps
## 24 1 steps
## 24 2 steps
## 24 3 steps
## 25 1 steps
## 25 2 steps
## 25 3 steps
## 26 1 steps
## 26 2 steps
## 26 3 steps
## 27 1 steps
## 27 2 steps
## 27 3 steps
## 28 1 steps
## 28 2 steps
## 28 3 steps
## 29 1 steps
## 29 2 steps
## 29 3 steps
## 30 1 steps
## 30 2 steps
## 30 3 steps
## 31 1 steps
## 31 2 steps
## 31 3 steps
## 32 1 steps
## 32 2 steps
## 32 3 steps
## 33 1 steps
## 33 2 steps
## 33 3 steps
## 34 1 steps
## 34 2 steps
## 34 3 steps
## 35 1 steps
## 35 2 steps
## 35 3 steps
## 36 1 steps
## 36 2 steps
## 36 3 steps
## 37 1 steps
## 37 2 steps
## 37 3 steps
## 38 1 steps
## 38 2 steps
## 38 3 steps
## 39 1 steps
## 39 2 steps
## 39 3 steps
## 40 1 steps
## 40 2 steps
## 40 3 steps
## 41 1 steps
## 41 2 steps
## 41 3 steps
## 42 1 steps
## 42 2 steps
## 42 3 steps
## 43 1 steps
## 43 2 steps
## 43 3 steps
## 44 1 steps
## 44 2 steps
## 44 3 steps
## 45 1 steps
## 45 2 steps
## 45 3 steps
## 46 1 steps
## 46 2 steps
## 46 3 steps
## 47 1 steps
## 47 2 steps
## 47 3 steps
## 48 1 steps
## 48 2 steps
## 48 3 steps
## 49 1 steps
## 49 2 steps
## 49 3 steps
## 50 1 steps
## 50 2 steps
## 50 3 steps
Summary of the Imputed Data.
summary(imputed_maindf1)
## Class: mids
## Number of multiple imputations: 3
## Imputation methods:
## steps date interval
## "pmm" "" ""
## PredictorMatrix:
## steps date interval
## steps 0 1 1
## date 1 0 1
## interval 1 1 0
Multiple imputations (m=3)
head(imputed_maindf1$imp$steps)
## 1 2 3
## 1 0 47 47
## 2 0 0 0
## 3 0 0 0
## 4 0 0 0
## 5 0 0 0
## 6 0 0 0
Picking a model from m = 3 to impute the data.
complete_Maindf1 <- complete(imputed_maindf1, 1)
summary(complete_Maindf1)
## steps date interval
## Min. : 0.00 Min. :2012-10-01 Min. : 0.0
## 1st Qu.: 0.00 1st Qu.:2012-10-16 1st Qu.: 588.8
## Median : 0.00 Median :2012-10-31 Median :1177.5
## Mean : 37.07 Mean :2012-10-31 Mean :1177.5
## 3rd Qu.: 12.00 3rd Qu.:2012-11-15 3rd Qu.:1766.2
## Max. :806.00 Max. :2012-11-30 Max. :2355.0
CompleteMeanTotal <- complete_Maindf1 %>% group_by(date) %>% summarize(Steps = sum(steps)) %>% mutate(m=month(date))
g4<-ggplot(data = CompleteMeanTotal, aes(x=Steps,fill=factor(m)))
gg4<-g4+geom_histogram(bins = 9, alpha=.5)+geom_vline(
xintercept = mean(CompleteMeanTotal$Steps))+
labs(title="Personal Movement Activity Monitoring Device",
subtitle="Total Steps Each Day (Imputed dataset), Observation = 61 days",
x="STEPS",
y="FREQUENCY",
fill="MONTH")
library(cowplot)
plot_grid(gg1, gg4, nrow = 2, labels = "AUTO")
The above histogram shows the imputed dataset’s mean decrease -203.1231, median decrease -326, the steps increase 69042, observation increase by 8 because we imputed the datasets.
Calculating the steps mean and median.
CompleteTotal <-
complete_Maindf1 %>% drop_na() %>% group_by(date) %>%
summarize(Steps = sum(steps)) %>% mutate(m =month(date))
CompleteMeanMed <-
CompleteTotal %>% summarize(
Obs = length(date),
SumofSteps = sum(Steps),
StepsMean = mean(Steps),
StepsMedian = median(Steps)
)
GTCompleteMeanMed <- CompleteMeanMed %>% gt::gt() %>% data_color(
columns = vars(Obs, SumofSteps, StepsMean, StepsMedian),
colors = c("yellow")
) %>%
tab_header(title = "Step Mean and Median Summary (imputed)",
subtitle = glue("{start_date} to {end_date}"))
GTCompleteMeanMed
| Step Mean and Median Summary (imputed) | |||
|---|---|---|---|
| 2012-10-01 to 2012-11-30 | |||
| Obs | SumofSteps | StepsMean | StepsMedian |
| 61 | 651215 | 10675.66 | 10571 |
GTMeanTotal
| Step Mean and Median Summary | |||
|---|---|---|---|
| 2012-10-01 to 2012-11-30 | |||
| Obs | SumofSteps | StepsMean | StepsMedian |
| 53 | 570608 | 10766.19 | 10765 |
Comparison formula:
NewSteps <- CompleteMeanMed$SumofSteps - StepsMeanMed$SumofSteps
NewMean <- CompleteMeanMed$StepsMean - StepsMeanMed$StepsMean
NewMed <- CompleteMeanMed$StepsMedian - StepsMeanMed$StepsMedian
NewSteps
## [1] 80607
NewMean
## [1] -90.53294
NewMed
## [1] -194
For this part the weekdays() function may be of some help here. Use the dataset with the filled-in missing values for this part.
MonFri <- c(2,3,4,5,6)
SatSun <- c(1,7)
New datasets with imputed missing data.
maindf2 <- complete_Maindf1 %>% mutate(DayNumber = day(date))
MonFriDf <- maindf2 %>% filter(DayNumber %in% MonFri)
SatSunDf <- maindf2 %>% filter(DayNumber %in% SatSun)
TSMonFri <- MonFriDf %>% drop_na() %>% group_by(TimeInterval=interval)%>%
summarize(Total_Obs = length(interval),
SumOfSteps = sum(steps),
MeanOfSteps = mean(steps))
g5 <-ggplot(data = TSMonFri, aes(y=MeanOfSteps, x=TimeInterval))+geom_line(color="red")+
labs(title="Personal Movement Activity Monitoring Device",
subtitle="Monday through Friday (imputed data)",
x="STEPS",
y="FREQUENCY"
)
TSSatSun <- SatSunDf %>% drop_na() %>% group_by(TimeInterval=interval)%>%
summarize(Total_Obs = length(interval),
SumOfSteps = sum(steps),
MeanOfSteps = mean(steps))
g6 <-ggplot(data = TSSatSun, aes(y=MeanOfSteps, x=TimeInterval))+geom_line(color="green")+
labs(title="Personal Movement Activity Monitoring Device",
subtitle="Saturday-Sunday (imputed data)",
x="STEPS",
y="FREQUENCY")
plot_grid(g5, g6, nrow = 2)