Show the code
df_poap <-
readxl::read_xlsx("PAWPER Study Data Actual 29102024.xlsx") %>%
janitor::clean_names() %>%
rename(
weight = scale_measured_weight_in_kg,
height = height_cm,
pxl_wgt = estimated_weight_using_pawper_xl_on_a_page_in_kg,
habitus_pxl = habitus_score_for_pawper_xl,
habitus_paw = habitus_score_for_pawper,
dob = date_of_birth_dd_mm_yyyy,
age_mths = age_remaining_months,
date_recruit = date_of_recruitment_dd_mm_yyyy,
poap_wgt = estimated_weight_using_pawper_on_a_page_in_kg) %>%
select(-c(timestamp, recruiter, date_recruit)) %>%
mutate(
age_mths = parse_integer(age_mths),
age = age_years*12 + age_mths,
height = parse_integer(height),
weight = parse_number(weight),
poap_wgt = parse_number(poap_wgt),
sex = factor(sex),
# Generating Pawper-XL errors
pe_pxl = ((pxl_wgt - weight)/weight)*100,
me_pxl = pxl_wgt - weight,
ape_pxl = abs(pe_pxl),
ape_pxl_10 = ape_pxl < 10,
ape_pxl_20 = ape_pxl < 20,
pxl_cat = cut(
ape_pxl,
br = c(0, 10, 20, 1001),
right = FALSE,
labels = c("P10","P20",">=P20")),
# Generating Pawper errors
pe_poap = ((poap_wgt - weight)/weight)*100,
me_poap = poap_wgt - weight,
ape_poap = abs(pe_poap),
ape_poap_10 = ape_poap < 10,
ape_poap_20 = ape_poap < 20,
poap_cat = cut(
ape_poap,
br = c(0, 10, 20, 1001),
right = FALSE,
labels = c("P10","P20",">=P20")),
agecat = case_when(
age < 12 ~ "< 1 year",
age >= 12 & age <= 60 ~ "1 to 5 years",
age > 60 ~ "> 5 years") %>%
factor(
levels = c("< 1 year", "1 to 5 years", "> 5 years")))
Warning: There were 2 warnings in `mutate()`.
The first warning was:
ℹ In argument: `age_mths = parse_integer(age_mths)`.
Caused by warning:
! 1 parsing failure.
row col expected actual
252 -- no trailing characters O
ℹ Run `dplyr::last_dplyr_warnings()` to see the 1 remaining warning.
Show the code
labelled::var_label(df_poap) <-
list(
age = "Age in months",
weight = "Weight (actual)",
poap_wgt = "Weight (Pawper on a page)",
height = "Height",
pxl_wgt = "Pawper-XL weight",
habitus_pxl = "Pawper-XL habitus score",
sex = "Sex",
age_mths = "Age in rest of month",
age_years = "Age in conpleted years",
habitus_paw = "Pawper habitus score",
dob = "Date of birth",
group = "Group",
study_id = "Study ID"
)
df_poap %>% summarytools::dfSummary()
Data Frame Summary
df_poap
Dimensions: 462 x 26
Duplicates: 1
-----------------------------------------------------------------------------------------------------------------------------------------
No Variable Label Stats / Values Freqs (% of Valid) Graph Valid Missing
---- ------------- --------------------------- ------------------------- --------------------- --------------------- ---------- ---------
1 study_id Study ID 1. 009 4 ( 0.9%) 462 0
[character] 2. 013 3 ( 0.6%) (100.0%) (0.0%)
3. 014 3 ( 0.6%)
4. 002 2 ( 0.4%)
5. 005 2 ( 0.4%)
6. 006 2 ( 0.4%)
7. 008 2 ( 0.4%)
8. 010 2 ( 0.4%)
9. 015 2 ( 0.4%)
10. 016 2 ( 0.4%)
[ 434 others ] 438 (94.8%) IIIIIIIIIIIIIIIIII
2 group Group 1. Doctor 57 (12.3%) II 462 0
[character] 2. Medical student 382 (82.7%) IIIIIIIIIIIIIIII (100.0%) (0.0%)
3. Nurse 23 ( 5.0%)
3 dob Date of birth 1. 44647 3 ( 0.6%) 462 0
[character] 2. 44988 3 ( 0.6%) (100.0%) (0.0%)
3. 41044 2 ( 0.4%)
4. 41131 2 ( 0.4%)
5. 41330 2 ( 0.4%)
6. 41371 2 ( 0.4%)
7. 41372 2 ( 0.4%)
8. 41405 2 ( 0.4%)
9. 42027 2 ( 0.4%)
10. 42055 2 ( 0.4%)
[ 413 others ] 440 (95.2%) IIIIIIIIIIIIIIIIIII
4 age_years Age in conpleted years Mean (sd) : 4.3 (3.7) 14 distinct values : 462 0
[numeric] min < med < max: : (100.0%) (0.0%)
0 < 3 < 13 :
IQR (CV) : 6 (0.9) : : . : .
: : : : : . : : . .
5 age_mths Age in rest of month Mean (sd) : 5.4 (3.6) 12 distinct values : . 461 1
[integer] min < med < max: : : (99.8%) (0.2%)
0 < 5 < 11 : . . :
IQR (CV) : 6 (0.7) : : : . : : . : . :
: : : : : : : : : :
6 sex Sex 1. Female 186 (40.3%) IIIIIIII 462 0
[factor] 2. Male 276 (59.7%) IIIIIIIIIII (100.0%) (0.0%)
7 habitus_paw Pawper habitus score Mean (sd) : 2.7 (0.8) 1 : 37 ( 8.0%) I 462 0
[numeric] min < med < max: 2 : 116 (25.1%) IIIII (100.0%) (0.0%)
1 < 3 < 5 3 : 269 (58.2%) IIIIIIIIIII
IQR (CV) : 1 (0.3) 4 : 29 ( 6.3%) I
5 : 11 ( 2.4%)
8 poap_wgt Weight (Pawper on a page) Mean (sd) : 17 (9.5) 51 distinct values : 460 2
[numeric] min < med < max: : . (99.6%) (0.4%)
2.8 < 14 < 50 : :
IQR (CV) : 13 (0.6) : : : : .
: : : : : : : .
9 habitus_pxl Pawper-XL habitus score Mean (sd) : 2.8 (0.9) 1 : 39 ( 8.4%) I 462 0
[numeric] min < med < max: 2 : 100 (21.6%) IIII (100.0%) (0.0%)
1 < 3 < 7 3 : 259 (56.1%) IIIIIIIIIII
IQR (CV) : 1 (0.3) 4 : 48 (10.4%) II
5 : 9 ( 1.9%)
6 : 5 ( 1.1%)
7 : 2 ( 0.4%)
10 pxl_wgt Pawper-XL weight Mean (sd) : 17.2 (9.9) 52 distinct values : 462 0
[numeric] min < med < max: : . (100.0%) (0.0%)
2.5 < 14 < 58 . : :
IQR (CV) : 14 (0.6) : : : . .
: : : : : : .
11 weight Weight (actual) Mean (sd) : 16.4 (9.7) 296 distinct values : 462 0
[numeric] min < med < max: : . (100.0%) (0.0%)
2 < 14 < 60.1 : : :
IQR (CV) : 11.8 (0.6) : : : . .
: : : : : :
12 height Height Mean (sd) : 92.3 (27.5) 90 distinct values : 251 211
[integer] min < med < max: : : . (54.3%) (45.7%)
45 < 87 < 150 : : : : .
IQR (CV) : 43 (0.3) . : : : : : . : : .
: : : : : : : : : :
13 age Age in months Mean (sd) : 56.6 (44.8) 140 distinct values : 461 1
[numeric] min < med < max: : : (99.8%) (0.2%)
0 < 45 < 165 : : .
IQR (CV) : 70 (0.8) : : : : . . .
: : : : : : : :
14 pe_pxl Mean (sd) : 6.2 (13.1) 332 distinct values : 462 0
[numeric] min < med < max: : (100.0%) (0.0%)
-84 < 5.5 < 64.7 . :
IQR (CV) : 14.2 (2.1) : :
: : :
15 me_pxl Mean (sd) : 0.8 (2.1) 174 distinct values : . 462 0
[numeric] min < med < max: : : (100.0%) (0.0%)
-15.8 < 0.7 < 11.8 : :
IQR (CV) : 2.2 (2.6) : :
. : : .
16 ape_pxl Mean (sd) : 10.5 (10) 317 distinct values : 462 0
[numeric] min < med < max: : (100.0%) (0.0%)
0 < 8.2 < 84 : .
IQR (CV) : 10.4 (1) : :
: : . .
17 ape_pxl_10 1. FALSE 187 (40.5%) IIIIIIII 462 0
[logical] 2. TRUE 275 (59.5%) IIIIIIIIIII (100.0%) (0.0%)
18 ape_pxl_20 1. FALSE 57 (12.3%) II 462 0
[logical] 2. TRUE 405 (87.7%) IIIIIIIIIIIIIIIII (100.0%) (0.0%)
19 pxl_cat 1. P10 275 (59.5%) IIIIIIIIIII 462 0
[factor] 2. P20 130 (28.1%) IIIII (100.0%) (0.0%)
3. >=P20 57 (12.3%) II
20 pe_poap Mean (sd) : 6 (13.3) 329 distinct values : 460 2
[numeric] min < med < max: : . (99.6%) (0.4%)
-32.9 < 5.3 < 103.2 : :
IQR (CV) : 15 (2.2) . : :
: : : .
21 me_poap Mean (sd) : 0.7 (2.3) 174 distinct values : 460 2
[numeric] min < med < max: : (99.6%) (0.4%)
-17.1 < 0.6 < 11.8 . :
IQR (CV) : 2.1 (3) : :
: :
22 ape_poap Mean (sd) : 10.5 (10.2) 314 distinct values : 460 2
[numeric] min < med < max: : (99.6%) (0.4%)
0 < 7.9 < 103.2 : .
IQR (CV) : 10.7 (1) : :
: : . .
23 ape_poap_10 1. FALSE 189 (41.1%) IIIIIIII 460 2
[logical] 2. TRUE 271 (58.9%) IIIIIIIIIII (99.6%) (0.4%)
24 ape_poap_20 1. FALSE 57 (12.4%) II 460 2
[logical] 2. TRUE 403 (87.6%) IIIIIIIIIIIIIIIII (99.6%) (0.4%)
25 poap_cat 1. P10 271 (58.9%) IIIIIIIIIII 460 2
[factor] 2. P20 132 (28.7%) IIIII (99.6%) (0.4%)
3. >=P20 57 (12.4%) II
26 agecat 1. < 1 year 71 (15.4%) III 461 1
[factor] 2. 1 to 5 years 209 (45.3%) IIIIIIIII (99.8%) (0.2%)
3. > 5 years 181 (39.3%) IIIIIII
-----------------------------------------------------------------------------------------------------------------------------------------
Show the code
df_poap %>% DataExplorer::plot_bar()
2 columns ignored with more than 50 categories.
study_id: 444 categories
dob: 423 categories
Show the code
df_poap %>%
DataExplorer::plot_histogram(
geom_histogram_args = list(bins = 12))