pacman::p_load(tidyverse, # for data visualization and wrangling
ggpval, # for p values in plots
car, # for regression
broom, # for model evaluation
pubh, # for glm_coefs
ordinal,
ggpubr, # for data visualization enhancements
janitor, # for data cleaning
visdat, # to vizualise missing data
table1, # to create summary tables
tableone) # to create summary tables with p values
df <- read_csv("https://docs.google.com/spreadsheets/d/e/2PACX-1vT5jtQKSkP1h0pNGzFvJ-B3HiQuvIxAKXSfzxZVQSiM7wr6Ub61xAs4t13O0ya0BZ6ziZ-anWt5Fcsf/pub?gid=1936031181&single=true&output=csv")
Clean the names
df <- janitor::clean_names(df) # standarize all names from columns, more easier to work with
Remove the names column
df <- select(df, -vards_uzvards)
head(df)
Just to check
rm(p)
rm(p)
Since there’s no difference between hip and l2l4, I will retain just l2l4 and create also a new var for the ordinal analysis
df <- df %>%
mutate(dx = case_when(
dxa_l2_l4 < -2.5 ~ "osteoporosis",
dxa_l2_l4 > -1 ~ "normal",
TRUE ~ "osteopenia"
))
check
table(df$dx)
normal osteopenia osteoporosis
56 46 25
df %>%
ggplot(aes(x = dx)) +
geom_bar()
Just to be sure, check again if there is any difference between hip and l2l4 values
df %>%
pivot_longer(dxa_l2_l4:dxa_hip,
names_to = "dxa",
values_to = "values") %>%
ggplot(aes(x = values, fill = dxa)) +
geom_histogram(bins = 8) +
facet_grid(dxa ~ .) +
theme_minimal() +
labs(title = "DXA values",
subtitle = "Hip and L2~L4",
y = "Count",
x = "DXA") +
scale_fill_discrete(name = "DXA", labels = c("Hip", "L2 L4"))
NA
NA
Check with t test
t.test(df$dxa_hip, df$dxa_l2_l4)
Welch Two Sample t-test
data: df$dxa_hip and df$dxa_l2_l4
t = 0.25641, df = 228.23, p-value = 0.7979
alternative hypothesis: true difference in means is not equal to 0
95 percent confidence interval:
-0.3100312 0.4027921
sample estimates:
mean of x mean of y
-1.003226 -1.049606
No significant difference, I will use the one with less missing values
df %>%
select(dxa_l2_l4, dxa_hip) %>%
summary()
dxa_l2_l4 dxa_hip
Min. :-4.20 Min. :-3.600
1st Qu.:-2.30 1st Qu.:-1.800
Median :-1.20 Median :-1.100
Mean :-1.05 Mean :-1.003
3rd Qu.: 0.05 3rd Qu.:-0.200
Max. : 4.40 Max. : 2.000
NA's :3
ok, dxa_l2_l4 has not missing values and dxa_hip has three, so I will use l2_l4
now, remove the unused variables
df <- select(df, c(-c(dxa_hip, dxa_worst)))
visdat::vis_dat(df)
Get variable names
demographics <- tableone::CreateTableOne(vars = c("age", "height", "weight", "bmi"),
strata = "dx",
data = df)
demographics <- tableone::CreateTableOne(vars = c("age", "height", "weight", "bmi"),
strata = "dx",
data = df)
demographics
Stratified by dx
normal osteopenia osteoporosis p test
n 56 46 25
age (mean (SD)) 69.95 (9.10) 69.96 (8.85) 72.44 (8.45) 0.457
height (mean (SD)) 160.11 (5.49) 160.15 (6.27) 159.28 (6.14) 0.813
weight (mean (SD)) 80.20 (16.67) 69.38 (11.08) 64.00 (13.89) <0.001
bmi (mean (SD)) 31.37 (6.28) 27.11 (4.30) 24.96 (4.65) <0.001
write.csv(print(demographics, quote = TRUE, noSpaces = TRUE), file = "TableDemographics.csv")
"Stratified by dx"
"" "normal" "osteopenia" "osteoporosis" "p" "test"
"n" "56" "46" "25" "" ""
"age (mean (SD))" "69.95 (9.10)" "69.96 (8.85)" "72.44 (8.45)" "0.457" ""
"height (mean (SD))" "160.11 (5.49)" "160.15 (6.27)" "159.28 (6.14)" "0.813" ""
"weight (mean (SD))" "80.20 (16.67)" "69.38 (11.08)" "64.00 (13.89)" "<0.001" ""
"bmi (mean (SD))" "31.37 (6.28)" "27.11 (4.30)" "24.96 (4.65)" "<0.001" ""
Export to a table to paste in any doc
write.csv(print(demographics, quote = TRUE, noSpaces = TRUE), file = "TableDemographics.csv")
write.csv(print(measurements, quote = TRUE, noSpaces = TRUE), file = "TableMeasurements.csv")
rm(demographics, measurements)
See
Koo, Terry, and Mae Li. 2016. “A Guideline of Selecting and Reporting Intraclass Correlation Coefficients for Reliability Research.” Journal of Chiropractic Medicine 15 (March). doi:10.1016/j.jcm.2016.02.012.
Shrout, P.E., and J.L. Fleiss. 1979. “Intraclass Correlation: Uses in Assessing Rater Reliability.” Psychological Bulletin 86: 420–28.
pacman::p_load(irr) # package to calculate the ICC
agreement <- df %>%
select(
"x1_viss",
"x1_viss2",
"x1_trab",
"x1_trab2",
"x1_baz_viss",
"x1_baz_viss2",
"x1_baz_trab",
"x1_baz_trab_2",
"c1_axial",
"c1_axial_2",
"c1sagital",
"c1_sagital_2"
)
agreement %>%
select("x1_viss",
"x1_viss2") %>%
filter(x1_viss2 > 0) %>%
irr::icc(.,
model = "twoway",
type = "agreement",
unit = "single")
Single Score Intraclass Correlation
Model: twoway
Type : agreement
Subjects = 29
Raters = 2
ICC(A,1) = 0.898
F-Test, H0: r0 = 0 ; H1: r0 > 0
F(28,29) = 18.5 , p = 4.52e-12
95%-Confidence Interval for ICC Population Values:
0.797 < ICC < 0.951
agreement %>%
select( "x1_trab",
"x1_trab2") %>%
filter(x1_trab2 > 0) %>%
irr::icc(.,
model = "twoway",
type = "agreement",
unit = "single")
Single Score Intraclass Correlation
Model: twoway
Type : agreement
Subjects = 29
Raters = 2
ICC(A,1) = 0.999
F-Test, H0: r0 = 0 ; H1: r0 > 0
F(28,28.9) = 2744 , p = 9.63e-43
95%-Confidence Interval for ICC Population Values:
0.998 < ICC < 1
agreement %>%
select("x1_baz_viss",
"x1_baz_viss2") %>%
filter(x1_baz_viss2 > 0) %>%
irr::icc(.,
model = "twoway",
type = "agreement",
unit = "single")
Single Score Intraclass Correlation
Model: twoway
Type : agreement
Subjects = 25
Raters = 2
ICC(A,1) = 0.997
F-Test, H0: r0 = 0 ; H1: r0 > 0
F(24,18.6) = 669 , p = 1.05e-22
95%-Confidence Interval for ICC Population Values:
0.992 < ICC < 0.999
agreement %>%
select("x1_baz_trab",
"x1_baz_trab_2") %>%
filter(x1_baz_trab_2 > 0) %>%
irr::icc(.,
model = "twoway",
type = "agreement",
unit = "single")
Single Score Intraclass Correlation
Model: twoway
Type : agreement
Subjects = 25
Raters = 2
ICC(A,1) = 0.986
F-Test, H0: r0 = 0 ; H1: r0 > 0
F(24,24.9) = 140 , p = 5.04e-21
95%-Confidence Interval for ICC Population Values:
0.969 < ICC < 0.994
agreement %>%
select("c1_axial",
"c1_axial_2") %>%
filter(c1_axial_2 > 0) %>%
irr::icc(.,
model = "twoway",
type = "agreement",
unit = "single")
Single Score Intraclass Correlation
Model: twoway
Type : agreement
Subjects = 10
Raters = 2
ICC(A,1) = 0.968
F-Test, H0: r0 = 0 ; H1: r0 > 0
F(9,9.02) = 56.4 , p = 7.47e-07
95%-Confidence Interval for ICC Population Values:
0.878 < ICC < 0.992
agreement %>%
select("c1sagital",
"c1_sagital_2") %>%
filter(c1_sagital_2 > 0) %>%
irr::icc(.,
model = "twoway",
type = "agreement",
unit = "single")
Single Score Intraclass Correlation
Model: twoway
Type : agreement
Subjects = 9
Raters = 2
ICC(A,1) = 0.972
F-Test, H0: r0 = 0 ; H1: r0 > 0
F(8,8.49) = 77.5 , p = 4.43e-07
95%-Confidence Interval for ICC Population Values:
0.887 < ICC < 0.994
rm(agreement)
Every measurement has almost perfect reliability
df %>%
ggplot(aes(x = age, fill = dx)) +
geom_histogram(bins = 8) +
facet_grid(dx~.) +
theme_minimal() +
labs(title = "Age Histogram by dx",
x = "Age",
y = "Count") +
theme(legend.position = "none")
df %>%
ggplot(aes(x = height, fill = dx)) +
geom_histogram(bins = 5) +
facet_grid(dx ~ .) +
theme_minimal() +
labs(title = "Height Histogram by dx",
x = "Height",
y = "Count") +
theme(legend.position = "none")
df %>%
ggplot(aes(x = weight, fill = dx)) +
geom_histogram(bins = 5) +
facet_grid(dx~.) +
theme_minimal() +
labs(title = "Weight Histogram by dx",
x = "Weight",
y = "Count") +
theme(legend.position = "none")
df %>%
ggplot(aes(x = bmi, fill = dx)) +
geom_histogram(bins = 7) +
facet_grid(dx~.) +
theme_minimal() +
labs(title = "BMI Histogram by dx",
x = "BMI",
y = "Count") +
theme(legend.position = "none")
I will omit the viss and only consider corr, trab and bas measurements by area and dx
long_df_measurements <- df %>%
pivot_longer(x1_trab:x4_cor_viss,
# columns to merge
names_to = "clinical_variable",
#name of the new colum with the names
values_to = "value") %>%
filter(!str_detect(clinical_variable, "2$")) %>% # leave out second measurement
filter(!str_detect(clinical_variable, "_viss")) %>% # omit all viss values
na.omit(values) %>% # omit NA values
select(dx:value, age, height, weight, bmi,
md_vol_all, mx_vol, dxa_l2_l4) %>% # leave demographic variables
mutate("vol_all" = md_vol_all + mx_vol) %>% # create a new var with the sum of volumes %>%
separate(clinical_variable,
into = c("area", "clin_variable"))
Expected 2 pieces. Additional pieces discarded in 92 rows [3, 4, 7, 8, 15, 16, 19, 20, 35, 36, 39, 40, 47, 48, 51, 52, 55, 56, 59, 60, ...].
table(long_df_measurements$clin_variable)
bas baz cor trab
15 77 72 72
Fit a linear model
response variable = value predictors = dx_l2l4 covariates = dx
model_norm <- lm(value ~ dxa_l2_l4 +
dx +
height +
weight |
clin_variable +
area +
bmi +
age,
data = long_df_measurements)
Error in dxa_l2_l4 + dx : argumento no-numérico para operador binario
model_norm %>%
Anova()
Anova Table (Type II tests)
Response: value
Sum Sq Df F value Pr(>F)
dxa_l2_l4 13336 1 27.2547 4.093e-07 ***
dx 8932 2 9.1273 0.0001551 ***
height 1664 1 3.4005 0.0665078 .
weight 4340 1 8.8694 0.0032219 **
clin_variable 14860 3 10.1233 2.814e-06 ***
area 7924 3 5.3979 0.0013279 **
bmi 3400 1 6.9489 0.0089787 **
age 19959 1 40.7899 9.805e-10 ***
Residuals 108626 222
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
model_norm %>%
summary()
Call:
lm(formula = value ~ dxa_l2_l4 + dx + height + weight + clin_variable +
area + bmi + age, data = long_df_measurements)
Residuals:
Min 1Q Median 3Q Max
-55.975 -14.386 -0.437 13.342 69.072
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) -258.6715 250.3890 -1.033 0.302691
dxa_l2_l4 8.0528 1.5425 5.221 4.09e-07 ***
dxosteopenia 9.5624 5.1031 1.874 0.062267 .
dxosteoporosis 33.5509 8.5157 3.940 0.000109 ***
height 2.8098 1.5237 1.844 0.066508 .
weight -4.9408 1.6590 -2.978 0.003222 **
clin_variablebaz -0.4067 6.8782 -0.059 0.952905
clin_variablecor 17.7540 6.8318 2.599 0.009984 **
clin_variabletrab 14.7460 6.8318 2.158 0.031968 *
areax2 -1.3062 4.1390 -0.316 0.752609
areax3 -12.1019 4.3669 -2.771 0.006058 **
areax4 -13.5341 4.4128 -3.067 0.002431 **
bmi 11.3075 4.2895 2.636 0.008979 **
age -1.4119 0.2211 -6.387 9.80e-10 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 22.12 on 222 degrees of freedom
Multiple R-squared: 0.3503, Adjusted R-squared: 0.3122
F-statistic: 9.206 on 13 and 222 DF, p-value: 4.046e-15
Ok, seems than there is a significant correlation for the measurement value for
dx (p = 4.09e-07) when dx = dxosteoporosis (p = 0.000109)
areax3 (p = 0.006058) areax4 (p = 0.002431) weight (p = 0.003222) age (p = 9.80e-10)
The p values for the correlation coefficients are:
My reccomendation is to use the p values from the regression, since are adjusted for all the variables
Coefficient | Pr(>|t|) | |
---|---|---|
Constant | -258.67 (-661.85, 144.51) | 0.207 |
DXA L2-L4 | 8.05 (5.44, 10.66) | < 0.001 |
Osteopenia | 9.56 (1.15, 17.98) | 0.026 |
Osteoporosis | 33.55 (19.25, 47.85) | < 0.001 |
Height | 2.81 (0.39, 5.23) | 0.023 |
Weight | -4.94 (-7.5, -2.38) | < 0.001 |
baz | -0.41 (-11.23, 10.42) | 0.941 |
cor | 17.75 (8.15, 27.36) | < 0.001 |
trab | 14.75 (2.36, 27.13) | 0.02 |
area 2 | -1.31 (-8.19, 5.58) | 0.709 |
area 3 | -12.1 (-20.02, -4.19) | 0.003 |
area 4 | -13.53 (-22.75, -4.32) | 0.004 |
BMI | 11.31 (4.5, 18.11) | 0.001 |
Age | -1.41 (-1.81, -1.01) | < 0.001 |
This table is interpreted as follows: the constant value of the measurements is -258 and, e.g. when dx1l2l4 change in one unit, then value change 8.05 (p<0.001)
And now, all the analysis for the grey values
long_df_grey <- df %>%
pivot_longer(c1_axial:c2sagital,
names_to = "area_grey_values",
values_to = "grey_values") %>%
select(
area_grey_values,
grey_values,
dx,
dxa_l2_l4,
age,
height,
weight,
bmi,
md_vol_all,
mx_vol,
dxa_l2_l4
) %>% # leave demographic variables
mutate("vol_all" = md_vol_all + mx_vol)
Fit a linear model
response variable = grey_values predictors = dx_l2l4 all the rest
model_norm_grey <- lm(grey_values ~ dxa_l2_l4 +
dx +
height +
weight +
clin_variable +
area_grey_values +
bmi +
age,
data = long_df_grey)
Error in eval(predvars, data, env) : objeto 'clin_variable' no encontrado
model_norm_grey %>%
Anova()
Anova Table (Type II tests)
Response: grey_values
Sum Sq Df F value Pr(>F)
dxa_l2_l4 95009 1 9.1931 0.002556 **
dx 66081 2 3.1970 0.041724 *
height 67136 1 6.4961 0.011110 *
weight 17875 1 1.7296 0.189071
bmi 39602 1 3.8319 0.050845 .
age 317439 1 30.7155 4.85e-08 ***
Residuals 5146738 498
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
model_norm_grey %>%
summary()
Call:
lm(formula = grey_values ~ dxa_l2_l4 + dx + height + weight +
bmi + age, data = long_df_grey)
Residuals:
Min 1Q Median 3Q Max
-287.452 -67.093 3.791 70.520 269.755
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 1563.9120 440.6428 3.549 0.000423 ***
dxa_l2_l4 16.8780 5.5666 3.032 0.002556 **
dxosteopenia -27.7096 15.4421 -1.794 0.073352 .
dxosteoporosis -57.5956 22.8476 -2.521 0.012018 *
height -6.8491 2.6873 -2.549 0.011110 *
weight 3.6102 2.7451 1.315 0.189071
bmi -14.0209 7.1626 -1.958 0.050845 .
age -2.9161 0.5262 -5.542 4.85e-08 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 101.7 on 498 degrees of freedom
(2 observations deleted due to missingness)
Multiple R-squared: 0.193, Adjusted R-squared: 0.1817
F-statistic: 17.02 on 7 and 498 DF, p-value: < 2.2e-16
Ok, seems than there is a significant correlation for the grey value for
dx (p = 0.002556) when dx = dxosteoporosis (p = 0.012018)
and weight (p = 0.011110) age (p = 4.85e-08)
The p values for the correlation coefficients are:
My reccomendation is to use the p values from the regression, since are adjusted for all the variables
Coefficient | Pr(>|t|) | |
---|---|---|
Constant | 1563.91 (451.86, 2675.97) | 0.006 |
DXA L2-L4 | 16.88 (1.48, 32.27) | 0.032 |
Osteopenia | -27.71 (-73.02, 17.6) | 0.230 |
Osteoporosis | -57.6 (-117.9, 2.7) | 0.061 |
Height | -6.85 (-13.44, -0.26) | 0.042 |
Weight | 3.61 (-3.31, 10.53) | 0.306 |
BMI | -14.02 (-32.57, 4.53) | 0.138 |
Age | -2.92 (-4.69, -1.14) | 0.001 |
This table is interpreted as follows: the constant value of the grey values is 1563.91 and, e.g. when dx1l2l4 change in one unit, then value change 16.88 (p=0.032)
df %>% pivot_longer(x1_viss:x4_cor_viss, # columns to merge names_to = “clinical_variable”, #name of the new colum with the names values_to = “value”) %>% #name of the new column with values select(clinical_variable, value, dx) %>% filter(!str_detect(clinical_variable, “cor_viss”)) %>% # we filter OUT (!) filter(!str_detect(clinical_variable, “2$”)) %>% # again filter(clinical_variable != “x1_cort_viss”) %>% # again filter(!str_detect(clinical_variable, “bas”)) %>% filter(!str_detect(clinical_variable, “baz”)) %>% filter(!str_detect(clinical_variable, "_viss“)) %>% # omit all viss values # count(clinical_variable) # just to check separate(clinical_variable, into = c(”zone“,”clinical_variable_2“)) %>% # filter(str_detect(clinical_variable,”trab“)) %>% ggplot(aes(x = clinical_variable_2, y = value, fill = dx)) + geom_boxplot() + geom_jitter(alpha = .1) + # facet_grid(.~zone)+ # facetting (rows ~ columns) labs( title =”All bone by areas“, # subtitle =”this is the subtitle“, y =”mm“, x =”Variable" ) + theme_minimal()