three dates changed, #55, #136 & #183
General reading: Bewick, V., Cheek, L., Ball, J., 2004. Statistics review 12: survival analysis. Crit. Care 8, 389โ394. https://www.ncbi.nlm.nih.gov/pmc/articles/PMC1065034/
See https://docs.google.com/document/d/1XiqDmTghzrRI-z6ytJdszlOd_gW6S3AKT7_ZKSSP5rM/edit
consider a trial with 2 year maximum follow-up 6 month uniform enrollment Control/HT hazards = 0.1/0.2 per 1 person-year drop out hazard 0.1 per 1 person-year alpha = 0.025 (1-sided) power = 0.80 (default beta=.1)
ss <- nSurvival(
lambda1 = .1 , ## hazard rate control
lambda2 = .25, ## hazard rate HT
eta = .1, ## equal drop out rate for both groups
Ts = 9, ## maximum study duration
Tr = 3, ## accrual (recruitment) duration
sided = 1,
alpha = .025,
ratio = 1,
beta = .2
)
ss # default values
Fixed design, two-arm trial with time-to-event
outcome (Lachin and Foulkes, 1986).
Study duration (fixed): Ts=9
Accrual duration (fixed): Tr=3
Uniform accrual: entry="unif"
Control median: log(2)/lambda1=6.9
Experimental median: log(2)/lambda2=2.8
Censoring median: log(2)/eta=6.9
Control failure rate: lambda1=0.1
Experimental failure rate: lambda2=0.25
Censoring rate: eta=0.1
Power: 100*(1-beta)=80%
Type I error (1-sided): 100*alpha=2.5%
Equal randomization: ratio=1
Sample size based on hazard ratio=2.5 (type="rr")
Sample size (computed): n=72
Events required (computed): nEvents=37
See 1. Post hoc power analysis: an idea whose time has passed? M. Levine, M. H. Ensom. Pharmacotherapy 2001: 21(4); 405-9. 2. Confidence limit analyses should replace power calculations in the interpretation of epidemiologic studies. A. H. Smith, M. N. Bates. Epidemiology 1992: 3(5); 449-52. โ
# Example 14.42 in Rosner B. Fundamentals of Biostatistics.
# (6-th edition). (2006) page 809
powerCT.default(nE = 39, # number of participants in the experimental group.
nC = 39, # number of participants in the control group.
pE = 0.6, # probability of failure in group E (experimental group) over the maximum time period of the study (t years).
pC = 0.5, # probability of failure in group C (control group) over the maximum time period of the study (t years).
RR = 0.1, # postulated hazard ratio.
alpha = 0.05) #ype I error rate
[1] 0.9996618
Yes, more than enough.
df <- df %>%
select(-c(Comments, `Marca temporal`)) # remove unwanted columns
Check variables
Verify the completeness of the data
visdat::vis_dat(df)
Convert dates
df$`Treatment date` <- lubridate::dmy(df$`Treatment date`)
df$`Date of the last radiograph BEFORE the treatment` <- lubridate::dmy(df$`Date of the last radiograph BEFORE the treatment`)
df$`Date First Time noted as having exfoliated: (Hall)` <- lubridate::dmy(df$`Date First Time noted as having exfoliated: (Hall)`)
df$`Date First Time noted as having exfoliated: (Contralateral)` <- lubridate::dmy(df$`Date First Time noted as having exfoliated: (Contralateral)`)
df$`Date of the radiograph 2` <- lubridate::dmy(df$`Date of the radiograph 2`)
df$`Birth Date` <- lubridate::dmy(df$`Birth Date`)
Add id sequential number
df <- df %>% mutate(id = row_number())
Change tooth variable
df$`Hall Crown Tooth` <- as.character(df$`Hall Crown Tooth`)
df$`Contralateral Tooth` <- as.character(df$`Contralateral Tooth`)
Three dates were corrected (treatment and birth changed: 55, 183 and 136.
df <- df %>%
mutate(age = interval(ymd(`Birth Date`), ymd(`Treatment date`)) / years(1),
age = if_else(age < 0, age + 100, age))
df <- df %>%
janitor::clean_names() %>%
janitor::remove_empty(c("rows", "cols"))
Check age
df %>%
ggplot(aes(x = age)) +
geom_histogram(bins = 10) +
scale_y_continuous(labels = scales::number_format(accuracy = 1)) +
scale_x_continuous(labels = scales::number_format(accuracy = 1)) +
facet_wrap(~ gender) +
theme_pubclean() +
labs(
title = "Age by gender",
y = "Count",
x = "Age (years"
)
df <- df %>%
mutate(hall_molar = case_when(
hall_crown_tooth %in% c(55, 65, 75, 85) ~ "1st",
TRUE ~ "2nd"
)) %>%
mutate(contralateral_molar = case_when(
contralateral_tooth %in% c(55, 65, 75, 85) ~ "1st",
TRUE ~ "2nd"
))
df <- df %>%
mutate(hall_jaw = case_when(
hall_crown_tooth %in% c(55, 65) ~ "Maxillary H Molar",
TRUE ~ "Mandibular H molar"
)) %>%
mutate((contralateral_jaw = case_when(
contralateral_tooth %in% c(55, 65) ~ "Maxillary C Molar",
TRUE ~ "Mandibular C molar"
)))
Dataset ready for analysis
df %>%
ggplot(aes( x = age)) +
geom_histogram(bins = 10) +
scale_y_continuous(labels = scales::number_format(accuracy = 1)) +
facet_wrap( ~ gender) +
labs(title = "Age at the time of the treatment by gender",
y = "Count",
x = "Age") +
theme_pubclean()
There is NO SIGNIFICANT difference in the age mean by gender The age means of girls and boys are 7.450262, 8.0041041 respectively, and this difference is NON significant (p-value = 0.0264872 )
More info
t.test(age ~ gender, data = df)
Welch Two Sample t-test
data: age by gender
t = -1.0985, df = 35.738, p-value = 0.2793
alternative hypothesis: true difference in means is not equal to 0
95 percent confidence interval:
-1.5766407 0.4689565
sample estimates:
mean in group Female mean in group Male
7.450262 8.004104
table(df$gender)
Female Male
20 19
The proportion of women is almost half, so obviously the proportion test will deliver a non-significant value for the difference.
prop.test(x = 20,
n = 39,
p = 0.5,
correct = FALSE)
1-sample proportions test without continuity correction
data: 20 out of 39, null probability 0.5
X-squared = 0.025641, df = 1, p-value = 0.8728
alternative hypothesis: true p is not equal to 0.5
95 percent confidence interval:
0.3619936 0.6613482
sample estimates:
p
0.5128205
df %>%
ggplot(aes(x = gender)) +
geom_bar() +
labs(title = "Gender distribution",
y = "Count",
x = "Gender") +
theme_pubclean()
df %>%
ggplot(aes(x = hall_crown_tooth, fill = gender)) +
geom_bar() +
facet_grid(gender~. ) +
labs(title = "Tooth distribution by gender",
y = "Count",
x = "Tooth") +
theme_pubclean() +
guides(fill = FALSE) # remove the legend
df %>%
mutate(molar = case_when(
hall_crown_tooth %in% c(55, 65) ~ "UFPM",
hall_crown_tooth %in% c(54, 64) ~ "USPM",
hall_crown_tooth %in% c(85, 75) ~ "LFPM",
TRUE ~ "LSPM",
)) %>%
select(gender, molar, hall_crown_tooth) %>%
select(gender, molar, hall_crown_tooth) %>%
ggplot(aes(x = molar, fill = gender)) +
geom_bar() +
labs(title = "Molar distribution by gender",
y = "Count", x = "Molar type", fill = "Gender") +
theme_pubclean() +
facet_grid(gender~.) +
guides(fill = FALSE) # remove the legend
table by gender
df %>%
janitor::tabyl(hall_molar, gender ) %>%
janitor::adorn_totals("col")
hall_molar Female Male Total
1st 11 14 25
2nd 9 5 14
Molar distribution by gender
df %>%
ggplot(aes(x = hall_molar)) +
geom_bar() +
facet_wrap( ~ gender) +
labs(title = "Molar distribution by gender",
y = "Count",
x = "Molar") +
theme_pubclean()
plot teeth pairs
df %>%
ggplot(aes(x = hall_molar,
y = contralateral_molar,
color = gender)) +
geom_jitter(alpha = 0.5, width = .1, height = .1) +
theme_pubclean() +
labs(title = "Distribution of molar-pairs by gender",
y = "Contralateral molar",
x = "HT molar",
color = "Gender")
chisq.test(table(df$gender, df$hall_crown_tooth))
Chi-squared approximation may be incorrect
Pearson's Chi-squared test
data: table(df$gender, df$hall_crown_tooth)
X-squared = 8.1797, df = 7, p-value = 0.317
DONE
COMBINE 55/65, etc
################## DONE #####################
table1::table1( ~ age +
hall_molar +
contralateral_molar +
type_of_radiograph +
demirjian_index_perm_tooth_hall_tooth_crown +
demirjian_index_perm_tooth_contralateral
| gender ,
data = df)
Female (n=20) |
Male (n=19) |
Overall (n=39) |
|
---|---|---|---|
age | |||
Mean (SD) | 7.45 (1.46) | 8.00 (1.67) | 7.72 (1.57) |
Median [Min, Max] | 7.35 [4.15, 10.0] | 7.74 [5.15, 11.5] | 7.70 [4.15, 11.5] |
hall_molar | |||
1st | 11 (55.0%) | 14 (73.7%) | 25 (64.1%) |
2nd | 9 (45.0%) | 5 (26.3%) | 14 (35.9%) |
contralateral_molar | |||
1st | 11 (55.0%) | 14 (73.7%) | 25 (64.1%) |
2nd | 9 (45.0%) | 5 (26.3%) | 14 (35.9%) |
type_of_radiograph | |||
Bitewing | 8 (40.0%) | 7 (36.8%) | 15 (38.5%) |
OPT | 12 (60.0%) | 12 (63.2%) | 24 (61.5%) |
demirjian_index_perm_tooth_hall_tooth_crown | |||
E | 5 (25.0%) | 4 (21.1%) | 9 (23.1%) |
F | 3 (15.0%) | 6 (31.6%) | 9 (23.1%) |
G | 3 (15.0%) | 2 (10.5%) | 5 (12.8%) |
H | 1 (5.0%) | 0 (0%) | 1 (2.6%) |
Not possible to determine (bitewing) | 8 (40.0%) | 7 (36.8%) | 15 (38.5%) |
demirjian_index_perm_tooth_contralateral | |||
E | 4 (20.0%) | 5 (26.3%) | 9 (23.1%) |
F | 4 (20.0%) | 6 (31.6%) | 10 (25.6%) |
G | 3 (15.0%) | 0 (0%) | 3 (7.7%) |
H | 1 (5.0%) | 1 (5.3%) | 2 (5.1%) |
Not possible to determine (bitewing) | 8 (40.0%) | 7 (36.8%) | 15 (38.5%) |
TO DO
COMBINE 55/65, etc AND graph by teeth ################## TO DO #####################
df %>%
filter(
demirjian_index_perm_tooth_hall_tooth_crown != "Not possible to determine (bitewing)" |
demirjian_index_perm_tooth_contralateral != "Not possible to determine (bitewing)"
) %>%
ggplot(
aes(x = demirjian_index_perm_tooth_hall_tooth_crown,
y = demirjian_index_perm_tooth_contralateral,
color = hall_molar)
) +
# geom_point() +
geom_jitter(alpha = 0.5, width = .15, height = .15) +
labs(title = "Demirjian status of HT and contralateral baseline",
y = "HT",
x = "CT") +
theme_pubclean()
NA
TO DO
Compare 1st prim molar with 2d prim molar with Demirjian and exof_time ################## TO DO #####################
Check
df %>%
filter(
demirjian_index_perm_tooth_hall_tooth_crown != "Not possible to determine (bitewing)" |
demirjian_index_perm_tooth_contralateral != "Not possible to determine (bitewing)"
) %$% # this also do the trick!
fisher.test(
table(
demirjian_index_perm_tooth_hall_tooth_crown,
.$demirjian_index_perm_tooth_contralateral
)
)
Fisher's Exact Test for Count Data
data: table(demirjian_index_perm_tooth_hall_tooth_crown, .$demirjian_index_perm_tooth_contralateral)
p-value = 0.0001543
alternative hypothesis: two.sided
Ok, so maybe any difference in the survival by gender could be explained by the demirjian status at the beggining of the follow-up
df %>%
ggplot(aes(x = status_of_the_contralateral_tooth)) +
geom_bar() +
labs(title = "Status of the contralateral tooth",
y = "Count",
x = "Status") +
theme_pubclean()
DONE
change scale from 4, 3, 2, 1, 0, exf ################# DONE #####################
df$degree_of_primary_tooth_resorption_contralateral <-
fct_relevel(
df$degree_of_primary_tooth_resorption_contralateral,
"4",
"3",
"2",
"1",
"0",
"exfoliated"
)
df$degree_of_primary_tooth_resorption_hall_tooth_crown <-
fct_relevel(
df$degree_of_primary_tooth_resorption_hall_tooth_crown,
"4",
"3",
"2",
"1",
"0",
"exfoliated"
)
df %>%
ggplot(
aes(x = degree_of_primary_tooth_resorption_hall_tooth_crown,
y = degree_of_primary_tooth_resorption_contralateral,
color = gender)
) +
geom_jitter(alpha = 0.8) +
labs(titl = "Baseline degree of root resorption",
y = "Contralateral",
x = "HT") +
theme_pubclean() +
labs(title = "Degree of root resorption at the beginning", color = "Gender")
The same, now for 1st or 2nd molar
df %>%
ggplot(
aes(x = degree_of_primary_tooth_resorption_hall_tooth_crown,
y = degree_of_primary_tooth_resorption_contralateral,
color = hall_molar)
) +
geom_jitter(alpha = 0.8,
width = 0.15,
height = 0.15) +
labs(titl = "Baseline degree of root resorption",
y = "Contralateral",
x = "HT") +
theme_pubclean() +
labs(title = "Degree of root resorption at the beginning", color = "Molar")
Create new var = days until exfoliation
df$Hall_group <-
difftime(
df$date_first_time_noted_as_having_exfoliated_hall,
df$treatment_date,
units = c("days")
) # for Hall
df$Contralateral_group <-
difftime(
df$date_first_time_noted_as_having_exfoliated_contralateral,
df$treatment_date,
units = c("days")
) # for contralateral
Just to see the correlation between the exfoliation time between groups
CHECK HERE 20190604
DONE
separate by 1st and 2d molars ################## DONE #####################
check the correlation.
I see thereโs a directly proportional correlation. This strongly suggests that as a dte with HT exfoliates, so does its contralateral.
lm(Hall_group ~ Contralateral_group, data = df)
Call:
lm(formula = Hall_group ~ Contralateral_group, data = df)
Coefficients:
(Intercept) Contralateral_group
191.7337 0.7965
So, for every extra unit of exfoliation time for a Contralateral tooth, a HT also increase in 0.79 unit of time.
create a new column event full with 1
df$event <- 1
Now gather Hall and contralateral
df <- df %>%
gather(key = "group",
value = "exf_time",
Hall_group:Contralateral_group)
DONE
change 1st and 2d molars ################## DONE #####################
Same for only for molar type
df %>%
ggplot(aes(x = exf_time, fill = gender)) +
geom_histogram(bins = 10) +
facet_grid(group~hall_molar) +
theme_pubclean() +
labs(title = "Exfoliation time histogram by molar, group and gender",
y = "Count",
x = "Exfoliation time (days",
gender = "Gender")
QUESTION: What could this bimodal distribution be?
surv_object <- survival::Surv(time = df$exf_time, event = df$event)
Check the survival object
surv_object
[1] 1636 2380 1077 569 2025 651 1930 823 1813 1877 1027 599 2646 1841 835 1889 1062
[18] 364 1442 1225 1098 899 595 1157 1552 1188 475 917 1888 865 2618 409 1195 2112
[35] 1075 2025 532 1312 863 1636 2380 1077 569 2025 651 1930 823 1813 1877 1027 599
[52] 2646 1841 835 1889 1062 364 1442 1225 1098 899 595 1157 1552 1188 475 917 1888
[69] 865 2618 409 1195 2112 1075 2025 532 1312 863 1819 2772 961 937 2116 712 2345
[86] 736 1813 1938 1153 1047 2260 1841 1317 1344 1427 1306 1442 876 1251 1159 291 420
[103] 1769 1335 21 1291 1525 1163 2749 161 1344 1870 2207 2025 660 1251 1345 1819 2772
[120] 961 937 2116 712 2345 736 1813 1938 1153 1047 2260 1841 1317 1344 1427 1306 1442
[137] 876 1251 1159 291 420 1769 1335 21 1291 1525 1163 2749 161 1344 1870 2207 2025
[154] 660 1251 1345
Convert the difftime in number to extract any wrong value, i.e.
df$exf_time_number <- as.numeric(df$exf_time) # convert diff time in a new var
which(df$exf_time_number < 0) # check if is any wrong time
integer(0)
##############################################
########## DELETE THIS STEP ##################
# df <- df %>%
# filter(allocation_num != 78)
########## DELETE THIS STEP ##################
##############################################
Master table for survival analysis
fit1 <- survival::survfit(surv_object ~ group, data = df)
summary(fit1)
Call: survfit(formula = surv_object ~ group, data = df)
group=Contralateral_group
time n.risk n.event survival std.err lower 95% CI upper 95% CI
21 78 2 0.9744 0.0179 0.93991 1.000
161 76 2 0.9487 0.0250 0.90101 0.999
291 74 2 0.9231 0.0302 0.86580 0.984
420 72 2 0.8974 0.0344 0.83257 0.967
660 70 2 0.8718 0.0379 0.80067 0.949
712 68 2 0.8462 0.0409 0.76976 0.930
736 66 2 0.8205 0.0435 0.73962 0.910
876 64 2 0.7949 0.0457 0.71013 0.890
937 62 2 0.7692 0.0477 0.68119 0.869
961 60 2 0.7436 0.0494 0.65274 0.847
1047 58 2 0.7179 0.0510 0.62472 0.825
1153 56 2 0.6923 0.0523 0.59710 0.803
1159 54 2 0.6667 0.0534 0.56985 0.780
1163 52 2 0.6410 0.0543 0.54294 0.757
1251 50 4 0.5897 0.0557 0.49009 0.710
1291 46 2 0.5641 0.0561 0.46413 0.686
1306 44 2 0.5385 0.0564 0.43845 0.661
1317 42 2 0.5128 0.0566 0.41307 0.637
1335 40 2 0.4872 0.0566 0.38798 0.612
1344 38 4 0.4359 0.0561 0.33864 0.561
1345 34 2 0.4103 0.0557 0.31441 0.535
1427 32 2 0.3846 0.0551 0.29048 0.509
1442 30 2 0.3590 0.0543 0.26685 0.483
1525 28 2 0.3333 0.0534 0.24354 0.456
1769 26 2 0.3077 0.0523 0.22057 0.429
1813 24 2 0.2821 0.0510 0.19795 0.402
1819 22 2 0.2564 0.0494 0.17571 0.374
1841 20 2 0.2308 0.0477 0.15389 0.346
1870 18 2 0.2051 0.0457 0.13253 0.318
1938 16 2 0.1795 0.0435 0.11168 0.288
2025 14 2 0.1538 0.0409 0.09142 0.259
2116 12 2 0.1282 0.0379 0.07188 0.229
2207 10 2 0.1026 0.0344 0.05320 0.198
2260 8 2 0.0769 0.0302 0.03566 0.166
2345 6 2 0.0513 0.0250 0.01974 0.133
2749 4 2 0.0256 0.0179 0.00653 0.101
2772 2 2 0.0000 NaN NA NA
group=Hall_group
time n.risk n.event survival std.err lower 95% CI upper 95% CI
364 78 2 0.9744 0.0179 0.93991 1.000
409 76 2 0.9487 0.0250 0.90101 0.999
475 74 2 0.9231 0.0302 0.86580 0.984
532 72 2 0.8974 0.0344 0.83257 0.967
569 70 2 0.8718 0.0379 0.80067 0.949
595 68 2 0.8462 0.0409 0.76976 0.930
599 66 2 0.8205 0.0435 0.73962 0.910
651 64 2 0.7949 0.0457 0.71013 0.890
823 62 2 0.7692 0.0477 0.68119 0.869
835 60 2 0.7436 0.0494 0.65274 0.847
863 58 2 0.7179 0.0510 0.62472 0.825
865 56 2 0.6923 0.0523 0.59710 0.803
899 54 2 0.6667 0.0534 0.56985 0.780
917 52 2 0.6410 0.0543 0.54294 0.757
1027 50 2 0.6154 0.0551 0.51636 0.733
1062 48 2 0.5897 0.0557 0.49009 0.710
1075 46 2 0.5641 0.0561 0.46413 0.686
1077 44 2 0.5385 0.0564 0.43845 0.661
1098 42 2 0.5128 0.0566 0.41307 0.637
1157 40 2 0.4872 0.0566 0.38798 0.612
1188 38 2 0.4615 0.0564 0.36317 0.587
1195 36 2 0.4359 0.0561 0.33864 0.561
1225 34 2 0.4103 0.0557 0.31441 0.535
1312 32 2 0.3846 0.0551 0.29048 0.509
1442 30 2 0.3590 0.0543 0.26685 0.483
1552 28 2 0.3333 0.0534 0.24354 0.456
1636 26 2 0.3077 0.0523 0.22057 0.429
1813 24 2 0.2821 0.0510 0.19795 0.402
1841 22 2 0.2564 0.0494 0.17571 0.374
1877 20 2 0.2308 0.0477 0.15389 0.346
1888 18 2 0.2051 0.0457 0.13253 0.318
1889 16 2 0.1795 0.0435 0.11168 0.288
1930 14 2 0.1538 0.0409 0.09142 0.259
2025 12 4 0.1026 0.0344 0.05320 0.198
2112 8 2 0.0769 0.0302 0.03566 0.166
2380 6 2 0.0513 0.0250 0.01974 0.133
2618 4 2 0.0256 0.0179 0.00653 0.101
2646 2 2 0.0000 NaN NA NA
917### Technical explanation and essential concepts
Follow-up data are censored: one does not know the exact survival time, only that it is greater or less than a certain time. Being X the real time of exfoliation and T the time censored, what we observe is the minimum of X and T together with an indication of which is which. With this, we create an S(t) survival function that measures the probability of exfoliation at a given moment.
The analysis process begins with the creation of a survival object.
Once we have the survival object, we can calculate the survival function for census data on the right.
For all the teeth is
survival::survfit(surv_object ~ group, data = df)
Call: survfit(formula = surv_object ~ group, data = df)
n events median 0.95LCL 0.95UCL
group=Contralateral_group 78 78 1335 1251 1442
group=Hall_group 78 78 1157 1062 1442
So, the median of exfoliation time for contralateral is 1335 days (divided by 30 = 44.5 months approx), and for Hall is 1098 days (36.6 months). The lower 95%IC are 1163 days for CL and 917 for HT.
NEW: for Hall is 1157 days (38.5 months). The lower 95%IC are 1251 days for CL and 1062 for HT.
In order to see if there are differences in the groups, it is necessary to observe the complete survival curve and to verify if somewhere the confidence intervals do not overlap. Now letโs graph
# jpeg("survival-grey-months.jpg")
survminer::ggsurvplot(
fit1,
data = df,
conf.int = TRUE,
# Add confidence interval
pval = TRUE,
pval.coord = c(1400, 0.10),
# add p value
xscale = "d_m",
# convert days in years
xlim = c(0, 3350),
# present narrower X axis, but not affect
# survival estimates.
# conf.int.style = "step",
# customize style of confidence intervals
# surv.median.line = "hv",
# add the median survival pointer.
legend.labs = c("CL group", "HT group"),
fun = "event",
########## risk table #########
risk.table = TRUE,
risk.table.y.text.col = TRUE,
risk.table.y.text = TRUE,
risk.table.height = 0.34,
risk.table.title = "Exfoliation events",
palette = c("#333333", "#999999"),# custom color palettes
# palette = "grey" # for bw publication if required
) +
labs(title = "Exfoliation Time Probability Plot (Months)",
x = "Time in months",
y = "Exfoliation Probability")
# Close the pdf file
# dev.off()
# jpeg("survival-grey-years.jpg")
survminer::ggsurvplot(
fit1,
data = df,
conf.int = TRUE,
# Add confidence interval
pval = TRUE,
pval.coord = c(1400, 0.10),
# add p value
xscale = "d_y",
# convert days in years
xlim = c(0, 3350),
# present narrower X axis, but not affect
# survival estimates.
# conf.int.style = "step",
# customize style of confidence intervals
# surv.median.line = "hv",
# add the median survival pointer.
legend.labs = c("CL group", "HT group"),
fun = "event",
########## risk table #########
risk.table = TRUE,
risk.table.y.text.col = TRUE,
risk.table.y.text = TRUE,
risk.table.height = 0.34,
risk.table.title = "Exfoliation events",
palette = c("#333333", "#999999"),# custom color palettes
# palette = "grey" # for bw publication if required
) +
labs(title = "Exfoliation Time Probability Plot (Years)",
x = "Time in years",
y = "Exfoliation Probability")
# Close the pdf file
# dev.off()
# jpeg("survival-color-months.jpg")
survminer::ggsurvplot(
fit1,
data = df,
conf.int = TRUE,
# Add confidence interval
pval = TRUE,
pval.coord = c(1400, 0.10),
# add p value
xscale = "d_m",
# convert days in years
xlim = c(0, 3350),
# present narrower X axis, but not affect
# survival estimates.
# conf.int.style = "step",
# customize style of confidence intervals
# surv.median.line = "hv",
# add the median survival pointer.
legend.labs = c("CL group", "HT group"),
fun = "event",
########## risk table #########
risk.table = TRUE,
risk.table.y.text.col = TRUE,
risk.table.y.text = TRUE,
risk.table.height = 0.34,
risk.table.title = "Exfoliation events",
palette = c("#CA3C42", "#28637B"),# custom color palettes
# palette = "grey" # for bw publication if required
) +
labs(title = "Exfoliation Time Probability Plot (Months)",
x = "Time in months",
y = "Exfoliation Probability")
# Close the pdf file
# dev.off()
# jpeg("survival-color-years.jpg")
survminer::ggsurvplot(
fit1,
data = df,
conf.int = TRUE,
# Add confidence interval
pval = TRUE,
pval.coord = c(1400, 0.10),
# add p value
xscale = "d_y",
# convert days in years
xlim = c(0, 3350),
# present narrower X axis, but not affect
# survival estimates.
# conf.int.style = "step",
# customize style of confidence intervals
# surv.median.line = "hv",
# add the median survival pointer.
legend.labs = c("CL group", "HT group"),
fun = "event",
########## risk table #########
risk.table = TRUE,
risk.table.y.text.col = TRUE,
risk.table.y.text = TRUE,
risk.table.height = 0.34,
risk.table.title = "Exfoliation events",
palette = c("#CA3C42", "#28637B"),# custom color palettes
# palette = "grey" # for bw publication if required
) +
labs(title = "Exfoliation Time Probability Plot (Years)",
x = "Time in years",
y = "Exfoliation Probability")
# Close the pdf file
# dev.off()
fit1 <- survival::survfit(surv_object ~ group, data = df)
ggsurvplot(
survival::survfit(surv_object ~ df$group), # survfit object with calculated statistics.
data = df, # data used to fit survival curves.
risk.table = FALSE, # show risk table.
pval = TRUE, # show p-value of log-rank test.
pval.coord = c(1500, 0.30), # position of the legend
conf.int = TRUE, # show confidence intervals for
# point estimates of survival curves.
palette = c("#E7B800", "#2E9FDF"),
xlim = c(0, 3350), # present narrower X axis, but not affect
# survival estimates.
xlab = "Time in days", # customize X axis label.
break.time.by = 365, # break X axis in time intervals by 500.
ggtheme = theme_classic(), # customize plot and risk table with a theme.
# risk.table.y.text.col = T,# colour risk table text annotations.
# risk.table.height = 0.35, # the height of the risk table
# risk.table.y.text = FALSE,# show bars instead of names in text annotations
# in legend of risk table.
conf.int.style = "step",
# customize style of confidence intervals
surv.median.line = "hv",
# add the median survival pointer.
fun = "event",
legend.labs =
c("CL group", "HT group") # change legend labels.
)
autoplot(aareg(Surv(exf_time , event) ~
gender,
data = df))
DONE
just four groups, with molars ################## DONE #####################
autoplot(aareg(Surv(exf_time , event) ~
hall_molar,
data = df))
autoplot(aareg(Surv(exf_time , event) ~
contralateral_molar,
data = df))
The mean age, (sd and range) for
o all of the children when they had the crown fitted
o The same for boys only
o The same for girls only
df %>%
mutate(
birth_date = ymd(birth_date),
treatment_date = ymd(treatment_date),
age_when_HT = floor(decimal_date(treatment_date) - decimal_date(birth_date))
) %>%
summarise("Age at treatment" = mean(age_when_HT), sd = sd(age_when_HT), min = min(age_when_HT), max = max(age_when_HT), range = max - min)
df %>%
mutate(
birth_date = ymd(birth_date),
treatment_date = ymd(treatment_date),
age_when_HT = floor(decimal_date(treatment_date) - decimal_date(birth_date))
) %>%
ggplot(aes(x = age_when_HT)) +
geom_histogram(bins = 10) +
theme_pubr() +
labs(title = "Age at treatment",
x = "Age at treatment", y = "Count")
df %>%
mutate(
birth_date = ymd(birth_date),
treatment_date = ymd(treatment_date),
age_when_HT = floor(decimal_date(treatment_date) - decimal_date(birth_date))
) %>%
group_by(gender) %>%
summarise("Age at treatment" = mean(age_when_HT), sd = sd(age_when_HT), min = min(age_when_HT), max = max(age_when_HT), range = max - min)
df %>%
mutate(
birth_date = ymd(birth_date),
treatment_date = ymd(treatment_date),
age_when_HT = floor(decimal_date(treatment_date) - decimal_date(birth_date))
) %$%
t.test(age_when_HT ~gender, data = .)
Welch Two Sample t-test
data: age_when_HT by gender
t = -2.6298, df = 149.04, p-value = 0.009441
alternative hypothesis: true difference in means is not equal to 0
95 percent confidence interval:
-1.0969290 -0.1557026
sample estimates:
mean in group Female mean in group Male
6.900000 7.526316
df %>%
mutate(
birth_date = ymd(birth_date),
treatment_date = ymd(treatment_date),
age_when_HT = floor(decimal_date(treatment_date) - decimal_date(birth_date))
) %>%
ggplot(aes(x = gender, y = age_when_HT)) +
ylim(0, 11) +
geom_boxplot() +
geom_jitter(alpha = 0.5) +
theme_pubclean() +
labs(title = "Age at treatment by gender")
df %>%
mutate(
birth_date = ymd(birth_date),
treatment_date = ymd(treatment_date),
age_when_HT = floor(decimal_date(treatment_date) - decimal_date(birth_date))
) %>%
ggplot(aes(x = age_when_HT)) +
geom_histogram(bins= 10) +
theme_pubr() +
labs(title = "Age at treatment",
x = "Age at treatment", y = "Count") +
facet_grid(gender~.)
ยท The mean age (sd and range) of the children when
o the hall crown tooth exfoliated
o the contralateral tooth exfoliated
df %>%
mutate(
age_when_HT_exfoliated = floor(
decimal_date(date_first_time_noted_as_having_exfoliated_hall) - decimal_date(birth_date)
) ,
age_when_CT_exfoliated = floor(
decimal_date(date_first_time_noted_as_having_exfoliated_contralateral) - decimal_date(birth_date)
)
) %>%
gather(key = "key", value = value, age_when_HT_exfoliated:age_when_CT_exfoliated) %>%
group_by(key) %>%
summarise(
"Mean exfol age" = mean(value),
sd = sd(value),
min = min(value),
max = max(value),
range = max(value) - min(value)
)
df %>%
mutate(
age_when_HT_exfoliated = floor(
decimal_date(date_first_time_noted_as_having_exfoliated_hall) - decimal_date(birth_date)
) ,
age_when_CT_exfoliated = floor(
decimal_date(date_first_time_noted_as_having_exfoliated_contralateral) - decimal_date(birth_date)
)
) %>%
gather(key = "key", value = value, age_when_HT_exfoliated:age_when_CT_exfoliated) %>%
ggplot(aes(x = value)) +
geom_histogram(bins = 8) +
facet_grid(key~.) +
theme_pubr() +
labs(title = "Age at exfoliation time by group", y = "Count", x = "Age (years)")
# install.packages("dataMaid")
#devtools::install_github("ekstroem/dataMaid")
# install.packages("ExPanDaR")
#library(ExPanDaR)
#df$cs_id <- row.names(df)
#df$ts_id <- 1
#ExPanDaR::ExPanD(
# df,
# cs_id = "cs_id",
# ts_id = "ts_id",
# components = c(trend_graph = FALSE, quantile_trend_graph = FALSE)
#)