dat = read.csv("~/Dropbox/Temp Files/Thao Quyen/AB476TH.csv")
ca = rename(dat, c("age_c"="age",
"assmfnl_c" = "assmfinal",
"cancfu1yr_c" = "canceryr1",
"cancfu2yr_c" = "canceryr2",
"cancscrfu1yr_c" = "cancscryr1",
"density_c" = "density",
"prvmam_c" = "prvmam",
"prvmos_c" = "prvmos",
"prvtim_c" = "prvtim",
"resfnl_c" = "res.final",
"resinit_c" = "res.init",
"scrmam_c" = "scrmam",
"timsnc_c" = "timsnc",
"menopht_c" = "menopht",
"examyear" = "year",
"samp1cens_srmam" = "sampl1cens",
"FP_Recall" = "fp.recall",
"FP_BxRecom" = "fp.bxrecom",
"FP_Sifu" = "fp.sifu",
"Mamm_1st_vs_Subs" = "mamm1.subs",
"womanid" = "id",
"racenci_c" = "race"))
ca$ageg[ca$age>=40 & ca$age<=49] = "40-49"
ca$ageg[ca$age>=50 & ca$age<=59] = "50-59"
ca$ageg[ca$age>=60 & ca$age<=69] = "60-69"
ca$ageg[ca$age>=70 & ca$age<=79] = "70-79"
ca$ass.final[ca$assmfinal=="0: Need addtl imagng eval"] = "Need Imaging"
ca$ass.final[ca$assmfinal=="1: Negative"] = "Negative"
ca$ass.final[ca$assmfinal=="2: Benign finding"] = "Benign"
ca$ass.final[ca$assmfinal=="3: Probably benign fndng"] = "Probably benign"
ca$ass.final[ca$assmfinal=="4: Suspiciously abnormal"] = "Susp abnormal"
ca$ass.final[ca$assmfinal=="8: Structural missing"] = NA
ca$ass.final[ca$assmfinal=="9: Unknown"] = NA
ca$ass.final = factor(ca$ass.final, levels=c("Need Imaging", "Negative", "Benign", "Probably benign", "Susp abnormal"))
ca$canceryr1[ca$canceryr1=="0: No"] = 0
ca$canceryr1[ca$canceryr1=="1: Yes"] = 1
ca$canceryr2[ca$canceryr2=="0: No"] = 0
ca$canceryr2[ca$canceryr2=="1: Yes"] = 1
ca$cancscryr1[ca$cancscryr1=="0: No"] = 0
ca$cancscryr1[ca$cancscryr1=="1: Yes"] = 1
ca$density[ca$density=="1: Almost entirely fat(<25% fibrglandular)"] = "Fat"
ca$density[ca$density=="2: Scttrd fibroglandular tiss(25%-50%)"] = "Scattered"
ca$density[ca$density=="3: Heterogeneously dense(50%-75%)"] = "Hetero"
ca$density[ca$density=="4: Extremely dense(>75%)"] = "Dense"
ca$density = factor(ca$density, levels=c("Fat", "Scattered","Hetero", "Dense"))
ca$prvmam[ca$prvmam =="0: No"] = 0
ca$prvmam[ca$prvmam =="1: Yes"] = 1
ca$prvmam[ca$prvmam =="9: Unknown"] = NA
ca$res.init[ca$res.init=="0: Negative"] = 0
ca$res.init[ca$res.init=="1: Positive"] = 1
ca$res.init[ca$res.init=="9"] = NA
ca$res.final[ca$res.final=="0: Negative"] = 0
ca$res.final[ca$res.final=="1: Positive"] = 1
ca$res.final[ca$res.final=="8: Structural missing"] = NA
ca$res.final[ca$res.final=="9"] = NA
ca$meno[ca$menopht=="01: Natural"] = 1
ca$meno[ca$menopht=="02: ooph"] = 2
ca$meno[ca$menopht=="03: Age 60+"] = 3
ca$meno[ca$menopht=="04: HRT"] = 4
ca$meno[ca$menopht=="05: PERI"] = 5
ca$meno[ca$menopht=="06: PRE"] = 6
ca$meno[ca$menopht=="95: LMP 365+"] = 95
ca$meno[ca$menopht=="96: Surgical"] = 96
ca$meno[ca$menopht=="97: Prd Stop Oth"] = 97
ca$meno[ca$menopht=="98: All Missing"] = NA
ca$meno[ca$menopht=="99: Some Missing"] = NA
ca$evermamm[ca$evermamm=="0: No"] = 0
ca$evermamm[ca$evermamm=="1: Yes"] = 1
ca$evermamm[ca$evermamm=="8: Structural missing"] = NA
ca$evermamm[ca$evermamm=="9: Unknown"] = NA
ca$mammintvl[ca$mammintvl=="0:1st Mamm"] = "First"
ca$mammintvl[ca$mammintvl=="1:Annual (9-18m)"] = "Annual"
ca$mammintvl[ca$mammintvl=="2:Biennial (19-30m)"] = "Biennial"
ca$mammintvl[ca$mammintvl=="3:>30months"] = "30+months"
ca$mammintvl[ca$mammintvl=="9"] = NA
ca$mammintvl = factor(ca$mammintvl, levels=c("First", "Annual", "Biennial", "30+months"))
ca$type[ca$mammtype=="2: Digital(2-D only)"] = "Digital"
ca$type[ca$mammtype=="5: Tomosynthesis"] = "Tomosynthesis"
# Key data
ca = ddply(ca, .(id), transform, dur = (age - age[1]))
ca1 = ca[, c("id", "age", "ageg", "dur", "type", "density", "fp.recall", "fp.bxrecom", "fp.sifu")]
table1(~ageg + density + factor(screenround) + res.init + res.final | type, data=ca)
| Digital (N=1223840) |
Tomosynthesis (N=106329) |
Overall (N=1330169) |
|
|---|---|---|---|
| ageg | |||
| 40-49 | 153718 (12.6%) | 18761 (17.6%) | 172479 (13.0%) |
| 50-59 | 54169 (4.4%) | 6577 (6.2%) | 60746 (4.6%) |
| 60-69 | 669451 (54.7%) | 55922 (52.6%) | 725373 (54.5%) |
| 70-79 | 346502 (28.3%) | 25069 (23.6%) | 371571 (27.9%) |
| density | |||
| Fat | 157017 (12.8%) | 12739 (12.0%) | 169756 (12.8%) |
| Scattered | 599967 (49.0%) | 51941 (48.8%) | 651908 (49.0%) |
| Hetero | 409255 (33.4%) | 35480 (33.4%) | 444735 (33.4%) |
| Dense | 57601 (4.7%) | 6169 (5.8%) | 63770 (4.8%) |
| factor(screenround) | |||
| 1 | 110211 (9.0%) | 8699 (8.2%) | 118910 (8.9%) |
| 2 | 48410 (4.0%) | 5073 (4.8%) | 53483 (4.0%) |
| 3 | 24631 (2.0%) | 3614 (3.4%) | 28245 (2.1%) |
| 4 | 12910 (1.1%) | 2644 (2.5%) | 15554 (1.2%) |
| 5 | 6566 (0.5%) | 2000 (1.9%) | 8566 (0.6%) |
| 6 | 3138 (0.3%) | 1397 (1.3%) | 4535 (0.3%) |
| 7 | 1346 (0.1%) | 924 (0.9%) | 2270 (0.2%) |
| 8 | 472 (0.0%) | 594 (0.6%) | 1066 (0.1%) |
| 9 | 166 (0.0%) | 283 (0.3%) | 449 (0.0%) |
| 10 | 37 (0.0%) | 110 (0.1%) | 147 (0.0%) |
| Missing | 1015953 (83.0%) | 80991 (76.2%) | 1096944 (82.5%) |
| res.init | |||
| 0 | 1096821 (89.6%) | 97773 (92.0%) | 1194594 (89.8%) |
| 1 | 125196 (10.2%) | 8544 (8.0%) | 133740 (10.1%) |
| Missing | 1823 (0.1%) | 12 (0.0%) | 1835 (0.1%) |
| res.final | |||
| 0 | 1191140 (97.3%) | 103983 (97.8%) | 1295123 (97.4%) |
| 1 | 21893 (1.8%) | 1849 (1.7%) | 23742 (1.8%) |
| Missing | 10807 (0.9%) | 497 (0.5%) | 11304 (0.8%) |
table1(~ageg*density | type, data=ca)
| Digital (N=1223840) |
Tomosynthesis (N=106329) |
Overall (N=1330169) |
|
|---|---|---|---|
| ageg | |||
| 40-49 | 153718 (12.6%) | 18761 (17.6%) | 172479 (13.0%) |
| 50-59 | 54169 (4.4%) | 6577 (6.2%) | 60746 (4.6%) |
| 60-69 | 669451 (54.7%) | 55922 (52.6%) | 725373 (54.5%) |
| 70-79 | 346502 (28.3%) | 25069 (23.6%) | 371571 (27.9%) |
| density | |||
| Fat | 157017 (12.8%) | 12739 (12.0%) | 169756 (12.8%) |
| Scattered | 599967 (49.0%) | 51941 (48.8%) | 651908 (49.0%) |
| Hetero | 409255 (33.4%) | 35480 (33.4%) | 444735 (33.4%) |
| Dense | 57601 (4.7%) | 6169 (5.8%) | 63770 (4.8%) |
table1(~ assmfinal + canceryr1 + canceryr2 + cancscryr1 + density + prvmam + prvmos + prvtim + res.init + res.final + scrmam + timsnc + meno + mammtype + year + evermamm + screenround + mammintvl + sample + sample + fp.recall + fp.bxrecom + fp.sifu + mamm1.subs, data=ca)
| Overall (N=1330169) |
|
|---|---|
| assmfinal | |
| 0: Need addtl imagng eval | 8628 (0.6%) |
| 1: Negative | 813695 (61.2%) |
| 2: Benign finding | 449899 (33.8%) |
| 3: Probably benign fndng | 31529 (2.4%) |
| 4: Suspiciously abnormal | 21994 (1.7%) |
| 5: Highly sggstv of malignancy | 1748 (0.1%) |
| 8: Structural missing | 841 (0.1%) |
| 9: Unknown | 1835 (0.1%) |
| canceryr1 | |
| 802242 (60.3%) | |
| 0 | 524697 (39.4%) |
| 1 | 3230 (0.2%) |
| canceryr2 | |
| 802242 (60.3%) | |
| 0 | 522148 (39.3%) |
| 1 | 5779 (0.4%) |
| cancscryr1 | |
| 802242 (60.3%) | |
| 0 | 524720 (39.4%) |
| 1 | 3207 (0.2%) |
| density | |
| Fat | 169756 (12.8%) |
| Scattered | 651908 (49.0%) |
| Hetero | 444735 (33.4%) |
| Dense | 63770 (4.8%) |
| prvmam | |
| 0 | 143006 (10.8%) |
| 1 | 1175974 (88.4%) |
| Missing | 11189 (0.8%) |
| prvmos | |
| Mean (SD) | 19.0 (15.6) |
| Median [Min, Max] | 14.0 [9.00, 481] |
| Missing | 194033 (14.6%) |
| prvtim | |
| Mean (SD) | 2.07 (1.43) |
| Median [Min, Max] | 2.00 [0, 9.00] |
| Missing | 18 (0.0%) |
| res.init | |
| 0 | 1194594 (89.8%) |
| 1 | 133740 (10.1%) |
| Missing | 1835 (0.1%) |
| res.final | |
| 0 | 1295123 (97.4%) |
| 1 | 23742 (1.8%) |
| Missing | 11304 (0.8%) |
| scrmam | |
| 1: Yes | 1330169 (100%) |
| timsnc | |
| Mean (SD) | 3.26 (3.00) |
| Median [Min, Max] | 2.00 [0, 9.00] |
| Missing | 12436 (0.9%) |
| meno | |
| 1 | 542341 (40.8%) |
| 2 | 51567 (3.9%) |
| 3 | 530615 (39.9%) |
| 4 | 1901 (0.1%) |
| 5 | 4584 (0.3%) |
| 6 | 171418 (12.9%) |
| 95 | 82 (0.0%) |
| 96 | 15376 (1.2%) |
| 97 | 4993 (0.4%) |
| Missing | 7292 (0.5%) |
| mammtype | |
| 2: Digital(2-D only) | 1223840 (92.0%) |
| 5: Tomosynthesis | 106329 (8.0%) |
| year | |
| Mean (SD) | 2010 (2.77) |
| Median [Min, Max] | 2010 [2010, 2020] |
| evermamm | |
| 12436 (0.9%) | |
| 0 | 158852 (11.9%) |
| 1 | 1034174 (77.7%) |
| Missing | 124707 (9.4%) |
| screenround | |
| Mean (SD) | 2.03 (1.44) |
| Median [Min, Max] | 1.00 [1.00, 10.0] |
| Missing | 1096944 (82.5%) |
| mammintvl | |
| First | 143006 (10.8%) |
| Annual | 854896 (64.3%) |
| Biennial | 178889 (13.4%) |
| 30+months | 102351 (7.7%) |
| Missing | 51027 (3.8%) |
| sample | |
| 1:1 Only | 233225 (17.5%) |
| 2:2 only | 1096944 (82.5%) |
| fp.recall | |
| Mean (SD) | 0.0945 (0.293) |
| Median [Min, Max] | 0 [0, 1.00] |
| fp.bxrecom | |
| Mean (SD) | 0.0124 (0.110) |
| Median [Min, Max] | 0 [0, 1.00] |
| fp.sifu | |
| Mean (SD) | 0.0234 (0.151) |
| Median [Min, Max] | 0 [0, 1.00] |
| mamm1.subs | |
| Mean (SD) | 2.00 (0) |
| Median [Min, Max] | 2.00 [2.00, 2.00] |
table1(~res.init | res.final, data=ca)
| 0 (N=1295123) |
1 (N=23742) |
Overall (N=1330169) |
|
|---|---|---|---|
| res.init | |||
| 0 | 1193850 (92.2%) | 0 (0%) | 1194594 (89.8%) |
| 1 | 101273 (7.8%) | 23742 (100%) | 133740 (10.1%) |
| Missing | 0 (0%) | 0 (0%) | 1835 (0.1%) |
ggplot(data=ca, aes(x=mammintvl, fill= mammintvl)) + geom_bar() + theme(legend.position="none") + labs(x="Mammo interval", y="Number of women")
ca %>% drop_na(screenround) %>% ggplot(aes(x=factor(screenround), fill=factor(screenround))) + geom_bar() + theme(legend.position="none") + labs(x="Screen round", y="Number of women")
ggplot(data=ca, aes(x=age)) + geom_histogram(col="white", fill="blue") + labs(x="Age", y="Number of women")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
ggplot(data=ca, aes(meno, fill=meno)) + geom_bar() + theme(legend.position="none") + labs(x="Meno", y="Number of women")
ggplot(data=ca, aes(ass.final, fill=ass.final)) + geom_bar() + theme(legend.position="none") + labs(x="ASSM Final", y="Number of women")
table1(~factor(fp.recall) + factor(fp.bxrecom) + factor(fp.sifu) | type, data=ca)
| Digital (N=1223840) |
Tomosynthesis (N=106329) |
Overall (N=1330169) |
|
|---|---|---|---|
| factor(fp.recall) | |||
| 0 | 1105904 (90.4%) | 98498 (92.6%) | 1204402 (90.5%) |
| 1 | 117936 (9.6%) | 7831 (7.4%) | 125767 (9.5%) |
| factor(fp.bxrecom) | |||
| 0 | 1208589 (98.8%) | 105145 (98.9%) | 1313734 (98.8%) |
| 1 | 15251 (1.2%) | 1184 (1.1%) | 16435 (1.2%) |
| factor(fp.sifu) | |||
| 0 | 1194370 (97.6%) | 104660 (98.4%) | 1299030 (97.7%) |
| 1 | 29470 (2.4%) | 1669 (1.6%) | 31139 (2.3%) |
p1 = ggplot(data=ca, aes(x=factor(fp.recall), fill=factor(fp.recall))) + geom_bar() + theme(legend.position="none") + labs(x="FP Recall", y="Number of women")
p2 = ggplot(data=ca, aes(x=factor(fp.bxrecom), fill=factor(fp.bxrecom))) + geom_bar() + theme(legend.position="none") + labs(x="FP Recom", y="Number of women")
p3 = ggplot(data=ca, aes(x=factor(fp.sifu), fill=factor(fp.sifu))) + geom_bar() + theme(legend.position="none") + labs(x="FP Sifu", y="Number of women")
grid.arrange(p1, p2, p3, ncol=3)
# Using survminer package
km = survfit(Surv(dur, fp.recall==1) ~ type, data=ca1)
ggsurvplot(km, xlab = "Time (Years)", fun="event") + labs(x="Time (years)", y="Cumulative risk of FP - Recall")
km = survfit(Surv(dur, fp.bxrecom==1) ~ type, data=ca1)
ggsurvplot(km, xlab = "Time (Years)", fun="event") + labs(x="Time (years)", y="Cumulative risk of FP BxRecom")
km = survfit(Surv(dur, fp.sifu==1) ~ type, data=ca1)
ggsurvplot(km, xlab = "Time (Years)", fun="event") + labs(x="Time (years)", y="Cumulative risk of FP - Sifu")