1. Tiền Xử Lý Số liệu
file_path = file.choose()
BRCA <- read.csv(file_path)
str(BRCA)
## 'data.frame': 341 obs. of 16 variables:
## $ Patient_ID : chr "TCGA-D8-A1XD" "TCGA-EW-A1OX" "TCGA-A8-A079" "TCGA-D8-A1XR" ...
## $ Age : int 36 43 69 56 56 84 53 50 77 40 ...
## $ Gender : chr "FEMALE" "FEMALE" "FEMALE" "FEMALE" ...
## $ Protein1 : num 0.0804 -0.4203 0.214 0.3451 0.2215 ...
## $ Protein2 : num 0.426 0.578 1.311 -0.211 1.907 ...
## $ Protein3 : num 0.547 0.614 -0.327 -0.193 0.52 ...
## $ Protein4 : num 0.2737 -0.0315 -0.2343 0.1243 -0.312 ...
## $ Tumour_Stage : chr "III" "II" "III" "II" ...
## $ Histology : chr "Infiltrating Ductal Carcinoma" "Mucinous Carcinoma" "Infiltrating Ductal Carcinoma" "Infiltrating Ductal Carcinoma" ...
## $ ER.status : chr "Positive" "Positive" "Positive" "Positive" ...
## $ PR.status : chr "Positive" "Positive" "Positive" "Positive" ...
## $ HER2.status : chr "Negative" "Negative" "Negative" "Negative" ...
## $ Surgery_type : chr "Modified Radical Mastectomy" "Lumpectomy" "Other" "Modified Radical Mastectomy" ...
## $ Date_of_Surgery : chr "15-Jan-17" "26-Apr-17" "08-Sep-17" "25-Jan-17" ...
## $ Date_of_Last_Visit: chr "19-Jun-17" "09-Nov-18" "09-Jun-18" "12-Jul-17" ...
## $ Patient_Status : chr "Alive" "Dead" "Alive" "Alive" ...
# Thay thế ô trống thành NA
BRCA[BRCA == ""] <- NA
# Kiểm tra tỉ lệ NA trong từng cột
library(questionr)
freq.na(BRCA)
## missing %
## Date_of_Last_Visit 24 7
## Patient_Status 20 6
## Patient_ID 7 2
## Age 7 2
## Gender 7 2
## Protein1 7 2
## Protein2 7 2
## Protein3 7 2
## Protein4 7 2
## Tumour_Stage 7 2
## Histology 7 2
## ER.status 7 2
## PR.status 7 2
## HER2.status 7 2
## Surgery_type 7 2
## Date_of_Surgery 7 2
# Loai bo cac dong co NA
BRCA <- na.omit(BRCA)
# Chuyen doi dinh dang ngay
BRCA$Date_of_Surgery <- as.Date(BRCA$Date_of_Surgery, format = "%d-%b-%y")
BRCA$Date_of_Last_Visit <- as.Date (BRCA$Date_of_Last_Visit, format = "%d-%b-%y")
# Tinh so ngay song
BRCA$Survival_days <- as.numeric(BRCA$Date_of_Last_Visit - BRCA$Date_of_Surgery)
head(BRCA[, c("Patient_ID", "Survival_days", "Date_of_Last_Visit", "Date_of_Surgery")])
## Patient_ID Survival_days Date_of_Last_Visit Date_of_Surgery
## 1 TCGA-D8-A1XD 155 2017-06-19 2017-01-15
## 2 TCGA-EW-A1OX 562 2018-11-09 2017-04-26
## 3 TCGA-A8-A079 274 2018-06-09 2017-09-08
## 4 TCGA-D8-A1XR 168 2017-07-12 2017-01-25
## 5 TCGA-BH-A0BF 782 2019-06-27 2017-05-06
## 6 TCGA-AO-A1KQ 1519 2021-11-15 2017-09-18
# Loai bo cot khong can thiet
new_data <- BRCA[, !(names(BRCA) %in% c("Patient_ID", "Date_of_Surgery", "Date_of_Last_Visit"))]
2. Thống kê mô tả
# Mo ta thong ke cac bien lien tuc
library(psych)
##
## Attaching package: 'psych'
## The following object is masked from 'package:questionr':
##
## describe
describe(BRCA[, c("Age", "Protein1", "Protein2", "Protein3",
"Protein4", "Survival_days")], fast = T)
## vars n mean sd median min max range skew
## Age 1 317 58.73 12.83 58.00 29.00 90.00 61.00 0.28
## Protein1 2 317 -0.03 0.54 0.01 -2.14 1.59 3.74 -0.46
## Protein2 3 317 0.95 0.91 1.00 -0.98 3.40 4.38 -0.19
## Protein3 4 317 -0.10 0.59 -0.19 -1.63 2.19 3.82 0.72
## Protein4 5 317 0.01 0.63 0.04 -2.03 1.63 3.66 -0.47
## Survival_days 6 317 447.78 386.28 372.00 0.00 3019.00 3019.00 2.53
## kurtosis se
## Age -0.55 0.72
## Protein1 0.96 0.03
## Protein2 -0.62 0.05
## Protein3 0.69 0.03
## Protein4 0.17 0.04
## Survival_days 11.24 21.70
# Bang tan bien danh muc
print("Gender")
## [1] "Gender"
table(BRCA$Gender)
##
## FEMALE MALE
## 313 4
print("Tumour_Stage")
## [1] "Tumour_Stage"
table(BRCA$Tumour_Stage)
##
## I II III
## 60 180 77
print("Histology")
## [1] "Histology"
table(BRCA$Histology)
##
## Infiltrating Ductal Carcinoma Infiltrating Lobular Carcinoma
## 224 81
## Mucinous Carcinoma
## 12
print("ER.status")
## [1] "ER.status"
table(BRCA$ER.status)
##
## Positive
## 317
print("PR.status")
## [1] "PR.status"
table(BRCA$PR.status)
##
## Positive
## 317
print("HER2.status")
## [1] "HER2.status"
table(BRCA$HER2.status)
##
## Negative Positive
## 288 29
print("Surgery_type")
## [1] "Surgery_type"
table(BRCA$Surgery_type)
##
## Lumpectomy Modified Radical Mastectomy
## 66 89
## Other Simple Mastectomy
## 97 65
print("Patient_Status")
## [1] "Patient_Status"
table(BRCA$Patient_Status)
##
## Alive Dead
## 255 62
# Bỏ Gender, ER, PR
new_data_2 <- new_data[, !(names(new_data) %in% c("ER.status", "PR.status", "Gender"))]
str(new_data_2)
## 'data.frame': 317 obs. of 11 variables:
## $ Age : int 36 43 69 56 56 84 53 77 40 71 ...
## $ Protein1 : num 0.0804 -0.4203 0.214 0.3451 0.2215 ...
## $ Protein2 : num 0.426 0.578 1.311 -0.211 1.907 ...
## $ Protein3 : num 0.547 0.614 -0.327 -0.193 0.52 ...
## $ Protein4 : num 0.2737 -0.0315 -0.2343 0.1243 -0.312 ...
## $ Tumour_Stage : chr "III" "II" "III" "II" ...
## $ Histology : chr "Infiltrating Ductal Carcinoma" "Mucinous Carcinoma" "Infiltrating Ductal Carcinoma" "Infiltrating Ductal Carcinoma" ...
## $ HER2.status : chr "Negative" "Negative" "Negative" "Negative" ...
## $ Surgery_type : chr "Modified Radical Mastectomy" "Lumpectomy" "Other" "Modified Radical Mastectomy" ...
## $ Patient_Status: chr "Alive" "Dead" "Alive" "Alive" ...
## $ Survival_days : num 155 562 274 168 782 ...
# Biểu đồ histogram theo trạng thái bệnh nhân
library(ggplot2)
##
## Attaching package: 'ggplot2'
## The following objects are masked from 'package:psych':
##
## %+%, alpha
vars <- c("Age", "Protein1", "Protein2", "Protein3", "Protein4", "Survival_days")
for (v in vars) {
plot <- ggplot(
new_data_2,
mapping = aes(x = .data[[v]], fill = Patient_Status)
) + (
geom_histogram(alpha = 0.7, position = "identity")
) + (
labs(title = paste("Histogram of ", v, "by Patient Status"))
)
print(plot)
}
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

# Bieu do cot theo bien danh muc
ggplot(new_data_2, aes(x = Tumour_Stage, fill = Patient_Status)) +
geom_bar(position = "dodge") + labs(title = "Tumour Stage by Patient Status")

ggplot(new_data_2, aes(x = Histology, fill = Patient_Status)) +
geom_bar(position = "dodge") + labs(title = "Histology by Patient Status")

ggplot(new_data_2, aes(x = HER2.status, fill = Patient_Status)) +
geom_bar(position = "dodge") + labs(title = "HER2 Status by Patient Status")

ggplot(new_data_2, aes(x = Surgery_type, fill = Patient_Status)) +
geom_bar(position = "dodge") + labs(title = "Surgery Type by Patient Status") +
theme(axis.text.x = element_text(angle = 45, hjust = 1))

# Bien doi Patient_Status va cac bien danh muc
if (is.character(new_data_2$Patient_Status)) {
new_data_2$Patient_Status <- ifelse(new_data_2$Patient_Status == "Alive", 1, 0)
new_data_2$Patient_Status <- factor(new_data_2$Patient_Status, levels = c(0, 1))
}
new_data_2$Tumour_Stage <- as.factor(new_data_2$Tumour_Stage)
new_data_2$Histology <- as.factor(new_data_2$Histology)
new_data_2$HER2.status <- as.factor(new_data_2$HER2.status)
new_data_2$Surgery_type <- as.factor(new_data_2$Surgery_type)
print(new_data_2)
## Age Protein1 Protein2 Protein3 Protein4 Tumour_Stage
## 1 36 0.08035300 0.4263800 0.5471500 0.27368000 III
## 2 43 -0.42032000 0.5780700 0.6144700 -0.03150500 II
## 3 69 0.21398000 1.3114000 -0.3274700 -0.23426000 III
## 4 56 0.34509000 -0.2114700 -0.1930400 0.12427000 II
## 5 56 0.22155000 1.9068000 0.5204500 -0.31199000 II
## 6 84 -0.08187200 1.7241000 -0.0573350 0.04302500 III
## 7 53 -0.06953500 1.4183000 -0.3610500 0.39158000 II
## 9 77 -0.15175000 -0.6633200 1.1894000 0.21718000 II
## 10 40 -0.56570000 1.2668000 -0.2934600 0.19395000 II
## 11 71 -0.22305000 0.5059400 -0.3494300 -0.83530000 II
## 12 72 -0.37270000 0.5554900 -0.6679000 -0.35070000 II
## 13 75 0.17164000 0.0296560 -0.1589000 0.67471000 I
## 14 52 -1.68280000 0.7272200 0.0567730 -1.24410000 II
## 15 53 0.56042000 0.8225100 -0.5347600 0.26527000 II
## 16 41 0.14252000 1.0827000 0.2109200 0.97428000 I
## 17 37 0.29490000 1.3625000 0.6923800 0.41002000 II
## 18 59 0.19762000 1.0719000 -0.2051000 0.63315000 II
## 19 62 0.10464000 0.7028000 -0.2989800 0.60103000 II
## 20 74 -0.24613000 1.4953000 -0.2985400 0.65925000 III
## 21 87 -0.08748500 1.1111000 -0.5669200 1.22370000 II
## 22 45 -0.59518000 -0.3405400 0.4445400 0.75328000 I
## 24 77 -0.29870000 -0.1612900 0.4607200 -0.39666000 II
## 25 62 0.54240000 1.7926000 -0.5659900 -0.01720600 II
## 26 79 -0.03622900 0.7955100 -0.0135250 1.62990000 II
## 27 47 0.33186000 -0.3276500 0.0033786 1.16580000 II
## 28 50 -0.06215300 1.6120000 -0.4740400 -0.04444800 II
## 29 59 -0.33045000 1.4353000 -0.6080000 -0.35070000 III
## 30 56 1.03520000 1.7646000 -0.7592100 0.97527000 III
## 31 61 -0.22788000 1.7908000 -0.7562100 0.73977000 I
## 32 68 0.64903000 1.4240000 -0.3953600 1.18480000 I
## 33 49 -0.28082000 0.7192500 -0.1710000 0.54672000 II
## 34 74 0.59453000 0.9486300 -0.4983400 0.97173000 II
## 35 48 0.23235000 1.2129000 0.0525180 -0.02412600 I
## 36 50 -0.07976500 2.6229000 0.0496870 -0.03936900 I
## 37 49 0.06164300 1.3149000 -0.0993570 0.75441000 II
## 38 46 -0.55214000 2.0836000 0.4160500 -0.80791000 I
## 39 68 -0.34962000 1.3735000 -0.2435600 0.18804000 II
## 40 87 0.70876000 2.1731000 -0.4396700 -0.39255000 I
## 41 59 0.28025000 0.8989600 -0.6100300 0.53418000 II
## 42 71 -0.94840000 0.7405100 -0.2582900 -0.21336000 III
## 43 50 0.21728000 -0.2162800 1.0881000 0.15429000 II
## 44 45 0.42538000 1.2628000 -0.7199600 -1.16410000 II
## 45 84 0.14991000 1.4923000 -0.9318200 -0.02315800 II
## 46 56 -0.22853000 1.3102000 -0.5912500 0.08651400 II
## 47 72 -0.42298000 0.3833000 0.4169900 -0.08821800 II
## 48 81 0.82374000 1.8531000 -0.8018700 -0.79413000 II
## 49 89 -0.24978000 -0.3513300 -0.0996240 0.75834000 I
## 50 46 0.23113000 0.8770000 0.0193870 0.86336000 II
## 51 61 -1.48720000 0.3490300 -0.6624200 0.18528000 II
## 52 44 0.23917000 0.4529000 0.0401580 -0.38479000 II
## 53 55 -0.66342000 1.9382000 -0.7753700 -0.26366000 II
## 54 78 0.20973000 0.6868300 0.1191900 0.61420000 II
## 55 85 0.74520000 1.7573000 -1.0062000 -0.06174000 II
## 56 79 0.72984000 1.5762000 -0.1610900 -0.25533000 II
## 57 60 0.21197000 1.5274000 -0.4094900 0.00097756 II
## 58 40 0.51178000 0.9358700 0.1647700 -0.53322000 II
## 59 57 0.00000000 0.2068400 -0.1611000 0.37403000 I
## 60 54 -0.28272000 0.6084200 -0.4740300 0.61717000 II
## 61 59 0.35914000 1.1852000 -0.4234900 0.41594000 II
## 62 62 0.06762700 1.2431000 0.0656870 -0.36482000 II
## 63 76 0.04354600 -0.4017100 0.4668500 1.04780000 II
## 64 58 -0.33490000 0.8844500 -0.7972000 0.32147000 III
## 65 77 -0.20608000 0.5775100 -0.2485600 0.00000000 I
## 66 67 -0.75475000 0.9477300 -0.8077600 0.20922000 II
## 67 47 0.12107000 0.7851300 -0.1976200 0.35245000 II
## 68 74 -0.03164000 2.2806000 0.0085752 -0.53801000 I
## 69 76 -0.16053000 -0.5439800 0.7855000 0.62199000 II
## 70 57 0.12704000 -0.4976100 0.9768100 0.62447000 II
## 71 50 0.13597000 1.8285000 -0.7690400 -0.27651000 I
## 72 54 0.00000000 1.3802000 -0.4980300 -0.50732000 II
## 73 63 0.37857000 1.4942000 0.1135500 0.76128000 II
## 74 54 0.09661400 -0.6027900 0.4777000 0.94660000 I
## 75 82 -0.97674000 -0.4996000 0.6579400 -0.71785000 I
## 76 65 0.43553000 0.7703400 0.4936800 0.37432000 III
## 77 68 -0.82632000 1.6912000 -0.1334900 -0.18214000 II
## 78 77 -0.05815100 0.6743200 -0.4230300 0.16753000 I
## 79 56 -0.25750000 0.8069100 -0.5091700 -0.03458500 I
## 80 73 0.44857000 2.3013000 0.0116590 0.60686000 II
## 81 85 0.40384000 0.4882500 0.8411100 0.61645000 III
## 82 68 -0.11733000 1.6399000 -0.2833000 -0.29739000 I
## 83 51 0.14784000 0.3182300 -0.6047500 -0.96489000 I
## 84 55 0.00661010 1.3076000 -0.5879000 0.14610000 II
## 85 51 0.00843830 1.4019000 0.1216000 -2.02550000 II
## 86 83 -2.14460000 -0.1794600 -0.2003300 -1.48880000 III
## 87 62 0.67951000 0.4981400 0.0185250 0.28502000 II
## 88 85 0.53809000 0.2810900 -0.1064200 0.23963000 I
## 89 58 -1.26850000 0.1061300 1.1352000 -0.39492000 II
## 90 80 0.46647000 2.5797000 -1.2537000 0.15154000 III
## 91 48 -0.21580000 -0.2933800 0.8677100 -0.24204000 III
## 92 68 -0.26000000 0.8604400 0.0767320 0.18670000 II
## 93 59 0.48045000 0.6313500 -0.7010000 -0.16203000 III
## 94 50 1.59360000 1.6752000 -0.0380620 0.94426000 II
## 95 58 0.35440000 1.5325000 -0.5669900 0.47109000 II
## 96 39 0.55309000 1.9955000 -0.1423800 -1.06840000 II
## 97 57 -0.21378000 -0.6281400 1.0226000 0.20244000 II
## 98 66 -0.45077000 0.4234600 -0.0291970 0.18192000 II
## 99 41 0.10012000 -0.4654700 0.4723700 -0.52387000 I
## 101 46 -0.64268000 0.0453740 -0.2042600 0.65811000 II
## 102 42 -0.17561000 0.2896300 1.1290000 -0.38224000 I
## 103 45 -0.52707000 0.7565200 -0.5608200 -0.50269000 III
## 104 53 0.01410600 -0.6839700 0.5381500 0.71897000 III
## 105 71 0.12758000 0.8103300 0.5134500 -0.10298000 II
## 106 66 0.14129000 1.2999000 -0.3432500 -1.76840000 II
## 107 62 0.37355000 0.7831200 -0.5744500 -0.96343000 I
## 108 63 -0.52303000 1.7640000 -0.3701900 0.01081500 II
## 109 59 -0.46713000 2.2267000 -0.2114600 -0.24328000 II
## 110 50 0.05939800 1.6725000 -0.4161500 0.30143000 II
## 111 38 -0.62985000 2.2999000 0.1134600 -1.03680000 II
## 113 63 -0.83210000 -0.2349400 0.6383100 0.03081500 II
## 114 50 0.12946000 -0.4927000 0.3160300 -0.56382000 III
## 115 59 0.32248000 2.0502000 -0.3468400 -1.14850000 II
## 116 47 0.58118000 0.9432100 -0.2269300 0.36686000 III
## 117 45 0.17626000 0.5934200 -0.5075200 0.40061000 III
## 118 58 -0.54747000 0.3066000 -0.5919500 -0.37415000 II
## 119 49 -0.20161000 0.2715900 -0.4487100 0.32081000 III
## 120 64 -0.96995000 -0.7692600 0.5568000 -0.72015000 I
## 121 61 0.66639000 2.1892000 -1.1018000 -0.83145000 II
## 122 60 -0.10055000 1.3716000 -0.8686600 0.04051100 I
## 123 65 -0.54013000 2.6043000 -0.6880300 0.00315240 II
## 124 57 0.05272900 1.5114000 0.7672200 0.18569000 II
## 125 80 -0.01791400 0.9241000 -0.0279960 0.26999000 I
## 126 75 -1.97110000 -0.6319700 0.0371340 -0.46685000 III
## 127 51 -0.85187000 1.6760000 1.7019000 -0.07829800 II
## 128 47 -0.19006000 1.9779000 -0.0076154 0.03532500 II
## 129 47 -0.52529000 1.1224000 0.4371200 0.62410000 III
## 130 29 -0.10568000 0.9374700 -0.6216900 0.18637000 I
## 131 32 0.12545000 1.4565000 -0.6803000 -0.01037800 I
## 132 56 -0.93256000 0.7023200 -0.5992100 -1.89930000 II
## 133 58 0.35774000 1.5189000 -0.3070600 0.43625000 III
## 134 56 -0.06500200 -0.5085200 0.5196200 0.77911000 II
## 135 70 1.06740000 0.9971300 -0.5862200 0.48622000 I
## 136 73 -0.09592700 1.8642000 -0.2892500 -0.35304000 I
## 137 54 0.32671000 -0.2949900 1.7691000 0.77087000 II
## 138 65 -0.44470000 1.4946000 -1.1074000 -0.25249000 II
## 139 56 -0.35060000 1.8783000 -0.3668800 0.46539000 I
## 140 53 -0.85310000 -0.0058079 -0.3808800 -0.80618000 II
## 141 40 0.08754900 2.1957000 -0.9935600 -0.37807000 III
## 142 38 -0.26845000 0.1951500 -1.0247000 0.10172000 III
## 143 42 0.22611000 1.7491000 -0.5439700 -0.39021000 II
## 144 69 0.29939000 -0.3076500 1.0716000 -0.17950000 II
## 145 45 -0.52601000 0.7183600 -0.6759800 -0.83526000 II
## 146 61 -0.75880000 0.0617870 -0.2063800 -1.24090000 III
## 147 71 0.41862000 -0.2525800 -0.2410000 -0.20763000 III
## 148 42 0.95256000 2.1500000 0.0079716 -0.04834000 II
## 149 42 -0.44904000 -0.7201600 -0.1116300 -0.03497300 II
## 150 63 -0.48913000 0.8938600 -0.4707700 -0.20582000 III
## 151 73 -0.21861000 0.9539900 0.3244900 -0.79983000 II
## 152 53 -0.34733000 1.4094000 -0.3759000 -0.04468000 III
## 153 51 0.83789000 0.5064200 -0.5451300 0.31490000 II
## 154 29 0.00181450 0.8928400 -0.6786000 -0.17833000 II
## 155 63 0.90314000 1.9678000 -0.7907900 0.96265000 II
## 156 77 -0.16159000 1.4507000 -0.7584200 0.73220000 III
## 157 46 0.22669000 1.9298000 -0.3312400 -0.59541000 I
## 158 76 -0.86379000 -0.1186400 0.0707650 0.75067000 III
## 159 48 -0.38123000 -0.4745600 0.5672800 -0.30694000 II
## 160 39 -0.20347000 1.0977000 -0.0464760 -0.04514400 II
## 161 67 0.01476700 0.6039600 0.0665380 -0.85405000 II
## 162 41 0.67269000 -0.6332900 0.0087360 0.55564000 II
## 163 50 -0.15842000 2.1515000 -0.8536000 0.54333000 III
## 164 62 0.45690000 0.7394400 -0.6317700 -0.06009600 II
## 165 59 0.00564860 1.4818000 0.3171100 0.32139000 II
## 166 36 0.46931000 1.5526000 -0.1652400 -0.27599000 III
## 167 50 -0.25023000 0.3230000 -0.5137600 -0.81081000 I
## 168 90 -1.38820000 0.7750400 0.2908800 -0.92427000 III
## 169 70 0.70029000 0.9734700 -0.2964500 0.10551000 I
## 170 60 -0.11790000 1.1539000 0.5417400 0.74579000 I
## 172 48 0.22826000 1.2112000 -0.4632600 -0.54502000 II
## 173 84 -0.23032000 1.1304000 0.1664200 1.18410000 II
## 174 54 -0.41706000 1.2694000 -0.7268100 -0.82771000 II
## 175 50 0.33653000 0.6502000 -0.7719600 0.41034000 III
## 176 55 0.39568000 0.8609300 -0.9718600 -0.06062000 III
## 177 49 -1.25170000 2.6739000 -0.5358000 -1.10710000 III
## 178 46 0.14483000 -0.7610700 0.7334500 0.82384000 III
## 179 48 -0.64398000 -0.5936300 -0.1097400 -1.60280000 III
## 180 49 -0.28710000 0.6553600 -0.0991030 -0.93850000 II
## 181 52 1.09020000 2.7660000 -0.5568100 -0.47586000 II
## 182 74 -0.21082000 -0.9787300 -1.0600000 -0.84252000 II
## 184 41 -0.07034000 -0.2715300 0.4670800 -1.21960000 II
## 185 49 -0.11986000 -0.9451300 0.7477600 -0.67553000 I
## 186 83 -0.39298000 1.4757000 0.1437800 0.64097000 I
## 187 79 -0.48269000 -0.3167700 0.4715800 0.34744000 II
## 188 52 0.44520000 0.8193800 -0.4386700 -0.00789630 II
## 189 46 0.52751000 -0.6212300 -0.5469900 0.39106000 II
## 191 53 -0.62780000 0.2610200 -0.2042000 -1.30530000 III
## 192 63 -0.55957000 0.4844500 0.0810550 0.60983000 II
## 193 45 -0.46408000 0.8533700 0.2477400 -0.54734000 III
## 194 66 0.49589000 1.8661000 0.0440830 0.24241000 II
## 195 52 0.78418000 2.2938000 -0.4386200 -0.04782900 III
## 196 66 -0.72486000 -0.2828900 0.9594300 0.35090000 II
## 198 76 -0.24514000 0.6140700 0.1265100 0.39114000 I
## 199 49 -0.06449700 -0.8713200 0.8688000 0.31662000 III
## 200 51 -0.47620000 1.7946000 -0.1477300 -0.30778000 II
## 201 42 0.17295000 1.2966000 -0.8567700 -0.23695000 II
## 202 59 0.27641000 -0.8761200 1.2980000 -0.68664000 II
## 203 40 0.71682000 1.0004000 0.1348700 -0.61786000 II
## 204 59 0.53857000 -0.8717800 -0.5883600 -0.95198000 III
## 205 74 0.97251000 1.4268000 -0.3665700 -0.10782000 II
## 206 47 -0.19819000 0.6259700 -0.2813700 -0.47774000 II
## 208 44 0.96405000 0.8230400 -0.3071500 -0.14975000 III
## 209 49 -0.14441000 2.3240000 -0.7074400 0.71895000 III
## 210 54 0.51432000 1.6936000 -0.1440100 1.02490000 III
## 211 77 0.47771000 0.8188100 -0.4063000 -0.65479000 I
## 212 66 -0.33451000 1.6081000 0.3804700 0.12195000 II
## 213 35 -0.81753000 1.7904000 -1.0416000 0.03852200 II
## 214 45 -0.71163000 1.6924000 -0.8007600 0.79440000 II
## 215 38 0.28736000 0.4624900 -0.6787300 0.49462000 I
## 216 63 0.17670000 0.4439400 -0.2507500 -0.37407000 II
## 217 59 0.71527000 2.1912000 0.3915800 0.95198000 II
## 218 43 0.44281000 1.4144000 -0.9325200 0.12166000 II
## 219 85 0.40553000 0.3314400 -0.2574300 -0.65476000 II
## 221 62 0.78601000 0.8084500 0.4222900 -0.14382000 II
## 223 56 -0.58641000 0.2244600 -0.3065200 -1.59650000 II
## 224 78 -0.87618000 0.1294300 -0.3703800 0.13219000 I
## 225 78 -0.16950000 1.3879000 -1.3071000 -1.25970000 I
## 226 47 0.29488000 1.4037000 0.2443000 -0.39588000 III
## 227 51 0.18762000 2.6639000 1.1956000 0.18640000 III
## 228 68 -0.57675000 1.3276000 0.0812470 -0.39916000 II
## 229 51 -0.38466000 0.8060900 -1.6274000 -0.51291000 II
## 230 64 -0.34457000 0.7901200 -0.4688700 -0.45557000 I
## 231 41 0.06472300 0.7594100 0.2814900 -1.71270000 II
## 232 54 -0.81605000 -0.6354500 1.3379000 -0.42646000 III
## 233 54 0.43464000 2.4522000 0.3338900 0.62901000 II
## 234 51 0.57241000 2.0089000 -1.1825000 0.29032000 II
## 235 64 0.41603000 1.6094000 -0.5194400 0.94267000 III
## 236 56 0.32600000 1.8602000 -1.0771000 0.33664000 III
## 237 64 -0.33866000 -0.6414700 1.6575000 -0.42082000 III
## 238 46 0.76582000 0.5240500 -0.1571700 0.14390000 III
## 239 40 0.33626000 1.7196000 0.4395100 -0.34219000 II
## 240 48 -0.34001000 0.1625700 -0.1772000 0.41255000 II
## 241 54 -0.58637000 1.5136000 1.0619000 0.10124000 I
## 242 49 -0.58397000 -0.3327200 -0.1308700 -1.23910000 III
## 243 47 -0.36177000 1.1137000 0.3133500 -0.57385000 II
## 244 59 0.02459800 1.4005000 0.0247510 0.28032000 II
## 245 60 0.20862000 -0.5610300 1.2385000 0.96681000 III
## 246 72 0.41729000 1.2071000 -0.8120600 0.93551000 II
## 247 56 0.45517000 1.1774000 -0.5745600 0.36635000 I
## 248 58 -0.47567000 0.4296800 -0.5886600 0.64553000 III
## 249 37 -1.14670000 0.9610400 0.2756300 0.24595000 III
## 250 66 0.28638000 1.3998000 0.3188300 0.83605000 II
## 251 71 0.35524000 1.9752000 -0.0134430 0.55076000 III
## 252 88 0.20412000 0.5253400 -0.8454400 -0.70565000 II
## 253 68 -0.41613000 0.7584300 0.4399200 0.20187000 II
## 254 57 0.19465000 1.6678000 0.3933900 0.70333000 II
## 255 45 -0.27807000 1.4291000 -0.5789500 0.22264000 II
## 256 51 0.09048700 -0.6387300 2.1934000 -0.83843000 II
## 257 63 -0.60324000 1.1190000 -0.6058300 -0.58394000 III
## 258 61 0.38306000 1.6546000 0.5730100 0.34822000 III
## 259 41 -0.05928900 2.0552000 0.0470340 -0.21004000 II
## 260 50 -0.70204000 0.2030700 0.5440400 -0.02387000 III
## 261 60 0.29220000 1.7753000 -0.0936310 0.56704000 II
## 262 65 -0.44324000 0.6095500 -0.3988900 0.33211000 II
## 263 54 0.75382000 1.6425000 -0.3328500 0.85786000 II
## 264 52 0.45553000 1.3648000 -0.5388100 0.14426000 II
## 265 56 -0.31474000 -0.4006100 0.7553200 0.09224900 I
## 267 84 0.83804000 2.0332000 -0.6316600 0.56565000 II
## 268 77 -0.52832000 0.1665200 0.1192600 -0.43346000 II
## 269 57 0.35680000 1.5832000 -0.4670800 1.03860000 II
## 270 48 0.33092000 1.7624000 0.1762400 0.78927000 II
## 271 80 -0.31038000 2.2200000 -0.5563400 0.52372000 II
## 272 68 1.47040000 0.4690000 0.1355400 -0.28984000 II
## 273 69 -0.48903000 2.3784000 -0.4670900 0.09731000 I
## 274 82 0.65960000 1.5541000 -0.8286400 0.27855000 II
## 275 65 -0.00202070 0.8140200 -0.3315500 -0.06266300 II
## 276 63 0.04965600 1.4490000 -0.0219280 0.42886000 II
## 277 63 -0.03725300 1.0907000 -0.4238100 -0.30536000 I
## 278 63 0.05272800 0.7221000 -0.3086500 -0.53129000 III
## 279 43 0.60863000 1.7145000 0.1497000 -0.27394000 III
## 280 60 -0.73062000 0.4664800 -0.4949400 -0.21706000 III
## 281 62 -0.38444000 1.0040000 -0.6385700 -0.54466000 II
## 282 39 -0.14791000 1.2713000 0.2093100 -0.47290000 III
## 283 53 0.26748000 1.6773000 -0.1753600 0.46565000 III
## 285 54 -1.37910000 1.6607000 -0.1566200 -0.41103000 II
## 288 68 0.42607000 0.3599500 -0.3456100 -0.08013700 II
## 289 74 -0.26558000 1.2695000 0.3808300 -0.00721740 II
## 290 62 0.27503000 0.8812000 -0.5137100 -0.02358500 III
## 291 46 0.00397670 1.2038000 -0.4135000 0.52908000 III
## 292 46 0.09510300 -0.4151700 -0.1948900 0.04652700 III
## 293 62 0.06631200 1.2954000 0.2512100 -0.31091000 I
## 294 54 0.78321000 -0.8414000 1.5070000 0.81629000 II
## 295 52 -0.17932000 1.5287000 -0.1631300 0.83222000 I
## 296 60 0.25287000 2.2967000 0.5324400 0.04629100 III
## 297 61 0.18832000 -0.1571100 -0.5598400 -0.59975000 I
## 298 45 -0.00071536 1.6332000 0.1326500 -1.45160000 III
## 299 63 0.27546000 2.2613000 -0.0068398 0.56781000 I
## 300 63 0.29059000 0.8124900 1.0691000 -0.14857000 III
## 301 63 0.20279000 -0.0433840 0.7750400 0.62011000 I
## 302 53 0.28708000 3.4022000 -0.3285000 0.21619000 III
## 303 68 -0.05515700 -0.4555300 0.1632400 -0.73511000 II
## 304 53 -0.36165000 2.6566000 0.3076900 0.20382000 II
## 305 52 0.20051000 2.1842000 -0.6560200 0.81131000 II
## 307 64 -0.24408000 1.6907000 -0.1285400 -1.34020000 II
## 308 71 0.68717000 0.3688400 0.7834300 0.94318000 II
## 309 59 -0.40951000 0.0375630 0.2178000 -0.04466500 III
## 310 75 0.50861000 2.7056000 -0.3339300 0.38805000 II
## 311 53 -0.08946300 1.0331000 -0.4779300 -0.25303000 II
## 312 50 -0.32560000 1.9899000 -0.0381470 0.25251000 II
## 313 66 -1.34410000 1.1280000 -0.2293500 -0.22993000 II
## 314 65 -0.09133400 0.0547570 0.0346480 -0.58883000 II
## 315 44 0.89185000 0.4444000 0.1102000 0.15685000 II
## 316 61 -0.40592000 2.7513000 -0.5954200 0.81639000 I
## 317 54 -1.23130000 -0.7676700 0.8616000 0.23905000 II
## 318 64 0.97580000 2.4847000 -0.5133200 0.66698000 I
## 319 66 -0.37043000 1.9185000 -0.7960800 -0.06290300 II
## 320 79 -1.06220000 1.1664000 -0.6556500 -0.09238100 II
## 321 59 0.45230000 1.3959000 -0.2308400 0.79631000 I
## 323 52 0.24080000 1.7348000 0.3310900 1.04410000 I
## 324 77 0.33912000 1.3193000 0.5874000 0.35192000 II
## 325 59 0.15050000 1.2674000 0.6426500 0.26142000 II
## 326 78 -0.91723000 -0.6469300 0.5525900 0.45874000 II
## 327 46 -0.74014000 -0.7375500 0.7203200 0.72182000 II
## 328 46 -0.05503600 -0.5064300 0.8877800 -0.08942400 II
## 329 60 0.09458500 1.0003000 -0.2257500 0.04860200 III
## 330 36 0.23180000 0.6180400 -0.5577900 -0.51735000 III
## 331 44 0.73272000 1.1117000 -0.2695200 -0.35492000 II
## 332 61 -0.71947000 2.5485000 -0.1502400 0.33968000 II
## 333 79 0.47940000 2.0559000 -0.5313600 -0.18848000 I
## 334 76 -0.24427000 0.9255600 -0.4182300 -0.06784800 I
## Histology HER2.status Surgery_type
## 1 Infiltrating Ductal Carcinoma Negative Modified Radical Mastectomy
## 2 Mucinous Carcinoma Negative Lumpectomy
## 3 Infiltrating Ductal Carcinoma Negative Other
## 4 Infiltrating Ductal Carcinoma Negative Modified Radical Mastectomy
## 5 Infiltrating Ductal Carcinoma Negative Other
## 6 Infiltrating Ductal Carcinoma Negative Modified Radical Mastectomy
## 7 Infiltrating Ductal Carcinoma Negative Simple Mastectomy
## 9 Infiltrating Ductal Carcinoma Negative Modified Radical Mastectomy
## 10 Infiltrating Lobular Carcinoma Positive Other
## 11 Infiltrating Ductal Carcinoma Negative Lumpectomy
## 12 Infiltrating Ductal Carcinoma Negative Other
## 13 Infiltrating Lobular Carcinoma Negative Simple Mastectomy
## 14 Infiltrating Ductal Carcinoma Negative Simple Mastectomy
## 15 Mucinous Carcinoma Negative Modified Radical Mastectomy
## 16 Infiltrating Lobular Carcinoma Negative Simple Mastectomy
## 17 Mucinous Carcinoma Negative Lumpectomy
## 18 Infiltrating Ductal Carcinoma Negative Lumpectomy
## 19 Infiltrating Ductal Carcinoma Negative Other
## 20 Infiltrating Ductal Carcinoma Negative Lumpectomy
## 21 Infiltrating Ductal Carcinoma Negative Simple Mastectomy
## 22 Infiltrating Ductal Carcinoma Negative Other
## 24 Infiltrating Ductal Carcinoma Negative Simple Mastectomy
## 25 Infiltrating Ductal Carcinoma Negative Lumpectomy
## 26 Mucinous Carcinoma Positive Modified Radical Mastectomy
## 27 Mucinous Carcinoma Negative Lumpectomy
## 28 Infiltrating Lobular Carcinoma Negative Simple Mastectomy
## 29 Infiltrating Ductal Carcinoma Negative Modified Radical Mastectomy
## 30 Infiltrating Ductal Carcinoma Positive Lumpectomy
## 31 Infiltrating Ductal Carcinoma Negative Other
## 32 Infiltrating Ductal Carcinoma Negative Simple Mastectomy
## 33 Infiltrating Lobular Carcinoma Negative Simple Mastectomy
## 34 Infiltrating Ductal Carcinoma Negative Simple Mastectomy
## 35 Infiltrating Lobular Carcinoma Negative Simple Mastectomy
## 36 Infiltrating Ductal Carcinoma Negative Other
## 37 Infiltrating Lobular Carcinoma Negative Other
## 38 Infiltrating Ductal Carcinoma Negative Other
## 39 Infiltrating Ductal Carcinoma Negative Modified Radical Mastectomy
## 40 Infiltrating Ductal Carcinoma Negative Simple Mastectomy
## 41 Infiltrating Ductal Carcinoma Negative Other
## 42 Infiltrating Lobular Carcinoma Negative Modified Radical Mastectomy
## 43 Infiltrating Lobular Carcinoma Negative Other
## 44 Infiltrating Ductal Carcinoma Negative Simple Mastectomy
## 45 Infiltrating Ductal Carcinoma Negative Simple Mastectomy
## 46 Infiltrating Ductal Carcinoma Negative Other
## 47 Infiltrating Ductal Carcinoma Negative Simple Mastectomy
## 48 Infiltrating Lobular Carcinoma Negative Other
## 49 Infiltrating Ductal Carcinoma Negative Other
## 50 Infiltrating Ductal Carcinoma Positive Lumpectomy
## 51 Infiltrating Ductal Carcinoma Negative Other
## 52 Infiltrating Ductal Carcinoma Negative Simple Mastectomy
## 53 Infiltrating Lobular Carcinoma Negative Modified Radical Mastectomy
## 54 Infiltrating Ductal Carcinoma Negative Lumpectomy
## 55 Infiltrating Ductal Carcinoma Positive Modified Radical Mastectomy
## 56 Infiltrating Lobular Carcinoma Negative Other
## 57 Infiltrating Lobular Carcinoma Negative Lumpectomy
## 58 Infiltrating Lobular Carcinoma Negative Simple Mastectomy
## 59 Infiltrating Ductal Carcinoma Negative Lumpectomy
## 60 Infiltrating Lobular Carcinoma Negative Modified Radical Mastectomy
## 61 Infiltrating Lobular Carcinoma Negative Other
## 62 Infiltrating Ductal Carcinoma Negative Simple Mastectomy
## 63 Infiltrating Ductal Carcinoma Negative Lumpectomy
## 64 Infiltrating Ductal Carcinoma Negative Other
## 65 Infiltrating Lobular Carcinoma Negative Other
## 66 Infiltrating Ductal Carcinoma Negative Other
## 67 Infiltrating Lobular Carcinoma Negative Other
## 68 Infiltrating Ductal Carcinoma Negative Lumpectomy
## 69 Infiltrating Ductal Carcinoma Negative Other
## 70 Infiltrating Ductal Carcinoma Negative Modified Radical Mastectomy
## 71 Infiltrating Ductal Carcinoma Negative Other
## 72 Infiltrating Ductal Carcinoma Negative Other
## 73 Infiltrating Ductal Carcinoma Negative Other
## 74 Infiltrating Ductal Carcinoma Negative Lumpectomy
## 75 Infiltrating Ductal Carcinoma Negative Other
## 76 Infiltrating Ductal Carcinoma Negative Modified Radical Mastectomy
## 77 Infiltrating Ductal Carcinoma Negative Other
## 78 Infiltrating Ductal Carcinoma Negative Lumpectomy
## 79 Infiltrating Ductal Carcinoma Negative Other
## 80 Infiltrating Lobular Carcinoma Negative Other
## 81 Infiltrating Ductal Carcinoma Negative Modified Radical Mastectomy
## 82 Infiltrating Ductal Carcinoma Negative Lumpectomy
## 83 Infiltrating Lobular Carcinoma Negative Simple Mastectomy
## 84 Infiltrating Ductal Carcinoma Negative Other
## 85 Mucinous Carcinoma Negative Simple Mastectomy
## 86 Infiltrating Ductal Carcinoma Positive Other
## 87 Infiltrating Ductal Carcinoma Positive Simple Mastectomy
## 88 Mucinous Carcinoma Negative Lumpectomy
## 89 Infiltrating Ductal Carcinoma Negative Other
## 90 Infiltrating Ductal Carcinoma Negative Modified Radical Mastectomy
## 91 Infiltrating Ductal Carcinoma Negative Simple Mastectomy
## 92 Infiltrating Lobular Carcinoma Negative Other
## 93 Infiltrating Ductal Carcinoma Negative Modified Radical Mastectomy
## 94 Infiltrating Ductal Carcinoma Negative Lumpectomy
## 95 Mucinous Carcinoma Negative Modified Radical Mastectomy
## 96 Infiltrating Ductal Carcinoma Negative Simple Mastectomy
## 97 Infiltrating Ductal Carcinoma Positive Lumpectomy
## 98 Infiltrating Lobular Carcinoma Negative Modified Radical Mastectomy
## 99 Infiltrating Ductal Carcinoma Positive Modified Radical Mastectomy
## 101 Infiltrating Ductal Carcinoma Positive Simple Mastectomy
## 102 Infiltrating Ductal Carcinoma Negative Simple Mastectomy
## 103 Infiltrating Ductal Carcinoma Negative Simple Mastectomy
## 104 Infiltrating Ductal Carcinoma Negative Simple Mastectomy
## 105 Infiltrating Ductal Carcinoma Negative Lumpectomy
## 106 Infiltrating Ductal Carcinoma Negative Modified Radical Mastectomy
## 107 Infiltrating Ductal Carcinoma Negative Other
## 108 Infiltrating Ductal Carcinoma Negative Lumpectomy
## 109 Infiltrating Ductal Carcinoma Negative Modified Radical Mastectomy
## 110 Infiltrating Lobular Carcinoma Negative Other
## 111 Infiltrating Lobular Carcinoma Negative Lumpectomy
## 113 Infiltrating Ductal Carcinoma Negative Simple Mastectomy
## 114 Infiltrating Ductal Carcinoma Negative Other
## 115 Infiltrating Ductal Carcinoma Negative Other
## 116 Infiltrating Lobular Carcinoma Negative Other
## 117 Infiltrating Ductal Carcinoma Negative Simple Mastectomy
## 118 Infiltrating Ductal Carcinoma Negative Other
## 119 Infiltrating Lobular Carcinoma Negative Modified Radical Mastectomy
## 120 Infiltrating Ductal Carcinoma Negative Modified Radical Mastectomy
## 121 Infiltrating Lobular Carcinoma Negative Lumpectomy
## 122 Infiltrating Lobular Carcinoma Negative Lumpectomy
## 123 Infiltrating Ductal Carcinoma Negative Simple Mastectomy
## 124 Infiltrating Ductal Carcinoma Negative Modified Radical Mastectomy
## 125 Infiltrating Ductal Carcinoma Negative Lumpectomy
## 126 Infiltrating Ductal Carcinoma Negative Other
## 127 Infiltrating Ductal Carcinoma Negative Other
## 128 Infiltrating Ductal Carcinoma Negative Modified Radical Mastectomy
## 129 Infiltrating Ductal Carcinoma Negative Modified Radical Mastectomy
## 130 Infiltrating Lobular Carcinoma Negative Simple Mastectomy
## 131 Infiltrating Ductal Carcinoma Negative Modified Radical Mastectomy
## 132 Infiltrating Ductal Carcinoma Negative Lumpectomy
## 133 Infiltrating Lobular Carcinoma Negative Modified Radical Mastectomy
## 134 Infiltrating Ductal Carcinoma Negative Modified Radical Mastectomy
## 135 Infiltrating Ductal Carcinoma Negative Lumpectomy
## 136 Infiltrating Ductal Carcinoma Negative Lumpectomy
## 137 Infiltrating Ductal Carcinoma Negative Modified Radical Mastectomy
## 138 Infiltrating Lobular Carcinoma Negative Other
## 139 Infiltrating Ductal Carcinoma Negative Modified Radical Mastectomy
## 140 Infiltrating Ductal Carcinoma Negative Modified Radical Mastectomy
## 141 Infiltrating Ductal Carcinoma Positive Modified Radical Mastectomy
## 142 Infiltrating Ductal Carcinoma Negative Other
## 143 Infiltrating Ductal Carcinoma Positive Lumpectomy
## 144 Infiltrating Ductal Carcinoma Negative Simple Mastectomy
## 145 Infiltrating Lobular Carcinoma Negative Other
## 146 Infiltrating Lobular Carcinoma Negative Modified Radical Mastectomy
## 147 Infiltrating Ductal Carcinoma Negative Simple Mastectomy
## 148 Infiltrating Ductal Carcinoma Negative Other
## 149 Infiltrating Ductal Carcinoma Positive Modified Radical Mastectomy
## 150 Infiltrating Ductal Carcinoma Negative Other
## 151 Infiltrating Ductal Carcinoma Negative Other
## 152 Infiltrating Lobular Carcinoma Negative Modified Radical Mastectomy
## 153 Infiltrating Lobular Carcinoma Negative Simple Mastectomy
## 154 Infiltrating Ductal Carcinoma Negative Other
## 155 Infiltrating Lobular Carcinoma Negative Simple Mastectomy
## 156 Infiltrating Ductal Carcinoma Negative Modified Radical Mastectomy
## 157 Infiltrating Ductal Carcinoma Negative Lumpectomy
## 158 Infiltrating Lobular Carcinoma Positive Other
## 159 Infiltrating Lobular Carcinoma Negative Modified Radical Mastectomy
## 160 Infiltrating Lobular Carcinoma Negative Simple Mastectomy
## 161 Infiltrating Ductal Carcinoma Negative Simple Mastectomy
## 162 Infiltrating Lobular Carcinoma Negative Modified Radical Mastectomy
## 163 Infiltrating Ductal Carcinoma Negative Other
## 164 Infiltrating Lobular Carcinoma Negative Modified Radical Mastectomy
## 165 Infiltrating Lobular Carcinoma Negative Other
## 166 Infiltrating Ductal Carcinoma Negative Modified Radical Mastectomy
## 167 Infiltrating Ductal Carcinoma Negative Lumpectomy
## 168 Infiltrating Ductal Carcinoma Negative Modified Radical Mastectomy
## 169 Infiltrating Ductal Carcinoma Negative Simple Mastectomy
## 170 Infiltrating Ductal Carcinoma Negative Simple Mastectomy
## 172 Infiltrating Lobular Carcinoma Negative Other
## 173 Infiltrating Ductal Carcinoma Negative Lumpectomy
## 174 Infiltrating Ductal Carcinoma Negative Modified Radical Mastectomy
## 175 Infiltrating Ductal Carcinoma Negative Other
## 176 Infiltrating Lobular Carcinoma Negative Other
## 177 Infiltrating Ductal Carcinoma Negative Other
## 178 Infiltrating Ductal Carcinoma Negative Modified Radical Mastectomy
## 179 Infiltrating Ductal Carcinoma Negative Other
## 180 Infiltrating Ductal Carcinoma Negative Simple Mastectomy
## 181 Infiltrating Ductal Carcinoma Negative Simple Mastectomy
## 182 Infiltrating Ductal Carcinoma Negative Simple Mastectomy
## 184 Infiltrating Lobular Carcinoma Negative Modified Radical Mastectomy
## 185 Infiltrating Ductal Carcinoma Negative Other
## 186 Infiltrating Ductal Carcinoma Negative Other
## 187 Infiltrating Ductal Carcinoma Positive Lumpectomy
## 188 Infiltrating Ductal Carcinoma Negative Other
## 189 Infiltrating Ductal Carcinoma Negative Other
## 191 Infiltrating Lobular Carcinoma Positive Simple Mastectomy
## 192 Infiltrating Ductal Carcinoma Positive Other
## 193 Infiltrating Lobular Carcinoma Positive Other
## 194 Infiltrating Ductal Carcinoma Negative Lumpectomy
## 195 Infiltrating Ductal Carcinoma Positive Lumpectomy
## 196 Infiltrating Ductal Carcinoma Negative Lumpectomy
## 198 Infiltrating Ductal Carcinoma Negative Other
## 199 Infiltrating Ductal Carcinoma Negative Modified Radical Mastectomy
## 200 Infiltrating Ductal Carcinoma Negative Modified Radical Mastectomy
## 201 Infiltrating Lobular Carcinoma Negative Modified Radical Mastectomy
## 202 Mucinous Carcinoma Negative Modified Radical Mastectomy
## 203 Infiltrating Ductal Carcinoma Negative Modified Radical Mastectomy
## 204 Infiltrating Ductal Carcinoma Negative Other
## 205 Infiltrating Lobular Carcinoma Negative Lumpectomy
## 206 Infiltrating Ductal Carcinoma Negative Lumpectomy
## 208 Infiltrating Ductal Carcinoma Negative Lumpectomy
## 209 Infiltrating Ductal Carcinoma Negative Modified Radical Mastectomy
## 210 Infiltrating Ductal Carcinoma Negative Modified Radical Mastectomy
## 211 Infiltrating Lobular Carcinoma Negative Lumpectomy
## 212 Infiltrating Ductal Carcinoma Negative Other
## 213 Infiltrating Lobular Carcinoma Negative Other
## 214 Infiltrating Ductal Carcinoma Negative Simple Mastectomy
## 215 Infiltrating Ductal Carcinoma Negative Lumpectomy
## 216 Infiltrating Ductal Carcinoma Negative Simple Mastectomy
## 217 Infiltrating Ductal Carcinoma Negative Modified Radical Mastectomy
## 218 Infiltrating Ductal Carcinoma Negative Modified Radical Mastectomy
## 219 Infiltrating Ductal Carcinoma Negative Other
## 221 Infiltrating Ductal Carcinoma Negative Modified Radical Mastectomy
## 223 Infiltrating Ductal Carcinoma Negative Other
## 224 Infiltrating Ductal Carcinoma Negative Other
## 225 Infiltrating Ductal Carcinoma Negative Simple Mastectomy
## 226 Infiltrating Lobular Carcinoma Positive Lumpectomy
## 227 Infiltrating Lobular Carcinoma Negative Modified Radical Mastectomy
## 228 Infiltrating Ductal Carcinoma Negative Modified Radical Mastectomy
## 229 Infiltrating Ductal Carcinoma Positive Modified Radical Mastectomy
## 230 Infiltrating Lobular Carcinoma Negative Lumpectomy
## 231 Infiltrating Lobular Carcinoma Negative Other
## 232 Infiltrating Ductal Carcinoma Negative Simple Mastectomy
## 233 Infiltrating Ductal Carcinoma Negative Other
## 234 Infiltrating Lobular Carcinoma Negative Other
## 235 Infiltrating Ductal Carcinoma Negative Lumpectomy
## 236 Infiltrating Ductal Carcinoma Negative Modified Radical Mastectomy
## 237 Infiltrating Ductal Carcinoma Negative Other
## 238 Infiltrating Ductal Carcinoma Negative Modified Radical Mastectomy
## 239 Infiltrating Ductal Carcinoma Negative Lumpectomy
## 240 Mucinous Carcinoma Negative Simple Mastectomy
## 241 Infiltrating Ductal Carcinoma Negative Other
## 242 Infiltrating Ductal Carcinoma Negative Modified Radical Mastectomy
## 243 Infiltrating Ductal Carcinoma Negative Other
## 244 Infiltrating Ductal Carcinoma Positive Lumpectomy
## 245 Infiltrating Ductal Carcinoma Negative Modified Radical Mastectomy
## 246 Infiltrating Ductal Carcinoma Negative Lumpectomy
## 247 Mucinous Carcinoma Negative Modified Radical Mastectomy
## 248 Infiltrating Lobular Carcinoma Negative Other
## 249 Infiltrating Ductal Carcinoma Positive Modified Radical Mastectomy
## 250 Infiltrating Ductal Carcinoma Negative Modified Radical Mastectomy
## 251 Infiltrating Ductal Carcinoma Negative Other
## 252 Infiltrating Ductal Carcinoma Negative Simple Mastectomy
## 253 Infiltrating Lobular Carcinoma Negative Lumpectomy
## 254 Infiltrating Ductal Carcinoma Negative Other
## 255 Infiltrating Ductal Carcinoma Negative Modified Radical Mastectomy
## 256 Infiltrating Ductal Carcinoma Negative Modified Radical Mastectomy
## 257 Infiltrating Ductal Carcinoma Negative Modified Radical Mastectomy
## 258 Infiltrating Lobular Carcinoma Positive Other
## 259 Infiltrating Lobular Carcinoma Negative Other
## 260 Infiltrating Lobular Carcinoma Negative Modified Radical Mastectomy
## 261 Infiltrating Ductal Carcinoma Negative Simple Mastectomy
## 262 Infiltrating Ductal Carcinoma Negative Other
## 263 Infiltrating Ductal Carcinoma Negative Simple Mastectomy
## 264 Infiltrating Ductal Carcinoma Negative Lumpectomy
## 265 Infiltrating Ductal Carcinoma Negative Modified Radical Mastectomy
## 267 Infiltrating Lobular Carcinoma Negative Other
## 268 Infiltrating Lobular Carcinoma Negative Modified Radical Mastectomy
## 269 Infiltrating Lobular Carcinoma Negative Lumpectomy
## 270 Infiltrating Lobular Carcinoma Negative Other
## 271 Infiltrating Ductal Carcinoma Negative Other
## 272 Infiltrating Ductal Carcinoma Negative Modified Radical Mastectomy
## 273 Infiltrating Ductal Carcinoma Negative Other
## 274 Infiltrating Ductal Carcinoma Negative Other
## 275 Infiltrating Lobular Carcinoma Negative Other
## 276 Infiltrating Ductal Carcinoma Negative Modified Radical Mastectomy
## 277 Infiltrating Ductal Carcinoma Positive Simple Mastectomy
## 278 Infiltrating Ductal Carcinoma Negative Modified Radical Mastectomy
## 279 Infiltrating Lobular Carcinoma Positive Modified Radical Mastectomy
## 280 Infiltrating Lobular Carcinoma Negative Other
## 281 Infiltrating Ductal Carcinoma Negative Simple Mastectomy
## 282 Infiltrating Lobular Carcinoma Negative Modified Radical Mastectomy
## 283 Infiltrating Ductal Carcinoma Negative Modified Radical Mastectomy
## 285 Infiltrating Ductal Carcinoma Negative Simple Mastectomy
## 288 Infiltrating Ductal Carcinoma Negative Simple Mastectomy
## 289 Infiltrating Lobular Carcinoma Negative Modified Radical Mastectomy
## 290 Infiltrating Ductal Carcinoma Negative Modified Radical Mastectomy
## 291 Infiltrating Ductal Carcinoma Negative Modified Radical Mastectomy
## 292 Infiltrating Ductal Carcinoma Negative Modified Radical Mastectomy
## 293 Mucinous Carcinoma Negative Lumpectomy
## 294 Infiltrating Lobular Carcinoma Negative Lumpectomy
## 295 Infiltrating Ductal Carcinoma Negative Modified Radical Mastectomy
## 296 Infiltrating Ductal Carcinoma Positive Lumpectomy
## 297 Infiltrating Ductal Carcinoma Negative Lumpectomy
## 298 Infiltrating Ductal Carcinoma Negative Other
## 299 Infiltrating Ductal Carcinoma Negative Simple Mastectomy
## 300 Infiltrating Ductal Carcinoma Negative Other
## 301 Infiltrating Ductal Carcinoma Negative Lumpectomy
## 302 Infiltrating Lobular Carcinoma Negative Lumpectomy
## 303 Infiltrating Ductal Carcinoma Negative Lumpectomy
## 304 Infiltrating Ductal Carcinoma Negative Other
## 305 Infiltrating Ductal Carcinoma Negative Lumpectomy
## 307 Infiltrating Lobular Carcinoma Negative Modified Radical Mastectomy
## 308 Infiltrating Ductal Carcinoma Negative Modified Radical Mastectomy
## 309 Infiltrating Lobular Carcinoma Negative Modified Radical Mastectomy
## 310 Infiltrating Lobular Carcinoma Negative Lumpectomy
## 311 Infiltrating Ductal Carcinoma Negative Simple Mastectomy
## 312 Infiltrating Ductal Carcinoma Negative Simple Mastectomy
## 313 Infiltrating Ductal Carcinoma Negative Other
## 314 Infiltrating Ductal Carcinoma Negative Other
## 315 Infiltrating Ductal Carcinoma Negative Modified Radical Mastectomy
## 316 Infiltrating Ductal Carcinoma Negative Lumpectomy
## 317 Infiltrating Lobular Carcinoma Negative Modified Radical Mastectomy
## 318 Infiltrating Ductal Carcinoma Negative Lumpectomy
## 319 Infiltrating Lobular Carcinoma Negative Simple Mastectomy
## 320 Infiltrating Lobular Carcinoma Negative Simple Mastectomy
## 321 Infiltrating Lobular Carcinoma Negative Other
## 323 Infiltrating Ductal Carcinoma Negative Other
## 324 Infiltrating Ductal Carcinoma Negative Simple Mastectomy
## 325 Infiltrating Ductal Carcinoma Negative Simple Mastectomy
## 326 Infiltrating Lobular Carcinoma Negative Modified Radical Mastectomy
## 327 Infiltrating Ductal Carcinoma Negative Simple Mastectomy
## 328 Infiltrating Ductal Carcinoma Negative Lumpectomy
## 329 Infiltrating Ductal Carcinoma Negative Simple Mastectomy
## 330 Infiltrating Ductal Carcinoma Positive Simple Mastectomy
## 331 Infiltrating Lobular Carcinoma Negative Other
## 332 Infiltrating Ductal Carcinoma Negative Lumpectomy
## 333 Infiltrating Ductal Carcinoma Positive Lumpectomy
## 334 Infiltrating Ductal Carcinoma Negative Lumpectomy
## Patient_Status Survival_days
## 1 1 155
## 2 0 562
## 3 1 274
## 4 1 168
## 5 0 782
## 6 1 1519
## 7 1 368
## 9 1 365
## 10 1 304
## 11 1 207
## 12 0 340
## 13 1 40
## 14 1 1001
## 15 1 923
## 16 1 886
## 17 1 496
## 18 1 267
## 19 0 948
## 20 1 911
## 21 1 21
## 22 1 1277
## 24 1 271
## 25 1 76
## 26 0 55
## 27 0 134
## 28 1 285
## 29 1 178
## 30 1 200
## 31 1 1888
## 32 0 223
## 33 1 68
## 34 1 47
## 35 1 848
## 36 1 140
## 37 1 170
## 38 0 340
## 39 1 35
## 40 0 235
## 41 1 4
## 42 1 372
## 43 1 268
## 44 1 755
## 45 0 45
## 46 1 1186
## 47 0 3019
## 48 1 608
## 49 1 30
## 50 1 550
## 51 1 546
## 52 1 1312
## 53 1 767
## 54 1 14
## 55 1 409
## 56 1 963
## 57 1 952
## 58 1 85
## 59 1 218
## 60 1 463
## 61 1 227
## 62 1 777
## 63 1 316
## 64 1 365
## 65 1 727
## 66 0 917
## 67 0 554
## 68 1 956
## 69 1 917
## 70 1 344
## 71 1 736
## 72 0 274
## 73 1 234
## 74 1 623
## 75 0 747
## 76 1 702
## 77 0 579
## 78 1 105
## 79 1 1028
## 80 0 340
## 81 1 81
## 82 1 325
## 83 1 106
## 84 1 550
## 85 1 673
## 86 1 761
## 87 1 19
## 88 1 400
## 89 1 549
## 90 1 236
## 91 0 268
## 92 0 912
## 93 1 813
## 94 1 974
## 95 1 352
## 96 1 321
## 97 1 189
## 98 1 10
## 99 1 272
## 101 1 139
## 102 1 736
## 103 1 92
## 104 1 136
## 105 1 519
## 106 0 1148
## 107 1 845
## 108 1 593
## 109 1 64
## 110 1 50
## 111 1 282
## 113 1 1288
## 114 1 324
## 115 0 2317
## 116 0 867
## 117 0 502
## 118 0 0
## 119 0 376
## 120 0 23
## 121 1 277
## 122 1 392
## 123 1 551
## 124 1 707
## 125 1 545
## 126 1 457
## 127 0 457
## 128 0 119
## 129 0 9
## 130 0 1455
## 131 0 172
## 132 0 107
## 133 0 186
## 134 0 242
## 135 1 595
## 136 1 167
## 137 1 140
## 138 1 577
## 139 1 284
## 140 1 405
## 141 1 471
## 142 1 191
## 143 1 24
## 144 1 863
## 145 1 168
## 146 1 339
## 147 1 421
## 148 1 98
## 149 1 477
## 150 1 396
## 151 1 396
## 152 1 185
## 153 1 665
## 154 1 6
## 155 0 410
## 156 1 80
## 157 1 800
## 158 0 304
## 159 1 371
## 160 1 850
## 161 1 274
## 162 1 870
## 163 0 650
## 164 1 57
## 165 1 396
## 166 1 346
## 167 1 2763
## 168 1 51
## 169 1 1559
## 170 1 127
## 172 1 76
## 173 1 177
## 174 1 21
## 175 0 140
## 176 1 304
## 177 1 7
## 178 1 30
## 179 1 487
## 180 1 423
## 181 1 890
## 182 1 461
## 184 1 273
## 185 1 558
## 186 1 157
## 187 1 54
## 188 1 1648
## 189 1 1330
## 191 1 370
## 192 1 756
## 193 1 743
## 194 1 161
## 195 1 124
## 196 1 235
## 198 1 481
## 199 1 126
## 200 1 586
## 201 1 117
## 202 1 645
## 203 1 113
## 204 1 549
## 205 1 9
## 206 1 189
## 208 1 641
## 209 1 222
## 210 1 374
## 211 1 791
## 212 1 579
## 213 1 365
## 214 1 449
## 215 1 258
## 216 1 259
## 217 1 104
## 218 1 51
## 219 1 803
## 221 1 201
## 223 1 721
## 224 1 620
## 225 1 525
## 226 1 804
## 227 0 675
## 228 0 10
## 229 1 397
## 230 1 469
## 231 1 770
## 232 1 441
## 233 0 650
## 234 1 743
## 235 1 124
## 236 0 185
## 237 1 640
## 238 1 782
## 239 0 388
## 240 1 687
## 241 1 765
## 242 1 162
## 243 1 396
## 244 1 437
## 245 0 12
## 246 1 743
## 247 1 750
## 248 1 518
## 249 1 450
## 250 0 187
## 251 0 78
## 252 1 492
## 253 0 354
## 254 0 450
## 255 0 554
## 256 1 650
## 257 1 70
## 258 1 245
## 259 1 660
## 260 0 780
## 261 0 476
## 262 0 577
## 263 0 199
## 264 1 635
## 265 1 150
## 267 1 365
## 268 1 256
## 269 1 331
## 270 1 650
## 271 1 396
## 272 1 359
## 273 1 461
## 274 1 273
## 275 0 333
## 276 1 630
## 277 1 297
## 278 0 225
## 279 1 359
## 280 1 674
## 281 1 73
## 282 1 239
## 283 1 260
## 285 1 629
## 288 1 230
## 289 1 6
## 290 1 205
## 291 1 292
## 292 1 268
## 293 1 358
## 294 1 589
## 295 1 264
## 296 1 456
## 297 1 315
## 298 1 476
## 299 1 225
## 300 1 31
## 301 1 75
## 302 1 379
## 303 1 169
## 304 1 365
## 305 1 191
## 307 1 532
## 308 1 261
## 309 1 392
## 310 1 309
## 311 1 465
## 312 1 344
## 313 1 496
## 314 1 520
## 315 1 252
## 316 1 481
## 317 1 512
## 318 1 490
## 319 1 352
## 320 1 208
## 321 1 465
## 323 1 392
## 324 1 476
## 325 1 478
## 326 1 519
## 327 1 448
## 328 0 476
## 329 0 485
## 330 0 90
## 331 0 124
## 332 0 434
## 333 0 456
## 334 0 421
#names(new_data_2)
3. Thống kê suy diễn
3.1 Chia tập dữ liệu train và test
library(caTools)
set.seed(115)
split <- sample.split(new_data_2$Patient_Status, SplitRatio = 0.8)
train_data <- subset(new_data_2, split == TRUE)
test_data <- subset(new_data_2, split == FALSE)
3.2 Huấn luyện mô hình hồi quy logistic
full_model <- glm(Patient_Status ~ Age + Protein1 + Protein2 + Protein3 + Protein4 +
Tumour_Stage + Histology + HER2.status +
Surgery_type + Survival_days,
data = train_data, family = "binomial")
3.3 Chọn mô hình tối ưu bằng step AIC
step_model <- step(full_model)
## Start: AIC=264.63
## Patient_Status ~ Age + Protein1 + Protein2 + Protein3 + Protein4 +
## Tumour_Stage + Histology + HER2.status + Surgery_type + Survival_days
##
## Df Deviance AIC
## - Surgery_type 3 237.43 261.43
## - Histology 2 235.61 261.61
## - Tumour_Stage 2 236.79 262.79
## - Survival_days 1 235.18 263.18
## - Age 1 235.26 263.26
## - Protein3 1 235.30 263.30
## - Protein1 1 235.40 263.40
## - HER2.status 1 235.63 263.63
## - Protein2 1 236.36 264.36
## <none> 234.63 264.63
## - Protein4 1 242.20 270.20
##
## Step: AIC=261.43
## Patient_Status ~ Age + Protein1 + Protein2 + Protein3 + Protein4 +
## Tumour_Stage + Histology + HER2.status + Survival_days
##
## Df Deviance AIC
## - Histology 2 238.38 258.38
## - Tumour_Stage 2 239.66 259.66
## - Protein3 1 237.91 259.91
## - Age 1 238.13 260.13
## - Survival_days 1 238.54 260.54
## - Protein1 1 238.72 260.72
## - HER2.status 1 238.78 260.78
## - Protein2 1 239.22 261.22
## <none> 237.43 261.43
## - Protein4 1 244.45 266.45
##
## Step: AIC=258.37
## Patient_Status ~ Age + Protein1 + Protein2 + Protein3 + Protein4 +
## Tumour_Stage + HER2.status + Survival_days
##
## Df Deviance AIC
## - Tumour_Stage 2 240.76 256.76
## - Age 1 238.88 256.88
## - Protein3 1 238.97 256.97
## - Survival_days 1 239.52 257.52
## - HER2.status 1 239.69 257.69
## - Protein1 1 239.92 257.92
## - Protein2 1 240.27 258.27
## <none> 238.38 258.38
## - Protein4 1 245.68 263.68
##
## Step: AIC=256.76
## Patient_Status ~ Age + Protein1 + Protein2 + Protein3 + Protein4 +
## HER2.status + Survival_days
##
## Df Deviance AIC
## - Survival_days 1 241.41 255.41
## - Protein3 1 241.43 255.43
## - Age 1 241.50 255.50
## - HER2.status 1 241.62 255.62
## - Protein1 1 242.21 256.21
## - Protein2 1 242.65 256.65
## <none> 240.76 256.76
## - Protein4 1 247.66 261.67
##
## Step: AIC=255.41
## Patient_Status ~ Age + Protein1 + Protein2 + Protein3 + Protein4 +
## HER2.status
##
## Df Deviance AIC
## - Protein3 1 242.05 254.05
## - Age 1 242.18 254.18
## - HER2.status 1 242.38 254.38
## - Protein1 1 242.89 254.89
## - Protein2 1 243.29 255.29
## <none> 241.41 255.41
## - Protein4 1 247.98 259.98
##
## Step: AIC=254.05
## Patient_Status ~ Age + Protein1 + Protein2 + Protein4 + HER2.status
##
## Df Deviance AIC
## - Age 1 242.82 252.82
## - HER2.status 1 242.98 252.98
## - Protein2 1 243.36 253.36
## - Protein1 1 243.63 253.63
## <none> 242.05 254.05
## - Protein4 1 249.05 259.05
##
## Step: AIC=252.82
## Patient_Status ~ Protein1 + Protein2 + Protein4 + HER2.status
##
## Df Deviance AIC
## - HER2.status 1 243.62 251.62
## - Protein2 1 244.17 252.17
## - Protein1 1 244.37 252.37
## <none> 242.82 252.82
## - Protein4 1 249.50 257.50
##
## Step: AIC=251.62
## Patient_Status ~ Protein1 + Protein2 + Protein4
##
## Df Deviance AIC
## - Protein2 1 245.00 251.00
## - Protein1 1 245.19 251.19
## <none> 243.62 251.62
## - Protein4 1 250.27 256.27
##
## Step: AIC=251
## Patient_Status ~ Protein1 + Protein4
##
## Df Deviance AIC
## - Protein1 1 246.32 250.32
## <none> 245.00 251.00
## - Protein4 1 251.78 255.78
##
## Step: AIC=250.32
## Patient_Status ~ Protein4
##
## Df Deviance AIC
## <none> 246.32 250.32
## - Protein4 1 251.97 253.97
summary(step_model)
##
## Call:
## glm(formula = Patient_Status ~ Protein4, family = "binomial",
## data = train_data)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 1.4608 0.1653 8.837 <2e-16 ***
## Protein4 -0.6665 0.2912 -2.289 0.0221 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 251.97 on 253 degrees of freedom
## Residual deviance: 246.32 on 252 degrees of freedom
## AIC: 250.32
##
## Number of Fisher Scoring iterations: 4
3.4 Đánh giá bằng confusion matrix
# Du bao xac suat va phan lop
pred_prob <- predict(step_model, newdata = test_data, type = "response")
pred_class <- ifelse(pred_prob > 0.5, 1, 0)
test_data$pred_class <- factor(pred_class, levels = c(0, 1))
test_data$Patient_Status <- factor(test_data$Patient_Status, levels = c(0, 1))
library(caret)
## Loading required package: lattice
confusionMatrix(test_data$pred_class, test_data$Patient_Status, positive = "1")
## Confusion Matrix and Statistics
##
## Reference
## Prediction 0 1
## 0 0 0
## 1 12 51
##
## Accuracy : 0.8095
## 95% CI : (0.6909, 0.8975)
## No Information Rate : 0.8095
## P-Value [Acc > NIR] : 0.576391
##
## Kappa : 0
##
## Mcnemar's Test P-Value : 0.001496
##
## Sensitivity : 1.0000
## Specificity : 0.0000
## Pos Pred Value : 0.8095
## Neg Pred Value : NaN
## Prevalence : 0.8095
## Detection Rate : 0.8095
## Detection Prevalence : 1.0000
## Balanced Accuracy : 0.5000
##
## 'Positive' Class : 1
##
Ve ROC cho logistic
library(pROC)
## Type 'citation("pROC")' for a citation.
##
## Attaching package: 'pROC'
## The following objects are masked from 'package:stats':
##
## cov, smooth, var
actual <- as.numeric(as.character(test_data$Patient_Status))
roc_obj <- roc(actual, pred_prob)
## Setting levels: control = 0, case = 1
## Setting direction: controls > cases
plot(roc_obj, col = "blue", main = "ROC Curve")

auc(roc_obj)
## Area under the curve: 0.5964