a <- read.table("https://archive.ics.uci.edu/ml/machine-learning-databases/breast-cancer/breast-cancer.data", sep=",")
names(a) <- c("Clase","age","menopause","tumor_size",
"inv_nodes","node_caps","deg_malig",
"breast","breast_quad","irradiat")
glimpse(a)
## Rows: 286
## Columns: 10
## $ Clase <chr> "no-recurrence-events", "no-recurrence-events", "no-recurr~
## $ age <chr> "30-39", "40-49", "40-49", "60-69", "40-49", "60-69", "50-~
## $ menopause <chr> "premeno", "premeno", "premeno", "ge40", "premeno", "ge40"~
## $ tumor_size <chr> "30-34", "20-24", "20-24", "15-19", "0-4", "15-19", "25-29~
## $ inv_nodes <chr> "0-2", "0-2", "0-2", "0-2", "0-2", "0-2", "0-2", "0-2", "0~
## $ node_caps <chr> "no", "no", "no", "no", "no", "no", "no", "no", "no", "no"~
## $ deg_malig <int> 3, 2, 2, 2, 2, 2, 2, 1, 2, 2, 3, 2, 1, 3, 3, 1, 2, 3, 3, 1~
## $ breast <chr> "left", "right", "left", "right", "right", "left", "left",~
## $ breast_quad <chr> "left_low", "right_up", "left_low", "left_up", "right_low"~
## $ irradiat <chr> "no", "no", "no", "no", "no", "no", "no", "no", "no", "no"~
Las dimensiones de los datos son
dim(a)
## [1] 286 10
summarytools::freq(a$age, order = "freq")
## Frequencies
## a$ge
## Type: Character
##
## Freq % Valid % Valid Cum. % Total % Total Cum.
## ----------- ------ --------- -------------- --------- --------------
## 50-59 96 33.57 33.57 33.57 33.57
## 40-49 90 31.47 65.03 31.47 65.03
## 60-69 57 19.93 84.97 19.93 84.97
## 30-39 36 12.59 97.55 12.59 97.55
## 70-79 6 2.10 99.65 2.10 99.65
## 20-29 1 0.35 100.00 0.35 100.00
## <NA> 0 0.00 100.00
## Total 286 100.00 100.00 100.00 100.00
ggplot(a, aes(x=age,fill=Clase))+ geom_bar()
summarytools::freq(a$menopause, order = "freq")
## Frequencies
## a$menopause
## Type: Character
##
## Freq % Valid % Valid Cum. % Total % Total Cum.
## ------------- ------ --------- -------------- --------- --------------
## premeno 150 52.45 52.45 52.45 52.45
## ge40 129 45.10 97.55 45.10 97.55
## lt40 7 2.45 100.00 2.45 100.00
## <NA> 0 0.00 100.00
## Total 286 100.00 100.00 100.00 100.00
ggplot(a, aes(x=menopause,fill=Clase))+ geom_bar()
summarytools::freq(a$tumor_size, order = "freq")
## Frequencies
## a$tumor_size
## Type: Character
##
## Freq % Valid % Valid Cum. % Total % Total Cum.
## ----------- ------ --------- -------------- --------- --------------
## 30-34 60 20.98 20.98 20.98 20.98
## 25-29 54 18.88 39.86 18.88 39.86
## 20-24 50 17.48 57.34 17.48 57.34
## 15-19 30 10.49 67.83 10.49 67.83
## 10-14 28 9.79 77.62 9.79 77.62
## 40-44 22 7.69 85.31 7.69 85.31
## 35-39 19 6.64 91.96 6.64 91.96
## 0-4 8 2.80 94.76 2.80 94.76
## 50-54 8 2.80 97.55 2.80 97.55
## 5-9 4 1.40 98.95 1.40 98.95
## 45-49 3 1.05 100.00 1.05 100.00
## <NA> 0 0.00 100.00
## Total 286 100.00 100.00 100.00 100.00
ggplot(a, aes(x=tumor_size,fill=Clase))+ geom_bar()
summarytools::freq(a$deg_malig, order = "freq")
## Frequencies
## a$deg_malig
## Type: Integer
##
## Freq % Valid % Valid Cum. % Total % Total Cum.
## ----------- ------ --------- -------------- --------- --------------
## 2 130 45.45 45.45 45.45 45.45
## 3 85 29.72 75.17 29.72 75.17
## 1 71 24.83 100.00 24.83 100.00
## <NA> 0 0.00 100.00
## Total 286 100.00 100.00 100.00 100.00
ggplot(a, aes(x=deg_malig,fill=Clase))+ geom_bar()
summarytools::freq(a$breast, order = "freq")
## Frequencies
## a$breast
## Type: Character
##
## Freq % Valid % Valid Cum. % Total % Total Cum.
## ----------- ------ --------- -------------- --------- --------------
## left 152 53.15 53.15 53.15 53.15
## right 134 46.85 100.00 46.85 100.00
## <NA> 0 0.00 100.00
## Total 286 100.00 100.00 100.00 100.00
ggplot(a, aes(x=breast,fill=Clase))+ geom_bar()
summarytools::freq(a$breast_quad, order = "freq")
## Frequencies
## a$breast_quad
## Type: Character
##
## Freq % Valid % Valid Cum. % Total % Total Cum.
## --------------- ------ --------- -------------- --------- --------------
## left_low 110 38.46 38.46 38.46 38.46
## left_up 97 33.92 72.38 33.92 72.38
## right_up 33 11.54 83.92 11.54 83.92
## right_low 24 8.39 92.31 8.39 92.31
## central 21 7.34 99.65 7.34 99.65
## ? 1 0.35 100.00 0.35 100.00
## <NA> 0 0.00 100.00
## Total 286 100.00 100.00 100.00 100.00
ggplot(a, aes(x=breast_quad,fill=Clase))+ geom_bar()