Datos de Cancer

a <- read.table("https://archive.ics.uci.edu/ml/machine-learning-databases/breast-cancer/breast-cancer.data", sep=",")
names(a) <- c("Clase","age","menopause","tumor_size",
                       "inv_nodes","node_caps","deg_malig",
                       "breast","breast_quad","irradiat")
glimpse(a)
## Rows: 286
## Columns: 10
## $ Clase       <chr> "no-recurrence-events", "no-recurrence-events", "no-recurr~
## $ age         <chr> "30-39", "40-49", "40-49", "60-69", "40-49", "60-69", "50-~
## $ menopause   <chr> "premeno", "premeno", "premeno", "ge40", "premeno", "ge40"~
## $ tumor_size  <chr> "30-34", "20-24", "20-24", "15-19", "0-4", "15-19", "25-29~
## $ inv_nodes   <chr> "0-2", "0-2", "0-2", "0-2", "0-2", "0-2", "0-2", "0-2", "0~
## $ node_caps   <chr> "no", "no", "no", "no", "no", "no", "no", "no", "no", "no"~
## $ deg_malig   <int> 3, 2, 2, 2, 2, 2, 2, 1, 2, 2, 3, 2, 1, 3, 3, 1, 2, 3, 3, 1~
## $ breast      <chr> "left", "right", "left", "right", "right", "left", "left",~
## $ breast_quad <chr> "left_low", "right_up", "left_low", "left_up", "right_low"~
## $ irradiat    <chr> "no", "no", "no", "no", "no", "no", "no", "no", "no", "no"~

Las dimensiones de los datos son

dim(a)
## [1] 286  10

Columnas que tiene y la descripcion :

Age

summarytools::freq(a$age, order = "freq")
## Frequencies  
## a$ge  
## Type: Character  
## 
##               Freq   % Valid   % Valid Cum.   % Total   % Total Cum.
## ----------- ------ --------- -------------- --------- --------------
##       50-59     96     33.57          33.57     33.57          33.57
##       40-49     90     31.47          65.03     31.47          65.03
##       60-69     57     19.93          84.97     19.93          84.97
##       30-39     36     12.59          97.55     12.59          97.55
##       70-79      6      2.10          99.65      2.10          99.65
##       20-29      1      0.35         100.00      0.35         100.00
##        <NA>      0                               0.00         100.00
##       Total    286    100.00         100.00    100.00         100.00
ggplot(a, aes(x=age,fill=Clase))+ geom_bar()

menopause

summarytools::freq(a$menopause, order = "freq")
## Frequencies  
## a$menopause  
## Type: Character  
## 
##                 Freq   % Valid   % Valid Cum.   % Total   % Total Cum.
## ------------- ------ --------- -------------- --------- --------------
##       premeno    150     52.45          52.45     52.45          52.45
##          ge40    129     45.10          97.55     45.10          97.55
##          lt40      7      2.45         100.00      2.45         100.00
##          <NA>      0                               0.00         100.00
##         Total    286    100.00         100.00    100.00         100.00
ggplot(a, aes(x=menopause,fill=Clase))+ geom_bar()

tumor_size

summarytools::freq(a$tumor_size, order = "freq")
## Frequencies  
## a$tumor_size  
## Type: Character  
## 
##               Freq   % Valid   % Valid Cum.   % Total   % Total Cum.
## ----------- ------ --------- -------------- --------- --------------
##       30-34     60     20.98          20.98     20.98          20.98
##       25-29     54     18.88          39.86     18.88          39.86
##       20-24     50     17.48          57.34     17.48          57.34
##       15-19     30     10.49          67.83     10.49          67.83
##       10-14     28      9.79          77.62      9.79          77.62
##       40-44     22      7.69          85.31      7.69          85.31
##       35-39     19      6.64          91.96      6.64          91.96
##         0-4      8      2.80          94.76      2.80          94.76
##       50-54      8      2.80          97.55      2.80          97.55
##         5-9      4      1.40          98.95      1.40          98.95
##       45-49      3      1.05         100.00      1.05         100.00
##        <NA>      0                               0.00         100.00
##       Total    286    100.00         100.00    100.00         100.00
ggplot(a, aes(x=tumor_size,fill=Clase))+ geom_bar()

deg_malig

summarytools::freq(a$deg_malig, order = "freq")
## Frequencies  
## a$deg_malig  
## Type: Integer  
## 
##               Freq   % Valid   % Valid Cum.   % Total   % Total Cum.
## ----------- ------ --------- -------------- --------- --------------
##           2    130     45.45          45.45     45.45          45.45
##           3     85     29.72          75.17     29.72          75.17
##           1     71     24.83         100.00     24.83         100.00
##        <NA>      0                               0.00         100.00
##       Total    286    100.00         100.00    100.00         100.00
ggplot(a, aes(x=deg_malig,fill=Clase))+ geom_bar()

breast

summarytools::freq(a$breast, order = "freq")
## Frequencies  
## a$breast  
## Type: Character  
## 
##               Freq   % Valid   % Valid Cum.   % Total   % Total Cum.
## ----------- ------ --------- -------------- --------- --------------
##        left    152     53.15          53.15     53.15          53.15
##       right    134     46.85         100.00     46.85         100.00
##        <NA>      0                               0.00         100.00
##       Total    286    100.00         100.00    100.00         100.00
ggplot(a, aes(x=breast,fill=Clase))+ geom_bar()

breast_quad

summarytools::freq(a$breast_quad, order = "freq")
## Frequencies  
## a$breast_quad  
## Type: Character  
## 
##                   Freq   % Valid   % Valid Cum.   % Total   % Total Cum.
## --------------- ------ --------- -------------- --------- --------------
##        left_low    110     38.46          38.46     38.46          38.46
##         left_up     97     33.92          72.38     33.92          72.38
##        right_up     33     11.54          83.92     11.54          83.92
##       right_low     24      8.39          92.31      8.39          92.31
##         central     21      7.34          99.65      7.34          99.65
##               ?      1      0.35         100.00      0.35         100.00
##            <NA>      0                               0.00         100.00
##           Total    286    100.00         100.00    100.00         100.00
ggplot(a, aes(x=breast_quad,fill=Clase))+ geom_bar()