1. Lectura de datos

datos = read.table("breast-cancer.data", sep = ",")
names(datos) <- c("Clase","age","menopause","tumor_size", "inv_nodes","node_caps","deg_malig", "breast","breast_quad","irradiat")
summary(datos)
##     Clase               age             menopause          tumor_size       
##  Length:286         Length:286         Length:286         Length:286        
##  Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character  
##                                                                             
##                                                                             
##                                                                             
##   inv_nodes          node_caps           deg_malig        breast         
##  Length:286         Length:286         Min.   :1.000   Length:286        
##  Class :character   Class :character   1st Qu.:2.000   Class :character  
##  Mode  :character   Mode  :character   Median :2.000   Mode  :character  
##                                        Mean   :2.049                     
##                                        3rd Qu.:3.000                     
##                                        Max.   :3.000                     
##  breast_quad          irradiat        
##  Length:286         Length:286        
##  Class :character   Class :character  
##  Mode  :character   Mode  :character  
##                                       
##                                       
## 

2. Exploración

dim(datos)
## [1] 286  10
head(datos)
##                  Clase   age menopause tumor_size inv_nodes node_caps deg_malig
## 1 no-recurrence-events 30-39   premeno      30-34       0-2        no         3
## 2 no-recurrence-events 40-49   premeno      20-24       0-2        no         2
## 3 no-recurrence-events 40-49   premeno      20-24       0-2        no         2
## 4 no-recurrence-events 60-69      ge40      15-19       0-2        no         2
## 5 no-recurrence-events 40-49   premeno        0-4       0-2        no         2
## 6 no-recurrence-events 60-69      ge40      15-19       0-2        no         2
##   breast breast_quad irradiat
## 1   left    left_low       no
## 2  right    right_up       no
## 3   left    left_low       no
## 4  right     left_up       no
## 5  right   right_low       no
## 6   left    left_low       no
str(datos)
## 'data.frame':    286 obs. of  10 variables:
##  $ Clase      : chr  "no-recurrence-events" "no-recurrence-events" "no-recurrence-events" "no-recurrence-events" ...
##  $ age        : chr  "30-39" "40-49" "40-49" "60-69" ...
##  $ menopause  : chr  "premeno" "premeno" "premeno" "ge40" ...
##  $ tumor_size : chr  "30-34" "20-24" "20-24" "15-19" ...
##  $ inv_nodes  : chr  "0-2" "0-2" "0-2" "0-2" ...
##  $ node_caps  : chr  "no" "no" "no" "no" ...
##  $ deg_malig  : int  3 2 2 2 2 2 2 1 2 2 ...
##  $ breast     : chr  "left" "right" "left" "right" ...
##  $ breast_quad: chr  "left_low" "right_up" "left_low" "left_up" ...
##  $ irradiat   : chr  "no" "no" "no" "no" ...
summary(datos)
##     Clase               age             menopause          tumor_size       
##  Length:286         Length:286         Length:286         Length:286        
##  Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character  
##                                                                             
##                                                                             
##                                                                             
##   inv_nodes          node_caps           deg_malig        breast         
##  Length:286         Length:286         Min.   :1.000   Length:286        
##  Class :character   Class :character   1st Qu.:2.000   Class :character  
##  Mode  :character   Mode  :character   Median :2.000   Mode  :character  
##                                        Mean   :2.049                     
##                                        3rd Qu.:3.000                     
##                                        Max.   :3.000                     
##  breast_quad          irradiat        
##  Length:286         Length:286        
##  Class :character   Class :character  
##  Mode  :character   Mode  :character  
##                                       
##                                       
## 

3. Organización y visualización: Variables Cualitativas

3.1 Tablas de Frecuencia

table(datos$Clase)
## 
## no-recurrence-events    recurrence-events 
##                  201                   85
proptable = prop.table(table(datos$Clase))
round(proptable, 3)
## 
## no-recurrence-events    recurrence-events 
##                0.703                0.297
as.data.frame(table(datos$Clase))
##                   Var1 Freq
## 1 no-recurrence-events  201
## 2    recurrence-events   85
tabyl(datos$age, sort = TRUE)
##  datos$age  n     percent
##      20-29  1 0.003496503
##      30-39 36 0.125874126
##      40-49 90 0.314685315
##      50-59 96 0.335664336
##      60-69 57 0.199300699
##      70-79  6 0.020979021
summarytools::freq(datos$age, order = "freq")
## Frequencies  
## datos$age  
## Type: Character  
## 
##               Freq   % Valid   % Valid Cum.   % Total   % Total Cum.
## ----------- ------ --------- -------------- --------- --------------
##       50-59     96     33.57          33.57     33.57          33.57
##       40-49     90     31.47          65.03     31.47          65.03
##       60-69     57     19.93          84.97     19.93          84.97
##       30-39     36     12.59          97.55     12.59          97.55
##       70-79      6      2.10          99.65      2.10          99.65
##       20-29      1      0.35         100.00      0.35         100.00
##        <NA>      0                               0.00         100.00
##       Total    286    100.00         100.00    100.00         100.00

3.2 Gráfico circular

pie(table(datos$Clase), 
    main="Gráfico para la clasificación de casos")

3.3 Gráfico debarras

  barplot(table(datos$age), 
          xlab="Grupos Etários",
          ylab="Frecuencia",
          main="Distribución de edades")

4. Variable menopause

m = table(datos$menopause)
porptable <- prop.table(table(m))
round(proptable, 3)
## 
## no-recurrence-events    recurrence-events 
##                0.703                0.297
tabyl(datos$menopause, sort = TRUE)
##  datos$menopause   n    percent
##             ge40 129 0.45104895
##             lt40   7 0.02447552
##          premeno 150 0.52447552
summarytools::freq(datos$menopause, order = "freq")
## Frequencies  
## datos$menopause  
## Type: Character  
## 
##                 Freq   % Valid   % Valid Cum.   % Total   % Total Cum.
## ------------- ------ --------- -------------- --------- --------------
##       premeno    150     52.45          52.45     52.45          52.45
##          ge40    129     45.10          97.55     45.10          97.55
##          lt40      7      2.45         100.00      2.45         100.00
##          <NA>      0                               0.00         100.00
##         Total    286    100.00         100.00    100.00         100.00

4.1 Gráfico circular

pie(table(datos$menopause), 
    main="Gráfico para la variable de menopause")

4.2 Gráfico de barras

barplot(table(datos$menopause), 
        xlab="Grupos",
        ylab="Frecuencia",
        main="Distribución de menopausia")