1. Lectura de datos
datos = read.table("breast-cancer.data", sep = ",")
names(datos) <- c("Clase","age","menopause","tumor_size", "inv_nodes","node_caps","deg_malig", "breast","breast_quad","irradiat")
summary(datos)
## Clase age menopause tumor_size
## Length:286 Length:286 Length:286 Length:286
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
##
##
##
## inv_nodes node_caps deg_malig breast
## Length:286 Length:286 Min. :1.000 Length:286
## Class :character Class :character 1st Qu.:2.000 Class :character
## Mode :character Mode :character Median :2.000 Mode :character
## Mean :2.049
## 3rd Qu.:3.000
## Max. :3.000
## breast_quad irradiat
## Length:286 Length:286
## Class :character Class :character
## Mode :character Mode :character
##
##
##
2. Exploración
dim(datos)
## [1] 286 10
head(datos)
## Clase age menopause tumor_size inv_nodes node_caps deg_malig
## 1 no-recurrence-events 30-39 premeno 30-34 0-2 no 3
## 2 no-recurrence-events 40-49 premeno 20-24 0-2 no 2
## 3 no-recurrence-events 40-49 premeno 20-24 0-2 no 2
## 4 no-recurrence-events 60-69 ge40 15-19 0-2 no 2
## 5 no-recurrence-events 40-49 premeno 0-4 0-2 no 2
## 6 no-recurrence-events 60-69 ge40 15-19 0-2 no 2
## breast breast_quad irradiat
## 1 left left_low no
## 2 right right_up no
## 3 left left_low no
## 4 right left_up no
## 5 right right_low no
## 6 left left_low no
str(datos)
## 'data.frame': 286 obs. of 10 variables:
## $ Clase : chr "no-recurrence-events" "no-recurrence-events" "no-recurrence-events" "no-recurrence-events" ...
## $ age : chr "30-39" "40-49" "40-49" "60-69" ...
## $ menopause : chr "premeno" "premeno" "premeno" "ge40" ...
## $ tumor_size : chr "30-34" "20-24" "20-24" "15-19" ...
## $ inv_nodes : chr "0-2" "0-2" "0-2" "0-2" ...
## $ node_caps : chr "no" "no" "no" "no" ...
## $ deg_malig : int 3 2 2 2 2 2 2 1 2 2 ...
## $ breast : chr "left" "right" "left" "right" ...
## $ breast_quad: chr "left_low" "right_up" "left_low" "left_up" ...
## $ irradiat : chr "no" "no" "no" "no" ...
summary(datos)
## Clase age menopause tumor_size
## Length:286 Length:286 Length:286 Length:286
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
##
##
##
## inv_nodes node_caps deg_malig breast
## Length:286 Length:286 Min. :1.000 Length:286
## Class :character Class :character 1st Qu.:2.000 Class :character
## Mode :character Mode :character Median :2.000 Mode :character
## Mean :2.049
## 3rd Qu.:3.000
## Max. :3.000
## breast_quad irradiat
## Length:286 Length:286
## Class :character Class :character
## Mode :character Mode :character
##
##
##
3. Organización y visualización: Variables Cualitativas
3.1 Tablas de Frecuencia
table(datos$Clase)
##
## no-recurrence-events recurrence-events
## 201 85
proptable = prop.table(table(datos$Clase))
round(proptable, 3)
##
## no-recurrence-events recurrence-events
## 0.703 0.297
as.data.frame(table(datos$Clase))
## Var1 Freq
## 1 no-recurrence-events 201
## 2 recurrence-events 85
tabyl(datos$age, sort = TRUE)
## datos$age n percent
## 20-29 1 0.003496503
## 30-39 36 0.125874126
## 40-49 90 0.314685315
## 50-59 96 0.335664336
## 60-69 57 0.199300699
## 70-79 6 0.020979021
summarytools::freq(datos$age, order = "freq")
## Frequencies
## datos$age
## Type: Character
##
## Freq % Valid % Valid Cum. % Total % Total Cum.
## ----------- ------ --------- -------------- --------- --------------
## 50-59 96 33.57 33.57 33.57 33.57
## 40-49 90 31.47 65.03 31.47 65.03
## 60-69 57 19.93 84.97 19.93 84.97
## 30-39 36 12.59 97.55 12.59 97.55
## 70-79 6 2.10 99.65 2.10 99.65
## 20-29 1 0.35 100.00 0.35 100.00
## <NA> 0 0.00 100.00
## Total 286 100.00 100.00 100.00 100.00
3.2 Gráfico circular
pie(table(datos$Clase),
main="Gráfico para la clasificación de casos")

3.3 Gráfico debarras
barplot(table(datos$age),
xlab="Grupos Etários",
ylab="Frecuencia",
main="Distribución de edades")

4. Variable menopause
m = table(datos$menopause)
porptable <- prop.table(table(m))
round(proptable, 3)
##
## no-recurrence-events recurrence-events
## 0.703 0.297
tabyl(datos$menopause, sort = TRUE)
## datos$menopause n percent
## ge40 129 0.45104895
## lt40 7 0.02447552
## premeno 150 0.52447552
summarytools::freq(datos$menopause, order = "freq")
## Frequencies
## datos$menopause
## Type: Character
##
## Freq % Valid % Valid Cum. % Total % Total Cum.
## ------------- ------ --------- -------------- --------- --------------
## premeno 150 52.45 52.45 52.45 52.45
## ge40 129 45.10 97.55 45.10 97.55
## lt40 7 2.45 100.00 2.45 100.00
## <NA> 0 0.00 100.00
## Total 286 100.00 100.00 100.00 100.00
4.1 Gráfico circular
pie(table(datos$menopause),
main="Gráfico para la variable de menopause")

4.2 Gráfico de barras
barplot(table(datos$menopause),
xlab="Grupos",
ylab="Frecuencia",
main="Distribución de menopausia")
