library(readr)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(knitr)
library(modeest)
# getwd()
set.seed(2020)
datos <- read.csv("../Datos/covid-19_general_MX.csv")
kable(head(datos, 10))
| X | SECTOR | ENTIDAD_UM | SEXO | ENTIDAD_RES | TIPO_PACIENTE | FECHA_INGRESO | FECHA_SINTOMAS | FECHA_DEF | INTUBADO | NEUMONIA | EDAD | NACIONALIDAD | DIABETES | EPOC | ASMA | INMUSUPR | HIPERTENSION | OTRA_CON | CARDIOVASCULAR | OBESIDAD | RENAL_CRONICA | TABAQUISMO | OTRO_CASO | RESULTADO | UCI |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 9 | 15 | 1 | 15 | 2 | 2020-04-09 | 2020-03-28 | 9999-99-99 | 2 | 1 | 75 | 1 | 1 | 2 | 2 | 2 | 2 | 2 | 2 | 2 | 2 | 2 | 2 | 1 | 1 |
| 1 | 12 | 9 | 1 | 9 | 1 | 2020-04-16 | 2020-04-02 | 9999-99-99 | 97 | 2 | 31 | 1 | 2 | 2 | 2 | 2 | 2 | 2 | 2 | 1 | 2 | 2 | 2 | 2 | 97 |
| 2 | 12 | 14 | 2 | 14 | 1 | 2020-04-28 | 2020-04-23 | 9999-99-99 | 97 | 1 | 64 | 1 | 2 | 2 | 2 | 2 | 2 | 2 | 2 | 2 | 2 | 2 | 1 | 3 | 97 |
| 3 | 9 | 28 | 2 | 28 | 1 | 2020-04-06 | 2020-04-04 | 9999-99-99 | 97 | 2 | 22 | 1 | 2 | 2 | 2 | 2 | 2 | 2 | 2 | 2 | 2 | 2 | 1 | 1 | 97 |
| 4 | 3 | 15 | 2 | 15 | 1 | 2020-04-16 | 2020-04-14 | 9999-99-99 | 97 | 2 | 26 | 1 | 2 | 2 | 2 | 2 | 2 | 2 | 2 | 1 | 2 | 2 | 1 | 3 | 97 |
| 5 | 12 | 15 | 2 | 15 | 2 | 2020-04-06 | 2020-04-04 | 9999-99-99 | 2 | 1 | 50 | 1 | 2 | 2 | 2 | 2 | 2 | 2 | 2 | 2 | 2 | 2 | 2 | 2 | 2 |
| 6 | 12 | 9 | 1 | 9 | 1 | 2020-04-20 | 2020-04-20 | 9999-99-99 | 97 | 2 | 55 | 1 | 2 | 2 | 2 | 2 | 2 | 2 | 2 | 2 | 2 | 2 | 1 | 2 | 97 |
| 7 | 4 | 15 | 1 | 15 | 1 | 2020-04-23 | 2020-04-23 | 9999-99-99 | 97 | 2 | 58 | 1 | 1 | 2 | 2 | 2 | 2 | 2 | 2 | 2 | 2 | 2 | 99 | 2 | 97 |
| 8 | 12 | 14 | 1 | 14 | 1 | 2020-04-23 | 2020-04-22 | 9999-99-99 | 97 | 2 | 31 | 1 | 2 | 2 | 2 | 2 | 2 | 2 | 2 | 1 | 2 | 2 | 2 | 2 | 97 |
| 9 | 4 | 9 | 1 | 9 | 2 | 2020-04-14 | 2020-04-14 | 9999-99-99 | 2 | 1 | 54 | 1 | 2 | 2 | 2 | 2 | 1 | 2 | 2 | 2 | 2 | 2 | 99 | 2 | 2 |
kable(tail(datos, 10))
| X | SECTOR | ENTIDAD_UM | SEXO | ENTIDAD_RES | TIPO_PACIENTE | FECHA_INGRESO | FECHA_SINTOMAS | FECHA_DEF | INTUBADO | NEUMONIA | EDAD | NACIONALIDAD | DIABETES | EPOC | ASMA | INMUSUPR | HIPERTENSION | OTRA_CON | CARDIOVASCULAR | OBESIDAD | RENAL_CRONICA | TABAQUISMO | OTRO_CASO | RESULTADO | UCI | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 87363 | 87362 | 12 | 9 | 1 | 9 | 1 | 2020-04-08 | 2020-04-01 | 9999-99-99 | 97 | 2 | 38 | 1 | 2 | 2 | 2 | 2 | 2 | 2 | 2 | 1 | 2 | 2 | 2 | 2 | 97 |
| 87364 | 87363 | 12 | 28 | 2 | 28 | 1 | 2020-04-14 | 2020-04-12 | 9999-99-99 | 97 | 2 | 28 | 1 | 2 | 2 | 2 | 2 | 2 | 2 | 2 | 2 | 2 | 2 | 99 | 2 | 97 |
| 87365 | 87364 | 8 | 30 | 2 | 30 | 2 | 2020-04-29 | 2020-04-28 | 9999-99-99 | 2 | 1 | 48 | 1 | 2 | 2 | 2 | 2 | 1 | 2 | 2 | 1 | 2 | 1 | 1 | 3 | 2 |
| 87366 | 87365 | 4 | 9 | 1 | 15 | 2 | 2020-04-07 | 2020-04-07 | 9999-99-99 | 2 | 1 | 62 | 1 | 2 | 2 | 2 | 2 | 2 | 2 | 2 | 2 | 2 | 2 | 99 | 1 | 2 |
| 87367 | 87366 | 4 | 21 | 1 | 21 | 2 | 2020-03-25 | 2020-03-25 | 9999-99-99 | 2 | 2 | 36 | 1 | 2 | 2 | 2 | 2 | 2 | 2 | 2 | 2 | 2 | 2 | 99 | 2 | 2 |
| 87368 | 87367 | 12 | 5 | 2 | 5 | 1 | 2020-04-21 | 2020-04-18 | 9999-99-99 | 97 | 2 | 40 | 1 | 2 | 2 | 2 | 2 | 1 | 2 | 2 | 2 | 2 | 2 | 1 | 2 | 97 |
| 87369 | 87368 | 12 | 9 | 1 | 9 | 1 | 2020-04-13 | 2020-04-13 | 9999-99-99 | 97 | 2 | 42 | 1 | 2 | 2 | 2 | 2 | 2 | 2 | 2 | 1 | 2 | 2 | 1 | 2 | 97 |
| 87370 | 87369 | 12 | 9 | 2 | 15 | 1 | 2020-04-21 | 2020-04-19 | 9999-99-99 | 97 | 2 | 67 | 1 | 2 | 2 | 2 | 2 | 2 | 2 | 2 | 2 | 2 | 2 | 1 | 2 | 97 |
| 87371 | 87370 | 12 | 9 | 2 | 9 | 1 | 2020-04-25 | 2020-04-25 | 9999-99-99 | 97 | 2 | 31 | 1 | 2 | 2 | 2 | 2 | 2 | 2 | 2 | 2 | 2 | 2 | 1 | 3 | 97 |
| 87372 | 87371 | 12 | 1 | 2 | 1 | 1 | 2020-04-28 | 2020-04-25 | 9999-99-99 | 97 | 2 | 27 | 1 | 2 | 2 | 2 | 2 | 2 | 2 | 2 | 2 | 2 | 2 | 1 | 3 | 97 |
str(datos)
## 'data.frame': 87372 obs. of 26 variables:
## $ X : int 0 1 2 3 4 5 6 7 8 9 ...
## $ SECTOR : int 9 12 12 9 3 12 12 4 12 4 ...
## $ ENTIDAD_UM : int 15 9 14 28 15 15 9 15 14 9 ...
## $ SEXO : int 1 1 2 2 2 2 1 1 1 1 ...
## $ ENTIDAD_RES : int 15 9 14 28 15 15 9 15 14 9 ...
## $ TIPO_PACIENTE : int 2 1 1 1 1 2 1 1 1 2 ...
## $ FECHA_INGRESO : chr "2020-04-09" "2020-04-16" "2020-04-28" "2020-04-06" ...
## $ FECHA_SINTOMAS: chr "2020-03-28" "2020-04-02" "2020-04-23" "2020-04-04" ...
## $ FECHA_DEF : chr "9999-99-99" "9999-99-99" "9999-99-99" "9999-99-99" ...
## $ INTUBADO : int 2 97 97 97 97 2 97 97 97 2 ...
## $ NEUMONIA : int 1 2 1 2 2 1 2 2 2 1 ...
## $ EDAD : int 75 31 64 22 26 50 55 58 31 54 ...
## $ NACIONALIDAD : int 1 1 1 1 1 1 1 1 1 1 ...
## $ DIABETES : int 1 2 2 2 2 2 2 1 2 2 ...
## $ EPOC : int 2 2 2 2 2 2 2 2 2 2 ...
## $ ASMA : int 2 2 2 2 2 2 2 2 2 2 ...
## $ INMUSUPR : int 2 2 2 2 2 2 2 2 2 2 ...
## $ HIPERTENSION : int 2 2 2 2 2 2 2 2 2 1 ...
## $ OTRA_CON : int 2 2 2 2 2 2 2 2 2 2 ...
## $ CARDIOVASCULAR: int 2 2 2 2 2 2 2 2 2 2 ...
## $ OBESIDAD : int 2 1 2 2 1 2 2 2 1 2 ...
## $ RENAL_CRONICA : int 2 2 2 2 2 2 2 2 2 2 ...
## $ TABAQUISMO : int 2 2 2 2 2 2 2 2 2 2 ...
## $ OTRO_CASO : int 2 2 1 1 1 2 1 99 2 99 ...
## $ RESULTADO : int 1 2 3 1 3 2 2 2 2 2 ...
## $ UCI : int 1 97 97 97 97 2 97 97 97 2 ...
summary(datos)
## X SECTOR ENTIDAD_UM SEXO
## Min. : 0 Min. : 1.000 Min. : 1.00 Min. :1.000
## 1st Qu.:21843 1st Qu.: 4.000 1st Qu.: 9.00 1st Qu.:1.000
## Median :43686 Median :12.000 Median :14.00 Median :2.000
## Mean :43686 Mean : 9.338 Mean :14.65 Mean :1.505
## 3rd Qu.:65528 3rd Qu.:12.000 3rd Qu.:20.00 3rd Qu.:2.000
## Max. :87371 Max. :99.000 Max. :32.00 Max. :2.000
## ENTIDAD_RES TIPO_PACIENTE FECHA_INGRESO FECHA_SINTOMAS
## Min. : 1.00 Min. :1.000 Length:87372 Length:87372
## 1st Qu.: 9.00 1st Qu.:1.000 Class :character Class :character
## Median :14.00 Median :1.000 Mode :character Mode :character
## Mean :14.89 Mean :1.265
## 3rd Qu.:20.00 3rd Qu.:2.000
## Max. :32.00 Max. :2.000
## FECHA_DEF INTUBADO NEUMONIA EDAD
## Length:87372 Min. : 1.00 Min. : 1.000 Min. : 0.00
## Class :character 1st Qu.: 2.00 1st Qu.: 2.000 1st Qu.: 30.00
## Mode :character Median :97.00 Median : 2.000 Median : 40.00
## Mean :71.81 Mean : 1.826 Mean : 41.88
## 3rd Qu.:97.00 3rd Qu.: 2.000 3rd Qu.: 53.00
## Max. :99.00 Max. :99.000 Max. :113.00
## NACIONALIDAD DIABETES EPOC ASMA
## Min. :1.000 Min. : 1.000 Min. : 1.000 Min. : 1.000
## 1st Qu.:1.000 1st Qu.: 2.000 1st Qu.: 2.000 1st Qu.: 2.000
## Median :1.000 Median : 2.000 Median : 2.000 Median : 2.000
## Mean :1.013 Mean : 2.302 Mean : 2.392 Mean : 2.379
## 3rd Qu.:1.000 3rd Qu.: 2.000 3rd Qu.: 2.000 3rd Qu.: 2.000
## Max. :2.000 Max. :98.000 Max. :98.000 Max. :98.000
## INMUSUPR HIPERTENSION OTRA_CON CARDIOVASCULAR
## Min. : 1.000 Min. : 1.000 Min. : 1.000 Min. : 1.000
## 1st Qu.: 2.000 1st Qu.: 2.000 1st Qu.: 2.000 1st Qu.: 2.000
## Median : 2.000 Median : 2.000 Median : 2.000 Median : 2.000
## Mean : 2.416 Mean : 2.245 Mean : 2.505 Mean : 2.399
## 3rd Qu.: 2.000 3rd Qu.: 2.000 3rd Qu.: 2.000 3rd Qu.: 2.000
## Max. :98.000 Max. :98.000 Max. :98.000 Max. :98.000
## OBESIDAD RENAL_CRONICA TABAQUISMO OTRO_CASO
## Min. : 1.000 Min. : 1.0 Min. : 1.000 Min. : 1.00
## 1st Qu.: 2.000 1st Qu.: 2.0 1st Qu.: 2.000 1st Qu.: 1.00
## Median : 2.000 Median : 2.0 Median : 2.000 Median : 2.00
## Mean : 2.237 Mean : 2.4 Mean : 2.322 Mean :38.12
## 3rd Qu.: 2.000 3rd Qu.: 2.0 3rd Qu.: 2.000 3rd Qu.:99.00
## Max. :98.000 Max. :98.0 Max. :98.000 Max. :99.00
## RESULTADO UCI
## Min. :1.000 Min. : 1.00
## 1st Qu.:2.000 1st Qu.: 2.00
## Median :2.000 Median :97.00
## Mean :1.958 Mean :71.81
## 3rd Qu.:2.000 3rd Qu.:97.00
## Max. :3.000 Max. :99.00
set.seed(2020)
edades <- sample(x = 20:100, size = 100, replace = TRUE )
edades
## [1] 47 41 84 36 55 61 89 68 75 91 61 99 48 22 85 23 97 97
## [19] 95 67 66 97 26 37 71 67 63 29 89 62 96 51 32 85 99 96
## [37] 20 57 22 48 44 64 62 88 77 85 44 54 43 69 52 26 27 75
## [55] 32 88 87 38 42 87 61 20 66 50 79 35 89 74 45 29 47 84
## [73] 36 59 34 63 69 96 24 67 39 71 33 32 47 100 60 79 59 46
## [91] 78 54 21 96 27 80 51 87 58 65
sum(edades) / 100
## [1] 60.48
mean(edades)
## [1] 60.48
mean(datos$EDAD)
## [1] 41.88436
edades
## [1] 47 41 84 36 55 61 89 68 75 91 61 99 48 22 85 23 97 97
## [19] 95 67 66 97 26 37 71 67 63 29 89 62 96 51 32 85 99 96
## [37] 20 57 22 48 44 64 62 88 77 85 44 54 43 69 52 26 27 75
## [55] 32 88 87 38 42 87 61 20 66 50 79 35 89 74 45 29 47 84
## [73] 36 59 34 63 69 96 24 67 39 71 33 32 47 100 60 79 59 46
## [91] 78 54 21 96 27 80 51 87 58 65
median(datos$EDAD)
## [1] 40
*La moda es el valor que se presenta con mayor frecuencia. ####Ejemplo de edades, La moda
sort(edades)
## [1] 20 20 21 22 22 23 24 26 26 27 27 29 29 32 32 32 33 34
## [19] 35 36 36 37 38 39 41 42 43 44 44 45 46 47 47 47 48 48
## [37] 50 51 51 52 54 54 55 57 58 59 59 60 61 61 61 62 62 63
## [55] 63 64 65 66 66 67 67 67 68 69 69 71 71 74 75 75 77 78
## [73] 79 79 80 84 84 85 85 85 87 87 87 88 88 89 89 89 91 95
## [91] 96 96 96 96 97 97 97 99 99 100
table(edades)
## edades
## 20 21 22 23 24 26 27 29 32 33 34 35 36 37 38 39 41 42 43 44
## 2 1 2 1 1 2 2 2 3 1 1 1 2 1 1 1 1 1 1 2
## 45 46 47 48 50 51 52 54 55 57 58 59 60 61 62 63 64 65 66 67
## 1 1 3 2 1 2 1 2 1 1 1 2 1 3 2 2 1 1 2 3
## 68 69 71 74 75 77 78 79 80 84 85 87 88 89 91 95 96 97 99 100
## 1 2 2 1 2 1 1 2 1 2 3 3 2 3 1 1 4 3 2 1
frecuencias <- sort(table(edades), decreasing = TRUE)
frecuencias
## edades
## 96 32 47 61 67 85 87 89 97 20 22 26 27 29 36 44 48 51 54 59
## 4 3 3 3 3 3 3 3 3 2 2 2 2 2 2 2 2 2 2 2
## 62 63 66 69 71 75 79 84 88 99 21 23 24 33 34 35 37 38 39 41
## 2 2 2 2 2 2 2 2 2 2 1 1 1 1 1 1 1 1 1 1
## 42 43 45 46 50 52 55 57 58 60 64 65 68 74 77 78 80 91 95 100
## 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
moda <- frecuencias[1]
moda
## 96
## 4
moda2 <- mlv(edades)
moda2
## [1] 96
Generanando la moda de EDAD de COVID19
moda <- mlv(datos$EDAD)
moda
## [1] 30
edades
## [1] 47 41 84 36 55 61 89 68 75 91 61 99 48 22 85 23 97 97
## [19] 95 67 66 97 26 37 71 67 63 29 89 62 96 51 32 85 99 96
## [37] 20 57 22 48 44 64 62 88 77 85 44 54 43 69 52 26 27 75
## [55] 32 88 87 38 42 87 61 20 66 50 79 35 89 74 45 29 47 84
## [73] 36 59 34 63 69 96 24 67 39 71 33 32 47 100 60 79 59 46
## [91] 78 54 21 96 27 80 51 87 58 65
min(edades)
## [1] 20
max(edades)
## [1] 100
boxplot(edades)
#### Graficar boxplot
boxplot(datos$EDAD)
min(datos$EDAD)
## [1] 0
max(datos$EDAD)
## [1] 113
sort(edades)
## [1] 20 20 21 22 22 23 24 26 26 27 27 29 29 32 32 32 33 34
## [19] 35 36 36 37 38 39 41 42 43 44 44 45 46 47 47 47 48 48
## [37] 50 51 51 52 54 54 55 57 58 59 59 60 61 61 61 62 62 63
## [55] 63 64 65 66 66 67 67 67 68 69 69 71 71 74 75 75 77 78
## [73] 79 79 80 84 84 85 85 85 87 87 87 88 88 89 89 89 91 95
## [91] 96 96 96 96 97 97 97 99 99 100
cuartile50 <- quantile(edades, 0.50)
cuartile50
## 50%
## 61
cuartile75<- quantile(edades, 0.75)
cuartile75
## 75%
## 81
cuartile50 <- quantile(edades, 0.25)
cuartile50
## 25%
## 41.75
boxplot(edades)
varianza <- var(edades)
desvstd <- sd(edades)
mean(edades)
## [1] 60.48
varianza
## [1] 559.1208
desvstd
## [1] 23.64574
range(edades)
## [1] 20 100
var(datos$EDAD)
## [1] 301.4186
sd(datos$EDAD)
## [1] 17.36141
range(datos$EDAD)
## [1] 0 113