Librerias
library(readr)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(knitr)
library(modeest)
Datos
#getwd()
set.seed(2020)
datos<- read.csv("../Datos/casos_confirmados.csv")
kable(head(datos, 10))
| 0 |
méxico |
FEMENINO |
75 |
2020-04-09 |
1 |
| 3 |
tamaulipas |
MASCULINO |
22 |
2020-04-06 |
1 |
| 15 |
distrito federal |
MASCULINO |
40 |
2020-03-28 |
1 |
| 16 |
distrito federal |
FEMENINO |
29 |
2020-04-01 |
1 |
| 17 |
yucatán |
MASCULINO |
71 |
2020-04-15 |
1 |
| 22 |
michoacán |
FEMENINO |
29 |
2020-04-23 |
1 |
| 27 |
guerrero |
FEMENINO |
61 |
2020-04-07 |
1 |
| 28 |
distrito federal |
MASCULINO |
33 |
2020-04-08 |
1 |
| 31 |
méxico |
FEMENINO |
77 |
2020-04-08 |
1 |
| 32 |
méxico |
FEMENINO |
84 |
2020-04-02 |
1 |
kable(tail(datos, 10))
| 19215 |
87334 |
michoacán |
MASCULINO |
22 |
2020-04-14 |
1 |
| 19216 |
87344 |
distrito federal |
FEMENINO |
52 |
2020-04-26 |
1 |
| 19217 |
87349 |
tabasco |
MASCULINO |
36 |
2020-04-28 |
1 |
| 19218 |
87353 |
distrito federal |
FEMENINO |
30 |
2020-04-21 |
1 |
| 19219 |
87354 |
tabasco |
FEMENINO |
47 |
2020-04-21 |
1 |
| 19220 |
87356 |
méxico |
FEMENINO |
28 |
2020-04-13 |
1 |
| 19221 |
87358 |
distrito federal |
FEMENINO |
39 |
2020-04-28 |
1 |
| 19222 |
87360 |
méxico |
MASCULINO |
48 |
2020-04-22 |
1 |
| 19223 |
87361 |
tabasco |
MASCULINO |
48 |
2020-04-25 |
1 |
| 19224 |
87365 |
méxico |
FEMENINO |
62 |
2020-04-07 |
1 |
estructura de datos
str(datos)
## 'data.frame': 19224 obs. of 6 variables:
## $ X : int 0 3 15 16 17 22 27 28 31 32 ...
## $ State : chr "méxico" "tamaulipas" "distrito federal" "distrito federal" ...
## $ Sex : chr "FEMENINO" "MASCULINO" "MASCULINO" "FEMENINO" ...
## $ Age : int 75 22 40 29 71 29 61 33 77 84 ...
## $ Date : chr "2020-04-09" "2020-04-06" "2020-03-28" "2020-04-01" ...
## $ Confirmed: int 1 1 1 1 1 1 1 1 1 1 ...
Resumen de datos
summary(datos)
## X State Sex Age
## Min. : 0 Length:19224 Length:19224 Min. : 0.00
## 1st Qu.:22523 Class :character Class :character 1st Qu.: 35.00
## Median :44009 Mode :character Mode :character Median : 46.00
## Mean :44045 Mean : 46.59
## 3rd Qu.:65793 3rd Qu.: 57.00
## Max. :87365 Max. :113.00
## Date Confirmed
## Length:19224 Min. :1
## Class :character 1st Qu.:1
## Mode :character Median :1
## Mean :1
## 3rd Qu.:1
## Max. :1
Cuartiles 25%, 50%, 75%
- valores que estan en porcentaje de datos , 50 % mediana
cuartile50 <- quantile(datos$Age, 0.50)
cuartile50
## 50%
## 46
cuartile75 <- quantile(datos$Age, 0.75)
cuartile75
## 75%
## 57
cuartile25 <- quantile(datos$Age, 0.25)
cuartile25
## 25%
## 35
boxplot(datos$Age, main = "Estadísticos de la edad (age)")

La moda, frecuencias
frecuencias=sort(datos$Age)
frecuencias <- sort(table(datos$Age), decreasing = TRUE)
frecuencias
##
## 46 49 47 52 44 45 41 43 38 48 35 39 42 37 40 50 51 36 34 33
## 483 475 466 465 462 459 456 455 453 450 442 437 436 434 429 429 428 425 417 416
## 53 56 31 54 29 55 32 30 57 28 58 27 59 60 26 62 61 65 63 66
## 412 402 398 393 384 376 372 369 362 336 333 324 322 322 299 287 283 261 249 220
## 25 67 68 24 64 69 23 70 71 73 72 22 75 74 21 78 76 77 79 19
## 215 215 197 196 192 178 161 155 136 130 124 115 114 113 99 94 93 91 74 57
## 20 80 84 82 81 18 83 17 87 86 0 13 85 1 15 16 11 2 88 89
## 56 56 56 52 44 42 41 33 33 27 26 26 25 24 22 21 20 18 18 18
## 12 14 90 4 10 7 9 6 91 3 5 8 92 95 93 94 97 99 100 96
## 17 17 17 15 15 14 14 13 12 11 11 9 7 6 5 3 2 2 2 1
## 98 102 113
## 1 1 1
moda <- sort(table(frecuencias), drecreasing = TRUE)
moda <- frecuencias [1]
moda2 <- mlv(datos$Age)
moda2
## [1] 46
valore maximos y minimos max(), min()
max(datos$Age)
## [1] 113
min(datos$Age)
## [1] 0
Medidas de dispersion var(), sd() y rango Age de COVID19
var(datos$Age)
## [1] 243.3138
sd(datos$Age)
## [1] 15.59852
# Rango
range(datos$Age)
## [1] 0 113
```