library(readr)
library(fdth)
library(dplyr)
library(lubridate)
library(ggplot2)
datos.covid <- read.csv("~/Carpeta Analisis inteligente de datos/Datos/200916COVID19MEXICO.csv", encoding = "UTF-8")
summary(datos.covid) # Descripcón de los datos
## FECHA_ACTUALIZACION ID_REGISTRO ORIGEN SECTOR
## Length:1545572 Length:1545572 Min. :1.000 Min. : 1.000
## Class :character Class :character 1st Qu.:1.000 1st Qu.: 4.000
## Mode :character Mode :character Median :2.000 Median :12.000
## Mean :1.669 Mean : 9.361
## 3rd Qu.:2.000 3rd Qu.:12.000
## Max. :2.000 Max. :99.000
## ENTIDAD_UM SEXO ENTIDAD_NAC ENTIDAD_RES
## Min. : 1.00 Min. :1.000 Min. : 1.00 Min. : 1.00
## 1st Qu.: 9.00 1st Qu.:1.000 1st Qu.: 9.00 1st Qu.: 9.00
## Median :14.00 Median :1.000 Median :15.00 Median :15.00
## Mean :15.51 Mean :1.491 Mean :16.32 Mean :15.75
## 3rd Qu.:22.00 3rd Qu.:2.000 3rd Qu.:24.00 3rd Qu.:22.00
## Max. :32.00 Max. :2.000 Max. :99.00 Max. :32.00
## MUNICIPIO_RES TIPO_PACIENTE FECHA_INGRESO FECHA_SINTOMAS
## Min. : 1.00 Min. :1.000 Length:1545572 Length:1545572
## 1st Qu.: 8.00 1st Qu.:1.000 Class :character Class :character
## Median : 20.00 Median :1.000 Mode :character Mode :character
## Mean : 37.01 Mean :1.172
## 3rd Qu.: 46.00 3rd Qu.:1.000
## Max. :999.00 Max. :2.000
## FECHA_DEF INTUBADO NEUMONIA EDAD
## Length:1545572 Min. : 1.00 Min. : 1.000 Min. : 0.00
## Class :character 1st Qu.:97.00 1st Qu.: 2.000 1st Qu.: 30.00
## Mode :character Median :97.00 Median : 2.000 Median : 41.00
## Mean :80.62 Mean : 1.876 Mean : 42.12
## 3rd Qu.:97.00 3rd Qu.: 2.000 3rd Qu.: 53.00
## Max. :99.00 Max. :99.000 Max. :120.00
## NACIONALIDAD EMBARAZO HABLA_LENGUA_INDIG DIABETES
## Min. :1.000 Min. : 1.00 Min. : 1.000 Min. : 1.000
## 1st Qu.:1.000 1st Qu.: 2.00 1st Qu.: 2.000 1st Qu.: 2.000
## Median :1.000 Median : 2.00 Median : 2.000 Median : 2.000
## Mean :1.005 Mean :48.97 Mean : 5.466 Mean : 2.166
## 3rd Qu.:1.000 3rd Qu.:97.00 3rd Qu.: 2.000 3rd Qu.: 2.000
## Max. :2.000 Max. :98.00 Max. :99.000 Max. :98.000
## EPOC ASMA INMUSUPR HIPERTENSION
## Min. : 1.000 Min. : 1.000 Min. : 1.000 Min. : 1.000
## 1st Qu.: 2.000 1st Qu.: 2.000 1st Qu.: 2.000 1st Qu.: 2.000
## Median : 2.000 Median : 2.000 Median : 2.000 Median : 2.000
## Mean : 2.242 Mean : 2.227 Mean : 2.261 Mean : 2.108
## 3rd Qu.: 2.000 3rd Qu.: 2.000 3rd Qu.: 2.000 3rd Qu.: 2.000
## Max. :98.000 Max. :98.000 Max. :98.000 Max. :98.000
## OTRA_COM CARDIOVASCULAR OBESIDAD RENAL_CRONICA
## Min. : 1.000 Min. : 1.000 Min. : 1.000 Min. : 1.000
## 1st Qu.: 2.000 1st Qu.: 2.000 1st Qu.: 2.000 1st Qu.: 2.000
## Median : 2.000 Median : 2.000 Median : 2.000 Median : 2.000
## Mean : 2.385 Mean : 2.241 Mean : 2.101 Mean : 2.238
## 3rd Qu.: 2.000 3rd Qu.: 2.000 3rd Qu.: 2.000 3rd Qu.: 2.000
## Max. :98.000 Max. :98.000 Max. :98.000 Max. :98.000
## TABAQUISMO OTRO_CASO RESULTADO MIGRANTE
## Min. : 1.00 Min. : 1.00 Min. :1.000 Min. : 1.00
## 1st Qu.: 2.00 1st Qu.: 1.00 1st Qu.:1.000 1st Qu.:99.00
## Median : 2.00 Median : 2.00 Median :2.000 Median :99.00
## Mean : 2.19 Mean :14.97 Mean :1.611 Mean :98.64
## 3rd Qu.: 2.00 3rd Qu.: 2.00 3rd Qu.:2.000 3rd Qu.:99.00
## Max. :98.00 Max. :99.00 Max. :3.000 Max. :99.00
## PAIS_NACIONALIDAD PAIS_ORIGEN UCI
## Length:1545572 Length:1545572 Min. : 1.00
## Class :character Class :character 1st Qu.:97.00
## Mode :character Mode :character Median :97.00
## Mean :80.63
## 3rd Qu.:97.00
## Max. :99.00
str(datos.covid) # Estructura de los datos
## 'data.frame': 1545572 obs. of 35 variables:
## $ FECHA_ACTUALIZACION: chr "2020-09-16" "2020-09-16" "2020-09-16" "2020-09-16" ...
## $ ID_REGISTRO : chr "010db3" "15b7b4" "08b549" "03a8ac" ...
## $ ORIGEN : int 2 2 2 2 2 2 2 2 2 2 ...
## $ SECTOR : int 4 4 4 3 4 4 4 4 3 4 ...
## $ ENTIDAD_UM : int 31 30 27 27 9 9 25 9 15 23 ...
## $ SEXO : int 1 2 2 1 1 2 1 2 1 1 ...
## $ ENTIDAD_NAC : int 31 30 27 27 9 9 25 12 9 30 ...
## $ ENTIDAD_RES : int 31 30 27 27 15 9 25 9 15 23 ...
## $ MUNICIPIO_RES : int 50 193 4 4 58 2 1 17 106 5 ...
## $ TIPO_PACIENTE : int 1 1 1 1 1 1 2 1 1 1 ...
## $ FECHA_INGRESO : chr "2020-04-20" "2020-04-29" "2020-04-22" "2020-06-25" ...
## $ FECHA_SINTOMAS : chr "2020-04-20" "2020-04-22" "2020-04-20" "2020-06-10" ...
## $ FECHA_DEF : chr "9999-99-99" "2020-05-12" "9999-99-99" "9999-99-99" ...
## $ INTUBADO : int 97 97 97 97 97 97 2 97 97 97 ...
## $ NEUMONIA : int 2 2 2 2 2 2 1 2 2 2 ...
## $ EDAD : int 37 47 47 47 40 44 51 37 50 35 ...
## $ NACIONALIDAD : int 1 1 1 1 1 1 1 1 1 1 ...
## $ EMBARAZO : int 2 97 97 2 2 97 2 97 2 2 ...
## $ HABLA_LENGUA_INDIG : int 2 2 2 2 2 2 2 2 2 2 ...
## $ DIABETES : int 2 1 1 2 2 2 1 2 2 1 ...
## $ EPOC : int 2 2 2 2 2 2 2 2 2 2 ...
## $ ASMA : int 2 2 2 2 2 2 2 2 2 2 ...
## $ INMUSUPR : int 2 2 2 2 2 2 2 2 2 2 ...
## $ HIPERTENSION : int 2 2 1 2 2 2 2 2 2 2 ...
## $ OTRA_COM : int 2 2 2 2 2 2 2 2 2 2 ...
## $ CARDIOVASCULAR : int 2 2 2 2 2 2 2 2 2 2 ...
## $ OBESIDAD : int 2 2 2 2 2 2 2 2 2 2 ...
## $ RENAL_CRONICA : int 2 2 2 2 2 2 2 2 2 2 ...
## $ TABAQUISMO : int 2 2 2 2 2 2 2 2 2 2 ...
## $ OTRO_CASO : int 99 99 99 1 1 99 99 1 1 99 ...
## $ RESULTADO : int 1 1 1 1 1 1 1 1 1 1 ...
## $ MIGRANTE : int 99 99 99 99 99 99 99 99 99 99 ...
## $ PAIS_NACIONALIDAD : chr "México" "México" "México" "México" ...
## $ PAIS_ORIGEN : chr "99" "99" "99" "99" ...
## $ UCI : int 97 97 97 97 97 97 2 97 97 97 ...
Modificar los tipos de datos de aquellos que datos que deban ser categóricos (factor()) y traen consigo valores numéricos.
Modificar los atributos tipo fecha a ymd().
datos.covid$ORIGEN <- factor(datos.covid$ORIGEN)
datos.covid$SECTOR <- factor(datos.covid$SECTOR)
datos.covid$ENTIDAD_UM <- factor(datos.covid$ENTIDAD_UM)
datos.covid$SEXO <- factor(datos.covid$SEXO)
datos.covid$ENTIDAD_NAC <- factor(datos.covid$ENTIDAD_NAC)
datos.covid$ENTIDAD_RES <- factor(datos.covid$ENTIDAD_RES)
datos.covid$MUNICIPIO_RES <- factor(datos.covid$MUNICIPIO_RES)
datos.covid$TIPO_PACIENTE <- factor(datos.covid$TIPO_PACIENTE)
datos.covid$NACIONALIDAD <- factor(datos.covid$NACIONALIDAD)
datos.covid$HABLA_LENGUA_INDIG <- factor(datos.covid$HABLA_LENGUA_INDIG)
datos.covid$MIGRANTE <- factor(datos.covid$MIGRANTE)
datos.covid$PAIS_ORIGEN <- factor(datos.covid$PAIS_ORIGEN)
datos.covid$FECHA_ACTUALIZACION <- ymd(datos.covid$FECHA_ACTUALIZACION)
datos.covid$FECHA_INGRESO <- ymd(datos.covid$FECHA_INGRESO)
datos.covid$FECHA_SINTOMAS <- ymd(datos.covid$FECHA_SINTOMAS)
datos.covid$FECHA_DEF <- ymd(datos.covid$FECHA_DEF)
datos.covid$INTUBADO <- factor(datos.covid$INTUBADO)
datos.covid$NEUMONIA <- factor(datos.covid$NEUMONIA)
datos.covid$EMBARAZO <- factor(datos.covid$EMBARAZO)
datos.covid$DIABETES <- factor(datos.covid$DIABETES)
datos.covid$EPOC <- factor(datos.covid$EPOC)
datos.covid$ASMA <- factor(datos.covid$ASMA)
datos.covid$INMUSUPR <- factor(datos.covid$INMUSUPR)
datos.covid$HIPERTENSION <- factor(datos.covid$HIPERTENSION)
datos.covid$OTRA_COM <- factor(datos.covid$OTRA_COM)
datos.covid$CARDIOVASCULAR <- factor(datos.covid$CARDIOVASCULAR)
datos.covid$OBESIDAD <- factor(datos.covid$OBESIDAD)
datos.covid$RENAL_CRONICA <- factor(datos.covid$RENAL_CRONICA)
datos.covid$TABAQUISMO <- factor(datos.covid$TABAQUISMO)
datos.covid$OTRO_CASO <- factor(datos.covid$OTRO_CASO)
datos.covid$RESULTADO <- factor(datos.covid$RESULTADO)
datos.covid$UCI <- factor(datos.covid$UCI)
nrow(datos.covid)
## [1] 1545572
ncol(datos.covid)
## [1] 35
summary(datos.covid) # Descripción de los datos
## FECHA_ACTUALIZACION ID_REGISTRO ORIGEN SECTOR
## Min. :2020-09-16 Length:1545572 1: 510822 12 :967085
## 1st Qu.:2020-09-16 Class :character 2:1034750 4 :393131
## Median :2020-09-16 Mode :character 6 : 58841
## Mean :2020-09-16 9 : 56610
## 3rd Qu.:2020-09-16 3 : 27549
## Max. :2020-09-16 8 : 13145
## (Other): 29211
## ENTIDAD_UM SEXO ENTIDAD_NAC ENTIDAD_RES
## 9 :378926 1:786804 9 :338722 9 :321855
## 15 :134638 2:758768 15 :171354 15 :188041
## 11 : 88936 11 : 84986 11 : 88869
## 19 : 80729 30 : 67363 19 : 80084
## 21 : 66550 19 : 66124 21 : 64375
## 28 : 62838 21 : 64032 28 : 62756
## (Other):732955 (Other):752991 (Other):739592
## MUNICIPIO_RES TIPO_PACIENTE FECHA_INGRESO FECHA_SINTOMAS
## 7 : 67974 1:1279316 Min. :2020-01-01 Min. :2020-01-01
## 5 : 67129 2: 266256 1st Qu.:2020-06-11 1st Qu.:2020-06-07
## 4 : 57966 Median :2020-07-15 Median :2020-07-10
## 39 : 55352 Mean :2020-07-10 Mean :2020-07-06
## 2 : 54459 3rd Qu.:2020-08-14 3rd Qu.:2020-08-10
## 17 : 46804 Max. :2020-09-16 Max. :2020-09-16
## (Other):1195888
## FECHA_DEF INTUBADO NEUMONIA EDAD NACIONALIDAD
## Min. :2020-01-13 1 : 40079 1 : 194070 Min. : 0.00 1:1538524
## 1st Qu.:2020-06-03 2 : 225989 2 :1351481 1st Qu.: 30.00 2: 7048
## Median :2020-07-04 97:1279316 99: 21 Median : 41.00
## Mean :2020-07-02 99: 188 Mean : 42.12
## 3rd Qu.:2020-08-03 3rd Qu.: 53.00
## Max. :2020-09-16 Max. :120.00
## NA's :1453623
## EMBARAZO HABLA_LENGUA_INDIG DIABETES EPOC ASMA
## 1 : 12719 1 : 12968 1 : 187626 1 : 20250 1 : 44214
## 2 :768607 2 :1477246 2 :1353317 2 :1521212 2 :1497245
## 97:758768 99: 55358 98: 4629 98: 4110 98: 4113
## 98: 5478
##
##
##
## INMUSUPR HIPERTENSION OTRA_COM CARDIOVASCULAR OBESIDAD
## 1 : 19163 1 : 247350 1 : 37503 1 : 29701 1 : 238549
## 2 :1522007 2 :1293913 2 :1501472 2 :1511682 2 :1302908
## 98: 4402 98: 4309 98: 6597 98: 4189 98: 4115
##
##
##
##
## RENAL_CRONICA TABAQUISMO OTRO_CASO RESULTADO MIGRANTE
## 1 : 27013 1 : 126433 1 :750236 1:680931 1 : 1580
## 2 :1514446 2 :1414756 2 :580980 2:785019 2 : 4193
## 98: 4113 98: 4383 99:214356 3: 79622 99:1539799
##
##
##
##
## PAIS_NACIONALIDAD PAIS_ORIGEN UCI
## Length:1545572 99 :1543992 1 : 22738
## Class :character Estados Unidos de Am<e9>rica: 252 2 : 243320
## Mode :character Rep<fa>blica de Honduras : 169 97:1279316
## Venezuela : 164 99: 198
## Colombia : 150
## Cuba : 142
## (Other) : 703
str(datos.covid) # Estructura de los datos
## 'data.frame': 1545572 obs. of 35 variables:
## $ FECHA_ACTUALIZACION: Date, format: "2020-09-16" "2020-09-16" ...
## $ ID_REGISTRO : chr "010db3" "15b7b4" "08b549" "03a8ac" ...
## $ ORIGEN : Factor w/ 2 levels "1","2": 2 2 2 2 2 2 2 2 2 2 ...
## $ SECTOR : Factor w/ 14 levels "1","2","3","4",..: 4 4 4 3 4 4 4 4 3 4 ...
## $ ENTIDAD_UM : Factor w/ 32 levels "1","2","3","4",..: 31 30 27 27 9 9 25 9 15 23 ...
## $ SEXO : Factor w/ 2 levels "1","2": 1 2 2 1 1 2 1 2 1 1 ...
## $ ENTIDAD_NAC : Factor w/ 33 levels "1","2","3","4",..: 31 30 27 27 9 9 25 12 9 30 ...
## $ ENTIDAD_RES : Factor w/ 32 levels "1","2","3","4",..: 31 30 27 27 15 9 25 9 15 23 ...
## $ MUNICIPIO_RES : Factor w/ 476 levels "1","2","3","4",..: 50 193 4 4 58 2 1 17 106 5 ...
## $ TIPO_PACIENTE : Factor w/ 2 levels "1","2": 1 1 1 1 1 1 2 1 1 1 ...
## $ FECHA_INGRESO : Date, format: "2020-04-20" "2020-04-29" ...
## $ FECHA_SINTOMAS : Date, format: "2020-04-20" "2020-04-22" ...
## $ FECHA_DEF : Date, format: NA "2020-05-12" ...
## $ INTUBADO : Factor w/ 4 levels "1","2","97","99": 3 3 3 3 3 3 2 3 3 3 ...
## $ NEUMONIA : Factor w/ 3 levels "1","2","99": 2 2 2 2 2 2 1 2 2 2 ...
## $ EDAD : int 37 47 47 47 40 44 51 37 50 35 ...
## $ NACIONALIDAD : Factor w/ 2 levels "1","2": 1 1 1 1 1 1 1 1 1 1 ...
## $ EMBARAZO : Factor w/ 4 levels "1","2","97","98": 2 3 3 2 2 3 2 3 2 2 ...
## $ HABLA_LENGUA_INDIG : Factor w/ 3 levels "1","2","99": 2 2 2 2 2 2 2 2 2 2 ...
## $ DIABETES : Factor w/ 3 levels "1","2","98": 2 1 1 2 2 2 1 2 2 1 ...
## $ EPOC : Factor w/ 3 levels "1","2","98": 2 2 2 2 2 2 2 2 2 2 ...
## $ ASMA : Factor w/ 3 levels "1","2","98": 2 2 2 2 2 2 2 2 2 2 ...
## $ INMUSUPR : Factor w/ 3 levels "1","2","98": 2 2 2 2 2 2 2 2 2 2 ...
## $ HIPERTENSION : Factor w/ 3 levels "1","2","98": 2 2 1 2 2 2 2 2 2 2 ...
## $ OTRA_COM : Factor w/ 3 levels "1","2","98": 2 2 2 2 2 2 2 2 2 2 ...
## $ CARDIOVASCULAR : Factor w/ 3 levels "1","2","98": 2 2 2 2 2 2 2 2 2 2 ...
## $ OBESIDAD : Factor w/ 3 levels "1","2","98": 2 2 2 2 2 2 2 2 2 2 ...
## $ RENAL_CRONICA : Factor w/ 3 levels "1","2","98": 2 2 2 2 2 2 2 2 2 2 ...
## $ TABAQUISMO : Factor w/ 3 levels "1","2","98": 2 2 2 2 2 2 2 2 2 2 ...
## $ OTRO_CASO : Factor w/ 3 levels "1","2","99": 3 3 3 1 1 3 3 1 1 3 ...
## $ RESULTADO : Factor w/ 3 levels "1","2","3": 1 1 1 1 1 1 1 1 1 1 ...
## $ MIGRANTE : Factor w/ 3 levels "1","2","99": 3 3 3 3 3 3 3 3 3 3 ...
## $ PAIS_NACIONALIDAD : chr "México" "México" "México" "México" ...
## $ PAIS_ORIGEN : Factor w/ 78 levels "99","Alemania",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ UCI : Factor w/ 4 levels "1","2","97","99": 3 3 3 3 3 3 2 3 3 3 ...
positivo.COVID <- subset(datos.covid, RESULTADO == '1')
ggplot(positivo.COVID, aes(EDAD)) +
geom_histogram(position = "stack", bins = 30)
options(scipen = 999)
freq.edades <- fdt(positivo.COVID$EDAD)
freq.edades <- data.frame(freq.edades$table)
freq.edades
## Class.limits f rf rf... cf cf...
## 1 [0,5.67524) 4611 0.006771611221 0.6771611221 4611 0.6771611
## 2 [5.67524,11.3505) 4772 0.007008052211 0.7008052211 9383 1.3779663
## 3 [11.3505,17.0257) 9922 0.014571226747 1.4571226747 19305 2.8350890
## 4 [17.0257,22.701) 24530 0.036024208033 3.6024208033 43835 6.4375098
## 5 [22.701,28.3762) 71288 0.104691958510 10.4691958510 115123 16.9067057
## 6 [28.3762,34.0514) 90379 0.132728573086 13.2728573086 205502 30.1795630
## 7 [34.0514,39.7267) 75769 0.111272654645 11.1272654645 281271 41.3068284
## 8 [39.7267,45.4019) 87640 0.128706139095 12.8706139095 368911 54.1774424
## 9 [45.4019,51.0771) 86165 0.126539987165 12.6539987165 455076 66.8314411
## 10 [51.0771,56.7524) 61183 0.089851982066 8.9851982066 516259 75.8166393
## 11 [56.7524,62.4276) 58812 0.086369984624 8.6369984624 575071 84.4536377
## 12 [62.4276,68.1029) 42961 0.063091561406 6.3091561406 618032 90.7627939
## 13 [68.1029,73.7781) 25159 0.036947943331 3.6947943331 643191 94.4575882
## 14 [73.7781,79.4533) 20127 0.029558060949 2.9558060949 663318 97.4133943
## 15 [79.4533,85.1286) 11497 0.016884236435 1.6884236435 674815 99.1018180
## 16 [85.1286,90.8038) 4300 0.006314883593 0.6314883593 679115 99.7333063
## 17 [90.8038,96.479) 1484 0.002179369128 0.2179369128 680599 99.9512432
## 18 [96.479,102.154) 294 0.000431761808 0.0431761808 680893 99.9944194
## 19 [102.154,107.83) 29 0.000042588750 0.0042588750 680922 99.9986783
## 20 [107.83,113.505) 4 0.000005874310 0.0005874310 680926 99.9992657
## 21 [113.505,119.18) 5 0.000007342888 0.0007342888 680931 100.0000000
ggplot(freq.edades, aes(1:21, rf, fill=Class.limits)) +
geom_bar(stat = "identity")
freq.sexo <- fdt_cat(positivo.COVID$SEXO)
freq.sexo <- data.frame(freq.sexo)
freq.sexo
## Category f rf rf... cf cf...
## 1 2 354915 0.5212202 52.12202 354915 52.12202
## 2 1 326016 0.4787798 47.87798 680931 100.00000
ggplot(freq.sexo, aes(Category, rf, fill=Category)) +
geom_bar(stat = "identity")
freq.estados <- fdt_cat(positivo.COVID$ENTIDAD_RES)
freq.estados <- data.frame(freq.estados)
freq.estados
## Category f rf rf... cf cf...
## 1 9 114268 0.167811423 16.7811423 114268 16.78114
## 2 15 75412 0.110748372 11.0748372 189680 27.85598
## 3 11 37541 0.055131871 5.5131871 227221 33.36917
## 4 19 34760 0.051047757 5.1047757 261981 38.47394
## 5 30 31370 0.046069279 4.6069279 293351 43.08087
## 6 27 30397 0.044640353 4.4640353 323748 47.54491
## 7 21 29524 0.043358284 4.3358284 353272 51.88073
## 8 28 27299 0.040090699 4.0090699 380571 55.88980
## 9 5 24435 0.035884693 3.5884693 405006 59.47827
## 10 14 23743 0.034868437 3.4868437 428749 62.96512
## 11 26 23445 0.034430801 3.4430801 452194 66.40820
## 12 24 21114 0.031007547 3.1007547 473308 69.50895
## 13 2 18386 0.027001267 2.7001267 491694 72.20908
## 14 16 18194 0.026719300 2.6719300 509888 74.88101
## 15 25 17498 0.025697170 2.5697170 527386 77.45073
## 16 31 16894 0.024810150 2.4810150 544280 79.93174
## 17 12 16854 0.024751407 2.4751407 561134 82.40688
## 18 20 15030 0.022072721 2.2072721 576164 84.61415
## 19 13 11662 0.017126552 1.7126552 587826 86.32681
## 20 23 11189 0.016431915 1.6431915 599015 87.97000
## 21 8 9223 0.013544691 1.3544691 608238 89.32447
## 22 3 9147 0.013433079 1.3433079 617385 90.66778
## 23 22 7929 0.011644352 1.1644352 625314 91.83221
## 24 10 7887 0.011582671 1.1582671 633201 92.99048
## 25 29 7035 0.010331443 1.0331443 640236 94.02362
## 26 32 6527 0.009585406 0.9585406 646763 94.98216
## 27 1 6495 0.009538411 0.9538411 653258 95.93601
## 28 7 6411 0.009415051 0.9415051 659669 96.87751
## 29 4 5834 0.008567682 0.8567682 665503 97.73428
## 30 17 5602 0.008226972 0.8226972 671105 98.55698
## 31 18 5547 0.008146200 0.8146200 676652 99.37160
## 32 6 4279 0.006284043 0.6284043 680931 100.00000
ggplot(freq.estados, aes(Category, rf, fill=Category)) + geom_bar(stat = "identity")
freq.pos <- fdt_cat(datos.covid$RESULTADO)
freq.pos <- data.frame(freq.pos)
freq.pos
## Category f rf rf... cf cf...
## 1 2 785019 0.5079149 50.79149 785019 50.79149
## 2 1 680931 0.4405689 44.05689 1465950 94.84838
## 3 3 79622 0.0515162 5.15162 1545572 100.00000
ggplot(freq.pos, aes(Category, rf, fill=Category)) + geom_bar(stat = "identity")
freq.hosp <- fdt_cat(positivo.COVID$TIPO_PACIENTE)
freq.hosp <- data.frame(freq.hosp)
freq.hosp
## Category f rf rf... cf cf...
## 1 1 514098 0.7549928 75.49928 514098 75.49928
## 2 2 166833 0.2450072 24.50072 680931 100.00000
ggplot(freq.hosp, aes(Category, rf, fill=Category)) + geom_bar(stat = "identity")
options(scipen = 999)
decesos <- fdt(month(positivo.COVID$FECHA_DEF))
decesos <- data.frame(decesos$table)
decesos
## Class.limits f rf rf... cf cf...
## 1 [2.97,3.31) 80 0.001111451 0.1111451 80 0.1111451
## 2 [3.31,3.65) 0 0.000000000 0.0000000 80 0.1111451
## 3 [3.65,3.99) 0 0.000000000 0.0000000 80 0.1111451
## 4 [3.99,4.33) 3543 0.049223374 4.9223374 3623 5.0334825
## 5 [4.33,4.67) 0 0.000000000 0.0000000 3623 5.0334825
## 6 [4.67,5.01) 12412 0.172441579 17.2441579 16035 22.2776404
## 7 [5.01,5.35) 0 0.000000000 0.0000000 16035 22.2776404
## 8 [5.35,5.69) 0 0.000000000 0.0000000 16035 22.2776404
## 9 [5.69,6.03) 17395 0.241671066 24.1671066 33430 46.4447470
## 10 [6.03,6.37) 0 0.000000000 0.0000000 33430 46.4447470
## 11 [6.37,6.71) 0 0.000000000 0.0000000 33430 46.4447470
## 12 [6.71,7.05) 19370 0.269110006 26.9110006 52800 73.3557476
## 13 [7.05,7.39) 0 0.000000000 0.0000000 52800 73.3557476
## 14 [7.39,7.73) 0 0.000000000 0.0000000 52800 73.3557476
## 15 [7.73,8.07) 14887 0.206827086 20.6827086 67687 94.0384562
## 16 [8.07,8.41) 0 0.000000000 0.0000000 67687 94.0384562
## 17 [8.41,8.75) 0 0.000000000 0.0000000 67687 94.0384562
## 18 [8.75,9.09) 4291 0.059615438 5.9615438 71978 100.0000000
ggplot(decesos, aes(1:18, rf, fill=Class.limits)) +
geom_bar(stat = "identity")
freq.UCI <- fdt_cat(positivo.COVID$UCI)
freq.UCI <- data.frame(freq.UCI)
freq.UCI
## Category f rf rf... cf cf...
## 1 97 514098 0.7549927966 75.49927966 514098 75.49928
## 2 2 152331 0.2237098913 22.37098913 666429 97.87027
## 3 1 14346 0.0210682140 2.10682140 680775 99.97709
## 4 99 156 0.0002290981 0.02290981 680931 100.00000
ggplot(freq.UCI, aes(Category, rf, fill=Category)) + geom_bar(stat = "identity")
options(scipen = 999)
freq.Ingreso <- fdt(month(positivo.COVID$FECHA_INGRESO))
freq.Ingreso <- data.frame(freq.Ingreso$table)
freq.Ingreso
## Class.limits f rf rf... cf cf...
## 1 [0.99,1.376) 2 0.000002937155 0.0002937155 2 0.0002937155
## 2 [1.376,1.761) 0 0.000000000000 0.0000000000 2 0.0002937155
## 3 [1.761,2.147) 8 0.000011748621 0.0011748621 10 0.0014685776
## 4 [2.147,2.533) 0 0.000000000000 0.0000000000 10 0.0014685776
## 5 [2.533,2.919) 0 0.000000000000 0.0000000000 10 0.0014685776
## 6 [2.919,3.304) 2530 0.003715501277 0.3715501277 2540 0.3730187053
## 7 [3.304,3.69) 0 0.000000000000 0.0000000000 2540 0.3730187053
## 8 [3.69,4.076) 26576 0.039028917761 3.9028917761 29116 4.2759104814
## 9 [4.076,4.461) 0 0.000000000000 0.0000000000 29116 4.2759104814
## 10 [4.461,4.847) 0 0.000000000000 0.0000000000 29116 4.2759104814
## 11 [4.847,5.233) 86609 0.127192035610 12.7192035610 115725 16.9951140424
## 12 [5.233,5.619) 0 0.000000000000 0.0000000000 115725 16.9951140424
## 13 [5.619,6.004) 153159 0.224925873547 22.4925873547 268884 39.4877013971
## 14 [6.004,6.39) 0 0.000000000000 0.0000000000 268884 39.4877013971
## 15 [6.39,6.776) 0 0.000000000000 0.0000000000 268884 39.4877013971
## 16 [6.776,7.161) 199879 0.293537818076 29.3537818076 468763 68.8414832046
## 17 [7.161,7.547) 0 0.000000000000 0.0000000000 468763 68.8414832046
## 18 [7.547,7.933) 0 0.000000000000 0.0000000000 468763 68.8414832046
## 19 [7.933,8.319) 156613 0.229998340507 22.9998340507 625376 91.8413172553
## 20 [8.319,8.704) 0 0.000000000000 0.0000000000 625376 91.8413172553
## 21 [8.704,9.09) 55555 0.081586827447 8.1586827447 680931 100.0000000000
options(scipen = 999)
freq.Sintomas <- fdt(month(positivo.COVID$FECHA_SINTOMAS))
freq.Sintomas <- data.frame(freq.Sintomas$table)
freq.Sintomas
## Class.limits f rf rf... cf cf...
## 1 [0.99,1.376) 2 0.000002937155 0.0002937155 2 0.0002937155
## 2 [1.376,1.761) 0 0.000000000000 0.0000000000 2 0.0002937155
## 3 [1.761,2.147) 15 0.000022028664 0.0022028664 17 0.0024965819
## 4 [2.147,2.533) 0 0.000000000000 0.0000000000 17 0.0024965819
## 5 [2.533,2.919) 0 0.000000000000 0.0000000000 17 0.0024965819
## 6 [2.919,3.304) 3923 0.005761229846 0.5761229846 3940 0.5786195664
## 7 [3.304,3.69) 0 0.000000000000 0.0000000000 3940 0.5786195664
## 8 [3.69,4.076) 31848 0.046771258762 4.6771258762 35788 5.2557454426
## 9 [4.076,4.461) 0 0.000000000000 0.0000000000 35788 5.2557454426
## 10 [4.461,4.847) 0 0.000000000000 0.0000000000 35788 5.2557454426
## 11 [4.847,5.233) 98504 0.144660765922 14.4660765922 134292 19.7218220348
## 12 [5.233,5.619) 0 0.000000000000 0.0000000000 134292 19.7218220348
## 13 [5.619,6.004) 160673 0.235960765481 23.5960765481 294965 43.3178985830
## 14 [6.004,6.39) 0 0.000000000000 0.0000000000 294965 43.3178985830
## 15 [6.39,6.776) 0 0.000000000000 0.0000000000 294965 43.3178985830
## 16 [6.776,7.161) 194958 0.286310947805 28.6310947805 489923 71.9489933635
## 17 [7.161,7.547) 0 0.000000000000 0.0000000000 489923 71.9489933635
## 18 [7.547,7.933) 0 0.000000000000 0.0000000000 489923 71.9489933635
## 19 [7.933,8.319) 156708 0.230137855377 23.0137855377 646631 94.9627789012
## 20 [8.319,8.704) 0 0.000000000000 0.0000000000 646631 94.9627789012
## 21 [8.704,9.09) 34300 0.050372210988 5.0372210988 680931 100.0000000000
options(scipen = 999)
freq.Actualizacion <- fdt(month(positivo.COVID$FECHA_ACTUALIZACION))
freq.Actualizacion <- data.frame(freq.Actualizacion$table)
freq.Actualizacion
## Class.limits f rf rf... cf cf...
## 1 [8.91,8.919) 0 0 0 0 0
## 2 [8.919,8.927) 0 0 0 0 0
## 3 [8.927,8.936) 0 0 0 0 0
## 4 [8.936,8.944) 0 0 0 0 0
## 5 [8.944,8.953) 0 0 0 0 0
## 6 [8.953,8.961) 0 0 0 0 0
## 7 [8.961,8.97) 0 0 0 0 0
## 8 [8.97,8.979) 0 0 0 0 0
## 9 [8.979,8.987) 0 0 0 0 0
## 10 [8.987,8.996) 0 0 0 0 0
## 11 [8.996,9.004) 680931 1 100 680931 100
## 12 [9.004,9.013) 0 0 0 680931 100
## 13 [9.013,9.021) 0 0 0 680931 100
## 14 [9.021,9.03) 0 0 0 680931 100
## 15 [9.03,9.039) 0 0 0 680931 100
## 16 [9.039,9.047) 0 0 0 680931 100
## 17 [9.047,9.056) 0 0 0 680931 100
## 18 [9.056,9.064) 0 0 0 680931 100
## 19 [9.064,9.073) 0 0 0 680931 100
## 20 [9.073,9.081) 0 0 0 680931 100
## 21 [9.081,9.09) 0 0 0 680931 100
ggplot(freq.Ingreso, aes(1:21, rf, fill=Class.limits)) +
geom_bar(stat = "identity")
ggplot(freq.Sintomas, aes(1:21, rf, fill=Class.limits)) +
geom_bar(stat = "identity")
ggplot(freq.Actualizacion, aes(1:21, rf, fill=Class.limits)) +
geom_bar(stat = "identity")
freq.Neumonia <- fdt_cat(positivo.COVID$NEUMONIA)
freq.Neumonia <- data.frame(freq.Neumonia)
freq.Neumonia
## Category f rf rf... cf cf...
## 1 2 552829 0.81187227487 81.187227487 552829 81.18723
## 2 1 128094 0.18811597651 18.811597651 680923 99.99883
## 3 99 8 0.00001174862 0.001174862 680931 100.00000
freq.Diabete <- fdt_cat(positivo.COVID$DIABETES)
freq.Diabete <- data.frame(freq.Diabete)
freq.Diabete
## Category f rf rf... cf cf...
## 1 2 572929 0.841390684 84.1390684 572929 84.13907
## 2 1 105944 0.155586983 15.5586983 678873 99.69777
## 3 98 2058 0.003022333 0.3022333 680931 100.00000
freq.Epoc <- fdt_cat(positivo.COVID$EPOC)
freq.Epoc <- data.frame(freq.Epoc)
freq.Epoc
## Category f rf rf... cf cf...
## 1 2 669069 0.982579733 98.2579733 669069 98.25797
## 2 1 10044 0.014750393 1.4750393 679113 99.73301
## 3 98 1818 0.002669874 0.2669874 680931 100.00000
freq.Asma <- fdt_cat(positivo.COVID$ASMA)
freq.Asma <- data.frame(freq.Asma)
freq.Asma
## Category f rf rf... cf cf...
## 1 2 661339 0.971227628 97.1227628 661339 97.12276
## 2 1 17784 0.026117184 2.6117184 679123 99.73448
## 3 98 1808 0.002655188 0.2655188 680931 100.00000
freq.INMU <- fdt_cat(positivo.COVID$INMUSUPR)
freq.INMU <- data.frame(freq.INMU)
freq.INMU
## Category f rf rf... cf cf...
## 1 2 671514 0.986170405 98.6170405 671514 98.61704
## 2 1 7461 0.010957057 1.0957057 678975 99.71275
## 3 98 1956 0.002872538 0.2872538 680931 100.00000
freq.Hipertension <- fdt_cat(positivo.COVID$HIPERTENSION)
freq.Hipertension <- data.frame(freq.Hipertension)
freq.Hipertension
## Category f rf rf... cf cf...
## 1 2 546507 0.802587927 80.2587927 546507 80.25879
## 2 1 132502 0.194589466 19.4589466 679009 99.71774
## 3 98 1922 0.002822606 0.2822606 680931 100.00000
freq.Otras <- fdt_cat(positivo.COVID$OTRA_COM)
freq.Otras <- data.frame(freq.Otras)
freq.Otras
## Category f rf rf... cf cf...
## 1 2 661279 0.971139513 97.1139513 661279 97.11395
## 2 1 16452 0.024161038 2.4161038 677731 99.53006
## 3 98 3200 0.004699448 0.4699448 680931 100.00000
freq.Cardio <- fdt_cat(positivo.COVID$CARDIOVASCULAR)
freq.Cardio <- data.frame(freq.Cardio)
freq.Cardio
## Category f rf rf... cf cf...
## 1 2 665485 0.977316351 97.7316351 665485 97.73164
## 2 1 13551 0.019900695 1.9900695 679036 99.72170
## 3 98 1895 0.002782955 0.2782955 680931 100.00000
freq.Obesidad <- fdt_cat(positivo.COVID$OBESIDAD)
freq.Obesidad <- data.frame(freq.Obesidad)
freq.Obesidad
## Category f rf rf... cf cf...
## 1 2 555123 0.815241192 81.5241192 555123 81.52412
## 2 1 123934 0.182006694 18.2006694 679057 99.72479
## 3 98 1874 0.002752114 0.2752114 680931 100.00000
freq.Cro <- fdt_cat(positivo.COVID$RENAL_CRONICA)
freq.Cro <- data.frame(freq.Cro)
freq.Cro
## Category f rf rf... cf cf...
## 1 2 666137 0.978273863 97.8273863 666137 97.82739
## 2 1 12939 0.019001925 1.9001925 679076 99.72758
## 3 98 1855 0.002724211 0.2724211 680931 100.00000
freq.Tab <- fdt_cat(positivo.COVID$TABAQUISMO)
freq.Tab <- data.frame(freq.Tab)
freq.Tab
## Category f rf rf... cf cf...
## 1 2 629692 0.924751553 92.4751553 629692 92.47516
## 2 1 49271 0.072358286 7.2358286 678963 99.71098
## 3 98 1968 0.002890161 0.2890161 680931 100.00000