JuveYell

Fuente:foto es del proyecto Aldea Global/paghonduras.org.

Los datos analizados provienen del Datos Observatorio Demgráfico Universitario de la Universidad Nacional Autónoma de Honduras, SINAGER y DOMO CORANOVIRUS TRACKER.

##Haga Click Aqui para ver Certificado Machine Learning MIT https://www.credential.net/4dd365ea-ea5a-46a2-a72e-539e70545c6e

##Haga Click Aqui para ver Certificado Columbia Python for Managers https://certificates.emeritus.org/0a2e1de7-add2-4710-ad49-417d1dadfb61#gs.4a92hv ##Contacto: rchang@unah.edu.hn

Algunos Dashboards elaborados son: Para Bolsa de Valores https://rchang.shinyapps.io/rchang-stock-exchange/

Para el Estado del Clima https://rchang.shinyapps.io/rchang-app_clima_ho/

Para Machine Learning https://rchang.shinyapps.io/rchang-app/

Para Empresariales e Industriales https://rchang.shinyapps.io/rchang-app_final_emp/

Para Dashboards con log in https://rchang.shinyapps.io/clase_3-shiny-2/_w_ae4e775f/_w_f249a9a1/?page=sign_in

y para Sistemas de Información Geográfica

##La pregunta de investigación se deriva de contar con los datos geoespaciales de los municipios de Honduras por una parte en formato shp y los datos del Observatorio Demográfico Universitario de la UNAH en CSV ¿cuáles son los municipios con mayor casos de COVID-19, muertes y personas recuperadas en Honduras?

Para ir a la parte II de este análisis con mapas interactivos haga click en https://rpubs.com/rchang/797384

knitr::opts_chunk$set(echo = TRUE)

library(data.table)
library(tidyverse)

## -- Attaching packages --------------------------------------- tidyverse 1.3.1 --

## v ggplot2 3.3.5     v purrr   0.3.4
## v tibble  3.1.2     v dplyr   1.0.7
## v tidyr   1.1.3     v stringr 1.4.0
## v readr   1.4.0     v forcats 0.5.1

## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::between()   masks data.table::between()
## x dplyr::filter()    masks stats::filter()
## x dplyr::first()     masks data.table::first()
## x dplyr::lag()       masks stats::lag()
## x dplyr::last()      masks data.table::last()
## x purrr::transpose() masks data.table::transpose()

library(dplyr)
library(ggplot2)
library(lubridate)

## 
## Attaching package: 'lubridate'

## The following objects are masked from 'package:data.table':
## 
##     hour, isoweek, mday, minute, month, quarter, second, wday, week,
##     yday, year

## The following objects are masked from 'package:base':
## 
##     date, intersect, setdiff, union

library(plotly)

## 
## Attaching package: 'plotly'

## The following object is masked from 'package:ggplot2':
## 
##     last_plot

## The following object is masked from 'package:stats':
## 
##     filter

## The following object is masked from 'package:graphics':
## 
##     layout

library(sf)

## Linking to GEOS 3.9.0, GDAL 3.2.1, PROJ 7.2.1

library(epiDisplay)

## Loading required package: foreign

## Loading required package: survival

## Loading required package: MASS

## 
## Attaching package: 'MASS'

## The following object is masked from 'package:plotly':
## 
##     select

## The following object is masked from 'package:dplyr':
## 
##     select

## Loading required package: nnet

## 
## Attaching package: 'epiDisplay'

## The following object is masked from 'package:ggplot2':
## 
##     alpha

library(viridis)

## Loading required package: viridisLite

##ANÁLISIS EXPLORATORIO: GRAFICOS DE DENSIDAD Y CURVAS DEL COVID-19 EN HONDURAS BASADO EN EL INDICE DE INCIDENCIA A 7 Y 14 DÍAS

FUENTE: DOMO CORONAVIRUS TRACKER https://www.domo.com/covid19/data-explorer/all/

## Warning: Ignoring unknown parameters: frame

## Warning: Ignoring unknown parameters: frame

## Warning: Ignoring unknown parameters: frame

## Warning: Ignoring unknown parameters: frame

## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

## Warning: Ignoring unknown parameters: frame

## Warning: Use of `sf$Date` is discouraged. Use `Date` instead.

## Warning: Use of `sf$N_cases_7` is discouraged. Use `N_cases_7` instead.

## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

## Warning: Removed 50 rows containing non-finite values (stat_smooth).

## Warning: Ignoring unknown parameters: frame

## Warning: Use of `sf$Date` is discouraged. Use `Date` instead.

## Warning: Use of `sf$N_cases_14` is discouraged. Use `N_cases_14` instead.

## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

ggplot(data= data.frame (sf), aes(x= as.IDate(sf$Date), 
                                  y = sf$New_D_7)) +

geom_smooth(fill="red", color="#e9ecef", alpha=0.8,frame = sf$New_D_7) +
scale_x_date(date_breaks = "1 month", date_labels =  "%b %Y") +
theme(axis.text.x = element_text(angle=45, hjust = 1))+
labs(title = "CURVA COVID-19 NUEVAS MUERTES A 7 DÍAS /Honduras",
                     subtitle = "",
                     x = "Mes",
                     y = "Indicidencia de muertes 7 días",
                     color = "Año",
                     caption = "Fuente: DOMO CORONAVIRUS TRACKER")

## Warning: Ignoring unknown parameters: frame

## Warning: Use of `sf$Date` is discouraged. Use `Date` instead.

## Warning: Use of `sf$New_D_7` is discouraged. Use `New_D_7` instead.

## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

##ANÁLISIS MUNICIPAL, ESPACIAL Y TEMPORAL DEL COVID-19 HONDURAS FUENTE: OBSERVATORIO DEMOGRÁFICO UNIVERSITARIO UNAH

Explorando datos

names(datacovid_observatorio) # Nombre de las Columnas

## [1] "Fecha"        "Sexo"         "Edad"         "Departamento" "Municipio"   
## [6] "Tipo de Dato"

str(datacovid_observatorio) #Tipo de cada Variable del dataset

## Classes 'data.table' and 'data.frame':   362438 obs. of  6 variables:
##  $ Fecha       : Factor w/ 472 levels "","1/1/2021",..: 38 38 69 100 100 100 115 115 131 146 ...
##  $ Sexo        : Factor w/ 4 levels "","Femenino",..: 2 2 2 3 2 2 2 2 3 3 ...
##  $ Edad        : int  37 42 64 44 1 30 28 15 61 43 ...
##  $ Departamento: Factor w/ 20 levels "","ATLÁNTIDA",..: 2 9 9 9 9 3 9 9 7 7 ...
##  $ Municipio   : Factor w/ 274 levels "","AGUANQUETERIQUE",..: 94 48 48 48 48 145 48 48 216 145 ...
##  $ Tipo de Dato: Factor w/ 4 levels "","CASOS","MUERTE",..: 2 2 2 2 2 2 2 2 2 2 ...
##  - attr(*, ".internal.selfref")=<externalptr>

class(datacovid_observatorio)    #Tipo de datos

## [1] "data.table" "data.frame"

dim(datacovid_observatorio)      #Número de columnas (variables) y filas (casos)

## [1] 362438      6

summary(datacovid_observatorio) #Resumen estadístico de cada Variable del dataset

##        Fecha                    Sexo             Edad       
##           :  2912                 :  2912   Min.   :  0.00  
##  16/9/2020:  2552   Femenino      :186612   1st Qu.: 26.00  
##  15/6/2021:  2241   Masculino     :168582   Median : 37.00  
##  12/9/2020:  2181   No Determinado:  4332   Mean   : 55.68  
##  29/4/2021:  2009                           3rd Qu.: 53.00  
##  11/6/2021:  1982                           Max.   :999.00  
##  (Other)  :348561                           NA's   :2912    
##      Departamento               Municipio          Tipo de Dato   
##  FM        : 98723   DISTRITO CENTRAL: 87795             :  2912  
##  CORTÉS    : 86974   SAN PEDRO SULA  : 47894   CASOS     :262760  
##  EL PARAÍSO: 24313   DANLI           : 14156   MUERTE    :  6997  
##  ATLÁNTIDA : 18489   LA CEIBA        : 11407   RECUPERADO: 89769  
##  YORO      : 17377   CHOLOMA         :  8378                      
##  LA PAZ    : 14197   EL PROGRESO     :  8188                      
##  (Other)   :102365   (Other)         :184620

## `summarise()` has grouped output by 'Departamento'. You can override using the `.groups` argument.

## `summarise()` has grouped output by 'Departamento'. You can override using the `.groups` argument.

ggplot(casos_covid_edades_casos,
       aes(x=rango, y=prop_prov ))+
  geom_col(fill="blue")+
  labs(title = "Cantidad de casos por Departamento hasta el 1 de julio 2021",
       subtitle = "Distribución por rangos etáreos",
       caption = "Fuente: Observatorio Demgráfico Universitario UNAH",
       x= "Rango etáreo", y= "% de casos")+
  geom_text(aes(label = n),
            hjust = 0.5,
            vjust=-0.1,
            size=3)+
 
  facet_grid(facets = "Departamento")+
  theme_light()

## `summarise()` has grouped output by 'Departamento'. You can override using the `.groups` argument.

ggplot(casos_covid_edades_recu,
       aes(x=rango, y=prop_prov ))+
  geom_col(fill="green")+
  labs(title = "Cantidad de Recuperados por Departamento hasta el 1 de julio 2021",
       subtitle = "Distribución por rangos etáreos",
       caption = "Fuente: Observatorio Demgráfico Universitario UNAH",
       x= "Rango etáreo", y= "% de Recuperados")+
  geom_text(aes(label = n),
            hjust = 0.5,
            vjust=-0.1,
            size=3)+
 
  facet_grid(facets = "Departamento")+
  theme_light()

## `summarise()` has grouped output by 'Departamento'. You can override using the `.groups` argument.

## `summarise()` has grouped output by 'Departamento'. You can override using the `.groups` argument.

ggplot(f, aes(x=Departamento, y=prop, fill = Tipo.de.Dato))+
  geom_bar(stat="identity")+
  geom_text(aes(label = n),
            hjust = 0.5,
            vjust=-0.1,
            size=2.5)+
  scale_fill_manual(values=c("grey69", "red", "green")) +
  scale_y_log10()

# utilizamos la escala Logaritmica en el eje a los efectos de la visualización

                A <- datacovid_observatorio  %>% 
                    dplyr::select(2,3,6) %>% 
                        
                filter(Sexo %in% c("Masculino", "Femenino")) %>%  
                droplevels() %>%
        
                  filter(`Tipo de Dato`== "MUERTE")%>% 
                  
                  filter(Edad <= 110 & Edad >=0) %>% 
                          dplyr::select(1,2)

pyramid(A$Edad,
                  A$Sexo,
                  binwidth = 5, decimal = 2,col.gender = c("gold", "red"),
                  main= "COVID-19: Fallecimiento por edad y sexo |  Honduras")

br = ((min(A1\(Edad, na.rm = TRUE)%/%binwidth):(max(A1\)Edad, na.rm = TRUE)%/%binwidth)),

#ANÁLISIS TEMPORAL

##         Fecha      Sexo Edad Departamento        Municipio Tipo de Dato
## 1: 2020-03-11  Femenino   37    ATLÁNTIDA         LA CEIBA        CASOS
## 2: 2020-03-11  Femenino   42           FM DISTRITO CENTRAL        CASOS
## 3: 2020-03-13  Femenino   64           FM DISTRITO CENTRAL        CASOS
## 4: 2020-03-15 Masculino   44           FM DISTRITO CENTRAL        CASOS
## 5: 2020-03-15  Femenino    1           FM DISTRITO CENTRAL        CASOS
## 6: 2020-03-15  Femenino   30    CHOLUTECA   No Determinado        CASOS
##    dia_semana dia semana  Ano
## 1:  miércoles  11     11 2020
## 2:  miércoles  11     11 2020
## 3:    viernes  13     11 2020
## 4:    domingo  15     11 2020
## 5:    domingo  15     11 2020
## 6:    domingo  15     11 2020

## # A tibble: 444 x 2
##    Fecha      cantidad
##    <date>        <int>
##  1 2020-03-26        1
##  2 2020-03-28        2
##  3 2020-03-30        4
##  4 2020-03-31        3
##  5 2020-04-01        4
##  6 2020-04-02        1
##  7 2020-04-04        7
##  8 2020-04-08        1
##  9 2020-04-10        1
## 10 2020-04-11        1
## # ... with 434 more rows

ggplot(datacovid_observatorio%>%
         filter (`Tipo de Dato`=="CASOS") %>% 
         group_by(Fecha) %>%
         summarise(cantidad=n())) + 
    geom_line(aes(x = Fecha, y = cantidad))+
  scale_x_date(date_breaks = "1 month", date_labels =  "%b %Y") +
  theme(axis.text.x=element_text(angle=60, hjust=1))+
  labs(title = "Cantidad de casos en Honduras hasta el 1 de julio 2021",
       
       x= "Fecha", y= "Número de casos",  
       caption = "Fuente: Observatorio Demgráfico Universitario UNAH")

ggplot(datacovid_observatorio%>%
         filter (`Tipo de Dato`=="MUERTE") %>% 
         group_by(Fecha) %>%
         summarise(cantidad=n())) + 
    geom_line(aes(x = Fecha, y = cantidad, color ="red"))+
  scale_x_date(date_breaks = "1 month", date_labels =  "%b %Y") +
  theme(axis.text.x=element_text(angle=60, hjust=1))+
  labs(title = "Cantidad de Muertes en Honduras hasta el 1 de julio 2021",
       color= "Muertes",
       
       x= "Fecha", y= "Número de Muertes",  
       caption = "Fuente: Observatorio Demgráfico Universitario UNAH")

x<-ggplot(datacovid_observatorio%>%
         filter (`Tipo de Dato`=="RECUPERADO") %>% 
         group_by(Fecha) %>%
         summarise(cantidad=n())) + 
    geom_line(aes(x = Fecha, y = cantidad))+
  scale_x_date(date_breaks = "1 month", date_labels =  "%b %Y") +
  theme(axis.text.x=element_text(angle=60, hjust=1))+
  labs(title = "Cantidad de Recuperados en Honduras hasta el 1 de julio 2021",
       
       x= "Fecha", y= "Número de Recuperados",  
       caption = "Fuente: Observatorio COVID/UNAH")
ggplotly(x)

datacovid_observatorio <- datacovid_observatorio %>% 
  mutate(mes = lubridate::month(Fecha, label=TRUE)) %>%  drop_na()
tail(datacovid_observatorio)

##         Fecha      Sexo Edad Departamento        Municipio Tipo de Dato
## 1: 2021-07-01  Femenino   40           FM VALLE DE ÁNGELES   RECUPERADO
## 2: 2021-07-01 Masculino   25           FM VALLE DE ÁNGELES   RECUPERADO
## 3: 2021-07-01 Masculino   26           FM VALLE DE ÁNGELES   RECUPERADO
## 4: 2021-07-01  Femenino   26           FM VALLE DE ÁNGELES   RECUPERADO
## 5: 2021-07-01  Femenino    9           FM VALLE DE ÁNGELES   RECUPERADO
## 6: 2021-07-01  Femenino   21           FM VALLE DE ÁNGELES   RECUPERADO
##    dia_semana dia semana  Ano mes
## 1:     jueves   1     26 2021 jul
## 2:     jueves   1     26 2021 jul
## 3:     jueves   1     26 2021 jul
## 4:     jueves   1     26 2021 jul
## 5:     jueves   1     26 2021 jul
## 6:     jueves   1     26 2021 jul

ggplot(datacovid_observatorio %>%  
        filter(Fecha < "2021-07-01") %>% 
        filter(`Tipo de Dato`== "CASOS") %>% 
         
group_by(Ano, mes) %>%
summarise(cantidad=n()))+
geom_line(aes(x = mes, y=cantidad, group = Ano, color= as.factor(Ano)), size=1 )+

   theme(axis.text.x = element_text(angle=45, hjust = 1))+
  labs(title = "Casos por mes y año",
         subtitle = "Año 2020",
    
         x = "Mes",
         y = "Cantidad",
         color = "Año",
         caption = "Fuente: Observatorio Demográdico Universitario UNAH")

## `summarise()` has grouped output by 'Ano'. You can override using the `.groups` argument.

ggplot(datacovid_observatorio %>% filter(Fecha < "2021-01-01")) + 
  geom_bar(aes(x = mes, fill= `Tipo de Dato`)) +

  theme(axis.text.x=element_text(angle=60, hjust=1))+
  scale_fill_manual(values=c("grey69", "red", "green"))+
    labs(title = "Tipo de casos por mes",
         subtitle = "Año 2020",
    
         x = "Mes",
         y = "Cantidad",
         fill = "Tipo de Dato",
         caption = "Fuente: Observatorio Demográdico Universitario UNAH")

ggplot(datacovid_observatorio %>% filter(Fecha >= "2021-01-01" & Fecha < "2021-07-01")) + 
  geom_bar(aes(x = mes, fill= `Tipo de Dato`)) +

  theme(axis.text.x=element_text(angle=60, hjust=1))+
  scale_fill_manual(values=c("grey69", "red", "green"))+
    labs(title = "Tipo de caso por mes",
         subtitle = "Año 2021",
    
         x = "Mes",
         y = "Cantidad",
         fill = "Tipo de Dato",
         caption = "Fuente: Observatorio Demográfico Universitario UNAH")

q<-datacovid_observatorio %>%
         dplyr::filter(datacovid_observatorio$`Tipo de Dato`== "CASOS") %>% 
         group_by(Ano) %>%
         summarise(cantidad=n(), total= sum(Ano)) %>% arrange(desc(cantidad))
q

## # A tibble: 2 x 3
##     Ano cantidad     total
##   <dbl>    <int>     <dbl>
## 1  2021   139997 282933937
## 2  2020   122763 247981260

ggplot(datacovid_observatorio %>% filter(Fecha >= "2021-01-01" & Fecha < "2021-07-01")) + 
    geom_bar(aes(x = dia_semana, fill= `Tipo de Dato` ))+
  scale_fill_manual(values=c("grey69", "red", "green"))+
  labs(title = "Tipo de caso por día de Semana año 2021",
         subtitle = "",
         x = "día de la semana",
         y = "Cantidad",
         caption = "Fuente: Observatorio Demográfico Universitario UNAH")

ggplot(datacovid_observatorio  %>% filter(Fecha >= "2021-01-01" & Fecha < "2021-07-01")) + 
    geom_bar(aes(x = dia_semana, fill=`Tipo de Dato`), position = "dodge")+
  scale_fill_manual(values=c("grey69", "red", "green"))+
  labs(title = "Tipo de caso por día de Semana año 2021",
         subtitle = "",
         x = "día de la semana",
         y = "Cantidad",
         caption = "Fuente: Observatorio Demográfico Universitario UNAH")

ggplot(datacovid_observatorio %>% 
         filter(Fecha >= "2021-01-01" & Fecha < "2021-07-01")%>%
         group_by(`Tipo de Dato`, dia_semana) %>%
         summarise(cantidad=n())) +

    geom_line(aes(x = dia_semana, y=cantidad, group = `Tipo de Dato`, color = `Tipo de Dato`), size=1) +
     scale_fill_manual(labels =c("grey69", "red", "green"), values = c("grey69"= "grey69", "red"= "red", "green"= "green")) + 
    geom_point(aes(x = dia_semana, y=cantidad, color = `Tipo de Dato`), size=2)+
  
    geom_text(aes(x = dia_semana, y=cantidad+200, label=cantidad), size=2)+
    
    
    labs(title = "Cantidad de casos, muertes y recuperación según día de la semana",
         subtitle = "",
         x = "Día de la Semana",
         y = "Cantidad",
         color = "Tipo de casos",
         caption = "Fuente: Observatorio Demográfico Universitario UNAH")

## `summarise()` has grouped output by 'Tipo de Dato'. You can override using the `.groups` argument.

hagamos zoom a muerte

ggplot(datacovid_observatorio %>%
         filter(Fecha >= "2021-01-01" & Fecha < "2021-07-01")%>%
         filter(`Tipo de Dato` == "MUERTE") %>% 
         group_by(dia_semana, `Tipo de Dato`) %>%
         summarise(cantidad = n())) + 
    geom_line(aes(x = dia_semana, y = cantidad, group =  `Tipo de Dato`, color =  `Tipo de Dato`), size=1) +
    geom_point(aes(x= dia_semana, y=cantidad, color =  `Tipo de Dato`), size=2)+
    labs(title = "Muerte por día de Semana",
         subtitle = "",
         x = "Día de la semana",
         y = "Cantidad",
         color = "",
         caption = "Fuente: Observatorio Demográfico Universitario UNAH")

## `summarise()` has grouped output by 'dia_semana'. You can override using the `.groups` argument.

ggplot(datacovid_observatorio %>%
         filter(Fecha >= "2021-01-01" & Fecha < "2021-07-01")%>%
         filter(`Tipo de Dato`=="CASOS") %>% 
         group_by(semana) %>%
         summarise(cantidad = n())) + 
    geom_line(aes(x = semana, y = cantidad), size=1)+
    scale_x_continuous(breaks = seq(1,54))+
  labs(title = "Casos por Semana año 2021",
         subtitle = "",
         x = "semana",
         y = "Cantidad",
         caption = "Fuente: Observatorio Demográfico Universitario UNAH")

ggplot(datacovid_observatorio %>% 
filter(`Tipo de Dato` == "MUERTE") %>%    
group_by(Ano, semana) %>%

summarise(cantidad=n()))+
geom_line(aes(x = semana, y=cantidad, group = Ano, color= as.factor(Ano)), size=1 )+
  labs(title = "MUERTE por Semana",
         subtitle = "",
         x = "semana",
         y = "Cantidad",
         color = "Año",
         caption = "Fuente: Observatorio Demográfico Universitario UNAH")

## `summarise()` has grouped output by 'Ano'. You can override using the `.groups` argument.

head(datacovid_observatorio)

##         Fecha      Sexo Edad Departamento        Municipio Tipo de Dato
## 1: 2020-03-11  Femenino   37    ATLÁNTIDA         LA CEIBA        CASOS
## 2: 2020-03-11  Femenino   42           FM DISTRITO CENTRAL        CASOS
## 3: 2020-03-13  Femenino   64           FM DISTRITO CENTRAL        CASOS
## 4: 2020-03-15 Masculino   44           FM DISTRITO CENTRAL        CASOS
## 5: 2020-03-15  Femenino    1           FM DISTRITO CENTRAL        CASOS
## 6: 2020-03-15  Femenino   30    CHOLUTECA   No Determinado        CASOS
##    dia_semana dia semana  Ano mes
## 1:  miércoles  11     11 2020 mar
## 2:  miércoles  11     11 2020 mar
## 3:    viernes  13     11 2020 mar
## 4:    domingo  15     11 2020 mar
## 5:    domingo  15     11 2020 mar
## 6:    domingo  15     11 2020 mar

Para quitar las tildes paquete stringi

#names(datacovid_observatorio)
#str(datacovid_observatorio)

###Comparando datos de columnas nombre

#Municipios_nombre<-MUNICIPIOS_2$NOMBRE
#Covid_nombre <- BASE_COVID_1$NOMBRE


#diferencia<- ifelse(Municipios_nombre %in% Covid_nombre, 1, Covid_nombre)

#diferencia


#diferencia[!(Municipios_nombre %in% Covid_nombre)]

#Para multiples columnas y dos data frame MissingfromData2 <- anti_join(Data1,Data2, by = c(“Property.1”,“Property.2”,“Property.3”)) MissingfromData1 <- anti_join(Data2,Data1, by = c(“Property.1”,“Property.2”,“Property.3”))

setdiff es otro método

#MissingfromData2 <- anti_join(Municipios_nombre,Covid_nombre)
#MissingfromData1 <- anti_join(Covid_nombre,Municipios_nombre)

Limpieza de datos y que cotejen con las otra base de datos espacial para poder realizar la unión

class(BASE_COVID_1$NOMBRE)

## [1] "factor"

BASE_COVID_2 <- BASE_COVID_1 %>%
group_by(NOMBRE) %>%
summarise(cantidad=n())

UNIÓN DE BASE DE DATOS ESPACIAL CON UNA CSV.

depa<-left_join(MUNICIPIOS_2, BASE_COVID_2, by= "NOMBRE")
   
head(depa)

## Simple feature collection with 6 features and 3 fields
## Geometry type: MULTIPOLYGON
## Dimension:     XY
## Bounding box:  xmin: 387918.2 ymin: 1635421 xmax: 568362.9 ymax: 1750195
## Projected CRS: NAD27 / UTM zone 16N
##   DEPARTAMENTO         NOMBRE cantidad                       geometry
## 1         <NA> NO DETERMINADO     4656 MULTIPOLYGON (((390022.3 16...
## 2           01       LA CEIBA    11407 MULTIPOLYGON (((533992.2 17...
## 3           01        ESPARTA      291 MULTIPOLYGON (((478662.7 17...
## 4           01        JUTIAPA      452 MULTIPOLYGON (((567455.2 17...
## 5           01      LA MASICA      811 MULTIPOLYGON (((485716.2 17...
## 6           01  SAN FRANCISCO      639 MULTIPOLYGON (((493123.8 17...

AHORA VAMOS A CREAR UNA CATEGORÍA DE MUNICIPIOS MENOR Y MAYOR QUE LA MEDIA DE COVID-19 AL 1/07/2021 y la uniremos con la base de datos espacial

POR LO QUE PRIMERO VAMOS A CALCULAR LA MEDIA DE CASOS, MUERTES, RECUPERACIONES DE COVID-19 PARA ESO PREVIAMENTE FILTRAREMOS LOS TIPOS DE CASOS

BASE_COVID_CASOS <- BASE_COVID_1 %>%
    filter(`Tipo de Dato`== "CASOS") %>% 
group_by(NOMBRE) %>%
summarise(cantidad=n()) 


BASE_COVID_MUERTES <- BASE_COVID_1 %>%
  filter(`Tipo de Dato`== "MUERTE") %>% 
group_by(NOMBRE) %>%
summarise(cantidad=n()) 


BASE_COVID_RECUPERADOS <- BASE_COVID_1 %>%
    filter(`Tipo de Dato`== "RECUPERADO") %>% 
group_by(NOMBRE) %>%
summarise(cantidad=n())

mean(BASE_COVID_CASOS$cantidad)

## [1] 973.1852

mean(BASE_COVID_MUERTES$cantidad)

## [1] 28.55918

mean(BASE_COVID_RECUPERADOS$cantidad)

## [1] 332.4778

depa_casos<-left_join(MUNICIPIOS_2, BASE_COVID_CASOS, by= "NOMBRE")

depa_muertes<-left_join(MUNICIPIOS_2, BASE_COVID_MUERTES, by= "NOMBRE")
   
depa_recuperados<-left_join(MUNICIPIOS_2, BASE_COVID_RECUPERADOS, by= "NOMBRE")

#Revisar dimensiones de las matrices
dim(depa_casos)

## [1] 299   4

dim(depa_muertes)

## [1] 299   4

dim(depa_recuperados)

## [1] 299   4

Despues de calcular los promedios de casos, muertes y recuperados procedemos a calcular una nueva variable para cada uno

muni_casos <- depa_casos %>% 
  mutate(CATEGORIA=as.factor(if_else(cantidad >= 974, "MAYOR que la media al 1/07/2021", "MENOR que la media al 1/07/2021"))) %>% 
  drop_na()


muni_muertes <- depa_muertes %>% 
  mutate(CATEGORIA=as.factor(if_else(cantidad >= 29, "MAYOR que la media al 1/07/2021", "MENOR que la media al 1/07/2021"))) %>% 
  drop_na()


muni_recuperados <- depa_recuperados %>% 
  mutate(CATEGORIA=as.factor(if_else(cantidad >= 333, "MAYOR que la media al 1/07/2021", "MENOR que la media al 1/07/2021"))) %>% 
  drop_na()

library(sf)


ggplot()+
  geom_sf(data= muni_casos)+
  geom_sf(data= muni_casos,aes(fill=CATEGORIA))+
  
     labs(title = "Municipios con mayor y menor casos que la media al 1/07/2021",
    
         fill = "CASOS",
         caption= "Fuente: Observatorio Demográfico Universitario de la UNAH")