1. Introducción

Este cuaderno tiene como objetivo clasificar e identificar los dos cultivos con mayor producción en el departamento de Boyacá y sus respectivos municipios productores, utilizando una base de datos de la Unidad de Planificación Agropecuaria (UPRA).

2. Configuración

library(tidyverse)
## -- Attaching packages --------------------------------------- tidyverse 1.3.1 --
## v ggplot2 3.3.5     v purrr   0.3.4
## v tibble  3.1.6     v dplyr   1.0.8
## v tidyr   1.2.0     v stringr 1.4.0
## v readr   2.1.2     v forcats 0.5.1
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
library(readxl)

3. Leer un archivo de excel con estadisticas agropecuarias municipales

(archivos = list.files(pattern='xls'))
## [1] "20210624_BaseSIPRA2020.xlsx"
(hojas = readxl::excel_sheets("20210624_BaseSIPRA2020.xlsx"))
## [1] "Agrícola_SIPRA_AGRONET"       "InventarioBovino"            
## [3] "InventarioPorcino"            "InvBufalosCaprinoOvinoEquino"
## [5] "InvAves"
eva2020 = readxl::read_excel("20210624_BaseSIPRA2020.xlsx", sheet = "Agrícola_SIPRA_AGRONET")
eva2020

4. Subconjunto de datos relevantes para nuestro departamento

J.ber2020 = dplyr::filter(eva2020, Departamento == "Boyacá")
J.ber2020

5. Limpiar los datos filtrados

J.ber2020.tmp <-  J.ber2020 %>% select('Código del Municipio':'Ciclo del cultivo')
J.ber2020.tmp
J.ber2020.tmp %>% rename(Cod_Mun = 'Código del Municipio', 
                         Grupo = 'Grupo cultivo según especie', 
                         Subgrupo = 'Subgrupo cultivo según especie',
                         AreaSiembra = 'Area Sembrada (ha)',
                         AreaCosecha = 'Area Cosechada (ha)',
                         Produccion = 'Producción (t)',                              
                         Rendimiento = 'Rendimiento (t/ha)', Ciclo='Ciclo del cultivo') ->                                  nJ.ber2020
nJ.ber2020
nJ.ber2020 %>% mutate(AreaSiembra = as.numeric(AreaSiembra),
                       AreaCosecha = as.numeric(AreaCosecha),
                       Produccion = as.numeric(Produccion),
                       Rendimiento = as.numeric(Rendimiento)) -> nJ.ber2020 
nJ.ber2020

6. Procesamiento y análisis de datos

nJ.ber2020 %>%
  filter(Produccion > 0) %>%
  group_by(Cultivo) %>%
  summarize(total_produccion = sum(Produccion)) %>% 
  arrange(desc(total_produccion))
nJ.ber2020 %>%
  group_by(Cultivo, Municipio) %>%
  summarize(max_prod = max(Produccion, na.rm = TRUE)) %>%
  slice(which.max(max_prod))  %>%
  arrange(desc(max_prod))
## `summarise()` has grouped output by 'Cultivo'. You can override using the
## `.groups` argument.
nJ.ber2020 %>%
  group_by(Grupo,Municipio) %>%
  summarize(max_prod = max(Produccion, na.rm = TRUE)) %>%
  slice(which.max(max_prod))  %>%
  arrange(desc(max_prod))
## `summarise()` has grouped output by 'Grupo'. You can override using the
## `.groups` argument.
nJ.ber2020 %>%
  group_by(Cod_Mun, Municipio, Grupo) %>%
  filter(Grupo=='Tubérculos Y Plátanos') %>% 
  summarize(max_prod = max(Produccion, na.rm = TRUE)) %>%
  arrange(desc(max_prod)) -> Tuberculos2020
## `summarise()` has grouped output by 'Cod_Mun', 'Municipio'. You can override
## using the `.groups` argument.
Tuberculos2020
nJ.ber2020 %>%
  group_by(Cod_Mun, Municipio, Grupo) %>%
  filter(Grupo=='Hortalizas') %>% 
  summarize(max_prod = max(Produccion, na.rm = TRUE)) %>%
  arrange(desc(max_prod)) -> Hortalizas2020
## `summarise()` has grouped output by 'Cod_Mun', 'Municipio'. You can override
## using the `.groups` argument.
Hortalizas2020

7. Escribir datos seleccionados en el archivo

write_csv(Hortalizas2020, "./J.ber_Hort_2020.csv")
write_csv(Tuberculos2020, "./J.ber_Tuber_2020.csv")

8. Referencias

“Lizarazo, I. Reading and processing municipal agricultural statistics for 2020. Available at: https://rpubs.com/ials2un/readingEVAv1.”

“Unidad de planificación agropecuaria UPRA: https://www.upra.gov.co/

sessionInfo()
## R version 4.1.3 (2022-03-10)
## Platform: x86_64-w64-mingw32/x64 (64-bit)
## Running under: Windows 10 x64 (build 19043)
## 
## Matrix products: default
## 
## locale:
## [1] LC_COLLATE=Spanish_Colombia.1252  LC_CTYPE=Spanish_Colombia.1252   
## [3] LC_MONETARY=Spanish_Colombia.1252 LC_NUMERIC=C                     
## [5] LC_TIME=Spanish_Colombia.1252    
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
##  [1] readxl_1.4.0    forcats_0.5.1   stringr_1.4.0   dplyr_1.0.8    
##  [5] purrr_0.3.4     readr_2.1.2     tidyr_1.2.0     tibble_3.1.6   
##  [9] ggplot2_3.3.5   tidyverse_1.3.1
## 
## loaded via a namespace (and not attached):
##  [1] tidyselect_1.1.2 xfun_0.30        bslib_0.3.1      haven_2.4.3     
##  [5] colorspace_2.0-3 vctrs_0.3.8      generics_0.1.2   htmltools_0.5.2 
##  [9] yaml_2.3.5       utf8_1.2.2       rlang_1.0.2      jquerylib_0.1.4 
## [13] pillar_1.7.0     withr_2.5.0      glue_1.6.2       DBI_1.1.2       
## [17] bit64_4.0.5      dbplyr_2.1.1     modelr_0.1.8     lifecycle_1.0.1 
## [21] munsell_0.5.0    gtable_0.3.0     cellranger_1.1.0 rvest_1.0.2     
## [25] evaluate_0.15    knitr_1.38       tzdb_0.2.0       fastmap_1.1.0   
## [29] parallel_4.1.3   fansi_1.0.3      broom_0.7.12     backports_1.4.1 
## [33] scales_1.1.1     vroom_1.5.7      jsonlite_1.8.0   bit_4.0.4       
## [37] fs_1.5.2         hms_1.1.1        digest_0.6.29    stringi_1.7.6   
## [41] grid_4.1.3       cli_3.2.0        tools_4.1.3      magrittr_2.0.2  
## [45] sass_0.4.1       crayon_1.5.1     pkgconfig_2.0.3  ellipsis_0.3.2  
## [49] xml2_1.3.3       reprex_2.0.1     lubridate_1.8.0  assertthat_0.2.1
## [53] rmarkdown_2.13   httr_1.4.2       rstudioapi_0.13  R6_2.5.1        
## [57] compiler_4.1.3