Evaluaciones agropecuarias departamento de Boyacá
library(tidyverse)
datos_inicial <- read_csv("Evaluaciones_Agropecuarias_por_consenso_DEPARTAMENTO_DE_BOYAC_.csv")library(janitor)
library(stringi)
datos_final <- datos_inicial %>%
clean_names() %>%
mutate(municipio = stri_trans_general(municipio, "Latin-ASCII"),
municipio = str_replace_all(municipio,
"VILLA DE LEYVA",
"VILLA DE LEIVA")) %>%
separate(col = periodo,
into = c("year", "semestre"),
sep = 4) %>%
mutate(year = as.numeric(year))
datos_finaldatos_final %>%
count(cultivo) %>%
arrange(desc(n)) %>%
slice(1:10) %>%
ggplot(mapping = aes(x = cultivo, y = n)) +
geom_col()datos_final %>%
count(cultivo) %>%
arrange(desc(n)) %>%
slice(1:10) %>%
ggplot(mapping = aes(x = cultivo, y = n)) +
geom_col() +
theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
labs(x = "Cultivo", y = "Total",
title = "Frecuencia absoluta por cultivo",
subtitle = "10 Mayores")datos_final %>%
count(cultivo) %>%
arrange(desc(n)) %>%
slice(1:10) %>%
ggplot(mapping = aes(x = fct_reorder(cultivo, n), y = n)) +
geom_col(color = "black", fill = "red") +
coord_flip() +
labs(x = "Cultivo", y = "Total",
title = "Frecuencia absoluta por cultivo",
subtitle = "10 Mayores")datos_final %>%
filter(cultivo == "PAPA") %>%
group_by(municipio) %>%
summarise(promedio_rto = mean(rendimiento_t_ha, na.rm = TRUE)) %>%
ungroup() %>%
arrange(desc(promedio_rto)) %>%
slice(1:5) %>%
ggplot(mapping = aes(x = fct_reorder(municipio, promedio_rto), y = promedio_rto)) +
geom_point(color = "green3", size = 3) +
coord_flip() +
labs(x = "Municipio", y = "Rendimiento",
title = "Redimiento promedio (t/ha)",
subtitle = "5 municipios de mayor rendimiento") +
theme_minimal()datos_final %>%
filter(semestre %in% c("A", "B")) %>%
filter(cultivo == "PAPA") %>%
group_by(municipio, semestre) %>%
summarise(promedio_rto = mean(rendimiento_t_ha, na.rm = TRUE)) %>%
ungroup() %>%
ggplot(mapping = aes(x = municipio, y = promedio_rto, fill = semestre)) +
geom_col(position = "dodge") +
coord_flip()datos_final %>%
filter(cultivo == "ARVEJA") %>%
filter(year == 2019) %>%
filter(semestre == "A") %>%
group_by(municipio) %>%
summarise(total_area = sum(area_sembrada_ha, na.rm = TRUE)) %>%
ungroup() %>%
mutate(total_depto = sum(total_area, na.rm = TRUE),
proporcion = total_area / total_depto,
proporcion = proporcion * 100,
proporcion = round(proporcion, digits = 2)) %>%
arrange(desc(proporcion)) %>%
slice(1:25) %>%
ggplot(mapping = aes(x = fct_reorder(municipio, proporcion),
y = proporcion,
label = str_c(proporcion, "%"))) +
geom_col() +
geom_label() +
coord_flip() +
xlab("Municipio") +
ylab("Proporción") +
ggtitle("25 municipios de mayor participación en área sembrada") +
theme_minimal()datos_final %>%
filter(cultivo == "ARVEJA") %>%
filter(year == 2019) %>%
group_by(municipio, semestre) %>%
summarise(total_area = sum(area_sembrada_ha, na.rm = TRUE)) %>%
ungroup() %>%
group_by(semestre) %>%
mutate(total_depto = sum(total_area, na.rm = TRUE)) %>%
ungroup() %>%
mutate(proporcion = total_area / total_depto) %>%
ggplot(mapping = aes(x = municipio, y = total_area, fill = semestre)) +
geom_col(position = "fill") +
coord_flip()datos_final %>%
filter(cultivo == "PAPA") %>%
filter(year == 2019) %>%
ggplot(mapping = aes(x = rendimiento_t_ha)) +
geom_histogram(color = "black")datos_final %>%
filter(cultivo == "PAPA") %>%
ggplot(mapping = aes(x = rendimiento_t_ha)) +
geom_histogram(color = "black") +
facet_wrap(facets = ~year, scales = "free")datos_final %>%
filter(cultivo == "PAPA") %>%
filter(year == 2019) %>%
ggplot(mapping = aes(x = rendimiento_t_ha)) +
geom_density()datos_final %>%
filter(year %in% c(2017, 2018, 2019)) %>%
filter(cultivo == "PAPA") %>%
ggplot(mapping = aes(x = rendimiento_t_ha, fill = as.factor(year))) +
geom_density(alpha = 0.5) +
labs(x = "Rendimiento", y = "Densidad", fill = "Año") +
scale_fill_manual(values = c("red", "yellow2", "blue")) +
theme_minimal()datos_final %>%
filter(year %in% c(2017, 2018, 2019)) %>%
filter(cultivo == "PAPA") %>%
ggplot(mapping = aes(x = as.factor(year), y = rendimiento_t_ha)) +
geom_boxplot(color = "forestgreen", fill = "forestgreen", alpha = 0.4)library(plotly)
ggplotly(
datos_final %>%
filter(cultivo == "PAPA") %>%
ggplot(mapping = aes(x = as.factor(year), y = rendimiento_t_ha)) +
geom_boxplot(color = "forestgreen", fill = "forestgreen", alpha = 0.4)
)datos_final %>%
filter(cultivo == "PAPA") %>%
ggplot(mapping = aes(x = as.factor(year), y = rendimiento_t_ha,
fill = semestre)) +
geom_boxplot()ggplotly(
datos_final %>%
filter(cultivo == "PAPA") %>%
ggplot(mapping = aes(x = as.factor(year), y = rendimiento_t_ha,
fill = semestre)) +
geom_boxplot()
) %>%
layout(boxmode='group')ggplotly(
datos_final %>%
filter(year %in% c(2017, 2018, 2019)) %>%
filter(cultivo == "PAPA") %>%
ggplot(mapping = aes(x = rendimiento_t_ha, fill = as.factor(year))) +
geom_density(alpha = 0.5) +
labs(x = "Rendimiento", y = "Densidad", fill = "Año") +
scale_fill_manual(values = c("red", "yellow2", "blue")) +
theme_minimal()
)datos_final %>%
filter(cultivo == "PAPA") %>%
ggplot(mapping = aes(x = as.factor(year), y = rendimiento_t_ha,
fill = semestre)) +
geom_violin()datos_final %>%
filter(cultivo == "PAPA") %>%
ggplot(mapping = aes(x = as.factor(year), y = rendimiento_t_ha)) +
geom_violin() +
geom_boxplot(width = 0.3)datos_final %>%
filter(cultivo == "PAPA") %>%
filter(year == 2019) %>%
ggplot(mapping = aes(x = area_cosechada_ha, y = rendimiento_t_ha)) +
geom_point()datos_final %>%
filter(cultivo == "PAPA") %>%
filter(year == 2019) %>%
ggplot(mapping = aes(x = area_cosechada_ha, y = rendimiento_t_ha)) +
geom_point() +
geom_smooth(method = "lm")datos_final %>%
filter(cultivo == "PAPA") %>%
filter(year == 2019) %>%
ggplot(mapping = aes(x = area_sembrada_ha, y = area_cosechada_ha)) +
geom_point() +
geom_smooth(method = "lm")cultivos_ejemplo <- levels(as.factor(datos_final$cultivo))[1:20]
datos_final %>%
filter(cultivo %in% cultivos_ejemplo) %>%
ggplot(mapping = aes(x = area_cosechada_ha, y = rendimiento_t_ha)) +
geom_point() +
facet_wrap(facets = ~cultivo, scales = "free", ncol = 4) +
geom_smooth(method = "lm")datos_final %>%
filter(cultivo == "ARRACACHA") %>%
filter(year == 2019) %>%
ggplot(mapping = aes(x = area_cosechada_ha, y = rendimiento_t_ha,
size = produccion_t)) +
geom_point() +
geom_smooth(method = "lm", show.legend = FALSE)datos_final %>%
filter(cultivo == "ARRACACHA") %>%
filter(year == 2019) %>%
ggplot(mapping = aes(x = area_cosechada_ha, y = rendimiento_t_ha,
color = produccion_t)) +
geom_point() +
geom_smooth(method = "lm", show.legend = FALSE) +
scale_color_viridis_c()datos_final %>%
filter(cultivo == "ARRACACHA") %>%
filter(year == 2019) %>%
ggplot(mapping = aes(x = area_cosechada_ha, y = rendimiento_t_ha,
color = semestre)) +
geom_point() +
geom_smooth(method = "lm", se = FALSE)datos_final %>%
filter(area_cosechada_ha < 5000) %>%
ggplot(mapping = aes(x = area_cosechada_ha, y = rendimiento_t_ha)) +
geom_bin2d() +
geom_smooth(method = "lm", color = "red")datos_final %>%
filter(cultivo == "PAPA") %>%
group_by(year) %>%
summarise(promedio_rto = mean(rendimiento_t_ha, na.rm = TRUE)) %>%
ungroup() %>%
ggplot(mapping = aes(x = year, y = promedio_rto)) +
geom_point() +
scale_x_continuous(breaks = c(2011:2019)) +
geom_line()datos_final %>%
filter(cultivo == "PAPA") %>%
group_by(year, semestre) %>%
summarise(promedio_rto = mean(rendimiento_t_ha, na.rm = TRUE)) %>%
ungroup() %>%
ggplot(mapping = aes(x = year, y = promedio_rto, color = semestre)) +
geom_point() +
scale_x_continuous(breaks = c(2011:2019)) +
geom_line()cor(x = datos_final$area_sembrada_ha, y = datos_final$rendimiento_t_ha,
use = "pairwise.complete.obs")## [1] 0.04962694
# Base de datos para matriz de correlaciones
aguacate <- datos_final %>%
filter(cultivo == "AGUACATE") %>%
filter(year == 2019) %>%
select(area_sembrada_ha:rendimiento_t_ha)
# Matriz de correlaciones
matriz_cor <- aguacate %>% cor()
matriz_cor## area_sembrada_ha area_cosechada_ha produccion_t
## area_sembrada_ha 1.00000000 0.97522004 0.8740109
## area_cosechada_ha 0.97522004 1.00000000 0.8631451
## produccion_t 0.87401094 0.86314508 1.0000000
## rendimiento_t_ha 0.07359338 0.05652358 0.2925651
## rendimiento_t_ha
## area_sembrada_ha 0.07359338
## area_cosechada_ha 0.05652358
## produccion_t 0.29256514
## rendimiento_t_ha 1.00000000
library(corrplot)
matriz_cor %>%
corrplot()matriz_cor %>%
corrplot(
diag = FALSE,
type = "lower",
method = "pie",
tl.col = "black",
tl.srt = 25
)datos_final %>%
filter(cultivo == "PAPA") %>%
group_by(year) %>%
summarise(mediana_rto = median(rendimiento_t_ha, na.rm = TRUE),
percentil5 = quantile(rendimiento_t_ha, probs = 0.05, na.rm = TRUE),
percentil95 = quantile(rendimiento_t_ha, probs = 0.95, na.rm = TRUE)) %>%
ungroup() %>%
ggplot(mapping = aes(x = year, y = mediana_rto,
ymin = percentil5, ymax = percentil95)) +
geom_ribbon(fill = "blue", alpha = 0.5) +
geom_line(color = "blue") +
scale_x_continuous(breaks = c(2011:2019))datos_final %>%
filter(cultivo == "PAPA") %>%
group_by(semestre) %>%
summarise(promedio_rto = mean(rendimiento_t_ha, na.rm = TRUE),
desv_rto = sd(rendimiento_t_ha, na.rm = TRUE)) %>%
ungroup() %>%
ggplot(mapping = aes(x = semestre, y = promedio_rto,
ymin = promedio_rto - desv_rto,
ymax = promedio_rto + desv_rto)) +
geom_col() +
geom_errorbar(width = 0.1)datos_final %>%
filter(cultivo == "PAPA") %>%
group_by(semestre) %>%
summarise(promedio_rto = mean(rendimiento_t_ha, na.rm = TRUE),
desv_rto = sd(rendimiento_t_ha, na.rm = TRUE)) %>%
ungroup() %>%
ggplot(mapping = aes(x = semestre, y = promedio_rto,
ymin = promedio_rto - desv_rto,
ymax = promedio_rto + desv_rto)) +
geom_point() +
geom_errorbar(width = 0.1)levels(as.factor(datos_inicial$MUNICIPIO))## [1] "ALMEIDA" "AQUITANIA" "ARCABUCO"
## [4] "BELEN" "BELÉN" "BERBEO"
## [7] "BETEITIVA" "BETÉITIVA" "BOAVITA"
## [10] "BOYACA" "BOYACÁ" "BRICEÑO"
## [13] "BUENAVISTA" "BUSBANZA" "BUSBANZÁ"
## [16] "CALDAS" "CAMPOHERMOSO" "CERINZA"
## [19] "CHINAVITA" "CHIQUINQUIRA" "CHIQUINQUIRÁ"
## [22] "CHIQUIZA" "CHÍQUIZA" "CHISCAS"
## [25] "CHITA" "CHITARAQUE" "CHIVATA"
## [28] "CHIVATÁ" "CHIVOR" "CIENEGA"
## [31] "CIÉNEGA" "COMBITA" "CÓMBITA"
## [34] "COPER" "CORRALES" "COVARACHIA"
## [37] "COVARACHÍA" "CUBARA" "CUBARÁ"
## [40] "CUCAITA" "CUITIVA" "CUÍTIVA"
## [43] "DUITAMA" "EL COCUY" "EL ESPINO"
## [46] "FIRAVITOBA" "FLORESTA" "GACHANTIVA"
## [49] "GACHANTIVÁ" "GAMEZA" "GÁMEZA"
## [52] "GARAGOA" "GUACAMAYAS" "GUATEQUE"
## [55] "GUAYATA" "GUAYATÁ" "GUICAN"
## [58] "GUICÁN" "IZA" "JENESANO"
## [61] "JERICO" "JERICÓ" "LA CAPILLA"
## [64] "LA UVITA" "LA VICTORIA" "LABRANZAGRANDE"
## [67] "MACANAL" "MARIPI" "MARIPÍ"
## [70] "MIRAFLORES" "MONGUA" "MONGUI"
## [73] "MONGUÍ" "MONIQUIRA" "MONIQUIRÁ"
## [76] "MOTAVITA" "MUZO" "NOBSA"
## [79] "NUEVO COLON" "NUEVO COLÓN" "OICATA"
## [82] "OICATÁ" "OTANCHE" "PACHAVITA"
## [85] "PAEZ" "PÁEZ" "PAIPA"
## [88] "PAJARITO" "PANQUEBA" "PAUNA"
## [91] "PAYA" "PAZ DE RIO" "PESCA"
## [94] "PISBA" "PUERTO BOYACA" "PUERTO BOYACÁ"
## [97] "QUIPAMA" "QUÍPAMA" "RAMIRIQUI"
## [100] "RAMIRIQUÍ" "RAQUIRA" "RÁQUIRA"
## [103] "RONDON" "RONDÓN" "SABOYA"
## [106] "SABOYÁ" "SACHICA" "SÁCHICA"
## [109] "SAMACA" "SAMACÁ" "SAN EDUARDO"
## [112] "SAN JOSE DE PARE" "SAN JOSÉ DE PARE" "SAN LUIS DE GACENO"
## [115] "SAN MATEO" "SAN MIGUEL DE SEMA" "SAN PABLO DE BORBUR"
## [118] "SANTA MARIA" "SANTA MARÍA" "SANTA ROSA DE VITERBO"
## [121] "SANTA SOFIA" "SANTA SOFÍA" "SANTANA"
## [124] "SATIVANORTE" "SATIVASUR" "SIACHOQUE"
## [127] "SOATA" "SOATÁ" "SOCHA"
## [130] "SOCOTA" "SOCOTÁ" "SOGAMOSO"
## [133] "SOMONDOCO" "SORA" "SORACA"
## [136] "SORACÁ" "SOTAQUIRA" "SOTAQUIRÁ"
## [139] "SUSACON" "SUSACÓN" "SUTAMARCHAN"
## [142] "SUTAMARCHÁN" "SUTATENZA" "TASCO"
## [145] "TENZA" "TIBANA" "TIBANÁ"
## [148] "TIBASOSA" "TINJACA" "TINJACÁ"
## [151] "TIPACOQUE" "TOCA" "TOGUI"
## [154] "TOGÜÍ" "TOPAGA" "TÓPAGA"
## [157] "TOTA" "TUNJA" "TUNUNGUA"
## [160] "TUNUNGUÁ" "TURMEQUE" "TURMEQUÉ"
## [163] "TUTA" "TUTAZA" "TUTAZÁ"
## [166] "UMBITA" "ÚMBITA" "VENTAQUEMADA"
## [169] "VILLA DE LEIVA" "VILLA DE LEYVA" "VIRACACHA"
## [172] "VIRACACHÁ" "ZETAQUIRA" "ZETAQUIRÁ"
length(levels(as.factor(datos_inicial$MUNICIPIO)))## [1] 174