library(tidyverse)
datos <- read_csv("Alimentos_del_tr_pico_para_alimentaci_n_animal_-_AlimenTro.csv")
datos %>% head()
glimpse(datos)
## Rows: 41,309
## Columns: 25
## $ ID <dbl> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12,~
## $ GrupoMuestra <chr> "09 Flores, frutas y subproductos", "0~
## $ Ingrediente <chr> "Cacao mucilago -", "Cacao cascara -",~
## $ Departamento <chr> "Huila", "Huila", "Huila", "Huila", "H~
## $ Municipio <chr> "Garzón", "Garzón", "Garzón", "Garzón"~
## $ EdadCorte <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,~
## $ EpocaRecoleccion <chr> "Lluvia", "Lluvia", "Lluvia", "Lluvia"~
## $ FechaRecoleccion <chr> "07/05/2012 12:00:00 AM", "08/05/2012 ~
## $ ProteinaCruda <dbl> 5.62, 6.34, 13.43, 13.81, 15.03, 15.27~
## $ PorcentajeCeniza <dbl> 1.53, 7.28, 3.54, 4.49, 4.11, 4.71, 9.~
## $ ExtractoEtereo <chr> "0.8500", "0.7200", "5.1100", "5.1500"~
## $ FDN <dbl> 16.53, 60.43, 62.03, 56.92, 55.78, 62.~
## $ FDA <dbl> 8.53, 47.17, 12.08, 11.65, 12.22, 12.4~
## $ Hemicelulosa <dbl> 8.00, 13.26, 49.95, 45.27, 43.56, 49.5~
## $ Lignina <chr> "NULL", "NULL", "NULL", "NULL", "NULL"~
## $ PorcentajeAlmidonTotal <chr> "NULL", "NULL", "NULL", "NULL", "NULL"~
## $ PorcentajeCarbohidratosSolubles <chr> "NULL", "NULL", "NULL", "NULL", "NULL"~
## $ PorcentajeCarbohNoEstructurales <chr> "NULL", "NULL", "NULL", "NULL", "NULL"~
## $ NDT <chr> "NULL", "NULL", "NULL", "NULL", "NULL"~
## $ DigestibilidadMS <chr> "NULL", "NULL", "NULL", "NULL", "NULL"~
## $ EDRumiantes <chr> "NULL", "NULL", "NULL", "NULL", "NULL"~
## $ EMRumiantes <chr> "NULL", "NULL", "NULL", "NULL", "NULL"~
## $ ENmRumiantes <chr> "NULL", "NULL", "NULL", "NULL", "NULL"~
## $ ENgRumiantes <chr> "NULL", "NULL", "NULL", "NULL", "NULL"~
## $ ENLRumiantes <chr> "NULL", "NULL", "NULL", "NULL", "NULL"~
library(janitor)
library(lubridate)
df_alimentos <- datos %>%
clean_names() %>%
mutate(across(c(extracto_etereo, lignina:enl_rumiantes),
as.numeric),
fecha = as_date(fecha_recoleccion, format = "%m/%d/%Y"),
mes = month(fecha, label = TRUE, abbr = FALSE),
year = year(fecha)) %>%
select(-c(id, fecha_recoleccion)) %>%
relocate(fecha, year, mes, everything())
df_alimentos %>% head()
write_rds(df_alimentos, compress = "xz", file = "alimentro_depurada.rds")
library(skimr)
skim(df_alimentos)
Name | df_alimentos |
Number of rows | 41309 |
Number of columns | 26 |
_______________________ | |
Column type frequency: | |
character | 5 |
Date | 1 |
factor | 1 |
numeric | 19 |
________________________ | |
Group variables | None |
Variable type: character
skim_variable | n_missing | complete_rate | min | max | empty | n_unique | whitespace |
---|---|---|---|---|---|---|---|
grupo_muestra | 0 | 1 | 17 | 47 | 0 | 9 | 0 |
ingrediente | 0 | 1 | 11 | 87 | 0 | 733 | 0 |
departamento | 0 | 1 | 4 | 18 | 0 | 28 | 0 |
municipio | 0 | 1 | 3 | 27 | 0 | 385 | 0 |
epoca_recoleccion | 0 | 1 | 6 | 26 | 0 | 4 | 0 |
Variable type: Date
skim_variable | n_missing | complete_rate | min | max | median | n_unique |
---|---|---|---|---|---|---|
fecha | 0 | 1 | 2012-07-05 | 2020-10-28 | 2019-02-06 | 1503 |
Variable type: factor
skim_variable | n_missing | complete_rate | ordered | n_unique | top_counts |
---|---|---|---|---|---|
mes | 0 | 1 | TRUE | 12 | oct: 5068, jul: 4243, sep: 3784, ene: 3776 |
Variable type: numeric
skim_variable | n_missing | complete_rate | mean | sd | p0 | p25 | p50 | p75 | p100 | hist |
---|---|---|---|---|---|---|---|---|---|---|
year | 0 | 1.00 | 2018.15 | 1.76 | 2012.00 | 2017.00 | 2019.00 | 2020.00 | 2020.00 | ▁▁▁▅▇ |
edad_corte | 0 | 1.00 | 44.72 | 33.51 | 0.00 | 28.00 | 35.00 | 49.00 | 392.00 | ▇▁▁▁▁ |
proteina_cruda | 0 | 1.00 | 13.11 | 5.82 | 0.66 | 8.70 | 11.73 | 16.67 | 62.59 | ▇▅▁▁▁ |
porcentaje_ceniza | 0 | 1.00 | 10.46 | 2.73 | 0.19 | 8.86 | 10.53 | 12.08 | 31.04 | ▁▇▂▁▁ |
extracto_etereo | 333 | 0.99 | 2.08 | 0.73 | 0.03 | 1.70 | 1.99 | 2.33 | 43.46 | ▇▁▁▁▁ |
fdn | 0 | 1.00 | 56.83 | 11.38 | 0.21 | 50.83 | 60.13 | 65.07 | 82.57 | ▁▁▂▇▂ |
fda | 0 | 1.00 | 31.36 | 7.34 | 0.03 | 27.24 | 32.84 | 36.26 | 51.82 | ▁▁▅▇▁ |
hemicelulosa | 3 | 1.00 | 25.47 | 5.59 | 0.14 | 22.75 | 26.84 | 29.08 | 54.81 | ▁▂▇▁▁ |
lignina | 598 | 0.99 | 6.73 | 1.88 | 0.05 | 5.49 | 6.97 | 8.09 | 13.81 | ▁▃▇▃▁ |
porcentaje_almidon_total | 327 | 0.99 | 7.85 | 7.39 | 0.02 | 5.19 | 6.68 | 8.25 | 72.75 | ▇▁▁▁▁ |
porcentaje_carbohidratos_solubles | 313 | 0.99 | 2.91 | 2.25 | -0.22 | 1.62 | 2.35 | 3.51 | 33.64 | ▇▁▁▁▁ |
porcentaje_carboh_no_estructurales | 312 | 0.99 | 10.75 | 7.65 | 0.36 | 7.33 | 9.35 | 11.97 | 74.33 | ▇▁▁▁▁ |
ndt | 265 | 0.99 | 56.13 | 7.31 | 0.00 | 50.65 | 54.53 | 60.30 | 93.50 | ▁▁▇▆▁ |
digestibilidad_ms | 265 | 0.99 | 61.46 | 7.88 | 0.00 | 55.55 | 59.74 | 65.97 | 101.65 | ▁▁▇▆▁ |
ed_rumiantes | 265 | 0.99 | 2.53 | 0.36 | 0.00 | 2.26 | 2.45 | 2.74 | 6.81 | ▁▇▃▁▁ |
em_rumiantes | 265 | 0.99 | 2.03 | 0.32 | 0.00 | 1.79 | 1.96 | 2.21 | 3.68 | ▁▁▇▂▁ |
e_nm_rumiantes | 266 | 0.99 | 1.17 | 0.30 | 0.00 | 0.95 | 1.11 | 1.35 | 2.58 | ▁▆▇▂▁ |
e_ng_rumiantes | 265 | 0.99 | 0.61 | 0.27 | 0.00 | 0.40 | 0.56 | 0.77 | 1.83 | ▃▇▃▁▁ |
enl_rumiantes | 265 | 0.99 | 1.26 | 0.18 | 0.91 | 1.12 | 1.22 | 1.36 | 2.17 | ▆▇▂▁▁ |
mean()
median()
sd()
na.rm = TRUE
en cada funciónlibrary(DT)
df_alimentos %>%
select(year, edad_corte, proteina_cruda:enl_rumiantes) %>%
pivot_longer(cols = -year, names_to = "variable", values_to = "valor") %>%
group_by(year, variable) %>%
summarise(media = mean(valor, na.rm = TRUE),
mediana = median(valor, na.rm = TRUE),
desviacion = sd(valor, na.rm = TRUE)) %>%
mutate(across(is.numeric, round, digits = 2)) %>%
datatable(rownames = FALSE)
df_homicidios <- read_csv("Tasas_de_homicidios_seg_n_municipios_por_cien_mil_habitantes._A_os_1990_-_2017.csv")
df_homicidios2 <- df_homicidios %>%
pivot_longer(cols = -Municipio, names_to = "year", values_to = "homicidio",
names_transform = list(year = as.numeric)) %>%
rename(municipio = Municipio)
df_homicidios2 %>% head()
# Configurando tema para todos los gráficos
theme_set(theme_minimal())
library(ggthemes)
df_homicidios2 %>%
group_by(year) %>%
summarise(promedio = mean(homicidio)) %>%
ggplot(mapping = aes(x = year, y = promedio)) +
geom_col() +
scale_x_continuous(breaks = seq(1990, 2017, 2)) +
labs(x = "Año", y = "Homicidios",
title = "Tasa de homicidios departamento del Quindío",
subtitle = "Por cada 100 mil habitantes")
library(plotly)
ggplotly(
df_homicidios2 %>%
group_by(year) %>%
summarise(promedio = mean(homicidio)) %>%
ggplot(mapping = aes(x = year, y = promedio)) +
geom_col() +
scale_x_continuous(breaks = seq(1990, 2017, 2)) +
labs(x = "Año", y = "Homicidios",
title = "Tasa de homicidios departamento del Quindío"),
width = 900
)
df_homicidios2 %>%
#filter(municipio %in% c("Calarca", "Circasia", "Salento")) %>%
ggplot(mapping = aes(x = homicidio)) +
facet_wrap(facets = ~municipio, scales = "free") +
geom_density(fill = "dodgerblue", color = "red", alpha = 0.5)
ggplotly(
df_homicidios2 %>%
ggplot(mapping = aes(x = homicidio)) +
facet_wrap(facets = ~municipio, scales = "free") +
geom_density(fill = "dodgerblue", color = "red", alpha = 0.5),
width = 900,
height = 500
)
df_homicidios2 %>%
ggplot(mapping = aes(x = fct_reorder(municipio, homicidio, median),
y = homicidio)) +
geom_boxplot() +
stat_summary(fun = mean, geom = "point", color = "red")
ggplotly(
df_homicidios2 %>%
ggplot(mapping = aes(x = fct_reorder(municipio, homicidio, median),
y = homicidio)) +
geom_boxplot() +
stat_summary(fun = mean, geom = "point", color = "red"),
tooltip = c("y"),
width = 900
)
df_homicidios2 %>%
ggplot(mapping = aes(sample = homicidio)) +
facet_wrap(facets = ~municipio, scales = "free") +
geom_qq() +
geom_qq_line()
df_homicidios2 %>%
ggplot(mapping = aes(x = year, y = homicidio)) +
geom_line(mapping = aes(color = municipio)) +
geom_smooth(se = FALSE, color = "red")
ggplotly(
df_homicidios2 %>%
ggplot(mapping = aes(x = year, y = homicidio)) +
geom_line(mapping = aes(color = municipio)) +
geom_smooth(se = FALSE, color = "red"),
width = 900
)
df_homicidios2 %>%
ggplot(mapping = aes(x = year, y = homicidio)) +
facet_wrap(facets = ~municipio, scales = "free") +
geom_line() +
geom_smooth(se = FALSE)
df_homicidios2 %>%
group_by(year) %>%
summarise(
promedio = mean(homicidio),
p5 = quantile(homicidio, probs = 0.05),
p95 = quantile(homicidio, probs = 0.95),
p20 = quantile(homicidio, probs = 0.20),
p80 = quantile(homicidio, probs = 0.80)
) %>%
ggplot(mapping = aes(x = year, y = promedio)) +
geom_ribbon(mapping = aes(ymin = p5, ymax = p95),
fill = "firebrick2", alpha = 0.5) +
geom_ribbon(mapping = aes(ymin = p20, ymax = p80),
fill = "firebrick2", alpha = 0.6) +
geom_line()