Code
library(tidyverse)
library(skimr)
library(readxl)
library(readr)
library(moments)
library(tidyverse)library(tidyverse)
library(skimr)
library(readxl)
library(readr)
library(moments)
library(tidyverse)df_datos <- read_excel("API_AG.YLD.CREL.KG_DS2_es_excel_v2_8440.xls",
sheet = "Data",
skip = 3 ) |>
pivot_longer(cols = -c("Country Name", "Country Code", "Indicator Name", "Indicator Code"),
names_to = "year_es",
values_to = "cereales")|>
filter(!is.na(`Country Name`)) |>
filter(!is.na(cereales)) |>
select(-c("Indicator Name", "Indicator Code"))df_datos2 <-
df_datos |>
filter(!is.na(`Country Name`)) |>
filter(!is.na(cereales)) |>
mutate(decada = case_when(
year_es >= 1960 & year_es < 1970 ~ "1960 - 1970",
year_es >= 1970 & year_es < 1980 ~ "1970 - 1980",
year_es >= 1980 & year_es < 1990 ~ "1990 - 2000",
year_es >= 1990 & year_es < 2000 ~ "2000 - 2010",
year_es >= 2000 & year_es < 2010 ~ "2010 - 2020",
year_es >= 2010 & year_es < 2020 ~ "2010 - 2020",
year_es >= 2020 & year_es < 2030 ~ "2020 - 2030"
)) |>
filter(!is.na(decada))excel_sheets("API_AG.YLD.CREL.KG_DS2_es_excel_v2_8440.xls")[1] "Data" "Metadata - Countries" "Metadata - Indicators"
df_datos$cereales |>
mean()[1] 2481.837
df_datos |>
group_by(`Country Name`) |>
reframe(promedio = mean(cereales))df_datos2 |>
filter(cereales == min(cereales))df_datos2 |>
filter(cereales == max(cereales))df_datos2 |>
ggplot(aes(x = decada, y = cereales)) +
geom_boxplot()df_datos2 |>
ggplot(aes(x = decada, y = cereales)) +
geom_boxplot() +
scale_y_log10()df_datos2 |>
filter(`Country Name` %in% c("Colombia", "Estados Unidos", "Canadá",
"China", "México", "Japón")) |>
filter(year_es < 2020) |>
group_by(decada, `Country Code`) |>
reframe(total = sum(cereales)) |>
ggplot(aes(x = decada, y = total, color = `Country Code`)) +
geom_line(aes(group = `Country Code`))df_datos2 |>
filter(`Country Name` %in% c("Colombia", "Estados Unidos", "Canadá",
"China", "México", "Japón")) |>
filter(year_es < 2020) |>
ggplot(aes(x = decada, y = cereales, color = `Country Code`)) +
geom_boxplot()df_datos2 |>
group_by(decada) |>
reframe(promedio = mean(cereales),
minimo = min(cereales),
maximo = max(cereales),
desviacion = sd(cereales))skewness(x = df_datos$cereales)[1] 4.679934
df_datos |>
ggplot(aes(x = cereales)) +
geom_histogram(color = "black")