About the Data set This dataset is the basis for the International Food Security Assessment, 2016-2026 released in June 2016. This annual ERS report projects food availability and access for 76 low- and middle-income countries over a 10-year period. The dataset includes annual country-level data on area, yield, production, nonfood use, trade, and consumption for grains and root and tuber crops (combined as R&T in the documentation tables), food aid, total value of imports and exports, gross domestic product, and population compiled from a variety of sources.
Load dataset
library(tidyverse)
## Warning: package 'tidyverse' was built under R version 4.4.1
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.5
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ ggplot2 3.5.1 ✔ tibble 3.2.1
## ✔ lubridate 1.9.3 ✔ tidyr 1.3.1
## ✔ purrr 1.0.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(dplyr)
library(readxl)
gdp_dataset<- read_xlsx("GrainDemandProduction.xlsx")
sum(is.na(gdp_dataset))
## [1] 17
data <- gdp_dataset %>% na.omit()
gdp_dataset <- gdp_dataset %>%
mutate(Year = as.factor(Year))
gdp_dataset %>%
str() %>%
summary() %>%
head()
## tibble [193 × 6] (S3: tbl_df/tbl/data.frame)
## $ Dataset : chr [1:193] "Grain food demand, other demand, total demand, production, and implied additional supply required for Internati"| __truncated__ "Grain food demand, other demand, total demand, production, and implied additional supply required for Internati"| __truncated__ "Grain food demand, other demand, total demand, production, and implied additional supply required for Internati"| __truncated__ "Grain food demand, other demand, total demand, production, and implied additional supply required for Internati"| __truncated__ ...
## $ Element : chr [1:193] "Food grain demand" "Other grain demand" "Grain production" "Implied additional supply required" ...
## $ Region : chr [1:193] "Asia" "Asia" "Asia" "Asia" ...
## $ Sub-region : chr [1:193] "East Asia" "East Asia" "East Asia" "East Asia" ...
## $ Year : Factor w/ 2 levels "2024","2034": 1 1 1 1 1 1 1 1 1 1 ...
## $ Millions of metric tons: num [1:193] 5.5 1.3 4.7 2.1 6.8 ...
## Length Class Mode
## 0 NULL NULL
colnames(gdp_dataset)
## [1] "Dataset" "Element"
## [3] "Region" "Sub-region"
## [5] "Year" "Millions of metric tons"
ggplot(gdp_dataset, aes(x = `Millions of metric tons`)) +
geom_histogram(binwidth = 1) +
labs(title = "Distribution of Millions of Metric Tons")
## Warning: Removed 3 rows containing non-finite outside the scale range
## (`stat_bin()`).
## Hypothesis Testing
Alternative: There is a significant relationship between “Element” and “Millions of Metric Tons”.
model <- aov(`Millions of metric tons` ~ Element, data = gdp_dataset)
summary(model)
## Df Sum Sq Mean Sq F value Pr(>F)
## Element 4 697279 174320 3.847 0.005 **
## Residuals 185 8382882 45313
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 3 observations deleted due to missingness