tidytuesday

library(tidyverse)

## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ ggplot2   3.5.2     ✔ tibble    3.3.0
## ✔ lubridate 1.9.4     ✔ tidyr     1.3.1
## ✔ purrr     1.1.0     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors

library(tidytuesdayR)
library(janitor)

## 
## Attaching package: 'janitor'
## 
## The following objects are masked from 'package:stats':
## 
##     chisq.test, fisher.test

tuesdata <- tidytuesdayR::tt_load(2025, week = 41)

## ---- Compiling #TidyTuesday Information for 2025-10-14 ----
## --- There is 1 file available ---
## 
## 
## ── Downloading files ───────────────────────────────────────────────────────────
## 
##   1 of 1: "food_security.csv"

fdsec <- tuesdata$food_security
view(fdsec)
glimpse(fdsec)

## Rows: 171,232
## Columns: 10
## $ Year_Start <dbl> 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013, 2014,…
## $ Year_End   <dbl> 2007, 2008, 2009, 2010, 2011, 2012, 2013, 2014, 2015, 2016,…
## $ Area       <chr> "Afghanistan", "Afghanistan", "Afghanistan", "Afghanistan",…
## $ Item       <chr> "Average dietary energy supply adequacy (percent) (3-year a…
## $ Unit       <chr> "%", "%", "%", "%", "%", "%", "%", "%", "%", "%", "%", "%",…
## $ Value      <dbl> 97, 99, 102, 104, 105, 105, 104, 106, 107, 107, 107, 107, 1…
## $ CI_Lower   <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,…
## $ CI_Upper   <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,…
## $ Flag       <chr> "Estimated value", "Estimated value", "Estimated value", "E…
## $ Note       <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,…

#Couldn’t find solutions to the questions on the tidytuesday website. So randomly exploring the data.

fdsec |> 
  distinct(Item)

## # A tibble: 69 × 1
##    Item                                                                         
##    <chr>                                                                        
##  1 Average dietary energy supply adequacy (percent) (3-year average)            
##  2 Dietary energy supply used in the estimation of the prevalence of undernouri…
##  3 Dietary energy supply used in the estimation of the prevalence of undernouri…
##  4 Share of dietary energy supply derived from cereals, roots and tubers (perce…
##  5 Average protein supply (g/cap/day) (3-year average)                          
##  6 Average supply of protein of animal origin (g/cap/day) (3-year average)      
##  7 Gross domestic product per capita, PPP, (constant 2021 international $)      
##  8 Prevalence of undernourishment (percent) (3-year average)                    
##  9 Number of people undernourished (million) (3-year average)                   
## 10 Prevalence of severe food insecurity in the total population (percent) (3-ye…
## # ℹ 59 more rows

#69 distinct items!
fdsec |> 
  distinct(Area)

## # A tibble: 249 × 1
##    Area               
##    <chr>              
##  1 Afghanistan        
##  2 Albania            
##  3 Algeria            
##  4 American Samoa     
##  5 Andorra            
##  6 Angola             
##  7 Antigua and Barbuda
##  8 Argentina          
##  9 Armenia            
## 10 Australia          
## # ℹ 239 more rows

#249 countries.

#FAO has interactive graphs on the website. Shiny app might help make a similar graph. #Make similar graphs #Separate out one item. #Find distribution across the years.

#df is data set with only one item: Prevalence of undernourishment (percent) (3-year average)

df <- fdsec |> 
  filter(Item == "Prevalence of undernourishment (percent) (3-year average)")
  
#Make a new column with mean of all countries for each year. Should have checked the distribution before choosing mean. 
df <- 
  df|>
  group_by(Year_Start) |> 
  mutate(avg = mean(Value, na.rm = TRUE))

#Now plot the means against the years
df |> 
  ggplot(
    aes(x = Year_Start, y = avg)
  )+
  geom_point()

#The prevalence of undernourishment has steadily decreased over the years. An increase around 2020 might reflect the effect of the pandemic.

#Now filter this further for one country.

df |> 
  filter(Area == "Albania") |> 
  ggplot(
    aes(x = Year_Start, y = avg)
  )+
  geom_point()

tidytuesday_41

AlearnsR

2025-10-15