Challenge 8

Import in the different data sets

cattleDairy <- read_csv("challenge_datasets/FAOSTAT_cattle_dairy.csv")
## Rows: 36449 Columns: 14
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (8): Domain Code, Domain, Area, Element, Item, Unit, Flag, Flag Description
## dbl (6): Area Code, Element Code, Item Code, Year Code, Year, Value
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
head(cattleDairy)
## # A tibble: 6 × 14
##   `Domain Code` Domain      `Area Code` Area  `Element Code` Element `Item Code`
##   <chr>         <chr>             <dbl> <chr>          <dbl> <chr>         <dbl>
## 1 QL            Livestock …           2 Afgh…           5318 Milk A…         882
## 2 QL            Livestock …           2 Afgh…           5420 Yield           882
## 3 QL            Livestock …           2 Afgh…           5510 Produc…         882
## 4 QL            Livestock …           2 Afgh…           5318 Milk A…         882
## 5 QL            Livestock …           2 Afgh…           5420 Yield           882
## 6 QL            Livestock …           2 Afgh…           5510 Produc…         882
## # ℹ 7 more variables: Item <chr>, `Year Code` <dbl>, Year <dbl>, Unit <chr>,
## #   Value <dbl>, Flag <chr>, `Flag Description` <chr>
chickenEgg <- read_csv("challenge_datasets/FAOSTAT_egg_chicken.csv")
## Rows: 38170 Columns: 14
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (8): Domain Code, Domain, Area, Element, Item, Unit, Flag, Flag Description
## dbl (6): Area Code, Element Code, Item Code, Year Code, Year, Value
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
head(chickenEgg)
## # A tibble: 6 × 14
##   `Domain Code` Domain      `Area Code` Area  `Element Code` Element `Item Code`
##   <chr>         <chr>             <dbl> <chr>          <dbl> <chr>         <dbl>
## 1 QL            Livestock …           2 Afgh…           5313 Laying         1062
## 2 QL            Livestock …           2 Afgh…           5410 Yield          1062
## 3 QL            Livestock …           2 Afgh…           5510 Produc…        1062
## 4 QL            Livestock …           2 Afgh…           5313 Laying         1062
## 5 QL            Livestock …           2 Afgh…           5410 Yield          1062
## 6 QL            Livestock …           2 Afgh…           5510 Produc…        1062
## # ℹ 7 more variables: Item <chr>, `Year Code` <dbl>, Year <dbl>, Unit <chr>,
## #   Value <dbl>, Flag <chr>, `Flag Description` <chr>

Looking at Cattle and Chicken production over time

#narrowing down from the original data frame to only get production information and some relevent columns
cattleDairy1 <- cattleDairy %>%
  filter(Element == "Production") %>%
  select(Domain, Area, Element, Unit, Value, `Year Code`) %>%
  group_by(Area, `Year Code`) %>%
  summarise(`Cattle Median` = median(Value)) %>%
  ungroup()
## `summarise()` has grouped output by 'Area'. You can override using the
## `.groups` argument.
#checking to see it works
head(cattleDairy1)
## # A tibble: 6 × 3
##   Area        `Year Code` `Cattle Median`
##   <chr>             <dbl>           <dbl>
## 1 Afghanistan        1961          350000
## 2 Afghanistan        1962          350000
## 3 Afghanistan        1963          400000
## 4 Afghanistan        1964          400000
## 5 Afghanistan        1965          450000
## 6 Afghanistan        1966          500000
#doing the same to the chicken data
chickenEgg1 <- chickenEgg %>%
  filter(Element == "Production") %>%
  select(Domain, Area, Element, Unit, Value, `Year Code`) %>%
  group_by(Area, `Year Code`) %>%
  summarise(`Chicken Median` = median(Value)) %>%
  ungroup()
## `summarise()` has grouped output by 'Area'. You can override using the
## `.groups` argument.
#joining the data
animalProd <- inner_join(cattleDairy1, chickenEgg1, by = c("Area", "Year Code"))
head(animalProd)
## # A tibble: 6 × 4
##   Area        `Year Code` `Cattle Median` `Chicken Median`
##   <chr>             <dbl>           <dbl>            <dbl>
## 1 Afghanistan        1961          350000            10000
## 2 Afghanistan        1962          350000            11000
## 3 Afghanistan        1963          400000            11500
## 4 Afghanistan        1964          400000            12000
## 5 Afghanistan        1965          450000            12800
## 6 Afghanistan        1966          500000            13200
animalProdLonger <- animalProd %>%
  pivot_longer(cols = ends_with("Median"), 
               names_to = "Livestock Type",
               values_to = "Year Average")
head(animalProdLonger)
## # A tibble: 6 × 4
##   Area        `Year Code` `Livestock Type` `Year Average`
##   <chr>             <dbl> <chr>                     <dbl>
## 1 Afghanistan        1961 Cattle Median            350000
## 2 Afghanistan        1961 Chicken Median            10000
## 3 Afghanistan        1962 Cattle Median            350000
## 4 Afghanistan        1962 Chicken Median            11000
## 5 Afghanistan        1963 Cattle Median            400000
## 6 Afghanistan        1963 Chicken Median            11500

Visualizing the joined data

#I couldn't figure out `facet_wrap` and there was such a larger diference in median or mean that a stacked graph seemed to best show that both cattle and chickens are increasing overtime in Afghanistan within the same graph. 
ggplot(animalProd, aes(x = `Year Code` )) +
  geom_line(aes(y = `Cattle Median`, color = "Cattle Median"), size = 1, alpha = 0.8) +
  geom_line(aes(y = `Chicken Median`, color = "Chicken Median"), size = 1) +
  labs(title = "Chicken and Cattle Average over time",
       y = "Year Average") +
  theme_minimal() +
  theme(plot.title = element_text(hjust=0.5)) + 
  scale_color_okabeito(name="Livestock Type")
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.