Challenge 8
Import in the different data sets
cattleDairy <- read_csv("challenge_datasets/FAOSTAT_cattle_dairy.csv")
## Rows: 36449 Columns: 14
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (8): Domain Code, Domain, Area, Element, Item, Unit, Flag, Flag Description
## dbl (6): Area Code, Element Code, Item Code, Year Code, Year, Value
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
head(cattleDairy)
## # A tibble: 6 × 14
## `Domain Code` Domain `Area Code` Area `Element Code` Element `Item Code`
## <chr> <chr> <dbl> <chr> <dbl> <chr> <dbl>
## 1 QL Livestock … 2 Afgh… 5318 Milk A… 882
## 2 QL Livestock … 2 Afgh… 5420 Yield 882
## 3 QL Livestock … 2 Afgh… 5510 Produc… 882
## 4 QL Livestock … 2 Afgh… 5318 Milk A… 882
## 5 QL Livestock … 2 Afgh… 5420 Yield 882
## 6 QL Livestock … 2 Afgh… 5510 Produc… 882
## # ℹ 7 more variables: Item <chr>, `Year Code` <dbl>, Year <dbl>, Unit <chr>,
## # Value <dbl>, Flag <chr>, `Flag Description` <chr>
chickenEgg <- read_csv("challenge_datasets/FAOSTAT_egg_chicken.csv")
## Rows: 38170 Columns: 14
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (8): Domain Code, Domain, Area, Element, Item, Unit, Flag, Flag Description
## dbl (6): Area Code, Element Code, Item Code, Year Code, Year, Value
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
head(chickenEgg)
## # A tibble: 6 × 14
## `Domain Code` Domain `Area Code` Area `Element Code` Element `Item Code`
## <chr> <chr> <dbl> <chr> <dbl> <chr> <dbl>
## 1 QL Livestock … 2 Afgh… 5313 Laying 1062
## 2 QL Livestock … 2 Afgh… 5410 Yield 1062
## 3 QL Livestock … 2 Afgh… 5510 Produc… 1062
## 4 QL Livestock … 2 Afgh… 5313 Laying 1062
## 5 QL Livestock … 2 Afgh… 5410 Yield 1062
## 6 QL Livestock … 2 Afgh… 5510 Produc… 1062
## # ℹ 7 more variables: Item <chr>, `Year Code` <dbl>, Year <dbl>, Unit <chr>,
## # Value <dbl>, Flag <chr>, `Flag Description` <chr>
Looking at Cattle and Chicken production over time
#narrowing down from the original data frame to only get production information and some relevent columns
cattleDairy1 <- cattleDairy %>%
filter(Element == "Production") %>%
select(Domain, Area, Element, Unit, Value, `Year Code`) %>%
group_by(Area, `Year Code`) %>%
summarise(`Cattle Median` = median(Value)) %>%
ungroup()
## `summarise()` has grouped output by 'Area'. You can override using the
## `.groups` argument.
#checking to see it works
head(cattleDairy1)
## # A tibble: 6 × 3
## Area `Year Code` `Cattle Median`
## <chr> <dbl> <dbl>
## 1 Afghanistan 1961 350000
## 2 Afghanistan 1962 350000
## 3 Afghanistan 1963 400000
## 4 Afghanistan 1964 400000
## 5 Afghanistan 1965 450000
## 6 Afghanistan 1966 500000
#doing the same to the chicken data
chickenEgg1 <- chickenEgg %>%
filter(Element == "Production") %>%
select(Domain, Area, Element, Unit, Value, `Year Code`) %>%
group_by(Area, `Year Code`) %>%
summarise(`Chicken Median` = median(Value)) %>%
ungroup()
## `summarise()` has grouped output by 'Area'. You can override using the
## `.groups` argument.
#joining the data
animalProd <- inner_join(cattleDairy1, chickenEgg1, by = c("Area", "Year Code"))
head(animalProd)
## # A tibble: 6 × 4
## Area `Year Code` `Cattle Median` `Chicken Median`
## <chr> <dbl> <dbl> <dbl>
## 1 Afghanistan 1961 350000 10000
## 2 Afghanistan 1962 350000 11000
## 3 Afghanistan 1963 400000 11500
## 4 Afghanistan 1964 400000 12000
## 5 Afghanistan 1965 450000 12800
## 6 Afghanistan 1966 500000 13200
animalProdLonger <- animalProd %>%
pivot_longer(cols = ends_with("Median"),
names_to = "Livestock Type",
values_to = "Year Average")
head(animalProdLonger)
## # A tibble: 6 × 4
## Area `Year Code` `Livestock Type` `Year Average`
## <chr> <dbl> <chr> <dbl>
## 1 Afghanistan 1961 Cattle Median 350000
## 2 Afghanistan 1961 Chicken Median 10000
## 3 Afghanistan 1962 Cattle Median 350000
## 4 Afghanistan 1962 Chicken Median 11000
## 5 Afghanistan 1963 Cattle Median 400000
## 6 Afghanistan 1963 Chicken Median 11500
Visualizing the joined data
#I couldn't figure out `facet_wrap` and there was such a larger diference in median or mean that a stacked graph seemed to best show that both cattle and chickens are increasing overtime in Afghanistan within the same graph.
ggplot(animalProd, aes(x = `Year Code` )) +
geom_line(aes(y = `Cattle Median`, color = "Cattle Median"), size = 1, alpha = 0.8) +
geom_line(aes(y = `Chicken Median`, color = "Chicken Median"), size = 1) +
labs(title = "Chicken and Cattle Average over time",
y = "Year Average") +
theme_minimal() +
theme(plot.title = element_text(hjust=0.5)) +
scale_color_okabeito(name="Livestock Type")
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
