Challenge 9

Author

Jingyi Yang

knitr::opts_chunk$set(echo = TRUE)
library(dplyr)

Attaching package: 'dplyr'
The following objects are masked from 'package:stats':

    filter, lag
The following objects are masked from 'package:base':

    intersect, setdiff, setequal, union
library(here)
here() starts at C:/8-601
library(readr)
library(readxl)
library(tidyverse)
── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
✔ forcats   1.0.0     ✔ stringr   1.5.1
✔ ggplot2   3.4.4     ✔ tibble    3.2.1
✔ lubridate 1.9.3     ✔ tidyr     1.3.0
✔ purrr     1.0.2     
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag()    masks stats::lag()
ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(ggplot2)

1. Function for import the data

data_import_FAOSATA <- function(df) {
  p <- read_csv(here(df))
  
  p1 <- select(p, -c(contains("Code"), Element, Domain, Unit))
  
  p2<- filter(p1, Flag!="A")
  
  p3<- select(p2, -contains("Flag"))
  
  return(p3)

}

Cattle_dairy <- data_import_FAOSATA("challenge_datasets\\FAOSTAT_cattle_dairy.csv")
Rows: 36449 Columns: 14
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (8): Domain Code, Domain, Area, Element, Item, Unit, Flag, Flag Description
dbl (6): Area Code, Element Code, Item Code, Year Code, Year, Value

ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
Cattle_dairy
# A tibble: 23,335 × 4
   Area        Item                   Year  Value
   <chr>       <chr>                 <dbl>  <dbl>
 1 Afghanistan Milk, whole fresh cow  1961 700000
 2 Afghanistan Milk, whole fresh cow  1961   5000
 3 Afghanistan Milk, whole fresh cow  1961 350000
 4 Afghanistan Milk, whole fresh cow  1962 700000
 5 Afghanistan Milk, whole fresh cow  1962   5000
 6 Afghanistan Milk, whole fresh cow  1962 350000
 7 Afghanistan Milk, whole fresh cow  1963 780000
 8 Afghanistan Milk, whole fresh cow  1963   5128
 9 Afghanistan Milk, whole fresh cow  1963 400000
10 Afghanistan Milk, whole fresh cow  1964 780000
# ℹ 23,325 more rows
Egg_chicken <- data_import_FAOSATA("challenge_datasets\\FAOSTAT_egg_chicken.csv")
Rows: 38170 Columns: 14
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (8): Domain Code, Domain, Area, Element, Item, Unit, Flag, Flag Description
dbl (6): Area Code, Element Code, Item Code, Year Code, Year, Value

ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
Egg_chicken
# A tibble: 27,436 × 4
   Area        Item                 Year Value
   <chr>       <chr>               <dbl> <dbl>
 1 Afghanistan Eggs, hen, in shell  1961  4000
 2 Afghanistan Eggs, hen, in shell  1961 25000
 3 Afghanistan Eggs, hen, in shell  1961 10000
 4 Afghanistan Eggs, hen, in shell  1962  4400
 5 Afghanistan Eggs, hen, in shell  1962 25000
 6 Afghanistan Eggs, hen, in shell  1962 11000
 7 Afghanistan Eggs, hen, in shell  1963  4600
 8 Afghanistan Eggs, hen, in shell  1963 25000
 9 Afghanistan Eggs, hen, in shell  1963 11500
10 Afghanistan Eggs, hen, in shell  1964  4800
# ℹ 27,426 more rows
Livestock <- data_import_FAOSATA("challenge_datasets\\FAOSTAT_livestock.csv")
Rows: 82116 Columns: 14
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (8): Domain Code, Domain, Area, Element, Item, Unit, Flag, Flag Description
dbl (6): Area Code, Element Code, Item Code, Year Code, Year, Value

ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
Livestock
# A tibble: 31,279 × 4
   Area        Item   Year   Value
   <chr>       <chr> <dbl>   <dbl>
 1 Afghanistan Asses  1964 1150000
 2 Afghanistan Asses  1973 1250000
 3 Afghanistan Asses  1974 1250000
 4 Afghanistan Asses  1975 1250000
 5 Afghanistan Asses  1976 1250000
 6 Afghanistan Asses  1978 1300000
 7 Afghanistan Asses  1979 1300000
 8 Afghanistan Asses  1980 1295000
 9 Afghanistan Asses  1981 1315000
10 Afghanistan Asses  1982 1315000
# ℹ 31,269 more rows

2. Function for summary statistics

  summary_table <- function(x) {
    
  tibble( sum= sum(x, na.rm=T),
    mean=mean(x, na.rm=T),
    median=median(x, na.rm=T),
   `standard deviation`=sd(x, na.rm=T),
    minimal=min(x, na.rm=T),
    maximum= max(x, na.rm=T))
    
    }

  
  summary_table (Cattle_dairy$Value)
# A tibble: 1 × 6
         sum    mean median `standard deviation` minimal  maximum
       <dbl>   <dbl>  <dbl>                <dbl>   <dbl>    <dbl>
1 6090816208 261847.  15222             1355116.       7 33424000
  summary_table (Egg_chicken$Value)
# A tibble: 1 × 6
         sum   mean median `standard deviation` minimal  maximum
       <dbl>  <dbl>  <dbl>                <dbl>   <dbl>    <dbl>
1 1628707449 59451.  27480              346997.       1 26864000
  summary_table (Livestock$Value)
# A tibble: 1 × 6
          sum     mean median `standard deviation` minimal   maximum
        <dbl>    <dbl>  <dbl>                <dbl>   <dbl>     <dbl>
1 45330136113 1506285.  21225            10153801.       0 345754816
   z_score <- function (x) 
    
  { 
    z<- (x-mean(x, na.rm = TRUE))/sd(x, na.rm = TRUE)
    
    tibble(z)
    }
  
  z_score(Cattle_dairy$Value)
# A tibble: 23,335 × 1
         z
     <dbl>
 1  0.323 
 2 -0.190 
 3  0.0651
 4  0.323 
 5 -0.190 
 6  0.0651
 7  0.382 
 8 -0.189 
 9  0.102 
10  0.382 
# ℹ 23,325 more rows
  z_score(Egg_chicken$Value)
# A tibble: 27,436 × 1
         z
     <dbl>
 1 -0.160 
 2 -0.0993
 3 -0.143 
 4 -0.159 
 5 -0.0993
 6 -0.140 
 7 -0.158 
 8 -0.0993
 9 -0.138 
10 -0.157 
# ℹ 27,426 more rows
  z_score(Livestock$Value)
# A tibble: 31,279 × 1
         z
     <dbl>
 1 -0.0351
 2 -0.0252
 3 -0.0252
 4 -0.0252
 5 -0.0252
 6 -0.0203
 7 -0.0203
 8 -0.0208
 9 -0.0188
10 -0.0188
# ℹ 31,269 more rows

3. Function Visualization

3.1 Simply visualization

visualization_FAOSATA <- function (x,y) {
  
hist(x, xlab = "Year", ylab="Count Number", col=x)
  

boxplot(y, xlab= "Number", ylab="Value", n.breaks= 5)
title("Boxplot for value")
  
}

visualization_FAOSATA(Cattle_dairy$Year,Cattle_dairy$Value)

visualization_FAOSATA(Egg_chicken$Year,Egg_chicken$Value)

visualization_FAOSATA(Livestock$Year,Livestock$Value)

3.2 Advance

Visualization_histogram_FAOSATA <- function(df) {
  
    p<- ggplot(df, aes(Year,na.rm=T, fill= Item))+
    geom_bar(position = "dodge")+
      facet_wrap(vars(Item))+
      scale_x_continuous( n.breaks = 10)+
      scale_y_continuous(name = "Count Number", n.breaks = 10)+
      ggthemes::theme_few()+
  theme(legend.position = "bottom")+
  labs(title = "Histogram of Year")+
  theme(plot.title = element_text(hjust=0.5))+
  theme(axis.text.x = element_text(angle=90))
      
    return(p)
  
}

Visualization_histogram_FAOSATA (Cattle_dairy)

Visualization_histogram_FAOSATA (Egg_chicken)

Visualization_histogram_FAOSATA (Livestock)

Visualization_point_FAOSATA <- function(df) {
  
    p<- ggplot(df, aes(x= Item, y=Value,na.rm=T, col= Item))+
    geom_boxplot()+
      scale_y_continuous(labels=scales::label_number(suffix="M",scale=1e-6), n.breaks = 10)+
      ggthemes::theme_few()+
  theme(legend.position = "bottom")+
  labs(title = "Boxplot of Value")+
  theme(plot.title = element_text(hjust=0.5))+
  theme(axis.text.x = element_text(angle=90))
      
    return(p)
  
}

Visualization_point_FAOSATA (Cattle_dairy)
Warning: Removed 74 rows containing non-finite values (`stat_boxplot()`).

Visualization_point_FAOSATA (Egg_chicken)
Warning: Removed 40 rows containing non-finite values (`stat_boxplot()`).

Visualization_point_FAOSATA (Livestock)
Warning: Removed 1185 rows containing non-finite values (`stat_boxplot()`).