Setup

Install and load the necessary packages to reproduce the report here:


# This is a chunk where you can load the necessary packages required to reproduce the report. Here are some example packages, you may add others if you require  

library(readr)
library(tidyr)
library(dplyr)
library(Hmisc)
library(outliers)

Read WHO Data

Read the WHO data using an appropriate function.


# This is an R chunk for reading the WHO data. Provide your R codes here:
who <- read_csv("WHO.csv")
Parsed with column specification:
cols(
  .default = col_double(),
  country = col_character(),
  iso2 = col_character(),
  iso3 = col_character()
)
See spec(...) for full column specifications.

Tidy Task 1:


# This is an R chunk for tidy task 1. Provide your R codes here:

who1 <- who %>% gather(code, value, 5:60)
who1
NA

Tidy Task 2:


# This is an R chunk for tidy task 2. Provide your R codes here:

who2 <- who1 %>% separate(code, c("new", "var", "sex"), sep = "_")
who2 <- who2 %>% separate(sex, c("sex", "age"), sep = "(?<=[A-Za-z])(?=[0-9999])")
who2
NA

Tidy Task 3:


# This is an R chunk for tidy task 3. Provide your R codes here:
who3 <- who2 %>% spread(var, value)
who3
NA
NA
NA

Tidy Task 4:


# This is a chunk for Task 4. Provide your R codes here: 
who4 <- who3 %>% mutate(age = factor(who3$age, levels = c("014", "1524", "2534", "3544", "4554", "5564", "65"),
                  labels = c("<15", "15-24", "25-34", "35-44", "45-54", "55-64", "65>="), ordered = TRUE))
who4
NA

Task 5: Filter & Select


# This is a chunk for Task 5. Provide your R codes here: 
who5 <- who4 %>% select(-c(iso2, new))
who5

WHO_subset <- who5 %>% filter(country == c("Albania", "Japan", "Greece"))
longer object length is not a multiple of shorter object length
WHO_subset
NA
NA

Read Species and Surveys data sets


# This is an R chunk for reading the Species and Surveys data sets. Provide your R codes here:
species <- read_csv("species.csv")
Parsed with column specification:
cols(
  species_id = col_character(),
  genus = col_character(),
  species = col_character(),
  taxa = col_character()
)
surveys <- read_csv("surveys.csv")
Parsed with column specification:
cols(
  record_id = col_double(),
  month = col_double(),
  day = col_double(),
  year = col_double(),
  species_id = col_character(),
  sex = col_character(),
  hindfoot_length = col_double(),
  weight = col_double()
)

Task 6: Join


# This is a chunk for Task 6. Provide your R codes here: 
surveys_combined <- left_join(surveys, species, key = "species_id")
Joining, by = "species_id"
surveys_combined
NA

Task 7: Calculate


# This is a chunk for Task 7. Provide your R codes here: 
surveys_combined %>% filter(species_id == "DM") %>% group_by(month) %>% 
  summarise('Avg Weight' = mean(weight, na.rm = TRUE), 'Avg Foot Lenght' = mean(hindfoot_length, na.rm = TRUE))
NA

Task 8: Missing Values


# This is a chunk for Task 8. Provide your R codes here: 
surveys_combined_year <- surveys_combined %>% filter(year == "2000")
surveys_combined_year

#Displaying the number of NA observations in weight for the year 2000
surveys_combined_year %>% group_by(species_id) %>% summarise('Weight NAs' = sum(is.na(weight)))  

#Imputing the means of each species weight into NA observations
surveys_weight_imputed <- surveys_combined_year %>% group_by(species_id) %>% mutate(weight = ifelse(is.na(weight), mean(weight, na.rm = TRUE), weight))
surveys_weight_imputed %>% group_by(species_id) %>% summarise('Weight NAs' = sum(is.na(weight)))
NA
NA
NA
NA

Task 9: Inconsistencies or Special Values


# This is a chunk for Task 9. Provide your R codes here: 

#Checking weight column for infinite values
sum(is.infinite(surveys_weight_imputed$weight))
[1] 0
#Checking weight column for NaN values
sum(is.nan(surveys_weight_imputed$weight))
[1] 68
#No infinite values were found, however the weight column has 68 NaN values.

#This is due to some species not having any weight values, so they were unable to be imputed using the mean weight of that species. The code run in task 8 would have imputed the mean of NA into species without any weight observations resulting in NaN.

Task 10: Outliers


# This is a chunk for Task 10. Provide your R codes here: 
#Creating a boxplot of the data
surveys_combined$hindfoot_length %>% boxplot(Main = "Boxplot of Hindfoot Length") 

#From the boxplot, 2 outliers can be observed

#Calculating z-scores for data
zscores <- na.omit(surveys_combined$hindfoot_length) %>% scores(type = "z")
#Displaying z-scores above 3 as that is the limit for outliers
length (which( abs(zscores) >3))
[1] 4
#Following the z-score method, there are only 4 outliers in a dataset of 35549 observations

#To manage the outliers, we will simply exlclude them since they are a small percentage of the dataset
hindfoot_new <- surveys_combined$hindfoot_length[ -which(abs (zscores)>3)]
length(surveys_combined$hindfoot_length)
[1] 35549
length(hindfoot_new)
[1] 35545



LS0tDQp0aXRsZTogIk1BVEgyMzQ5IFNlbWVzdGVyIDEsIDIwMTkiDQphdXRob3I6ICJTaGlwcmVuIEpheWFkZXYgIHMzNzQ0NDIxIg0Kc3VidGl0bGU6IEFzc2lnbm1lbnQgMg0Kb3V0cHV0Og0KICBodG1sX25vdGVib29rOiBkZWZhdWx0DQotLS0NCiMjIFNldHVwDQoNCkluc3RhbGwgYW5kIGxvYWQgdGhlIG5lY2Vzc2FyeSBwYWNrYWdlcyB0byByZXByb2R1Y2UgdGhlIHJlcG9ydCBoZXJlOg0KDQpgYGB7ciwgZWNobyA9IFRSVUUsIG1lc3NhZ2U9RkFMU0V9DQoNCiMgVGhpcyBpcyBhIGNodW5rIHdoZXJlIHlvdSBjYW4gbG9hZCB0aGUgbmVjZXNzYXJ5IHBhY2thZ2VzIHJlcXVpcmVkIHRvIHJlcHJvZHVjZSB0aGUgcmVwb3J0LiBIZXJlIGFyZSBzb21lIGV4YW1wbGUgcGFja2FnZXMsIHlvdSBtYXkgYWRkIG90aGVycyBpZiB5b3UgcmVxdWlyZSAgDQoNCmxpYnJhcnkocmVhZHIpDQpsaWJyYXJ5KHRpZHlyKQ0KbGlicmFyeShkcGx5cikNCmxpYnJhcnkoSG1pc2MpDQpsaWJyYXJ5KG91dGxpZXJzKQ0KDQpgYGANCg0KDQojIyBSZWFkIFdITyBEYXRhDQoNClJlYWQgdGhlIFdITyBkYXRhIHVzaW5nIGFuIGFwcHJvcHJpYXRlIGZ1bmN0aW9uLg0KDQpgYGB7cn0NCg0KIyBUaGlzIGlzIGFuIFIgY2h1bmsgZm9yIHJlYWRpbmcgdGhlIFdITyBkYXRhLiBQcm92aWRlIHlvdXIgUiBjb2RlcyBoZXJlOg0Kd2hvIDwtIHJlYWRfY3N2KCJXSE8uY3N2IikNCg0KYGBgDQoNCg0KDQoNCiMjIFRpZHkgVGFzayAxOg0KDQpgYGB7ciwgZWNobz1UUlVFfQ0KDQojIFRoaXMgaXMgYW4gUiBjaHVuayBmb3IgdGlkeSB0YXNrIDEuIFByb3ZpZGUgeW91ciBSIGNvZGVzIGhlcmU6DQoNCndobzEgPC0gd2hvICU+JSBnYXRoZXIoY29kZSwgdmFsdWUsIDU6NjApDQp3aG8xDQoNCmBgYA0KDQoNCg0KIyMgVGlkeSBUYXNrIDI6DQoNCmBgYHtyfQ0KDQojIFRoaXMgaXMgYW4gUiBjaHVuayBmb3IgdGlkeSB0YXNrIDIuIFByb3ZpZGUgeW91ciBSIGNvZGVzIGhlcmU6DQoNCndobzIgPC0gd2hvMSAlPiUgc2VwYXJhdGUoY29kZSwgYygibmV3IiwgInZhciIsICJzZXgiKSwgc2VwID0gIl8iKQ0Kd2hvMiA8LSB3aG8yICU+JSBzZXBhcmF0ZShzZXgsIGMoInNleCIsICJhZ2UiKSwgc2VwID0gIig/PD1bQS1aYS16XSkoPz1bMC05OTk5XSkiKQ0Kd2hvMg0KDQpgYGANCg0KDQojIyBUaWR5IFRhc2sgMzoNCg0KYGBge3J9DQoNCiMgVGhpcyBpcyBhbiBSIGNodW5rIGZvciB0aWR5IHRhc2sgMy4gUHJvdmlkZSB5b3VyIFIgY29kZXMgaGVyZToNCndobzMgPC0gd2hvMiAlPiUgc3ByZWFkKHZhciwgdmFsdWUpDQp3aG8zDQoNCg0KDQpgYGANCg0KDQojIyBUaWR5IFRhc2sgNDogDQoNCmBgYHtyfQ0KDQojIFRoaXMgaXMgYSBjaHVuayBmb3IgVGFzayA0LiBQcm92aWRlIHlvdXIgUiBjb2RlcyBoZXJlOiANCndobzQgPC0gd2hvMyAlPiUgbXV0YXRlKGFnZSA9IGZhY3Rvcih3aG8zJGFnZSwgbGV2ZWxzID0gYygiMDE0IiwgIjE1MjQiLCAiMjUzNCIsICIzNTQ0IiwgIjQ1NTQiLCAiNTU2NCIsICI2NSIpLA0KICAgICAgICAgICAgICAgICAgbGFiZWxzID0gYygiPDE1IiwgIjE1LTI0IiwgIjI1LTM0IiwgIjM1LTQ0IiwgIjQ1LTU0IiwgIjU1LTY0IiwgIjY1Pj0iKSwgb3JkZXJlZCA9IFRSVUUpKQ0Kd2hvNA0KDQpgYGANCg0KIyMgVGFzayA1OiBGaWx0ZXIgJiBTZWxlY3QNCg0KYGBge3J9DQoNCiMgVGhpcyBpcyBhIGNodW5rIGZvciBUYXNrIDUuIFByb3ZpZGUgeW91ciBSIGNvZGVzIGhlcmU6IA0Kd2hvNSA8LSB3aG80ICU+JSBzZWxlY3QoLWMoaXNvMiwgbmV3KSkNCndobzUNCg0KV0hPX3N1YnNldCA8LSB3aG81ICU+JSBmaWx0ZXIoY291bnRyeSA9PSBjKCJBbGJhbmlhIiwgIkphcGFuIiwgIkdyZWVjZSIpKQ0KV0hPX3N1YnNldA0KDQoNCmBgYA0KDQoNCg0KIyMgUmVhZCBTcGVjaWVzIGFuZCBTdXJ2ZXlzIGRhdGEgc2V0cw0KDQpgYGB7cn0NCg0KIyBUaGlzIGlzIGFuIFIgY2h1bmsgZm9yIHJlYWRpbmcgdGhlIFNwZWNpZXMgYW5kIFN1cnZleXMgZGF0YSBzZXRzLiBQcm92aWRlIHlvdXIgUiBjb2RlcyBoZXJlOg0Kc3BlY2llcyA8LSByZWFkX2Nzdigic3BlY2llcy5jc3YiKQ0Kc3VydmV5cyA8LSByZWFkX2Nzdigic3VydmV5cy5jc3YiKQ0KDQpgYGANCg0KDQoNCg0KIyMgVGFzayA2OiBKb2luICANCg0KYGBge3J9DQoNCiMgVGhpcyBpcyBhIGNodW5rIGZvciBUYXNrIDYuIFByb3ZpZGUgeW91ciBSIGNvZGVzIGhlcmU6IA0Kc3VydmV5c19jb21iaW5lZCA8LSBsZWZ0X2pvaW4oc3VydmV5cywgc3BlY2llcywga2V5ID0gInNwZWNpZXNfaWQiKQ0Kc3VydmV5c19jb21iaW5lZA0KDQpgYGANCg0KDQoNCiMjIFRhc2sgNzogQ2FsY3VsYXRlIA0KDQpgYGB7cn0NCg0KIyBUaGlzIGlzIGEgY2h1bmsgZm9yIFRhc2sgNy4gUHJvdmlkZSB5b3VyIFIgY29kZXMgaGVyZTogDQpzdXJ2ZXlzX2NvbWJpbmVkICU+JSBmaWx0ZXIoc3BlY2llc19pZCA9PSAiRE0iKSAlPiUgZ3JvdXBfYnkobW9udGgpICU+JSANCiAgc3VtbWFyaXNlKCdBdmcgV2VpZ2h0JyA9IG1lYW4od2VpZ2h0LCBuYS5ybSA9IFRSVUUpLCAnQXZnIEZvb3QgTGVuZ2h0JyA9IG1lYW4oaGluZGZvb3RfbGVuZ3RoLCBuYS5ybSA9IFRSVUUpKQ0KDQpgYGANCg0KDQoNCiMjIFRhc2sgODogTWlzc2luZyBWYWx1ZXMNCg0KYGBge3J9DQoNCiMgVGhpcyBpcyBhIGNodW5rIGZvciBUYXNrIDguIFByb3ZpZGUgeW91ciBSIGNvZGVzIGhlcmU6IA0Kc3VydmV5c19jb21iaW5lZF95ZWFyIDwtIHN1cnZleXNfY29tYmluZWQgJT4lIGZpbHRlcih5ZWFyID09ICIyMDAwIikNCnN1cnZleXNfY29tYmluZWRfeWVhcg0KDQojRGlzcGxheWluZyB0aGUgbnVtYmVyIG9mIE5BIG9ic2VydmF0aW9ucyBpbiB3ZWlnaHQgZm9yIHRoZSB5ZWFyIDIwMDANCnN1cnZleXNfY29tYmluZWRfeWVhciAlPiUgZ3JvdXBfYnkoc3BlY2llc19pZCkgJT4lIHN1bW1hcmlzZSgnV2VpZ2h0IE5BcycgPSBzdW0oaXMubmEod2VpZ2h0KSkpICANCg0KI0ltcHV0aW5nIHRoZSBtZWFucyBvZiBlYWNoIHNwZWNpZXMgd2VpZ2h0IGludG8gTkEgb2JzZXJ2YXRpb25zDQpzdXJ2ZXlzX3dlaWdodF9pbXB1dGVkIDwtIHN1cnZleXNfY29tYmluZWRfeWVhciAlPiUgZ3JvdXBfYnkoc3BlY2llc19pZCkgJT4lIG11dGF0ZSh3ZWlnaHQgPSBpZmVsc2UoaXMubmEod2VpZ2h0KSwgbWVhbih3ZWlnaHQsIG5hLnJtID0gVFJVRSksIHdlaWdodCkpDQpzdXJ2ZXlzX3dlaWdodF9pbXB1dGVkICU+JSBncm91cF9ieShzcGVjaWVzX2lkKSAlPiUgc3VtbWFyaXNlKCdXZWlnaHQgTkFzJyA9IHN1bShpcy5uYSh3ZWlnaHQpKSkNCg0KDQoNCg0KYGBgDQoNCg0KDQoNCg0KIyMgVGFzayA5OiBJbmNvbnNpc3RlbmNpZXMgb3IgU3BlY2lhbCBWYWx1ZXMNCg0KYGBge3J9DQoNCiMgVGhpcyBpcyBhIGNodW5rIGZvciBUYXNrIDkuIFByb3ZpZGUgeW91ciBSIGNvZGVzIGhlcmU6IA0KDQojQ2hlY2tpbmcgd2VpZ2h0IGNvbHVtbiBmb3IgaW5maW5pdGUgdmFsdWVzDQpzdW0oaXMuaW5maW5pdGUoc3VydmV5c193ZWlnaHRfaW1wdXRlZCR3ZWlnaHQpKQ0KI0NoZWNraW5nIHdlaWdodCBjb2x1bW4gZm9yIE5hTiB2YWx1ZXMNCnN1bShpcy5uYW4oc3VydmV5c193ZWlnaHRfaW1wdXRlZCR3ZWlnaHQpKQ0KDQoNCiNObyBpbmZpbml0ZSB2YWx1ZXMgd2VyZSBmb3VuZCwgaG93ZXZlciB0aGUgd2VpZ2h0IGNvbHVtbiBoYXMgNjggTmFOIHZhbHVlcy4NCg0KI1RoaXMgaXMgZHVlIHRvIHNvbWUgc3BlY2llcyBub3QgaGF2aW5nIGFueSB3ZWlnaHQgdmFsdWVzLCBzbyB0aGV5IHdlcmUgdW5hYmxlIHRvIGJlIGltcHV0ZWQgdXNpbmcgdGhlIG1lYW4gd2VpZ2h0IG9mIHRoYXQgc3BlY2llcy4gVGhlIGNvZGUgcnVuIGluIHRhc2sgOCB3b3VsZCBoYXZlIGltcHV0ZWQgdGhlIG1lYW4gb2YgTkEgaW50byBzcGVjaWVzIHdpdGhvdXQgYW55IHdlaWdodCBvYnNlcnZhdGlvbnMgcmVzdWx0aW5nIGluIE5hTi4NCg0KYGBgDQoNCg0KDQoNCiMjIFRhc2sgMTA6IE91dGxpZXJzDQoNCmBgYHtyfQ0KDQojIFRoaXMgaXMgYSBjaHVuayBmb3IgVGFzayAxMC4gUHJvdmlkZSB5b3VyIFIgY29kZXMgaGVyZTogDQojQ3JlYXRpbmcgYSBib3hwbG90IG9mIHRoZSBkYXRhDQpzdXJ2ZXlzX2NvbWJpbmVkJGhpbmRmb290X2xlbmd0aCAlPiUgYm94cGxvdChNYWluID0gIkJveHBsb3Qgb2YgSGluZGZvb3QgTGVuZ3RoIikgDQojRnJvbSB0aGUgYm94cGxvdCwgMiBvdXRsaWVycyBjYW4gYmUgb2JzZXJ2ZWQNCg0KI0NhbGN1bGF0aW5nIHotc2NvcmVzIGZvciBkYXRhDQp6c2NvcmVzIDwtIG5hLm9taXQoc3VydmV5c19jb21iaW5lZCRoaW5kZm9vdF9sZW5ndGgpICU+JSBzY29yZXModHlwZSA9ICJ6IikNCiNEaXNwbGF5aW5nIHotc2NvcmVzIGFib3ZlIDMgYXMgdGhhdCBpcyB0aGUgbGltaXQgZm9yIG91dGxpZXJzDQpsZW5ndGggKHdoaWNoKCBhYnMoenNjb3JlcykgPjMpKQ0KI0ZvbGxvd2luZyB0aGUgei1zY29yZSBtZXRob2QsIHRoZXJlIGFyZSBvbmx5IDQgb3V0bGllcnMgaW4gYSBkYXRhc2V0IG9mIDM1NTQ5IG9ic2VydmF0aW9ucw0KDQojVG8gbWFuYWdlIHRoZSBvdXRsaWVycywgd2Ugd2lsbCBzaW1wbHkgZXhsY2x1ZGUgdGhlbSBzaW5jZSB0aGV5IGFyZSBhIHNtYWxsIHBlcmNlbnRhZ2Ugb2YgdGhlIGRhdGFzZXQNCmhpbmRmb290X25ldyA8LSBzdXJ2ZXlzX2NvbWJpbmVkJGhpbmRmb290X2xlbmd0aFsgLXdoaWNoKGFicyAoenNjb3Jlcyk+MyldDQpsZW5ndGgoc3VydmV5c19jb21iaW5lZCRoaW5kZm9vdF9sZW5ndGgpDQpsZW5ndGgoaGluZGZvb3RfbmV3KQ0KDQoNCg0KYGBgDQoNCg0KDQoNCjxicj4NCjxicj4NCg==