Setup
Install and load the necessary packages to reproduce the report here:
# This is a chunk where you can load the necessary packages required to reproduce the report. Here are some example packages, you may add others if you require
library(readr)
library(tidyr)
library(dplyr)
library(Hmisc)
library(outliers)
Read WHO Data
Read the WHO data using an appropriate function.
# This is an R chunk for reading the WHO data. Provide your R codes here:
who <- read_csv("WHO.csv")
Parsed with column specification:
cols(
.default = col_double(),
country = [31mcol_character()[39m,
iso2 = [31mcol_character()[39m,
iso3 = [31mcol_character()[39m
)
See spec(...) for full column specifications.
Tidy Task 1:
# This is an R chunk for tidy task 1. Provide your R codes here:
who1 <- who %>% gather(code, value, 5:60)
who1
NA
Tidy Task 2:
# This is an R chunk for tidy task 2. Provide your R codes here:
who2 <- who1 %>% separate(code, c("new", "var", "sex"), sep = "_")
who2 <- who2 %>% separate(sex, c("sex", "age"), sep = "(?<=[A-Za-z])(?=[0-9999])")
who2
NA
Tidy Task 3:
# This is an R chunk for tidy task 3. Provide your R codes here:
who3 <- who2 %>% spread(var, value)
who3
NA
NA
NA
Tidy Task 4:
# This is a chunk for Task 4. Provide your R codes here:
who4 <- who3 %>% mutate(age = factor(who3$age, levels = c("014", "1524", "2534", "3544", "4554", "5564", "65"),
labels = c("<15", "15-24", "25-34", "35-44", "45-54", "55-64", "65>="), ordered = TRUE))
who4
NA
Task 5: Filter & Select
# This is a chunk for Task 5. Provide your R codes here:
who5 <- who4 %>% select(-c(iso2, new))
who5
WHO_subset <- who5 %>% filter(country == c("Albania", "Japan", "Greece"))
longer object length is not a multiple of shorter object length
WHO_subset
NA
NA
Read Species and Surveys data sets
# This is an R chunk for reading the Species and Surveys data sets. Provide your R codes here:
species <- read_csv("species.csv")
Parsed with column specification:
cols(
species_id = [31mcol_character()[39m,
genus = [31mcol_character()[39m,
species = [31mcol_character()[39m,
taxa = [31mcol_character()[39m
)
surveys <- read_csv("surveys.csv")
Parsed with column specification:
cols(
record_id = [32mcol_double()[39m,
month = [32mcol_double()[39m,
day = [32mcol_double()[39m,
year = [32mcol_double()[39m,
species_id = [31mcol_character()[39m,
sex = [31mcol_character()[39m,
hindfoot_length = [32mcol_double()[39m,
weight = [32mcol_double()[39m
)
Task 6: Join
# This is a chunk for Task 6. Provide your R codes here:
surveys_combined <- left_join(surveys, species, key = "species_id")
Joining, by = "species_id"
surveys_combined
NA
Task 7: Calculate
# This is a chunk for Task 7. Provide your R codes here:
surveys_combined %>% filter(species_id == "DM") %>% group_by(month) %>%
summarise('Avg Weight' = mean(weight, na.rm = TRUE), 'Avg Foot Lenght' = mean(hindfoot_length, na.rm = TRUE))
NA
Task 8: Missing Values
# This is a chunk for Task 8. Provide your R codes here:
surveys_combined_year <- surveys_combined %>% filter(year == "2000")
surveys_combined_year
#Displaying the number of NA observations in weight for the year 2000
surveys_combined_year %>% group_by(species_id) %>% summarise('Weight NAs' = sum(is.na(weight)))
#Imputing the means of each species weight into NA observations
surveys_weight_imputed <- surveys_combined_year %>% group_by(species_id) %>% mutate(weight = ifelse(is.na(weight), mean(weight, na.rm = TRUE), weight))
surveys_weight_imputed %>% group_by(species_id) %>% summarise('Weight NAs' = sum(is.na(weight)))
NA
NA
NA
NA
Task 9: Inconsistencies or Special Values
# This is a chunk for Task 9. Provide your R codes here:
#Checking weight column for infinite values
sum(is.infinite(surveys_weight_imputed$weight))
[1] 0
#Checking weight column for NaN values
sum(is.nan(surveys_weight_imputed$weight))
[1] 68
#No infinite values were found, however the weight column has 68 NaN values.
#This is due to some species not having any weight values, so they were unable to be imputed using the mean weight of that species. The code run in task 8 would have imputed the mean of NA into species without any weight observations resulting in NaN.
Task 10: Outliers
# This is a chunk for Task 10. Provide your R codes here:
#Creating a boxplot of the data
surveys_combined$hindfoot_length %>% boxplot(Main = "Boxplot of Hindfoot Length")

#From the boxplot, 2 outliers can be observed
#Calculating z-scores for data
zscores <- na.omit(surveys_combined$hindfoot_length) %>% scores(type = "z")
#Displaying z-scores above 3 as that is the limit for outliers
length (which( abs(zscores) >3))
[1] 4
#Following the z-score method, there are only 4 outliers in a dataset of 35549 observations
#To manage the outliers, we will simply exlclude them since they are a small percentage of the dataset
hindfoot_new <- surveys_combined$hindfoot_length[ -which(abs (zscores)>3)]
length(surveys_combined$hindfoot_length)
[1] 35549
length(hindfoot_new)
[1] 35545
LS0tDQp0aXRsZTogIk1BVEgyMzQ5IFNlbWVzdGVyIDEsIDIwMTkiDQphdXRob3I6ICJTaGlwcmVuIEpheWFkZXYgIHMzNzQ0NDIxIg0Kc3VidGl0bGU6IEFzc2lnbm1lbnQgMg0Kb3V0cHV0Og0KICBodG1sX25vdGVib29rOiBkZWZhdWx0DQotLS0NCiMjIFNldHVwDQoNCkluc3RhbGwgYW5kIGxvYWQgdGhlIG5lY2Vzc2FyeSBwYWNrYWdlcyB0byByZXByb2R1Y2UgdGhlIHJlcG9ydCBoZXJlOg0KDQpgYGB7ciwgZWNobyA9IFRSVUUsIG1lc3NhZ2U9RkFMU0V9DQoNCiMgVGhpcyBpcyBhIGNodW5rIHdoZXJlIHlvdSBjYW4gbG9hZCB0aGUgbmVjZXNzYXJ5IHBhY2thZ2VzIHJlcXVpcmVkIHRvIHJlcHJvZHVjZSB0aGUgcmVwb3J0LiBIZXJlIGFyZSBzb21lIGV4YW1wbGUgcGFja2FnZXMsIHlvdSBtYXkgYWRkIG90aGVycyBpZiB5b3UgcmVxdWlyZSAgDQoNCmxpYnJhcnkocmVhZHIpDQpsaWJyYXJ5KHRpZHlyKQ0KbGlicmFyeShkcGx5cikNCmxpYnJhcnkoSG1pc2MpDQpsaWJyYXJ5KG91dGxpZXJzKQ0KDQpgYGANCg0KDQojIyBSZWFkIFdITyBEYXRhDQoNClJlYWQgdGhlIFdITyBkYXRhIHVzaW5nIGFuIGFwcHJvcHJpYXRlIGZ1bmN0aW9uLg0KDQpgYGB7cn0NCg0KIyBUaGlzIGlzIGFuIFIgY2h1bmsgZm9yIHJlYWRpbmcgdGhlIFdITyBkYXRhLiBQcm92aWRlIHlvdXIgUiBjb2RlcyBoZXJlOg0Kd2hvIDwtIHJlYWRfY3N2KCJXSE8uY3N2IikNCg0KYGBgDQoNCg0KDQoNCiMjIFRpZHkgVGFzayAxOg0KDQpgYGB7ciwgZWNobz1UUlVFfQ0KDQojIFRoaXMgaXMgYW4gUiBjaHVuayBmb3IgdGlkeSB0YXNrIDEuIFByb3ZpZGUgeW91ciBSIGNvZGVzIGhlcmU6DQoNCndobzEgPC0gd2hvICU+JSBnYXRoZXIoY29kZSwgdmFsdWUsIDU6NjApDQp3aG8xDQoNCmBgYA0KDQoNCg0KIyMgVGlkeSBUYXNrIDI6DQoNCmBgYHtyfQ0KDQojIFRoaXMgaXMgYW4gUiBjaHVuayBmb3IgdGlkeSB0YXNrIDIuIFByb3ZpZGUgeW91ciBSIGNvZGVzIGhlcmU6DQoNCndobzIgPC0gd2hvMSAlPiUgc2VwYXJhdGUoY29kZSwgYygibmV3IiwgInZhciIsICJzZXgiKSwgc2VwID0gIl8iKQ0Kd2hvMiA8LSB3aG8yICU+JSBzZXBhcmF0ZShzZXgsIGMoInNleCIsICJhZ2UiKSwgc2VwID0gIig/PD1bQS1aYS16XSkoPz1bMC05OTk5XSkiKQ0Kd2hvMg0KDQpgYGANCg0KDQojIyBUaWR5IFRhc2sgMzoNCg0KYGBge3J9DQoNCiMgVGhpcyBpcyBhbiBSIGNodW5rIGZvciB0aWR5IHRhc2sgMy4gUHJvdmlkZSB5b3VyIFIgY29kZXMgaGVyZToNCndobzMgPC0gd2hvMiAlPiUgc3ByZWFkKHZhciwgdmFsdWUpDQp3aG8zDQoNCg0KDQpgYGANCg0KDQojIyBUaWR5IFRhc2sgNDogDQoNCmBgYHtyfQ0KDQojIFRoaXMgaXMgYSBjaHVuayBmb3IgVGFzayA0LiBQcm92aWRlIHlvdXIgUiBjb2RlcyBoZXJlOiANCndobzQgPC0gd2hvMyAlPiUgbXV0YXRlKGFnZSA9IGZhY3Rvcih3aG8zJGFnZSwgbGV2ZWxzID0gYygiMDE0IiwgIjE1MjQiLCAiMjUzNCIsICIzNTQ0IiwgIjQ1NTQiLCAiNTU2NCIsICI2NSIpLA0KICAgICAgICAgICAgICAgICAgbGFiZWxzID0gYygiPDE1IiwgIjE1LTI0IiwgIjI1LTM0IiwgIjM1LTQ0IiwgIjQ1LTU0IiwgIjU1LTY0IiwgIjY1Pj0iKSwgb3JkZXJlZCA9IFRSVUUpKQ0Kd2hvNA0KDQpgYGANCg0KIyMgVGFzayA1OiBGaWx0ZXIgJiBTZWxlY3QNCg0KYGBge3J9DQoNCiMgVGhpcyBpcyBhIGNodW5rIGZvciBUYXNrIDUuIFByb3ZpZGUgeW91ciBSIGNvZGVzIGhlcmU6IA0Kd2hvNSA8LSB3aG80ICU+JSBzZWxlY3QoLWMoaXNvMiwgbmV3KSkNCndobzUNCg0KV0hPX3N1YnNldCA8LSB3aG81ICU+JSBmaWx0ZXIoY291bnRyeSA9PSBjKCJBbGJhbmlhIiwgIkphcGFuIiwgIkdyZWVjZSIpKQ0KV0hPX3N1YnNldA0KDQoNCmBgYA0KDQoNCg0KIyMgUmVhZCBTcGVjaWVzIGFuZCBTdXJ2ZXlzIGRhdGEgc2V0cw0KDQpgYGB7cn0NCg0KIyBUaGlzIGlzIGFuIFIgY2h1bmsgZm9yIHJlYWRpbmcgdGhlIFNwZWNpZXMgYW5kIFN1cnZleXMgZGF0YSBzZXRzLiBQcm92aWRlIHlvdXIgUiBjb2RlcyBoZXJlOg0Kc3BlY2llcyA8LSByZWFkX2Nzdigic3BlY2llcy5jc3YiKQ0Kc3VydmV5cyA8LSByZWFkX2Nzdigic3VydmV5cy5jc3YiKQ0KDQpgYGANCg0KDQoNCg0KIyMgVGFzayA2OiBKb2luICANCg0KYGBge3J9DQoNCiMgVGhpcyBpcyBhIGNodW5rIGZvciBUYXNrIDYuIFByb3ZpZGUgeW91ciBSIGNvZGVzIGhlcmU6IA0Kc3VydmV5c19jb21iaW5lZCA8LSBsZWZ0X2pvaW4oc3VydmV5cywgc3BlY2llcywga2V5ID0gInNwZWNpZXNfaWQiKQ0Kc3VydmV5c19jb21iaW5lZA0KDQpgYGANCg0KDQoNCiMjIFRhc2sgNzogQ2FsY3VsYXRlIA0KDQpgYGB7cn0NCg0KIyBUaGlzIGlzIGEgY2h1bmsgZm9yIFRhc2sgNy4gUHJvdmlkZSB5b3VyIFIgY29kZXMgaGVyZTogDQpzdXJ2ZXlzX2NvbWJpbmVkICU+JSBmaWx0ZXIoc3BlY2llc19pZCA9PSAiRE0iKSAlPiUgZ3JvdXBfYnkobW9udGgpICU+JSANCiAgc3VtbWFyaXNlKCdBdmcgV2VpZ2h0JyA9IG1lYW4od2VpZ2h0LCBuYS5ybSA9IFRSVUUpLCAnQXZnIEZvb3QgTGVuZ2h0JyA9IG1lYW4oaGluZGZvb3RfbGVuZ3RoLCBuYS5ybSA9IFRSVUUpKQ0KDQpgYGANCg0KDQoNCiMjIFRhc2sgODogTWlzc2luZyBWYWx1ZXMNCg0KYGBge3J9DQoNCiMgVGhpcyBpcyBhIGNodW5rIGZvciBUYXNrIDguIFByb3ZpZGUgeW91ciBSIGNvZGVzIGhlcmU6IA0Kc3VydmV5c19jb21iaW5lZF95ZWFyIDwtIHN1cnZleXNfY29tYmluZWQgJT4lIGZpbHRlcih5ZWFyID09ICIyMDAwIikNCnN1cnZleXNfY29tYmluZWRfeWVhcg0KDQojRGlzcGxheWluZyB0aGUgbnVtYmVyIG9mIE5BIG9ic2VydmF0aW9ucyBpbiB3ZWlnaHQgZm9yIHRoZSB5ZWFyIDIwMDANCnN1cnZleXNfY29tYmluZWRfeWVhciAlPiUgZ3JvdXBfYnkoc3BlY2llc19pZCkgJT4lIHN1bW1hcmlzZSgnV2VpZ2h0IE5BcycgPSBzdW0oaXMubmEod2VpZ2h0KSkpICANCg0KI0ltcHV0aW5nIHRoZSBtZWFucyBvZiBlYWNoIHNwZWNpZXMgd2VpZ2h0IGludG8gTkEgb2JzZXJ2YXRpb25zDQpzdXJ2ZXlzX3dlaWdodF9pbXB1dGVkIDwtIHN1cnZleXNfY29tYmluZWRfeWVhciAlPiUgZ3JvdXBfYnkoc3BlY2llc19pZCkgJT4lIG11dGF0ZSh3ZWlnaHQgPSBpZmVsc2UoaXMubmEod2VpZ2h0KSwgbWVhbih3ZWlnaHQsIG5hLnJtID0gVFJVRSksIHdlaWdodCkpDQpzdXJ2ZXlzX3dlaWdodF9pbXB1dGVkICU+JSBncm91cF9ieShzcGVjaWVzX2lkKSAlPiUgc3VtbWFyaXNlKCdXZWlnaHQgTkFzJyA9IHN1bShpcy5uYSh3ZWlnaHQpKSkNCg0KDQoNCg0KYGBgDQoNCg0KDQoNCg0KIyMgVGFzayA5OiBJbmNvbnNpc3RlbmNpZXMgb3IgU3BlY2lhbCBWYWx1ZXMNCg0KYGBge3J9DQoNCiMgVGhpcyBpcyBhIGNodW5rIGZvciBUYXNrIDkuIFByb3ZpZGUgeW91ciBSIGNvZGVzIGhlcmU6IA0KDQojQ2hlY2tpbmcgd2VpZ2h0IGNvbHVtbiBmb3IgaW5maW5pdGUgdmFsdWVzDQpzdW0oaXMuaW5maW5pdGUoc3VydmV5c193ZWlnaHRfaW1wdXRlZCR3ZWlnaHQpKQ0KI0NoZWNraW5nIHdlaWdodCBjb2x1bW4gZm9yIE5hTiB2YWx1ZXMNCnN1bShpcy5uYW4oc3VydmV5c193ZWlnaHRfaW1wdXRlZCR3ZWlnaHQpKQ0KDQoNCiNObyBpbmZpbml0ZSB2YWx1ZXMgd2VyZSBmb3VuZCwgaG93ZXZlciB0aGUgd2VpZ2h0IGNvbHVtbiBoYXMgNjggTmFOIHZhbHVlcy4NCg0KI1RoaXMgaXMgZHVlIHRvIHNvbWUgc3BlY2llcyBub3QgaGF2aW5nIGFueSB3ZWlnaHQgdmFsdWVzLCBzbyB0aGV5IHdlcmUgdW5hYmxlIHRvIGJlIGltcHV0ZWQgdXNpbmcgdGhlIG1lYW4gd2VpZ2h0IG9mIHRoYXQgc3BlY2llcy4gVGhlIGNvZGUgcnVuIGluIHRhc2sgOCB3b3VsZCBoYXZlIGltcHV0ZWQgdGhlIG1lYW4gb2YgTkEgaW50byBzcGVjaWVzIHdpdGhvdXQgYW55IHdlaWdodCBvYnNlcnZhdGlvbnMgcmVzdWx0aW5nIGluIE5hTi4NCg0KYGBgDQoNCg0KDQoNCiMjIFRhc2sgMTA6IE91dGxpZXJzDQoNCmBgYHtyfQ0KDQojIFRoaXMgaXMgYSBjaHVuayBmb3IgVGFzayAxMC4gUHJvdmlkZSB5b3VyIFIgY29kZXMgaGVyZTogDQojQ3JlYXRpbmcgYSBib3hwbG90IG9mIHRoZSBkYXRhDQpzdXJ2ZXlzX2NvbWJpbmVkJGhpbmRmb290X2xlbmd0aCAlPiUgYm94cGxvdChNYWluID0gIkJveHBsb3Qgb2YgSGluZGZvb3QgTGVuZ3RoIikgDQojRnJvbSB0aGUgYm94cGxvdCwgMiBvdXRsaWVycyBjYW4gYmUgb2JzZXJ2ZWQNCg0KI0NhbGN1bGF0aW5nIHotc2NvcmVzIGZvciBkYXRhDQp6c2NvcmVzIDwtIG5hLm9taXQoc3VydmV5c19jb21iaW5lZCRoaW5kZm9vdF9sZW5ndGgpICU+JSBzY29yZXModHlwZSA9ICJ6IikNCiNEaXNwbGF5aW5nIHotc2NvcmVzIGFib3ZlIDMgYXMgdGhhdCBpcyB0aGUgbGltaXQgZm9yIG91dGxpZXJzDQpsZW5ndGggKHdoaWNoKCBhYnMoenNjb3JlcykgPjMpKQ0KI0ZvbGxvd2luZyB0aGUgei1zY29yZSBtZXRob2QsIHRoZXJlIGFyZSBvbmx5IDQgb3V0bGllcnMgaW4gYSBkYXRhc2V0IG9mIDM1NTQ5IG9ic2VydmF0aW9ucw0KDQojVG8gbWFuYWdlIHRoZSBvdXRsaWVycywgd2Ugd2lsbCBzaW1wbHkgZXhsY2x1ZGUgdGhlbSBzaW5jZSB0aGV5IGFyZSBhIHNtYWxsIHBlcmNlbnRhZ2Ugb2YgdGhlIGRhdGFzZXQNCmhpbmRmb290X25ldyA8LSBzdXJ2ZXlzX2NvbWJpbmVkJGhpbmRmb290X2xlbmd0aFsgLXdoaWNoKGFicyAoenNjb3Jlcyk+MyldDQpsZW5ndGgoc3VydmV5c19jb21iaW5lZCRoaW5kZm9vdF9sZW5ndGgpDQpsZW5ndGgoaGluZGZvb3RfbmV3KQ0KDQoNCg0KYGBgDQoNCg0KDQoNCjxicj4NCjxicj4NCg==