The RMarkdown version of this file can be seen rendered here. The CSV file used in this code is available here. The code in this file is used to further edit the bird_speed_table (CSV), which was earlier extracted from Wikipedia here but the Wiki page has since been updated. Because of this update, rather than re-write the code to do the inital steps over, this processing phase has been broken into two parts: this second part, and the first available here.

The final data resulting from these two steps is on GitHub here.

library(dplyr)

Pull the data from GitHub

# Download the first CSV that was the result of the inital clean up phase
download.file("https://raw.githubusercontent.com/dbouquin/Pigeons/master/bird_speed_table.csv", "bird_speed_table.csv", method="curl")
bird_speeds <- read.csv("bird_speed_table.csv", na.strings = "NA")
# check the structure to make sure it was properly loaded
head(bird_speeds)
##   X                      bird                  species       family
## 1 1          Peregrine falcon         Falco peregrinus   Falconidae
## 2 2              Golden eagle        Aquila chrysaetos Accipitridae
## 3 3     Grey-headed albatross Thalassarche Chrysostoma  Diomedeidae
## 4 4                 Gyrfalcon         Falco rusticolus   Falconidae
## 5 5 White-throated needletail    Hirundapus caudacutus     Apodidae
## 6 6                     Swift                Apus apus     Apodidae
##   avg_h_speed_mph avg_h_speed_km max_h_speed_mph max_h_speed_km
## 1           40-56          65–90           65-71        105–115
## 2           28-32          45–51              80            129
## 3            <NA>           <NA>            78.9            127
## 4           50-68         80-110              90            145
## 5            <NA>           <NA>             105            169
## 6            <NA>           <NA>            <NA>           <NA>
##   avg_d_speed_mph avg_d_speed_km max_airspeed_mph max_airspeed_km
## 1             112            180              242             389
## 2             150            241              200             320
## 3            <NA>           <NA>               NA              NA
## 4         116-130        187–209              130             209
## 5            <NA>           <NA>               NA              NA
## 6            <NA>           <NA>              106             171
# remove the "X" column that got created and the columns we will not need later
drops <- c("X", "species", "family", "avg_h_speed_km", "avg_h_speed_mph", "max_h_speed_km", "max_h_speed_mph", "avg_d_speed_km", "avg_d_speed_mph", "avg_d_speed_mph", "max_airspeed_km")
bird_speeds <- bird_speeds[,!(names(bird_speeds) %in% drops)]
str(bird_speeds)
## 'data.frame':    13 obs. of  2 variables:
##  $ bird            : Factor w/ 13 levels "Canvasback","Eider",..: 8 5 6 7 13 11 3 4 10 9 ...
##  $ max_airspeed_mph: int  242 200 NA 130 NA 106 100 95 88 81 ...
# remove the records where we have no data on max airspeed
bird_speeds <- bird_speeds[complete.cases(bird_speeds),]
bird_speeds
##                      bird max_airspeed_mph
## 1        Peregrine falcon              242
## 2            Golden eagle              200
## 4               Gyrfalcon              130
## 6                   Swift              106
## 7          Eurasian hobby              100
## 8             Frigatebird               95
## 9       Spur-winged goose               88
## 10 Red-breasted merganser               81
## 11             Canvasback               73
## 12                  Eider               70
## 13                   Teal               69

Convert the max_airspeed_mph to yards/minute from mph

bird_speeds <- mutate(bird_speeds, max_airspeed_ym = ((max_airspeed_mph*1760)/60))
bird_speeds
##                      bird max_airspeed_mph max_airspeed_ym
## 1        Peregrine falcon              242        7098.667
## 2            Golden eagle              200        5866.667
## 3               Gyrfalcon              130        3813.333
## 4                   Swift              106        3109.333
## 5          Eurasian hobby              100        2933.333
## 6             Frigatebird               95        2786.667
## 7       Spur-winged goose               88        2581.333
## 8  Red-breasted merganser               81        2376.000
## 9              Canvasback               73        2141.333
## 10                  Eider               70        2053.333
## 11                   Teal               69        2024.000

Write the final file to a csv

write.csv(bird_speeds, file = "final_bird_speeds.csv")