The RMarkdown version of this file can be seen rendered here. The CSV file used in this code is available here. The code in this file is used to further edit the bird_speed_table (CSV), which was earlier extracted from Wikipedia here but the Wiki page has since been updated. Because of this update, rather than re-write the code to do the inital steps over, this processing phase has been broken into two parts: this second part, and the first available here.
The final data resulting from these two steps is on GitHub here.
library(dplyr)
Pull the data from GitHub
# Download the first CSV that was the result of the inital clean up phase
download.file("https://raw.githubusercontent.com/dbouquin/Pigeons/master/bird_speed_table.csv", "bird_speed_table.csv", method="curl")
bird_speeds <- read.csv("bird_speed_table.csv", na.strings = "NA")
# check the structure to make sure it was properly loaded
head(bird_speeds)
## X bird species family
## 1 1 Peregrine falcon Falco peregrinus Falconidae
## 2 2 Golden eagle Aquila chrysaetos Accipitridae
## 3 3 Grey-headed albatross Thalassarche Chrysostoma Diomedeidae
## 4 4 Gyrfalcon Falco rusticolus Falconidae
## 5 5 White-throated needletail Hirundapus caudacutus Apodidae
## 6 6 Swift Apus apus Apodidae
## avg_h_speed_mph avg_h_speed_km max_h_speed_mph max_h_speed_km
## 1 40-56 65–90 65-71 105–115
## 2 28-32 45–51 80 129
## 3 <NA> <NA> 78.9 127
## 4 50-68 80-110 90 145
## 5 <NA> <NA> 105 169
## 6 <NA> <NA> <NA> <NA>
## avg_d_speed_mph avg_d_speed_km max_airspeed_mph max_airspeed_km
## 1 112 180 242 389
## 2 150 241 200 320
## 3 <NA> <NA> NA NA
## 4 116-130 187–209 130 209
## 5 <NA> <NA> NA NA
## 6 <NA> <NA> 106 171
# remove the "X" column that got created and the columns we will not need later
drops <- c("X", "species", "family", "avg_h_speed_km", "avg_h_speed_mph", "max_h_speed_km", "max_h_speed_mph", "avg_d_speed_km", "avg_d_speed_mph", "avg_d_speed_mph", "max_airspeed_km")
bird_speeds <- bird_speeds[,!(names(bird_speeds) %in% drops)]
str(bird_speeds)
## 'data.frame': 13 obs. of 2 variables:
## $ bird : Factor w/ 13 levels "Canvasback","Eider",..: 8 5 6 7 13 11 3 4 10 9 ...
## $ max_airspeed_mph: int 242 200 NA 130 NA 106 100 95 88 81 ...
# remove the records where we have no data on max airspeed
bird_speeds <- bird_speeds[complete.cases(bird_speeds),]
bird_speeds
## bird max_airspeed_mph
## 1 Peregrine falcon 242
## 2 Golden eagle 200
## 4 Gyrfalcon 130
## 6 Swift 106
## 7 Eurasian hobby 100
## 8 Frigatebird 95
## 9 Spur-winged goose 88
## 10 Red-breasted merganser 81
## 11 Canvasback 73
## 12 Eider 70
## 13 Teal 69
Convert the max_airspeed_mph to yards/minute from mph
bird_speeds <- mutate(bird_speeds, max_airspeed_ym = ((max_airspeed_mph*1760)/60))
bird_speeds
## bird max_airspeed_mph max_airspeed_ym
## 1 Peregrine falcon 242 7098.667
## 2 Golden eagle 200 5866.667
## 3 Gyrfalcon 130 3813.333
## 4 Swift 106 3109.333
## 5 Eurasian hobby 100 2933.333
## 6 Frigatebird 95 2786.667
## 7 Spur-winged goose 88 2581.333
## 8 Red-breasted merganser 81 2376.000
## 9 Canvasback 73 2141.333
## 10 Eider 70 2053.333
## 11 Teal 69 2024.000
Write the final file to a csv
write.csv(bird_speeds, file = "final_bird_speeds.csv")