The following contain (fictional) data from 100 flights to two competing R pirate conventions taking place in Basel and Zurich. The flights all came from one of two airlines: Air Blackbeard (AB) and Scimitar Air (SA). The data are separated into four vectors:
airline <- c("AB", "AB", "AB", "AB", "AB", "AB", "AB", "AB", "AB", "SA",
"AB", "AB", "AB", "AB", "SA", "AB", "SA", "SA", "AB", "SA", "AB",
"SA", "AB", "SA", "AB", "AB", "SA", "AB", "AB", "AB", "AB", "AB",
"AB", "AB", "SA", "SA", "SA", "SA", "SA", "AB", "SA", "AB", "AB",
"SA", "SA", "AB", "SA", "SA", "AB", "SA", "AB", "SA", "AB", "SA",
"SA", "SA", "SA", "SA", "AB", "AB", "AB", "SA", "SA", "SA", "AB",
"SA", "AB", "SA", "AB", "AB", "SA", "SA", "SA", "SA", "AB", "AB",
"SA", "AB", "AB", "SA", "AB", "SA", "AB", "SA", "AB", "SA", "SA",
"AB", "AB", "SA", "AB", "SA", "AB", "SA", "AB", "SA", "SA", "SA",
"AB", "SA", "AB", "AB", "AB", "AB", "AB", "AB", "SA", "SA", "SA",
"SA", "SA", "AB", "SA", "AB", "AB", "AB", "AB", "SA", "SA", "SA",
"SA", "AB", "AB", "SA", "SA", "SA", "AB", "AB", "SA", "SA", "SA",
"AB", "AB", "AB", "SA", "SA", "AB", "SA", "AB", "AB", "AB", "AB",
"AB", "SA", "SA", "SA", "AB", "SA", "SA", "SA", "AB", "SA", "SA",
"AB", "AB", "AB", "SA", "AB", "AB", "SA", "SA", "AB", "SA", "SA",
"AB", "SA", "AB", "AB", "SA", "SA", "SA", "SA", "AB", "AB", "SA",
"SA", "AB", "SA", "AB", "SA", "AB", "AB", "SA", "SA", "SA", "SA",
"SA", "AB", "SA", "SA", "AB", "AB", "AB", "AB", "SA", "SA", "AB",
"SA", "SA", "SA")
destination <- c("ZRH", "ZRH", "ZRH", "ZRH", "ZRH", "ZRH", "ZRH", "ZRH", "BSL",
"BSL", "BSL", "ZRH", "ZRH", "ZRH", "BSL", "ZRH", "BSL", "BSL",
"ZRH", "BSL", "ZRH", "BSL", "ZRH", "BSL", "ZRH", "BSL", "BSL",
"ZRH", "ZRH", "ZRH", "ZRH", "ZRH", "ZRH", "ZRH", "BSL", "BSL",
"BSL", "BSL", "ZRH", "ZRH", "BSL", "ZRH", "BSL", "BSL", "BSL",
"ZRH", "BSL", "BSL", "ZRH", "ZRH", "BSL", "BSL", "BSL", "BSL",
"BSL", "ZRH", "ZRH", "ZRH", "ZRH", "ZRH", "BSL", "ZRH", "BSL",
"BSL", "ZRH", "ZRH", "ZRH", "BSL", "ZRH", "BSL", "BSL", "BSL",
"BSL", "BSL", "ZRH", "BSL", "BSL", "ZRH", "ZRH", "ZRH", "ZRH",
"BSL", "ZRH", "BSL", "ZRH", "ZRH", "BSL", "ZRH", "ZRH", "BSL",
"BSL", "BSL", "ZRH", "ZRH", "ZRH", "BSL", "BSL", "BSL", "BSL",
"BSL", "ZRH", "ZRH", "ZRH", "ZRH", "ZRH", "ZRH", "BSL", "BSL",
"BSL", "ZRH", "BSL", "ZRH", "BSL", "ZRH", "ZRH", "BSL", "ZRH",
"ZRH", "BSL", "ZRH", "ZRH", "ZRH", "BSL", "ZRH", "BSL", "BSL",
"ZRH", "ZRH", "BSL", "ZRH", "BSL", "ZRH", "BSL", "ZRH", "BSL",
"BSL", "ZRH", "BSL", "BSL", "ZRH", "ZRH", "ZRH", "ZRH", "ZRH",
"ZRH", "BSL", "ZRH", "BSL", "BSL", "BSL", "ZRH", "BSL", "BSL",
"ZRH", "BSL", "BSL", "BSL", "ZRH", "ZRH", "BSL", "BSL", "ZRH",
"BSL", "BSL", "ZRH", "BSL", "BSL", "ZRH", "BSL", "BSL", "ZRH",
"BSL", "BSL", "ZRH", "BSL", "BSL", "ZRH", "BSL", "ZRH", "BSL",
"BSL", "ZRH", "BSL", "BSL", "BSL", "BSL", "BSL", "ZRH", "BSL",
"ZRH", "ZRH", "ZRH", "ZRH", "ZRH", "BSL", "BSL", "ZRH", "BSL",
"BSL", "BSL")
delay <- c(12, 10, 8, 12, 9, 8, 10, 11, 1, 3, 1, 10, 11, 10, 3, 10, 1,
4, 11, 4, 9, 2, 10, 2, 9, -1, 3, 10, 12, 9, 10, 9, 9, 9, 3, 4,
2, 4, 13, 11, 3, 11, 1, 4, 4, 10, 3, 3, 9, 14, 0, 3, 0, 4, 3,
13, 11, 13, 10, 10, 1, 13, 2, 2, 11, 14, 8, 2, 10, 0, 3, 3, 3,
1, 12, 0, 4, 11, 10, 14, 11, 3, 9, 4, 10, 12, 2, 9, 11, 4, 1,
2, 8, 14, 11, 2, 3, 5, -1, 2, 9, 8, 10, 11, 10, 10, 2, 4, 3,
13, 3, 10, 2, 12, 13, 1, 9, 13, 3, 14, 12, 10, 1, 14, 5, 3, 12,
9, 3, 13, 3, 10, 0, 9, 4, 4, 10, 5, -1, 10, 11, 9, 10, 12, 13,
4, 10, 2, 3, 4, 11, 2, 5, 11, 0, 1, 5, 8, 10, 2, 5, 11, 3, 3,
10, 4, 0, 8, 3, 3, 14, 3, -1, 9, 3, 3, 11, 3, 8, 3, 1, 10, 2,
1, 4, 4, 3, 9, 3, 14, 10, 9, 10, 10, 2, 2, 10, 3, 3, 5)
flight.num <- c("AB.996", "AB.573", "AB.997", "AB.189", "AB.293", "AB.787",
"AB.193", "AB.276", "AB.577", "SA.677", "AB.712", "AB.452", "AB.473",
"AB.226", "SA.876", "AB.385", "SA.392", "SA.391", "AB.186", "SA.591",
"AB.949", "SA.347", "AB.620", "SA.775", "AB.912", "AB.678", "SA.420",
"AB.762", "AB.259", "AB.658", "AB.691", "AB.757", "AB.492", "AB.714",
"SA.767", "SA.555", "SA.425", "SA.832", "SA.986", "AB.892", "SA.640",
"AB.596", "AB.470", "SA.449", "SA.920", "AB.733", "SA.498", "SA.246",
"AB.141", "SA.679", "AB.133", "SA.626", "AB.987", "SA.265", "SA.111",
"SA.583", "SA.242", "SA.828", "AB.766", "AB.868", "AB.267", "SA.195",
"SA.329", "SA.638", "AB.754", "SA.784", "AB.332", "SA.649", "AB.147",
"AB.653", "SA.258", "SA.515", "SA.424", "SA.843", "AB.899", "AB.134",
"SA.149", "AB.476", "AB.306", "SA.514", "AB.810", "SA.523", "AB.428",
"SA.479", "AB.513", "SA.249", "SA.931", "AB.662", "AB.529", "SA.838",
"AB.668", "SA.477", "AB.695", "SA.709", "AB.971", "SA.416", "SA.444",
"SA.485", "AB.855", "SA.414", "AB.669", "AB.558", "AB.334", "AB.131",
"AB.440", "AB.884", "SA.216", "SA.801", "SA.304", "SA.795", "SA.534",
"AB.499", "SA.961", "AB.833", "AB.983", "AB.213", "AB.343", "SA.756",
"SA.364", "SA.793", "SA.374", "AB.447", "AB.540", "SA.317", "SA.910",
"SA.890", "AB.602", "AB.610", "SA.517", "SA.731", "SA.581", "AB.104",
"AB.703", "AB.120", "SA.924", "SA.785", "AB.220", "SA.275", "AB.207",
"AB.977", "AB.867", "AB.239", "AB.623", "SA.802", "SA.940", "SA.403",
"AB.158", "SA.352", "SA.646", "SA.399", "AB.652", "SA.215", "SA.121",
"AB.478", "AB.851", "AB.585", "SA.726", "AB.990", "AB.541", "SA.365",
"SA.156", "AB.686", "SA.401", "SA.282", "AB.198", "SA.445", "AB.606",
"AB.861", "SA.545", "SA.929", "SA.303", "SA.650", "AB.934", "AB.927",
"SA.720", "SA.622", "AB.436", "SA.123", "AB.139", "SA.956", "AB.281",
"AB.772", "SA.509", "SA.586", "SA.906", "SA.311", "SA.171", "AB.692",
"SA.750", "SA.616", "AB.808", "AB.660", "AB.361", "AB.339", "SA.397",
"SA.301", "AB.707", "SA.286", "SA.711", "SA.881")
What were the airlines of the 10th through the 20th flights on the list?
What was the destination of the last flight on the list (hint: don’t write the indexing number directly; instead, index the vector using the length() function with the appropriate argument)
How many flights were from each airline? a) Use the table() function. b) Use logical indexing
How many flights were to each destination? a) Use the table() function. b) Use logical indexing
How many flights were there from airline Air Blackbeard? Use logical indexing
How many flights were there to Basel? Use logical indexing.
What percent of flights had no delay?
What percent of Air Blackbeard’s flights had a delay greater than 10 minutes?
What was the median delay of flights to Basel that had a delay of EITHER less than -5 minutes or greater than 5 minutes?
What were the flight numbers of all Scimitar Air flights that were late?
Based on what you’ve learned so far, if you had to book an airline, which one would you choose?
What was the mean delay of Air Blackbeard flights TO Basel?
What was the mean delay of Air Blackbeard flights TO Zurich?
What was the mean delay of Scimitar Air flights TO Basel?
What was the mean delay Scimitar Air flights to Zurich?
Based on what you’ve learned now, if you had to book an airline, which one would you choose?
What you’ve just seen is an example of Simpson’s Pardox. If you want to learn more, check out the wikipedia page.
Run the following code to screw up some of the vector values:
delay[sample(1:length(delay), size = 4)] <- round(rnorm(n = 4, mean = -1000, sd = 5), 0)
airline[sample(1:length(delay), size = 4)] <- c("jkfl;ads", "hiuf", "niav", "jfuhio")
Well great, some of the delay and airline are now screwed up. Why did you do that?! You shouldn’t do everything you’re told. Now you have to fix it.
Replace any delay values less than -30 min or greater than 30 minutes with NA values. Use logical indexing.
Replace any invalid airline values with NA. Use logical indexing.