October 16, 2015
library(data.table)
df = fread("2008.csv")
## Read 0.0% of 7009728 rows Read 10.3% of 7009728 rows Read 26.0% of 7009728 rows Read 41.8% of 7009728 rows Read 57.5% of 7009728 rows Read 73.3% of 7009728 rows Read 89.2% of 7009728 rows Read 7009728 rows and 29 (of 29) columns from 0.642 GB file in 00:00:10
# df[rows, columns, by] df[Origin == "JFK",] # select flights from JFK df[, .(Origin, Distance)] # select columns df[, .N, by = UniqueCarrier] # count flights per carrier
df[, date := {
date = paste(Year, Month, DayofMonth, sep = "-")
date = ymd(date)
}]
# get weekday and day of the year
df[, `:=` (
week.day = wday(date, label = T, abbr = T),
year.day = yday(date)
)]
df[, `:=` (
distance = Distance * 1.852,
hr = AirTime / 60
)]
# calculate speed and condense dataframe
dfx = df[, speed := distance / hr]
dfx = dfx[ , .(date, distance, hr, speed, TailNum,
UniqueCarrier, Origin, Dest)]
dfx[is.na(speed), speed := 0]
dfx[is.infinite(speed), speed := 0]
data(airports, package = "nycflights13")
setDT(airports)
dest.counts = df[Origin == "JFK", .N, by = Dest]
dest.counts = merge(dest.counts,
airports,
by.x = "Dest", by.y = "faa",
all.x = T)