巨量資料

資料集 : airline

library("Rhipe")
rhinit()
counts <- rhread("/airline/volume")
ap <- read.csv("~/下載/airports.csv")
aircode <- unlist(lapply(counts, "[[", 1))
count <- do.call("rbind", lapply(counts, "[[", 2))
results <- data.frame(aircode = aircode, inb = count[, 1], oub = count[, 2], 
    all = count[, 3], stringsAsFactors = FALSE)
results <- results[order(results$all, decreasing = TRUE), ]
results$airport <- sapply(results$aircode, function(r) {
    nam <- ap[ap$iata == r, "airport"]
    if (length(nam) == 0) 
        r else nam
})
tmp <- sapply(results$aircode, function(r) {
    paste(ap[ap$iata == r, "lat"], ap[ap$iata == r, "long"], sep = ":")
})
results$latlong <- do.call("rbind", as.list(tmp))

count <- do.call("rbind", lapply(counts, "[[", 2))

library("googleVis")

機場名稱,還有機場的出入量

G <- gvisGeoChart(results, locationvar = "latlong", sizevar = "all", hovervar = "airport", 
    options = list(region = "US"))
print(G, "chart")
b = rhread("/airline/ijjoin")
y <- do.call("rbind", lapply(b, "[[", 1))
results <- data.frame(a = y[, 1], b = y[, 2], count = do.call("rbind", lapply(b, 
    "[[", 2)), stringsAsFactors = FALSE)
results <- results[order(results$count, decreasing = TRUE), ]
results$cumprop <- cumsum(results$count)/sum(results$count)
ap <- read.csv("~/下載/airports.csv")
a.lat <- t(sapply(results$a, function(r) {
    ap[ap$iata == r, c("lat", "long")]
}))
results$a.lat <- unlist(a.lat[, "lat"])
results$a.long <- unlist(a.lat[, "long"])
b.lat <- t(sapply(results$b, function(r) {
    ap[ap$iata == r, c("lat", "long")]
}))
results$b.lat <- unlist(b.lat[, "lat"])
results$b.long <- unlist(b.lat[, "long"])

前二十名航線 (起點、終點)

library("maps")
map(database = "state", col = "grey")
top <- results[1:20, ]

for (i in 1:length(top$a)) {
    lngs <- c(top$a.long[i], top$b.long[i])
    lats <- c(top$a.lat[i], top$b.lat[i])
    lines(lngs, lats, col = "black")
}

for (i in 1:length(top$a)) {
    lngs <- c(top$a.long[i], top$b.long[i])
    lats <- c(top$a.lat[i], top$b.lat[i])
    symbols(lngs, lats, lwd = 0.5, bg = "green", circles = rep(1, length(lngs)), 
        inches = 0.05, add = TRUE)
}

plot of chunk unnamed-chunk-4