資料集 : airline
library("Rhipe")
rhinit()
counts <- rhread("/airline/volume")
MapReduce處理後的資料格式
head(counts)
## [[1]]
## [[1]][[1]]
## [1] "ABE"
##
## [[1]][[2]]
## ABE ABE ABE
## 278 281 559
##
##
## [[2]]
## [[2]][[1]]
## [1] "ABQ"
##
## [[2]][[2]]
## ABQ ABQ ABQ
## 2863 2882 5745
##
##
## [[3]]
## [[3]][[1]]
## [1] "ACV"
##
## [[3]][[2]]
## ACV ACV ACV
## 301 299 600
##
##
## [[4]]
## [[4]][[1]]
## [1] "ACY"
##
## [[4]][[2]]
## ACY ACY
## 1 0 1
##
##
## [[5]]
## [[5]][[1]]
## [1] "AEX"
##
## [[5]][[2]]
## AEX AEX AEX
## 27 28 55
##
##
## [[6]]
## [[6]][[1]]
## [1] "ALB"
##
## [[6]][[2]]
## ALB ALB ALB
## 892 896 1788
Airport.cvs 裡面的格式
ap <- read.csv("~/下載/airports.csv")
head(ap)
## iata airport city state country lat long
## 1 00M Thigpen Bay Springs MS USA 31.95 -89.23
## 2 00R Livingston Municipal Livingston TX USA 30.69 -95.02
## 3 00V Meadow Lake Colorado Springs CO USA 38.95 -104.57
## 4 01G Perry-Warsaw Perry NY USA 42.74 -78.05
## 5 01J Hilliard Airpark Hilliard FL USA 30.69 -81.91
## 6 01M Tishomingo County Belmont MS USA 34.49 -88.20
aircode <- unlist(lapply(counts, "[[", 1))
count <- do.call("rbind", lapply(counts, "[[", 2))
results <- data.frame(aircode = aircode, inb = count[, 1], oub = count[, 2],
all = count[, 3], stringsAsFactors = FALSE)
最後整理的結果
results <- results[order(results$all, decreasing = TRUE), ]
head(results)
## aircode inb oub all
## 160 PHX 15190 15224 30414
## 154 ORD 14549 14558 29107
## 60 DEN 13586 13608 27194
## 111 LAX 11915 11821 23736
## 110 LAS 11841 11626 23467
## 47 CLT 9165 9158 18323
results$airport <- sapply(results$aircode, function(r) {
nam <- ap[ap$iata == r, "airport"]
if (length(nam) == 0)
r else nam
})
加上 Airport 與 經緯度 的詳細資料
tmp <- sapply(results$aircode, function(r) {
paste(ap[ap$iata == r, "lat"], ap[ap$iata == r, "long"], sep = ":")
})
results$latlong <- do.call("rbind", as.list(tmp))
head(results)
## aircode inb oub all airport
## 160 PHX 15190 15224 30414 Phoenix Sky Harbor International
## 154 ORD 14549 14558 29107 Chicago O'Hare International
## 60 DEN 13586 13608 27194 Denver Intl
## 111 LAX 11915 11821 23736 Los Angeles International
## 110 LAS 11841 11626 23467 McCarran International
## 47 CLT 9165 9158 18323 Charlotte/Douglas International
## latlong
## 160 33.43416667:-112.0080556
## 154 41.979595:-87.90446417
## 60 39.85840806:-104.6670019
## 111 33.94253611:-118.4080744
## 110 36.08036111:-115.1523333
## 47 35.21401111:-80.94312583
count <- do.call("rbind", lapply(counts, "[[", 2))
使用 googleVis 套件做視覺化
library("googleVis")
機場名稱,還有機場的出入量
G <- gvisGeoChart(results, locationvar = "latlong", sizevar = "all", hovervar = "airport",
options = list(region = "US"))
print(G, "chart")
*機場出入量整理結果
b = rhread("/airline/ijjoin")
## RHIPE: Read 1494 pairs occupying 40.852 KB, deserializing
head(b)
## [[1]]
## [[1]][[1]]
## [1] "ABE" "ATL"
##
## [[1]][[2]]
## [1] 56
##
##
## [[2]]
## [[2]][[1]]
## [1] "ABE" "CLE"
##
## [[2]][[2]]
## [1] 151
##
##
## [[3]]
## [[3]][[1]]
## [1] "ABE" "CLT"
##
## [[3]][[2]]
## [1] 62
##
##
## [[4]]
## [[4]][[1]]
## [1] "ABE" "CVG"
##
## [[4]][[2]]
## [1] 52
##
##
## [[5]]
## [[5]][[1]]
## [1] "ABE" "JFK"
##
## [[5]][[2]]
## [1] 1
##
##
## [[6]]
## [[6]][[1]]
## [1] "ABE" "MKE"
##
## [[6]][[2]]
## [1] 1
y <- do.call("rbind", lapply(b, "[[", 1))
results <- data.frame(a = y[, 1], b = y[, 2], count = do.call("rbind", lapply(b,
"[[", 2)), stringsAsFactors = FALSE)
results <- results[order(results$count, decreasing = TRUE), ]
results$cumprop <- cumsum(results$count)/sum(results$count)
ap <- read.csv("~/下載/airports.csv")
a.lat <- t(sapply(results$a, function(r) {
ap[ap$iata == r, c("lat", "long")]
}))
results$a.lat <- unlist(a.lat[, "lat"])
results$a.long <- unlist(a.lat[, "long"])
b.lat <- t(sapply(results$b, function(r) {
ap[ap$iata == r, c("lat", "long")]
}))
results$b.lat <- unlist(b.lat[, "lat"])
results$b.long <- unlist(b.lat[, "long"])
head(results)
## a b count cumprop a.lat a.long b.lat b.long
## 1092 LAS PHX 1984 0.006613 36.08 -115.15 33.43 -112.01
## 1070 LAS LAX 1917 0.013003 36.08 -115.15 33.94 -118.41
## 1130 LAX PHX 1876 0.019257 33.94 -118.41 33.43 -112.01
## 1139 LAX SFO 1783 0.025200 33.94 -118.41 37.62 -122.37
## 603 DAL HOU 1608 0.030560 32.85 -96.85 29.65 -95.28
## 1122 LAX OAK 1424 0.035307 33.94 -118.41 37.72 -122.22
前二十名航線 (起點、終點)
library("maps")
map(database = "state", col = "grey")
top <- results[1:20, ]
for (i in 1:length(top$a)) {
lngs <- c(top$a.long[i], top$b.long[i])
lats <- c(top$a.lat[i], top$b.lat[i])
lines(lngs, lats, col = "black")
}
for (i in 1:length(top$a)) {
lngs <- c(top$a.long[i], top$b.long[i])
lats <- c(top$a.lat[i], top$b.lat[i])
symbols(lngs, lats, lwd = 0.5, bg = "green", circles = rep(1, length(lngs)),
inches = 0.05, add = TRUE)
}