資料來源

105上半年台北市各地非營業用戶售電量

抓出民國105年1月份的資料

power <- read.csv("105-power.csv")
power.10501 <- filter(power, Ym == "10501")
taiwan <- readRDS("taiwan.Rds")
.tmp <- taiwan[taiwan$CODE1 %in% power.10501$code1,]
rownames(power.10501) <- power.10501$code1
power.10501.spdf <- SpatialPolygonsDataFrame(
  unionSpatialPolygons(.tmp, .tmp@data$CODE1),
  power.10501,
  match.ID = TRUE)
g.power <- mapview(power.10501.spdf, zcol = "gen")
g.power

台北市校園數位氣象網

抓出2016年1月份每間學校每天的平均溫度,再取平均

school.temperature <- readLines("school-temperature.txt")
school.temperature <- substring(school.temperature, 13, nchar(school.temperature) - 2) %>%
  lapply(jsonlite::fromJSON)
school.temperature <- lapply(school.temperature, function(x) {
  data.frame(id = x$result$Id[1], name = x$result$學校名稱[1], temperature = mean(x$result$氣溫, na.rm = TRUE))
}) %>%
  do.call(what = rbind) %>%
  dplyr::filter(!is.nan(temperature))
tw.school <- rbind(
  dplyr::select(read.csv("tw-school.csv"), 代碼, 學校名稱, 地址),
  dplyr::select(read.csv("tw-school-junior-high.csv", skip = 1), 代碼, 學校名稱, 地址)
)

school.temperature <- left_join(school.temperature, tw.school, by = c("id" = "代碼")) %>%
  # dplyr::filter(!is.na(地址)) %>%
  dplyr::select(id, temperature, address = 地址, name1 = name, name2 = 學校名稱)

# 手動作address --> longlat
# substring(school.temperature$address, 6, as.character(school.temperature$address) %>% nchar()) %>% cat(sep = "\n")
longlat <- c("25.0504764,121.57767209999997", "25.0623873,121.5623038", 
"25.0631782,121.56428570000003", "25.0566674,121.56278459999999", 
"25.0402704,121.55785270000001", "25.0257433,121.568716", "25.0388994,121.58650349999994", 
"25.036362,121.57044500000006", "25.0239215,121.53488540000001", 
"25.0322754,121.52767500000004", "25.0124078,121.54280790000007", 
"25.0298199,121.53361999999993", "25.0619756,121.52574549999997", 
"25.05049,121.54007249999995", "25.0497694,121.53016339999999", 
"25.0733222,121.53641070000003", "25.0641393,121.54220229999999", 
"25.0787761,121.54940629999999", "25.0233231,121.52260890000002", 
"25.02963,121.50634639999998", "25.0307489,121.51059029999999", 
"25.0429269,121.53153250000003", "25.0443829,121.5170412", "25.0563873,121.5157097", 
"25.0613756,121.51144929999998", "25.0648624,121.5161306", "25.073409,121.51669249999998", 
"25.0660466,121.51245440000002", "25.0639633,121.51282249999997", 
"25.0259828,121.50333720000003", "25.0340691,121.49118150000004", 
"25.038046,121.50272500000005", "25.0356648,121.49615519999998", 
"24.9895266,121.5407811", "24.9986645,121.53890760000002", "24.9875796,121.5500432", 
"24.9763942,121.58363529999997", "25.0013934,121.56761110000002", 
"25.0064295,121.55924219999997", "25.0571633,121.61038350000001", 
"25.0402449,121.61953900000003", "25.0423894,121.61723460000007", 
"25.0554594,121.60221890000003", "25.0602709,121.59048140000004", 
"25.0691597,121.6160678", "25.0680576,121.61188379999999", "25.1036423,121.5555425", 
"25.0986884,121.52959650000003", "25.0898123,121.50294810000003", 
"25.1026617,121.53526469999997", "25.1196984,121.57994859999997", 
"25.1291056,121.57651069999997", "25.1180133,121.5373439", "25.1339694,121.5000781", 
"25.1383716,121.5051631", "25.126886,121.4668792", "25.1684294,121.53877139999997", 
"25.1493002,121.52442640000004", "25.154895,121.50398300000006", 
"25.1122284,121.52182700000003", "25.1371562,121.49650739999993"
) %>%
  strsplit(split = ",", fixed = TRUE) %>%
  lapply(as.numeric) %>%
  do.call(what = rbind)
longlat <- longlat[,2:1]
school.sp <- SpatialPoints(longlat, CRS("+proj=longlat"))
school.sp <- SpatialPointsDataFrame(school.sp, school.temperature)
g.school <- mapview(school.sp, zcol = "temperature")
g.school

資料整合

  1. 利用Kriging建立氣溫的模型
  2. 找出用電資料中,每一個第一級統計區的中心點
  3. 利用Step 1的模型預測Step2的點,並且把整合的資料輸出

Step 1

if (!file.exists("target.args.Rds")) {
  # Model selection via 10-folds cross validation
  set.seed(1)
  fold.id <- rep(1:10, ceiling(nrow(school.sp) / 10)) %>%
    head(nrow(school.sp)) %>%
    sample()
  sp.train <- lapply(1:10, function(fold.target) {
    school.sp[fold.id != fold.target,]
  })
  sp.valid <- lapply(1:10, function(fold.target) {
    school.sp[fold.id == fold.target,]
  })
  sp.vgm <- lapply(sp.train, function(sp.df) {
    variogram(temperature ~ 1, sp.df)
  })
  
  cross.validate <- function(fitter) {
    sp.fit <- lapply(sp.vgm, function(vgm) {
      fitter(vgm)
    })
    izip(train = sp.train, valid = sp.valid, vgm = sp.vgm, fit = sp.fit) %>%
      sapply(function(x) {
        pred <- krige(temperature ~ 1, x$train, x$valid, x$fit)@data$var1.pred
        y <- x$valid@data$temperature
        sum((y - pred)^2)
      }) %>%
      sum()
  }
  results <- lapply(1:1000, function(i) {
    model <- sample(vgm()$short, 1) %>% as.character()
    range <- rpois(1, 10)
    kappa <- rexp(1, 1 / 10)
    nugget <- rexp(1, 1 / 10)
    list(
      fitter = function(vgm) {
        fit.variogram(vgm, vgm(model = model, range = range, nugget = nugget, kappa = kappa))
      },
      args = list(model = model, range = range, nugget = nugget, kappa = kappa)
    )
  }) %>%
    lapply(function(x) {
      result <- try(cross.validate(x$fitter), silent = TRUE)
      if (class(result)[1] == "try-error") x$args$cv.sse <- NA else x$args$cv.sse <- result
      data.frame(x$args)
    }) %>%
    do.call(what = rbind)
  target.args <- results[which.min(results$cv.sse),]
  attr(target.args, "results") <- results
  saveRDS(target.args, "target.args.Rds")
} else {
  target.args <- readRDS("target.args.Rds")
}
vgm <- variogram(temperature ~ 1, school.sp)
fit <- fit.variogram(vgm, vgm(model = target.args$model, range = target.args$range, nugget = target.args$nugget, kappa = target.args$kappa))
plot(vgm, fit)

Step 2

power.10501.spdf.centroid <- gCentroid(power.10501.spdf, byid = TRUE)
g.centroid <- mapView(power.10501.spdf.centroid, layer.name = "centroid")
## Warning in cbind(`Feature ID` = fid, mat): number of rows of result is not
## a multiple of vector length (arg 1)
g.centroid + g.power

Step 3

power.10501.spdf@data$temperature <- krige(temperature ~ 1, school.sp, gCentroid(power.10501.spdf, byid = TRUE), fit)@data$var1.pred

[using ordinary kriging]

knitr::kable(head(power.10501.spdf@data))
Ym code1 gen cunli.code cunli area.code area city.code city temperature
A6301-01-001 10501 A6301-01-001 21076 2 莊敬里 63000010 松山區 63000 臺北市 15.98264
A6301-02-001 10501 A6301-02-001 11987 10 精忠里 63000010 松山區 63000 臺北市 16.01474
A6301-03-001 10501 A6301-03-001 89674 2 莊敬里 63000010 松山區 63000 臺北市 15.96465
A6301-03-002 10501 A6301-03-002 99717 2 莊敬里 63000010 松山區 63000 臺北市 15.96580
A6301-03-003 10501 A6301-03-003 22550 2 莊敬里 63000010 松山區 63000 臺北市 15.96730
A6301-03-004 10501 A6301-03-004 34976 2 莊敬里 63000010 松山區 63000 臺北市 15.96853