Last update: 2015-06-16 16:03:50
We want to find cases of imputing missing quantities in Comtrade data for Israel
Getting data from TF_SOURCE*
tfsource <- cbind(gettfsource(reporter = 105,
year = 2011),
year = 2011)
head(tfsource, 10)
## reporter partner flow hs qty qty2 value year
## 1 105 106 1 01019000 0 1 3 2011
## 2 105 106 2 01061900 0 0 46 2011
## 3 105 106 2 01063200 0 43 20 2011
## 4 105 106 1 01063919 0 8410 65 2011
## 5 105 106 1 01069000 0 0 4 2011
## 6 105 106 2 01069010 0 0 243 2011
## 7 105 106 2 01069090 0 0 33 2011
## 8 105 106 1 02023000 0 50212 362 2011
## 9 105 106 2 02072700 0 217245 1002 2011
## 10 105 106 2 02089000 0 0 95 2011
tfsource <- tfsource %>%
mutate(hs = stringr::str_extract(hs, "^.{6}")) %>%
group_by(year, reporter, partner, flow, hs) %>%
summarize(qty = sum(qty),
qty2 = sum(qty2),
value = sum(value))
# Remove non agri items
tfsource <- data.frame(hs = getAgriHSCodes(), stringsAsFactors = F) %>%
inner_join(tfsource, by = "hs")
any(!(unique(tfsource$hs) %in% getAgriHSCodes()))
## [1] FALSE
# Filter flows without quantity
no_quant10 <- tfsource %>%
mutate(no_quant = qty == 0 & qty2 == 0 & value > 0) %>%
filter(no_quant) %>%
top_n(10, (value))
And for comparison similar data from SWS.
swsdata <- readRDS("/home/sas/sws/sws_r_api/USGEIL_agri.rds") %>%
filter(reportingCountryM49 == unctarea(105)) %>%
mutate_(reporter = ~faoarea(reportingCountryM49),
partner = ~faoarea(partnerCountryM49),
flow = ~as.integer(getElementCodeDesc(measuredElementTrade)$direction),
unit = ~getElementCodeDesc(measuredElementTrade)$unit,
group = ~getElementCodeDesc(measuredElementTrade)$unitgroup,
year = ~as.integer(timePointYears)) %>%
select(reporter, partner, flow, group, year, hs = measuredItemHS, value = Value, flag = flagTrade) %>%
reshape2::dcast(... ~ group) %>%
rename(value_sws = cost, heads_sws = head, weight_sws = weight, volume_sws = volume)
no_quant10 <- no_quant10 %>%
left_join(swsdata, by = c("reporter", "partner", "year", "flow", "hs")) %>%
select(-no_quant) %>%
select(reporter, year, partner, hs, flow, qty, qty2, value, value_sws, heads_sws, weight_sws, volume_sws, flag_sws = flag)
no_quant10$qty_api <- NA
no_quant10$weight_api <- NA
no_quant10$value_api <- NA
no_quant10$flag_api <- NA
for (i in seq_len(nrow(no_quant10))) {
api <- getunct(area = unctarea(no_quant10$reporter[i]),
year = no_quant10$year[i],
partner = unctarea(no_quant10$partner[i]),
code = no_quant10$hs[i],
flow = no_quant10$flow[i],
desc = T,
compact = F)
no_quant10[i, "qty_api"] <- api$TradeQuantity
no_quant10[i, "weight_api"] <- api$NetWeight
no_quant10[i, "value_api"] <- api$TradeValue
no_quant10[i, "flag_api"] <- api$estCode
Sys.sleep(1)
}
no_quant10
## reporter year partner hs flow qty qty2 value value_sws heads_sws
## 1 105 2011 104 210690 1 0 0 80435 80435000 NA
## 2 105 2011 231 210690 2 0 0 37515 37515000 NA
## 3 105 2011 231 190590 2 0 0 31171 31171000 NA
## 4 105 2011 106 190219 1 0 0 24974 24974000 NA
## 5 105 2011 229 170199 1 0 0 103716 103716000 NA
## 6 105 2011 231 120991 2 0 0 27620 27620000 NA
## 7 105 2011 150 070960 2 0 0 66757 66757000 NA
## 8 105 2011 185 070960 2 0 0 47014 47014000 NA
## 9 105 2011 198 070960 2 0 0 50575 50575000 NA
## 10 105 2011 185 070610 2 0 0 48295 48295000 NA
## weight_sws volume_sws flag_sws qty_api weight_api value_api flag_api
## 1 13229802 NA 13229802 13229802 80435000 6
## 2 7749110 NA 7749110 7749110 37515000 6
## 3 10618854 NA 10618854 10618854 31171000 6
## 4 18207897 NA 18207897 18207897 24974000 6
## 5 148496946 NA 148496946 148496946 103716000 6
## 6 NA NA NA NA 27620000 0
## 7 39571840 NA 39571840 39571840 66757000 6
## 8 27868695 NA 27868695 27868695 47014000 6
## 9 29979564 NA 29979564 29979564 50575000 6
## 10 113912569 NA 113912569 113912569 48295000 6
Flag refers to quantity/netweight estimation:
0 = no estimation, 2 = quantity, 4 = netweight, 6 = both quantity and netweight