Last update: 2015-06-12 14:54:51
We want to compare ComTrade data from TF_SOURCE and SWS
Getting data from TF_SOURCE*
tfsource <- bind_rows(
cbind(gettfsource(reporter = c(231, 105, 79), year = 2010), year = 2010),
cbind(gettfsource(reporter = c(231, 105, 79), year = 2011), year = 2011))
head(tfsource, 10)
## Source: local data frame [10 x 8]
##
## reporter partner flow hs qty qty2 value year
## 1 79 1 2 61013090 0.0 60 1.31 2010
## 2 79 1 2 61022090 0.0 4 0.04 2010
## 3 79 1 2 61023090 0.0 55 1.30 2010
## 4 79 1 2 61033300 0.0 126 2.69 2010
## 5 79 1 2 61033900 0.0 8 0.52 2010
## 6 79 1 2 61034200 0.2 350 5.60 2010
## 7 79 1 2 61034300 0.1 437 3.08 2010
## 8 79 1 1 61043100 0.2 164 2.48 2010
## 9 79 1 2 61043100 0.0 7 0.74 2010
## 10 79 1 1 61043300 1.1 1251 21.50 2010
How to calculate missing values? Is there NAs?
tfsource %>%
summarize(qty_na = sum(is.na(qty)),
qty2_na = sum(is.na(qty2)),
qty_0 = sum(qty == 0),
qty2_0 = sum(qty2 == 0))
## Source: local data frame [1 x 4]
##
## qty_na qty2_na qty_0 qty2_0
## 1 0 0 110661 467448
Only zeros and no NAs. So we’ll use zeros as missing values.
tfsource %>%
select(hs) %>%
mutate(hsdigits = stringr::str_length(hs)) %>%
group_by(hsdigits) %>%
summarize(flowstotal = n())
## Source: local data frame [2 x 2]
##
## hsdigits flowstotal
## 1 8 289867
## 2 10 440699
OK, so to compare with SWS we need to aggregate down to 6 digits.
tfsource <- tfsource %>%
mutate(hs = stringr::str_extract(hs, "^.{6}")) %>%
group_by(year, reporter, partner, flow, hs) %>%
summarize(qty = sum(qty),
qty2 = sum(qty2),
value = sum(value))
any(!(unique(tfsource$hs) %in% getAgriHSCodes()))
## [1] TRUE
OK, they are. So we need them to be filter out.
tfsource <- data.frame(hs = getAgriHSCodes(), stringsAsFactors = F) %>%
inner_join(tfsource, by = "hs")
any(!(unique(tfsource$hs) %in% getAgriHSCodes()))
## [1] FALSE
tfsource_agg <- tfsource %>%
group_by(year, reporter) %>%
summarize(totalflows = n(),
noqty = sum(qty == 0 & qty2 == 0 & value > 0),
noqty_prcnt = round(100 * noqty / totalflows, 2),
novl = sum(qty > 0 & qty2 > 0 & value == 0),
novl_prcnt = round(100 * novl / totalflows, 2))
And for comparison similar data from SWS.
swsdata <- readRDS("/home/sas/sws/sws_r_api/USGEIL_agri.rds") %>%
humanizeComtrade() %>%
select(-unit) %>%
reshape2::dcast(... ~ group) %>%
group_by(reporter) %>%
summarize(
totalflows = n(),
noquant = sum(((weight == 0 | is.na(weight)) &
(volume == 0 | is.na(volume)) &
(head == 0 | is.na(head))) &
cost > 0,
na.rm = T),
noquantprcnt = round(100 * noquant / totalflows, 2),
novalue = sum((weight > 0 | volume > 0 | head > 0) &
(is.na(cost) | cost == 0), na.rm = T),
novalueprcnt = round(100 * novalue / totalflows, 2))
SWS data only for 2011 year. I’ve added 2010 year in TF_SOURCE table to be sure that the absence of Germany’s data is year-specific.
swsdata
## Source: local data frame [3 x 6]
##
## reporter totalflows noquant noquantprcnt novalue novalueprcnt
## 1 Germany(1991-) 44269 30 0.07 0 0
## 2 Israel 7903 650 8.22 0 0
## 3 USA(1981-) 37998 419 1.10 0 0
tfsource_agg
## Source: local data frame [5 x 7]
## Groups: year
##
## year reporter totalflows noqty noqty_prcnt novl novl_prcnt
## 1 2010 79 46570 4813 10.33 1 0
## 2 2010 105 7843 4626 58.98 0 0
## 3 2010 231 38113 497 1.30 0 0
## 4 2011 105 7989 4482 56.10 0 0
## 5 2011 231 38353 499 1.30 0 0
We observe similar amounts of trade flows for Israel (105) and USA (231). Amount of trade flow with no quantity differs much for Israel.
In TFSOURCE there is no data for Germany (79) from 2011! Let’s check other sources on small example (Bovine).
Data from UN ComTrade public API.
unct <- getunct(area = paste(unctarea(79), unctarea(231), sep = ","),
year = "2011, 2010",
partner = unctarea(fclhs:::findfaoareacode("Canada")),
code = "010210",
desc = T,
compact = F)
unct
## pfCode yr period periodDesc aggrLevel IsLeaf rgCode rgDesc rtCode
## 1 H3 2010 2010 2010 6 1 1 Import 842
## 2 H3 2010 2010 2010 6 1 2 Export 842
## 3 H3 2010 2010 2010 6 1 3 Re-Export 842
## 4 H3 2011 2011 2011 6 1 1 Import 276
## 5 H3 2011 2011 2011 6 1 1 Import 842
## 6 H3 2011 2011 2011 6 1 2 Export 842
## 7 H3 2011 2011 2011 6 1 3 Re-Export 842
## rtTitle rt3ISO ptCode ptTitle pt3ISO cmdCode
## 1 USA USA 124 Canada CAN 010210
## 2 USA USA 124 Canada CAN 010210
## 3 USA USA 124 Canada CAN 010210
## 4 Germany DEU 124 Canada CAN 010210
## 5 USA USA 124 Canada CAN 010210
## 6 USA USA 124 Canada CAN 010210
## 7 USA USA 124 Canada CAN 010210
## cmdDescE qtCode qtDesc
## 1 Live bovine animals: pure-bred breeding animals 5 Number of items
## 2 Live bovine animals: pure-bred breeding animals 5 Number of items
## 3 Live bovine animals: pure-bred breeding animals 5 Number of items
## 4 Live bovine animals: pure-bred breeding animals 5 Number of items
## 5 Live bovine animals: pure-bred breeding animals 5 Number of items
## 6 Live bovine animals: pure-bred breeding animals 5 Number of items
## 7 Live bovine animals: pure-bred breeding animals 5 Number of items
## TradeQuantity NetWeight TradeValue estCode
## 1 3522 1184592 4831908 4
## 2 2522 1290253 4619438 4
## 3 5 1779 6371 4
## 4 50 6300 99239 2
## 5 3420 1271785 5065340 4
## 6 3791 2035585 6998112 4
## 7 54 51114 175723 4
There is data for Germany only for 2011 year and its flow has flag 2. I didn’t find description of the flags.
Similar data from TF_SOURCE.
tfsource %>%
filter(reporter %in% c(231, 79),
stringr::str_detect(hs, "^010210"),
partner == fclhs:::findfaoareacode("Canada"))
## hs year reporter partner flow qty qty2 value
## 1 010210 2010 231 33 1 3522 0 4831908
## 2 010210 2010 231 33 2 2522 0 4619438
## 3 010210 2011 231 33 1 3420 0 5065340
## 4 010210 2011 231 33 2 3791 0 6998112
It has 10 digits in HS code. Let’s aggregate to 6 digits.
tfsource %>%
filter(reporter %in% c(231, 79),
stringr::str_detect(hs, "^010210"),
partner == fclhs:::findfaoareacode("Canada")) %>%
mutate(hs = stringr::str_extract(hs, "^.{6}")) %>%
group_by(year, reporter, partner, flow, hs) %>%
summarize(qty = sum(qty),
qty2 = sum(qty2),
value = sum(value))
## Source: local data frame [4 x 8]
## Groups: year, reporter, partner, flow
##
## year reporter partner flow hs qty qty2 value
## 1 2010 231 33 1 010210 3522 0 4831908
## 2 2010 231 33 2 010210 2522 0 4619438
## 3 2011 231 33 1 010210 3420 0 5065340
## 4 2011 231 33 2 010210 3791 0 6998112
# And public API Comtrade
unct %>% select(yr, flow = rgDesc, reporter = rtTitle, partner = ptTitle, hs = cmdCode, qtDesc,
qty = TradeQuantity, kg = NetWeight, value = TradeValue, flag = estCode)
## yr flow reporter partner hs qtDesc qty kg
## 1 2010 Import USA Canada 010210 Number of items 3522 1184592
## 2 2010 Export USA Canada 010210 Number of items 2522 1290253
## 3 2010 Re-Export USA Canada 010210 Number of items 5 1779
## 4 2011 Import Germany Canada 010210 Number of items 50 6300
## 5 2011 Import USA Canada 010210 Number of items 3420 1271785
## 6 2011 Export USA Canada 010210 Number of items 3791 2035585
## 7 2011 Re-Export USA Canada 010210 Number of items 54 51114
## value flag
## 1 4831908 4
## 2 4619438 4
## 3 6371 4
## 4 99239 2
## 5 5065340 4
## 6 6998112 4
## 7 175723 4
getComtradeData(reporter = c(842, 276), partner = 124, year = 2010:2011, item = "010210",
element = selectElems(unit %in% c("kg", "US$", "t", "1000 t", "head", "1000 head"))) %>%
humanizeComtrade() %>%
group_by(year, reporter, dir, unit)
## Source: local data table [18 x 10]
## Groups: year, reporter, dir, unit
##
## reporter partner year dir back hs value unit group
## 1 Germany(1991-) Canada 2011 in FALSE 010210 50 head head
## 2 Germany(1991-) Canada 2011 in FALSE 010210 6300 kg weight
## 3 Germany(1991-) Canada 2011 in FALSE 010210 99239 US$ cost
## 4 USA(1981-) Canada 2011 in FALSE 010210 3420 head head
## 5 USA(1981-) Canada 2010 in FALSE 010210 3522 head head
## 6 USA(1981-) Canada 2011 out FALSE 010210 3791 head head
## 7 USA(1981-) Canada 2010 out FALSE 010210 2522 head head
## 8 USA(1981-) Canada 2011 out TRUE 010210 54 head head
## 9 USA(1981-) Canada 2011 in FALSE 010210 1271785 kg weight
## 10 USA(1981-) Canada 2010 in FALSE 010210 1184592 kg weight
## 11 USA(1981-) Canada 2011 in FALSE 010210 5065340 US$ cost
## 12 USA(1981-) Canada 2010 in FALSE 010210 4831908 US$ cost
## 13 USA(1981-) Canada 2011 out FALSE 010210 2035585 kg weight
## 14 USA(1981-) Canada 2010 out FALSE 010210 1290253 kg weight
## 15 USA(1981-) Canada 2011 out FALSE 010210 6998112 US$ cost
## 16 USA(1981-) Canada 2010 out FALSE 010210 4619438 US$ cost
## 17 USA(1981-) Canada 2011 out TRUE 010210 51114 kg weight
## 18 USA(1981-) Canada 2011 out TRUE 010210 175723 US$ cost
## Variables not shown: item (chr)