Last update: 2015-07-14 14:59:24
Codes of reporting countries which are absent among partner countries.
# http://explainextended.com/2009/09/16/not-in-vs-not-exists-vs-left-join-is-null-postgresql/
absent_reps <- tbl(trade_src, sql("select distinct reporter
from tariffline tr
where not exists (select 1
from tariffline tp
where tr.reporter = tp.partner)")) %>% collect() %>%
'[['("reporter")
# absent_prts <- tbl(trade_src, sql("select distinct partner from tariffline
# left join
# (select distinct reporter from tariffline) as tbl1
# on tariffline.partner = tbl1.reporter
# where tbl1.reporter is null ")) %>% collect() %>%
# '[['("partner")
getCountryName(absent_reps, drop = F)
## Source: local data table [5 x 2]
##
## code description
## 1 381 Italy
## 2 842 USA(1981-)
## 3 579 Norway
## 4 757 Switzerland
## 5 251 France(1996-)
# getCountryName(absent_prts, drop = F) %>% arrange(code) %>% as.data.frame()
Codes from official M491 which are absent in Comtrade M492.
officialm49 <- getOfficialM49()
comtradem49 <- getComtradeM49()
officialm49[!is.element(as.integer(officialm49$code),
as.integer(comtradem49$code)), ] %>%
arrange(code)
## code name iso3
## 1 016 American Samoa ASM
## 2 248 Ã\u0085land Islands ALA
## 3 250 France FRA
## 4 316 Guam GUM
## 5 380 Italy ITA
## 6 438 Liechtenstein LIE
## 7 492 Monaco MCO
## 8 520 Nauru NRU
## 9 570 Niue NIU
## 10 574 Norfolk Island NFK
## 11 578 Norway NOR
## 12 612 Pitcairn PCN
## 13 630 Puerto Rico PRI
## 14 652 Saint Barthélemy BLM
## 15 663 Saint Martin (French part) MAF
## 16 680 Sark <NA>
## 17 732 Western Sahara ESH
## 18 744 Svalbard and Jan Mayen Islands SJM
## 19 756 Switzerland CHE
## 20 830 Channel Islands <NA>
## 21 831 Guernsey GGY
## 22 832 Jersey JEY
## 23 833 Isle of Man IMN
## 24 840 United States of America USA
Codes from Comtrade M49 which are absent in Official M49.
comtradem49[!is.element(as.integer(comtradem49$code),
as.integer(officialm49$code)), ] %>%
arrange(code)
## code name
## 1 200 Czechoslovakia
## 2 230 Fmr Ethiopia
## 3 251 France
## 4 278 Fmr Dem. Rep. of Germany
## 5 280 Fmr Fed. Rep. of Germany
## 6 381 Italy
## 7 457 Sarawak
## 8 459 Peninsula Malaysia
## 9 461 Sabah
## 10 490 Other Asia, nes
## 11 530 Neth. Antilles
## 12 532 Neth. Antilles and Aruba
## 13 579 Norway
## 14 58 Belgium-Luxembourg
## 15 582 Fmr Pacific Isds
## 16 588 East and West Pakistan
## 17 590 Fmr Panama, excl.Canal Zone
## 18 592 Fmr Panama-Canal-Zone
## 19 647 Ryukyu Isd
## 20 658 Saint Kitts, Nevis and Anguilla
## 21 699 India
## 22 711 So. African Customs Union
## 23 717 Fmr Rhodesia Nyas
## 24 720 Fmr Dem. Yemen
## 25 729 Sudan
## 26 736 Fmr Sudan
## 27 757 Switzerland
## 28 810 Fmr USSR
## 29 835 Fmr Tanganyika
## 30 836 Fmr Zanzibar and Pemba Isd
## 31 841 USA (before 1981)
## 32 842 USA
## 33 866 Fmr Dem. Rep. of Vietnam
## 34 868 Fmr Rep. of Vietnam
## 35 886 Fmr Arab Rep. of Yemen
## 36 890 Fmr Yugoslavia
## 37 891 Serbia and Montenegro
## 38 97 EU-28
It would be useful to check matching of codes based on strings comparing.
reps <- tbl(trade_src, sql("select distinct reporter from tariffline")) %>%
collect()
reps %>%
mutate(reporter = as.integer(reporter)) %>%
anti_join(getComtradeM49() %>%
mutate(code = as.integer(code)),
by = c("reporter" = "code"))
## Source: local data frame [0 x 1]
##
## Variables not shown: reporter (int)
No such partners. So column with partners is standardized in Tariffline.
prts <- tbl(trade_src, sql("select distinct partner from tariffline")) %>%
collect()
prts_not_official <- prts %>%
mutate(partner = as.integer(partner)) %>%
anti_join(getOfficialM49() %>%
mutate(code = as.integer(code)),
by = c("partner" = "code")) %>%
unlist() %>% unname() %>% sort()
prts_not_official
## [1] 10 74 86 158 162 166 239 260 334 473 488 490 527 530 536 568 577 637 699 729 736 837 838 839 849 872 899
prts_not_comtrade <- prts %>%
mutate(partner = as.integer(partner)) %>%
anti_join(getComtradeM49() %>%
mutate(code = as.integer(code)),
by = c("partner" = "code")) %>%
unlist() %>% unname() %>% sort()
prts_not_comtrade
## [1] 10 16 74 86 158 162 166 239 250 260 316 334 380 438 473 488 492 520 527 536 568 570 574 577 578 612 630 637 652
## [30] 663 732 744 756 837 838 839 840 849 872 899
Column with reporters is not standardized in Tariffline, but official M49 look better.
prts_not_sws <- getCountryName(prts$partner, drop = F) %>%
filter(is.na(description)) %>%
select(code) %>%
arrange(code) %>%
unlist() %>% unname()
prts_not_sws
## [1] "158" "250" "380" "438" "488" "578" "630" "652" "663" "744" "756" "840" "872"
Wow! SWS is a champion!
intersect(prts_not_comtrade,
intersect(prts_not_sws,
prts_not_official))
## [1] 158 488 872
http://unstats.un.org/unsd/methods/m49/m49alpha.htm Countries or areas, codes and abbreviations↩
http://comtrade.un.org/data/cache/reporterAreas.json The UN Comtrade data extraction API↩