Last update: 2015-06-16 16:03:50

Introduction

We want to find cases of imputing missing quantities in Comtrade data for Israel

Getting data from TF_SOURCE*

tfsource <- cbind(gettfsource(reporter = 105, 
                              year = 2011),
                  year = 2011)

head(tfsource, 10)
##    reporter partner flow       hs qty   qty2 value year
## 1       105     106    1 01019000   0      1     3 2011
## 2       105     106    2 01061900   0      0    46 2011
## 3       105     106    2 01063200   0     43    20 2011
## 4       105     106    1 01063919   0   8410    65 2011
## 5       105     106    1 01069000   0      0     4 2011
## 6       105     106    2 01069010   0      0   243 2011
## 7       105     106    2 01069090   0      0    33 2011
## 8       105     106    1 02023000   0  50212   362 2011
## 9       105     106    2 02072700   0 217245  1002 2011
## 10      105     106    2 02089000   0      0    95 2011

Trimming to 6-digits HS codes and removing nonagricult items

tfsource <- tfsource %>% 
  mutate(hs = stringr::str_extract(hs, "^.{6}")) %>% 
  group_by(year, reporter, partner, flow, hs) %>% 
  summarize(qty = sum(qty),
            qty2 = sum(qty2),
            value = sum(value))


# Remove non agri items
tfsource <- data.frame(hs = getAgriHSCodes(), stringsAsFactors = F) %>% 
  inner_join(tfsource, by = "hs")
any(!(unique(tfsource$hs) %in% getAgriHSCodes()))
## [1] FALSE
# Filter flows without quantity

no_quant10 <- tfsource %>% 
  mutate(no_quant = qty == 0 & qty2 == 0 & value > 0) %>% 
  filter(no_quant) %>% 
  top_n(10, (value))

And for comparison similar data from SWS.

swsdata <- readRDS("/home/sas/sws/sws_r_api/USGEIL_agri.rds") %>% 
  filter(reportingCountryM49 == unctarea(105)) %>%
  mutate_(reporter = ~faoarea(reportingCountryM49),
          partner  = ~faoarea(partnerCountryM49),
          flow     = ~as.integer(getElementCodeDesc(measuredElementTrade)$direction),
          unit     = ~getElementCodeDesc(measuredElementTrade)$unit,
          group    = ~getElementCodeDesc(measuredElementTrade)$unitgroup,
          year     = ~as.integer(timePointYears)) %>% 
  select(reporter, partner, flow, group, year, hs = measuredItemHS, value = Value, flag = flagTrade) %>% 
  reshape2::dcast(... ~ group) %>% 
  rename(value_sws = cost, heads_sws = head, weight_sws = weight, volume_sws = volume) 
no_quant10 <- no_quant10 %>% 
  left_join(swsdata, by = c("reporter", "partner", "year", "flow", "hs")) %>% 
  select(-no_quant) %>% 
  select(reporter, year, partner, hs, flow, qty, qty2, value, value_sws, heads_sws, weight_sws, volume_sws, flag_sws = flag)
no_quant10$qty_api <- NA
no_quant10$weight_api <- NA
no_quant10$value_api <- NA
no_quant10$flag_api <- NA

for (i in seq_len(nrow(no_quant10))) {
  api <- getunct(area = unctarea(no_quant10$reporter[i]),
                 year = no_quant10$year[i],
                 partner = unctarea(no_quant10$partner[i]),
                 code = no_quant10$hs[i],
                 flow = no_quant10$flow[i],
                 desc = T,
                 compact = F)
  
  no_quant10[i, "qty_api"] <- api$TradeQuantity
  no_quant10[i, "weight_api"] <- api$NetWeight
  no_quant10[i, "value_api"] <- api$TradeValue
  no_quant10[i, "flag_api"] <- api$estCode
  
  Sys.sleep(1)
                 
}

no_quant10
##    reporter year partner     hs flow qty qty2  value value_sws heads_sws
## 1       105 2011     104 210690    1   0    0  80435  80435000        NA
## 2       105 2011     231 210690    2   0    0  37515  37515000        NA
## 3       105 2011     231 190590    2   0    0  31171  31171000        NA
## 4       105 2011     106 190219    1   0    0  24974  24974000        NA
## 5       105 2011     229 170199    1   0    0 103716 103716000        NA
## 6       105 2011     231 120991    2   0    0  27620  27620000        NA
## 7       105 2011     150 070960    2   0    0  66757  66757000        NA
## 8       105 2011     185 070960    2   0    0  47014  47014000        NA
## 9       105 2011     198 070960    2   0    0  50575  50575000        NA
## 10      105 2011     185 070610    2   0    0  48295  48295000        NA
##    weight_sws volume_sws flag_sws   qty_api weight_api value_api flag_api
## 1    13229802         NA           13229802   13229802  80435000        6
## 2     7749110         NA            7749110    7749110  37515000        6
## 3    10618854         NA           10618854   10618854  31171000        6
## 4    18207897         NA           18207897   18207897  24974000        6
## 5   148496946         NA          148496946  148496946 103716000        6
## 6          NA         NA                 NA         NA  27620000        0
## 7    39571840         NA           39571840   39571840  66757000        6
## 8    27868695         NA           27868695   27868695  47014000        6
## 9    29979564         NA           29979564   29979564  50575000        6
## 10  113912569         NA          113912569  113912569  48295000        6

Flag refers to quantity/netweight estimation:

0 = no estimation, 2 = quantity, 4 = netweight, 6 = both quantity and netweight