Last update: 2015-07-09 11:48:00

Wheat in HS

##      code                          description
## 1:   1001                     Wheat and meslin
## 2: 100110                          Durum wheat
## 3: 100111                                 Seed
## 4: 100119                                Other
## 5: 100190 Wheat except durum wheat, and meslin
## 6: 100191                                 Seed
## 7: 100199                                Other

Which HS-code shoud we select?

getHSCode("boneless|bone in|wheat|palm") %>% arrange(code)
##       code                                           description
##  1: 020120                 Bovine cuts bone in, fresh or chilled
##  2: 020130                Bovine cuts boneless, fresh or chilled
##  3: 020220                           Bovine cuts bone in, frozen
##  4: 020230                          Bovine cuts boneless, frozen
##  5: 020312 Swine hams, shoulders & cuts bone in, fresh or chille
##  6: 020322   Hams, shoulders and cuts, of swine, bone in, frozen
##  7: 020422                 Sheep cuts, bone in, fresh or chilled
##  8: 020423                Sheep cuts, boneless, fresh or chilled
##  9: 020442                           Sheep cuts, bone in, frozen
## 10: 020443                          Sheep cuts, boneless, frozen
## 11:   1001                                      Wheat and meslin
## 12: 100110                                           Durum wheat
## 13: 100190                  Wheat except durum wheat, and meslin
## 14:   1008      Buckwheat, millet and canary seed, other cereals
## 15: 100810                                             Buckwheat
## 16:     11 Milling products, malt, starches, inulin, wheat glute
## 17:   1101                                 Wheat or meslin flour
## 18: 110100                                 Wheat or meslin flour
## 19:   1102           Cereal flours other than of wheat or meslin
## 20: 110290   Cereal flour except wheat, meslin, rye, maize, rice
## 21: 110311                                            Wheat meal
## 22: 110319 Cereal groats or meal except wheat, maize, rice, oats
## 23: 110321                                         Wheat pellets
## 24: 110329                           Cereal pellets except wheat
## 25: 110811                                         Wheat, starch
## 26: 110819          Starches except wheat, maize, potato, manioc
## 27:   1109                                          Wheat gluten
## 28: 110900                                          Wheat gluten
## 29: 120710                                 Palm nuts and kernels
## 30:   1511   Palm oil and its fractions, not chemically modified
## 31: 151110                                       Palm oil, crude
## 32: 151190                  Palm oil or fractions simply refined
## 33:   1513 Coconut, palm kernel, babassu oil, fractions, refined
## 34: 151321                     Palm kernel or babassu oil, crude
## 35: 151329  Palm kernel & babassu oil, fractions, simply refined
## 36: 190430                                          Bulgur wheat
## 37: 200891          Palm hearts, otherwise prepared or preserved
## 38: 230230                    Wheat bran, sharps, other residues
## 39: 230240 Cereal bran, sharps, residue except maize, wheat, ric
## 40: 230660  Palm nut or kernel oil cake and other solid residues
## 41: 291570     Palmitic acid, stearic acid, their salts & esters
##       code                                           description

Let’s take 020230.

US country code

getCountryCode("USA\\(1981") 
##    code description    type startYear endYear
## 1:  842  USA(1981-) country      1981      NA

Data on import reported by USA

## Source: postgres 9.4.4 [trade@localhost:5432/sws_data]
## From: <derived table> [?? x 9]
## 
##    year reporter partner         hs flow   weight      qty qunit    value
## 1  2011      842     558 0202300200    1    17690    17690     8    73499
## 2  2011      842     124 0202300400    1    12789    12789     8    24784
## 3  2011      842     554 0202301000    1    57120    57120     8   298149
## 4  2011      842     124 0202303000    1    66052    66052     8   426843
## 5  2011      842     152 0202303000    1    69485    69485     8   433472
## 6  2011      842     484 0202303000    1  1183759  1183759     8  4626408
## 7  2011      842     558 0202303000    1     7696     7696     8    25420
## 8  2011      842     124 0202305000    1 11127535 11127535     8 33246533
## 9  2011      842     152 0202305000    1   232115   232115     8  1225883
## 10 2011      842     188 0202305000    1  5682447  5682447     8 22764955
## ..  ...      ...     ...        ...  ...      ...      ...   ...      ...

Data on export in USA reported by trade partners

There is no 842 country code!

## Source: postgres 9.4.4 [trade@localhost:5432/sws_data]
## From: <derived table> [?? x 1]
## 
##    number_of_flows
## 1                0
## ..             ...

Let’s check trade leaders.

## Source: postgres 9.4.4 [trade@localhost:5432/sws_data]
## From: <derived table> [?? x 2]
## 
##    partner number_of_flows
## 1      840          669349
## 2      276          652626
## 3      156          644593
## 4      380          518403
## 5      250          504044
## 6      826          490448
## 7      528          395815
## 8      724          382516
## 9       56          352054
## 10     392          333738
## ..     ...             ...

Leader is 840. And not far from 842. USA?

##    year reporter partner         hs flow    weight       qty qunit        value
## 1  2011      554     840 0202300011    2  59088756  57738217     8 2.517855e+08
## 2  2011      554     840 0202300009    2   4426483   4327766     8 1.825160e+07
## 3  2011      554     840 0202300001    2  80361261  78570573     8 3.246708e+08
## 4  2011      554     840 0202300021    2   7004757   6822617     8 2.759765e+07
## 5  2011      558     840 0202300099    2  32499729  32499729     8 1.166152e+08
## 6  2011      702     840     020230    2      1170      1170     8 4.887079e+03
## 7  2011       76     840   02023000    2     51469     51469     8 2.096130e+05
## 8  2011      826     840   02023090    2     95579     95579     8 1.218344e+06
## 9  2011      858     840     020230    2  12652924  12652924     8 6.578924e+07
## 10 2011      124     840   02023000    2  12753663  12753663     8 3.858783e+07
## 11 2011      152     840   02023000    2    601853    601853     8 3.344636e+06
## 12 2011      188     840 0202300099    2   5681649   5681649     8 2.135941e+07
## 13 2011      276     840     020230    2      6770      6770     8 6.832462e+04
## 14 2011      340     840 0202300000    2   1695547   1695547     8 6.596075e+06
## 15 2011       36     840     020230    2 142661023 139568371     8 5.789559e+08
## 16 2011      484     840     020230    2   1740054   1740054     8 7.085281e+06
## 17 2011      516     840     020230    2        10        10     8 1.019138e+02
## 18 2011      554     840 0202300029    2     36045     35026     8 1.353669e+05

Units of measurement (preparation for aggregation)

Do we have different units for one HS6 code?

Units reported by the US.

##   qunit
## 1     8

Units reported by partners of the US.

##   qunit
## 1     8

OK, we have only one unit. It’s 8 - kg.

Differencies in sets of countries

## [1] 0

The are no cases, when the US report import from a partner, but the partner doesn’t report such export to the US.

##   area   country_name
## 1  516        Namibia
## 2  276 Germany(1991-)
## 3  826 United Kingdom
## 4   76         Brazil
## 5  702      Singapore

There are 5 cases, when a partner reports export to the US, but the US don’t report import from the partner. We could apply mirroring here to fill missed data, but it’s possible that partners have used other HS-codes for these commodities.

Trade flows with zero quantity

##   no_quantity
## 1           0

Differencies between HS schemes

Harmonized system classification is globally declared up to 6 digits. At more detailed level every country can extend HS to better respond to local circumstances. These are country-specific descriptions under 0202 heading

get_madb_export("US", "020230") 
##               hs                                                                                        desc
## 1             02                                                        CHAPTER 2 MEAT AND EDIBLE MEAT OFFAL
## 2           0202                                                             Meat of bovine animals, frozen:
## 3        0202.10                                                                 Carcases and half-carcases:
## 4     0202.10.05 Described in general note 15 of the tariff schedule and entered pursuant to its provisions:
## 5  0202.10.05.10                                                                                        Veal
## 6  0202.10.05.90                                                                                       Other
## 7     0202.10.10 Described in additional U.S. note 3 to this chapter and entered pursuant to its provisions:
## 8  0202.10.10.10                                                                                        Veal
## 9  0202.10.10.90                                                                                       Other
## 10    0202.10.50                                                                                      Other:
## 11 0202.10.50.10                                                                                        Veal
## 12 0202.10.50.90                                                                                       Other
## 13       0202.20                                                                    Other cuts with bone in:
## 14               Described in general note 15 of the tariff schedule and entered pursuant to its provisions:
## 15                                                                                                Processed:
## 16    0202.20.02                                                                      High-quality beef cuts
## 17    0202.20.04                                                                                       Other
## 18    0202.20.06                                                                                       Other
## 19               Described in additional U.S. note 3 to this chapter and entered pursuant to its provisions:
## 20                                                                                                Processed:
## 21    0202.20.10                                                                      High-quality beef cuts
## 22    0202.20.30                                                                                       Other
## 23    0202.20.50                                                                                       Other
## 24    0202.20.80                                                                                       Other
## 25       0202.30                                                                                   Boneless:
## 26               Described in general note 15 of the tariff schedule and entered pursuant to its provisions:
## 27                                                                                                Processed:
## 28    0202.30.02                                                                      High-quality beef cuts
## 29    0202.30.04                                                                                       Other
## 30    0202.30.06                                                                                       Other
## 31               Described in additional U.S. note 3 to this chapter and entered pursuant to its provisions:
## 32                                                                                                Processed:
## 33    0202.30.10                                                                      High-quality beef cuts
## 34    0202.30.30                                                                                       Other
## 35    0202.30.50                                                                                       Other
## 36    0202.30.80                                                                                       Other
get_madb_export("BR", "020230") 
##           hs                                 desc
## 1         02 CHAPTER 2 MEAT AND EDIBLE MEAT OFFAL
## 2       0202      Meat of bovine animals, frozen:
## 3    0202.10           Carcases and half-carcases
## 4    0202.20             Other cuts with bone in:
## 5 0202.20.10                         Forequarters
## 6 0202.20.20                         Hindquarters
## 7 0202.20.90                                Other
## 8    0202.30                             Boneless

Calculation of unit value and outlier detection

usimp6 <- usimp %>% 
  group_by(year, reporter, partner, flow, hs6, qunit) %>% 
  summarize(weight = sum(weight),
            qty = sum(qty),
            value = sum(value)) %>% 
  mutate(unitvalue = value / weight) %>% 
  ungroup()

usimp6 %>% 
  arrange(desc(unitvalue)) 
## Source: postgres 9.4.4 [trade@localhost:5432/sws_data]
## From: <derived table> [?? x 10]
## Arrange: desc(unitvalue) 
## 
##    year reporter partner flow    hs6 qunit    weight       qty     value unitvalue
## 1  2011      842     152    1 020230     8    775861    775861   4441157  5.724166
## 2  2011      842     858    1 020230     8   9284286   9284286  52052660  5.606533
## 3  2011      842     554    1 020230     8 147326735 147326735 666526935  4.524141
## 4  2011      842      36    1 020230     8 123842289 123842289 555510467  4.485628
## 5  2011      842     340    1 020230     8   4763903   4763903  19141703  4.018072
## 6  2011      842     188    1 020230     8   5682447   5682447  22764955  4.006189
## 7  2011      842     484    1 020230     8   1199356   1199356   4760383  3.969116
## 8  2011      842     558    1 020230     8  32774516  32774516 125955026  3.843078
## 9  2011      842     124    1 020230     8  12754652  12754652  38612592  3.027334
## ..  ...      ...     ...  ...    ...   ...       ...       ...       ...       ...
usimp6 %>% 
  select(unitvalue) %>% 
  collect() %>% 
  ggplot() + geom_density(aes(unitvalue))

Let’s assume that unit value more than $5000/tn is outlier. Usually value is reported by customs more accurately than quantity. In case of bigger than usual unit value quantity could be underestimated.

usimp6 %>% collect() %>% 
  filter(unitvalue > 5)
## Source: local data frame [2 x 10]
## 
##   year reporter partner flow    hs6 qunit  weight     qty    value unitvalue
## 1 2011      842     858    1 020230     8 9284286 9284286 52052660  5.606533
## 2 2011      842     152    1 020230     8  775861  775861  4441157  5.724166

We could estimate quantity using country-specific median unit value.

Replacement of outliers

usimp6 %>% collect() %>% 
  summarize(unitvalue = median(unitvalue))
## Source: local data frame [1 x 1]
## 
##   unitvalue
## 1  4.018072
usimp6 %>% collect() %>% 
  select(-year, -reporter, -partner, -flow, -hs6, -qunit, -qty) %>% 
  mutate(median_uv = median(unitvalue)) %>% 
  filter(unitvalue > 5) %>% 
  mutate(new_weight = round(value / median_uv),
         weight_diff = new_weight - weight)
## Source: local data frame [2 x 6]
## 
##    weight    value unitvalue median_uv new_weight weight_diff
## 1 9284286 52052660  5.606533  4.018072   12954637     3670351
## 2  775861  4441157  5.724166  4.018072    1105296      329435

Else we can use for computing of quantity global unit value, unit value estimated from time series or take quantity reported by trade partner.

Range of unit values for all reporters

bovine_global_imp <- tbl(trade_src, sql("select * from (select 
year, reporter, partner, hs, flow, weight, qty, qunit, value, 
substring(hs from 1 for 6) as hs6 from tariffline
where flow = '2' and year = '2011') as tbl1 
where hs6 in ('020230')"))
bovine_global_imp %>% 
  group_by(year, reporter, partner, flow, hs6, qunit) %>% 
  summarize(weight = sum(weight),
            qty = sum(qty),
            value = sum(value)) %>% 
  mutate(unitvalue = value / weight) %>% 
  ungroup() %>% 
  select(reporter, unitvalue) %>% 
  collect() %>% 
  ggplot() + geom_boxplot(aes(reporter, unitvalue, fill = reporter == '842')) +
  scale_y_log10()
## Warning: Removed 13 rows containing non-finite values (stat_boxplot).