Reading sample data ...

Here's how we read the sample data ...

# The real data ...
rm(list=ls())
library(foreign)
real2000 <- read.dta("source_all_sample2000_stata12.dta")
str(real2000)
## 'data.frame':    2000 obs. of  21 variables:
##  $ rec          : num  449859 160403 1292026 1422720 1307634 ...
##  $ file_id      : num  61 52 68 70 69 60 90 51 16 37 ...
##  $ source       : Factor w/ 2 levels "Transporters",..: 1 1 1 1 1 1 2 1 1 1 ...
##  $ flowtype     : Factor w/ 3 levels "In","Out","- (filiaal)": 1 2 2 2 1 2 3 1 2 1 ...
##  $ id           : chr  "75020" "75020" "75020" "75020" ...
##  $ fromid       : chr  "1101AG75020" "7575AV75020" "7575AV75020" "7575AV75020" ...
##  $ pc6_from     : chr  "1101AG" "7575AV" "7575AV" "7575AV" ...
##  $ pc4_from     : chr  "1101" "7575" "7575" "7575" ...
##  $ pc3_from     : chr  "110" "757" "757" "757" ...
##  $ pc2_from     : chr  "11" "75" "75" "75" ...
##  $ datestring   : chr  "28-08-07" "18-4-2007" "09-10-07" "21-11-07" ...
##  $ toid         : chr  "7575AV75020" "9471AK75020" "2511BC75020" "1101AH75020" ...
##  $ pc6_to       : chr  "7575AV" "9471AK" "2511BC" "1101AH" ...
##  $ pc4_to       : chr  "7575" "9471" "2511" "1101" ...
##  $ pc3_to       : chr  "757" "947" "251" "110" ...
##  $ pc2_to       : chr  "75" "94" "25" "11" ...
##  $ packaging    : chr  "DO" "DO" "HA" "HA" ...
##  $ packagingcode: num  1 1 2 2 2 1 4 1 1 2 ...
##  $ quantity     : num  2 1 18 2 72 4 0.5 1 2 11 ...
##  $ starttime    : chr  "0:00" "9:00" "9:00" "9:00" ...
##  $ endtime      : chr  "23:59" "11:00" "11:00" "11:00" ...
##  - attr(*, "datalabel")= chr ""
##  - attr(*, "time.stamp")= chr "14 Mar 2021 15:02"
##  - attr(*, "formats")= chr [1:21] "%9.0g" "%22.0f" "%18.0f" "%1.0f" ...
##  - attr(*, "types")= int [1:21] 254 254 254 254 5 12 6 4 3 2 ...
##  - attr(*, "val.labels")= chr [1:21] "" "file_id" "source" "flowtype" ...
##  - attr(*, "var.labels")= chr [1:21] "Record number" "File_id" "Source (transporters/other)" "Type of flow (in/out)" ...
##  - attr(*, "expansion.fields")=List of 13
##   ..$ : chr [1:3] "_dta" "note2" "data from Hennie Jordaan, on AMFI project"
##   ..$ : chr [1:3] "_dta" "note0" "2"
##   ..$ : chr [1:3] "_dta" "note1" "data from Hennie Jordaan, on AMFI project"
##   ..$ : chr [1:3] "id" "_de_col_width_" "                   12"
##   ..$ : chr [1:3] "flowtype" "_de_col_width_" "                   14"
##   ..$ : chr [1:3] "source" "_de_col_width_" "                   14"
##   ..$ : chr [1:3] "packagingcode" "_de_col_width_" "                   18"
##   ..$ : chr [1:3] "fromid" "_de_col_width_" "                   20"
##   ..$ : chr [1:3] "quantity" "destring_cmd" "destring q2c, gen(q3)"
##   ..$ : chr [1:3] "quantity" "destring" "Characters removed were:"
##   ..$ : chr [1:3] "packagingcode" "destring_cmd" "destring packagingcode, gen(pcode)"
##   ..$ : chr [1:3] "packagingcode" "destring" "Characters removed were:"
##   ..$ : chr [1:3] "datestring" "_de_col_width_" "                   20"
##  - attr(*, "version")= int 12
##  - attr(*, "label.table")=List of 3
##   ..$ file_id : Named int [1:73] 1 2 3 4 5 6 7 8 9 10 ...
##   .. ..- attr(*, "names")= chr [1:73] "50010 HA 01.02 IN.csv" "50010 HA 01.02 UIT.csv" "50010 HA 03.04 UIT.csv" "50010 HA 05.06 UIT.csv" ...
##   ..$ flowtype: Named int [1:3] 1 2 3
##   .. ..- attr(*, "names")= chr [1:3] "In" "Out" "- (filiaal)"
##   ..$ source  : Named int [1:2] 1 2
##   .. ..- attr(*, "names")= chr [1:2] "Transporters" "Branche deliveries"
real2000n <- read.dta("source_all_sample2000_stata12.dta",convert.factors = FALSE)
(m3 <- read.dta("m3_stata12.dta"))
table(real2000$packaging,real2000$packagingcode)
##     
##        1   2   3   4   5   6
##   DI   0   0   0   0   0   2
##   DO 712   0   0   0   0   0
##   HA   0 872   0   0   0   0
##   KR   0   0 257   0   0   0
##   PA   0   0   0  62   0   0
##   RO   0   0   0   0  95   0

Subparagraph: use numeric codes

Convering factors to numeric codes:

real2000n <- read.dta("source_all_sample2000_stata12.dta",convert.factors = FALSE)
(m3 <- read.dta("m3_stata12.dta"))
table(real2000$packaging,real2000$packagingcode)
##     
##        1   2   3   4   5   6
##   DI   0   0   0   0   0   2
##   DO 712   0   0   0   0   0
##   HA   0 872   0   0   0   0
##   KR   0   0 257   0   0   0
##   PA   0   0   0  62   0   0
##   RO   0   0   0   0  95   0

Read conversion table (packagingcode to cubic meters).

Table of packaging by packagingcode

(m3 <- read.dta("m3_stata12.dta"))
table(real2000$packaging,real2000$packagingcode)
##     
##        1   2   3   4   5   6
##   DI   0   0   0   0   0   2
##   DO 712   0   0   0   0   0
##   HA   0 872   0   0   0   0
##   KR   0   0 257   0   0   0
##   PA   0   0   0  62   0   0
##   RO   0   0   0   0  95   0

End!