Here's how we read the sample data ...
# The real data ...
rm(list=ls())
library(foreign)
real2000 <- read.dta("source_all_sample2000_stata12.dta")
str(real2000)
## 'data.frame': 2000 obs. of 21 variables:
## $ rec : num 449859 160403 1292026 1422720 1307634 ...
## $ file_id : num 61 52 68 70 69 60 90 51 16 37 ...
## $ source : Factor w/ 2 levels "Transporters",..: 1 1 1 1 1 1 2 1 1 1 ...
## $ flowtype : Factor w/ 3 levels "In","Out","- (filiaal)": 1 2 2 2 1 2 3 1 2 1 ...
## $ id : chr "75020" "75020" "75020" "75020" ...
## $ fromid : chr "1101AG75020" "7575AV75020" "7575AV75020" "7575AV75020" ...
## $ pc6_from : chr "1101AG" "7575AV" "7575AV" "7575AV" ...
## $ pc4_from : chr "1101" "7575" "7575" "7575" ...
## $ pc3_from : chr "110" "757" "757" "757" ...
## $ pc2_from : chr "11" "75" "75" "75" ...
## $ datestring : chr "28-08-07" "18-4-2007" "09-10-07" "21-11-07" ...
## $ toid : chr "7575AV75020" "9471AK75020" "2511BC75020" "1101AH75020" ...
## $ pc6_to : chr "7575AV" "9471AK" "2511BC" "1101AH" ...
## $ pc4_to : chr "7575" "9471" "2511" "1101" ...
## $ pc3_to : chr "757" "947" "251" "110" ...
## $ pc2_to : chr "75" "94" "25" "11" ...
## $ packaging : chr "DO" "DO" "HA" "HA" ...
## $ packagingcode: num 1 1 2 2 2 1 4 1 1 2 ...
## $ quantity : num 2 1 18 2 72 4 0.5 1 2 11 ...
## $ starttime : chr "0:00" "9:00" "9:00" "9:00" ...
## $ endtime : chr "23:59" "11:00" "11:00" "11:00" ...
## - attr(*, "datalabel")= chr ""
## - attr(*, "time.stamp")= chr "14 Mar 2021 15:02"
## - attr(*, "formats")= chr [1:21] "%9.0g" "%22.0f" "%18.0f" "%1.0f" ...
## - attr(*, "types")= int [1:21] 254 254 254 254 5 12 6 4 3 2 ...
## - attr(*, "val.labels")= chr [1:21] "" "file_id" "source" "flowtype" ...
## - attr(*, "var.labels")= chr [1:21] "Record number" "File_id" "Source (transporters/other)" "Type of flow (in/out)" ...
## - attr(*, "expansion.fields")=List of 13
## ..$ : chr [1:3] "_dta" "note2" "data from Hennie Jordaan, on AMFI project"
## ..$ : chr [1:3] "_dta" "note0" "2"
## ..$ : chr [1:3] "_dta" "note1" "data from Hennie Jordaan, on AMFI project"
## ..$ : chr [1:3] "id" "_de_col_width_" " 12"
## ..$ : chr [1:3] "flowtype" "_de_col_width_" " 14"
## ..$ : chr [1:3] "source" "_de_col_width_" " 14"
## ..$ : chr [1:3] "packagingcode" "_de_col_width_" " 18"
## ..$ : chr [1:3] "fromid" "_de_col_width_" " 20"
## ..$ : chr [1:3] "quantity" "destring_cmd" "destring q2c, gen(q3)"
## ..$ : chr [1:3] "quantity" "destring" "Characters removed were:"
## ..$ : chr [1:3] "packagingcode" "destring_cmd" "destring packagingcode, gen(pcode)"
## ..$ : chr [1:3] "packagingcode" "destring" "Characters removed were:"
## ..$ : chr [1:3] "datestring" "_de_col_width_" " 20"
## - attr(*, "version")= int 12
## - attr(*, "label.table")=List of 3
## ..$ file_id : Named int [1:73] 1 2 3 4 5 6 7 8 9 10 ...
## .. ..- attr(*, "names")= chr [1:73] "50010 HA 01.02 IN.csv" "50010 HA 01.02 UIT.csv" "50010 HA 03.04 UIT.csv" "50010 HA 05.06 UIT.csv" ...
## ..$ flowtype: Named int [1:3] 1 2 3
## .. ..- attr(*, "names")= chr [1:3] "In" "Out" "- (filiaal)"
## ..$ source : Named int [1:2] 1 2
## .. ..- attr(*, "names")= chr [1:2] "Transporters" "Branche deliveries"
real2000n <- read.dta("source_all_sample2000_stata12.dta",convert.factors = FALSE)
(m3 <- read.dta("m3_stata12.dta"))
table(real2000$packaging,real2000$packagingcode)
##
## 1 2 3 4 5 6
## DI 0 0 0 0 0 2
## DO 712 0 0 0 0 0
## HA 0 872 0 0 0 0
## KR 0 0 257 0 0 0
## PA 0 0 0 62 0 0
## RO 0 0 0 0 95 0
Convering factors to numeric codes:
real2000n <- read.dta("source_all_sample2000_stata12.dta",convert.factors = FALSE)
(m3 <- read.dta("m3_stata12.dta"))
table(real2000$packaging,real2000$packagingcode)
##
## 1 2 3 4 5 6
## DI 0 0 0 0 0 2
## DO 712 0 0 0 0 0
## HA 0 872 0 0 0 0
## KR 0 0 257 0 0 0
## PA 0 0 0 62 0 0
## RO 0 0 0 0 95 0
Read conversion table (packagingcode to cubic meters).
Table of packaging by packagingcode
(m3 <- read.dta("m3_stata12.dta"))
table(real2000$packaging,real2000$packagingcode)
##
## 1 2 3 4 5 6
## DI 0 0 0 0 0 2
## DO 712 0 0 0 0 0
## HA 0 872 0 0 0 0
## KR 0 0 257 0 0 0
## PA 0 0 0 62 0 0
## RO 0 0 0 0 95 0
End!