ECON565-Assignment 1-Production Function Estimation

loading the data needed for estimation

## load.fun(x) will load library 'x' if it is installed. If 'x' has not been
## installed, it will install it. Understanding this code is not necessary
## source:
## http://r.789695.n4.nabble.com/Install-package-automatically-if-not-there-tp2267532p2267659.html
load.fun <- function(x) {
    x <- as.character(substitute(x))
    if (isTRUE(x %in% .packages(all.available = TRUE))) {
        eval(parse(text = paste("require(", x, ")", sep = "")))
    } else {
        # update.packages() ## good idea, but may take some time. can usually be
        # safely skipped
        eval(parse(text = paste("install.packages('", x, "')", sep = "")))
        eval(parse(text = paste("require(", x, ")", sep = "")))
    }
}

################################################################################ Download the data if necessary
if (!file.exists("exports_oct05.dta")) {
    if (!file.exists("cfldjae.zip")) {
        ## Long-term, we should not rely on the data always being available at the
        ## same url, but writing code to automatically download the data at least
        ## makes sure we document where the data originally came from.
        url <- "http://www.csae.ox.ac.uk/datasets/cfld/cfldjae.zip"
        download.file(url, "cfldjae.zip", mode = "wb")
    }
    unzip(zipfile = "cfldjae.zip", files = "exports_oct05.dta")
}
################################################################################ 

################################################################################ Load the data
load.fun(foreign)  # package for reading foreign data formats,

## Loading required package: foreign

# including Stata .dta files

df <- read.dta("exports_oct05.dta")

# Data cleaning - remove redundant variables. We could leave them in, but
# since this data did not come with documentation saying what each variable
# means, and the variable names are not very descriptive, I prefer to remove
# unnecessary variables to avoid confusion.

# This part repeatedly uses 'grep', to search the array of strings
# colnames(df) for anything matching the specified pattern and returns the
# indices of the the matches.  It also relies on the fact that for any
# array, a negative indice means the array with that entry/row/column/slice
# removed

# remove year dummies
df <- df[, -grep("ydum", colnames(df))]
# remove lagged variables
df <- df[, -grep("_1", colnames(df))]
df <- df[, -grep("_2", colnames(df))]

# fix country label
df$country[df$wbcode == "NGA"] = "Nigeria"
df$country[df$wbcode == "GHA"] = "Ghana"
df$country[df$wbcode == "KEN"] = "Kenya"
df$country[df$wbcode == "TZA"] = "Tanzania"
df$country[df$wbcode == "ZAF"] = "South Africa"
df$country <- as.factor(df$country)
# remove redundant country identifiers
df <- df[, -grep("^wbcode$", colnames(df), perl = TRUE)]
df <- df[, -grep("^kenya|^ghana|^tanzania|^nigeria|^sa$", colnames(df), perl = TRUE)]
df <- df[, -grep("^texgarsa$", colnames(df), perl = TRUE)]

df$industry <- NA
df$industry[df$textile == 1] <- "textile"
df$industry[df$garment == 1] <- "garment"
df$industry[df$wood == 1] <- "wood"
df$industry[df$furn == 1] <- "furniture"  # ? not certain?
df$industry[df$foods_bak == 1] <- "food and baking"
df$industry[df$met_mac == 1] <- "metals and machinery"  # ? what ?
df$industry <- as.factor(df$industry)
df <- df[, -grep("textile|garment|wood|furn|foods_bak|met_mac", colnames(df), 
    perl = TRUE)]

## recover output, materials, capital from lrl=log(output/labor), etc
df$labor <- exp(df$ll)
df$output <- exp(df$lrl) * df$labor
df$capital <- exp(df$lkl) * df$labor
df$materials <- exp(df$lml) * df$labor
df$indirect.costs <- exp(df$lol) * df$labor  # !? what does that mean ?
df <- df[, -grep("^l.l$", colnames(df), perl = TRUE)]
df <- df[, -grep("^ll$", colnames(df), perl = TRUE)]
df <- df[, -grep("^emp$", colnames(df), perl = TRUE)]
df <- df[, -grep("^lprateus$", colnames(df), perl = TRUE)]
df <- df[, -grep("^lrmawus$", colnames(df), perl = TRUE)]
df <- df[, -grep("^pexp", colnames(df), perl = TRUE)]


## rename some variables
names(df)[grep("anyfor", names(df))] <- "any.foreign.own"
names(df)[grep("fmage", names(df))] <- "firm.age"
#################################################################################

Problem 1

1.1 Descriptive Statistics

summary(df)  ## show some summary statistics of df

##       firm         any.foreign.own      year         firm.age    
##  Min.   :   1001   Min.   :0.00    Min.   :1991   Min.   :  1.0  
##  1st Qu.:   1548   1st Qu.:0.00    1st Qu.:1994   1st Qu.:  9.0  
##  Median :   3156   Median :0.00    Median :1997   Median : 17.0  
##  Mean   :  86038   Mean   :0.18    Mean   :1996   Mean   : 18.8  
##  3rd Qu.:  50418   3rd Qu.:0.00    3rd Qu.:1999   3rd Qu.: 25.0  
##  Max.   :1009999   Max.   :1.00    Max.   :2003   Max.   :102.0  
##                    NA's   :257                    NA's   :1691   
##     exports        exportna       exporta        rmawagus     
##  Min.   :0      Min.   :0      Min.   :0      Min.   :  -729  
##  1st Qu.:0      1st Qu.:0      1st Qu.:0      1st Qu.:    22  
##  Median :0      Median :0      Median :0      Median :    51  
##  Mean   :0      Mean   :0      Mean   :0      Mean   :   158  
##  3rd Qu.:0      3rd Qu.:0      3rd Qu.:0      3rd Qu.:   110  
##  Max.   :1      Max.   :1      Max.   :1      Max.   :117383  
##  NA's   :5178   NA's   :6106   NA's   :6103   NA's   :5357    
##      eduwgt         agewgt         tenwgt         ernus     
##  Min.   : 0     Min.   : 0     Min.   : 0     Min.   :   0  
##  1st Qu.: 7     1st Qu.:26     1st Qu.: 3     1st Qu.:  38  
##  Median : 9     Median :32     Median : 6     Median :  61  
##  Mean   : 9     Mean   :31     Mean   : 7     Mean   :  73  
##  3rd Qu.:10     3rd Qu.:36     3rd Qu.: 9     3rd Qu.:  91  
##  Max.   :21     Max.   :65     Max.   :30     Max.   :1310  
##  NA's   :6190   NA's   :6190   NA's   :6190   NA's   :6522  
##          country        prateus        kus_routus       entex       
##  Ghana       :3390   Min.   :-1578   Min.   :  0    Min.   :0.0000  
##  Kenya       :3240   1st Qu.:    0   1st Qu.:  0    1st Qu.:0.0000  
##  Nigeria     : 700   Median :    0   Median :  0    Median :0.0000  
##  South Africa: 404   Mean   :    1   Mean   :  2    Mean   :0.0088  
##  Tanzania    :2625   3rd Qu.:    1   3rd Qu.:  1    3rd Qu.:0.0000  
##                      Max.   :  633   Max.   :191    Max.   :1.0000  
##                      NA's   :5580    NA's   :5274                   
##      exitex                       industry        labor      
##  Min.   :0.0000   food and baking     :2265   Min.   :    1  
##  1st Qu.:0.0000   furniture           :1820   1st Qu.:    8  
##  Median :0.0000   garment             :1741   Median :   25  
##  Mean   :0.0079   metals and machinery:3084   Mean   :  125  
##  3rd Qu.:0.0000   textile             : 648   3rd Qu.:   81  
##  Max.   :1.0000   wood                : 781   Max.   :13000  
##                   NA's                :  20   NA's   :4888   
##      output            capital           materials       
##  Min.   :8.70e+01   Min.   :2.30e+01   Min.   :3.00e+00  
##  1st Qu.:1.87e+04   1st Qu.:3.74e+03   1st Qu.:8.71e+03  
##  Median :1.15e+05   Median :9.74e+04   Median :5.29e+04  
##  Mean   :4.09e+06   Mean   :3.24e+06   Mean   :2.38e+06  
##  3rd Qu.:1.02e+06   3rd Qu.:8.06e+05   3rd Qu.:4.78e+05  
##  Max.   :5.15e+08   Max.   :5.70e+08   Max.   :3.84e+08  
##  NA's   :5149       NA's   :5120       NA's   :5277      
##  indirect.costs    
##  Min.   :       8  
##  1st Qu.:    1280  
##  Median :   10527  
##  Mean   :  458566  
##  3rd Qu.:   98569  
##  Max.   :68251868  
##  NA's   :5226

sd(df$output, na.rm = TRUE)  ## to calculate the standard deviation of variable 'output'

## [1] 22008558

apply(df, 2, FUN = function(x) {
    sd(x, na.rm = TRUE)
})  ## to calculate the standard deviation to all the colum variables

## Warning: ǿ?Ƹı??????в?????NA
## Warning: ǿ?Ƹı??????в?????NA

##            firm any.foreign.own            year        firm.age 
##       2.487e+05       3.801e-01       3.097e+00       1.324e+01 
##         exports        exportna         exporta        rmawagus 
##       4.134e-01       3.317e-01       3.856e-01       1.708e+03 
##          eduwgt          agewgt          tenwgt           ernus 
##       2.575e+00       7.882e+00       4.540e+00       6.244e+01 
##         country         prateus      kus_routus           entex 
##              NA       3.075e+01       6.326e+00       9.332e-02 
##          exitex        industry           labor          output 
##       8.862e-02              NA       4.773e+02       2.201e+07 
##         capital       materials  indirect.costs 
##       2.146e+07       1.512e+07       2.795e+06

install.packages("Hmisc")  ## install a package speciallized to do the descriptive summary

## Installing package into 'C:/Users/WANG Kun/Documents/R/win-library/3.0'
## (as 'lib' is unspecified)

## Error: trying to use CRAN without setting a mirror

library(Hmisc)  ## cite the function of Hmisc

## Loading required package: grid
## Loading required package: lattice
## Loading required package: survival
## Loading required package: splines
## Loading required package: Formula
## 
## Attaching package: 'Hmisc'
## 
## ???ж??????��???from 'package:base':
## 
##     format.pval, round.POSIXt, trunc.POSIXt, units

describe(df)  ## Hmisc's function to describe the data

## df 
## 
##  23  Variables      10359  Observations
## ---------------------------------------------------------------------------
## firm 
##       n missing  unique    Mean     .05     .10     .25     .50     .75 
##   10359       0    1447   86038    1070    1136    1548    3156   50418 
##     .90     .95 
##  100097 1003051 
## 
## lowest :    1001    1002    1003    1004    1005
## highest: 1008022 1009082 1009152 1009998 1009999 
## ---------------------------------------------------------------------------
## any.foreign.own 
##       n missing  unique     Sum    Mean 
##   10102     257       2    1769  0.1751 
## ---------------------------------------------------------------------------
## year 
##       n missing  unique    Mean     .05     .10     .25     .50     .75 
##   10359       0      13    1997    1992    1992    1994    1997    1999 
##     .90     .95 
##    2000    2002 
## 
##           1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003
## Frequency  265 1045 1045  679 1054  670 1237 1362 1180  805  381  383  253
## %            3   10   10    7   10    6   12   13   11    8    4    4    2
## ---------------------------------------------------------------------------
## firm.age 
##       n missing  unique    Mean     .05     .10     .25     .50     .75 
##    8668    1691      87   18.77       2       4       9      17      25 
##     .90     .95 
##      35      44 
## 
## lowest :   1   2   3   4   5, highest:  97  99 100 101 102 
## ---------------------------------------------------------------------------
## exports 
##       n missing  unique     Sum    Mean 
##    5181    5178       2    1133  0.2187 
## ---------------------------------------------------------------------------
## exportna 
##       n missing  unique     Sum    Mean 
##    4253    6106       2     535  0.1258 
## ---------------------------------------------------------------------------
## exporta 
##       n missing  unique     Sum    Mean 
##    4256    6103       2     773  0.1816 
## ---------------------------------------------------------------------------
## rmawagus 
##       n missing  unique    Mean     .05     .10     .25     .50     .75 
##    5002    5357    4594   157.7   1.548   6.395  21.647  50.971 110.079 
##     .90     .95 
## 286.582 565.259 
## 
## lowest : -7.295e+02 -5.840e+02  0.000e+00  1.898e-02  7.252e-02
## highest:  5.581e+03  5.710e+03  1.450e+04  1.613e+04  1.174e+05 
## ---------------------------------------------------------------------------
## eduwgt 
##       n missing  unique    Mean     .05     .10     .25     .50     .75 
##    4169    6190    2945   8.753   4.667   5.706   7.200   8.982  10.413 
##     .90     .95 
##  11.727  12.570 
## 
## lowest :  0.0000  0.6711  0.7500  1.1237  1.2500
## highest: 17.2904 20.0000 20.9728 21.0926 21.0998 
## ---------------------------------------------------------------------------
## agewgt 
##       n missing  unique    Mean     .05     .10     .25     .50     .75 
##    4169    6190    3141   31.35   19.21   21.31   26.00   31.95   36.02 
##     .90     .95 
##   40.80   44.14 
## 
## lowest :  0.00  6.00  8.00  9.00 10.00
## highest: 60.00 61.27 61.65 63.00 65.00 
## ---------------------------------------------------------------------------
## tenwgt 
##       n missing  unique    Mean     .05     .10     .25     .50     .75 
##    4169    6190    3160   6.731   1.083   1.707   3.375   6.148   8.634 
##     .90     .95 
##  12.680  15.796 
## 
## lowest :  0.00000  0.08333  0.11111  0.12037  0.12500
## highest: 27.44974 27.68838 28.33333 29.00000 30.00000 
## ---------------------------------------------------------------------------
## ernus 
##       n missing  unique    Mean     .05     .10     .25     .50     .75 
##    3837    6522    3456   73.18   10.78   20.72   38.20   61.11   91.00 
##     .90     .95 
##  132.52  167.69 
## 
## lowest :    0.0000    0.0199    0.4939    1.0208    1.3781
## highest:  625.6652  725.2505  816.6256  848.7915 1310.4449 
## ---------------------------------------------------------------------------
## country 
##       n missing  unique 
##   10359       0       5 
## 
##           Ghana Kenya Nigeria South Africa Tanzania
## Frequency  3390  3240     700          404     2625
## %            33    31       7            4       25
## ---------------------------------------------------------------------------
## prateus 
##        n  missing   unique     Mean      .05      .10      .25      .50 
##     4779     5580     4764    1.473 -0.16114 -0.02289  0.05345  0.28957 
##      .75      .90      .95 
##  1.28035  4.26932  7.94606 
## 
## lowest : -1577.67  -726.20  -694.92  -149.92   -26.69
## highest:   113.39   204.54   301.54   537.84   632.60 
## ---------------------------------------------------------------------------
## kus_routus 
##       n missing  unique    Mean     .05     .10     .25     .50     .75 
##    5085    5274    5077   1.767 0.02484 0.04830 0.13877 0.46772 1.42857 
##     .90     .95 
## 3.72958 6.54954 
## 
## lowest : 4.148e-04 9.916e-04 1.169e-03 1.895e-03 3.069e-03
## highest: 9.570e+01 9.681e+01 1.534e+02 1.714e+02 1.908e+02 
## ---------------------------------------------------------------------------
## entex 
##        n  missing   unique      Sum     Mean 
##    10359        0        2       91 0.008785 
## ---------------------------------------------------------------------------
## exitex 
##        n  missing   unique      Sum     Mean 
##    10359        0        2       82 0.007916 
## ---------------------------------------------------------------------------
## industry 
##       n missing  unique 
##   10339      20       6 
## 
## food and baking (2265, 22%) 
## furniture (1820, 18%), garment (1741, 17%) 
## metals and machinery (3084, 30%) 
## textile (648, 6%), wood (781, 8%) 
## ---------------------------------------------------------------------------
## labor 
##       n missing  unique    Mean     .05     .10     .25     .50     .75 
##    5471    4888     557   124.9     3.0     4.0     8.0    25.0    81.0 
##     .90     .95 
##   249.0   434.5 
## 
## lowest :     1     2     3     4     5
## highest:  8115  8326  8338 12000 13000 
## ---------------------------------------------------------------------------
## output 
##        n  missing   unique     Mean      .05      .10      .25      .50 
##     5210     5149     5098  4086075     3309     5912    18709   114985 
##      .75      .90      .95 
##  1023184  5859787 14671149 
## 
## lowest : 8.702e+01 1.070e+02 1.666e+02 2.385e+02 2.660e+02
## highest: 3.708e+08 3.936e+08 4.173e+08 4.600e+08 5.154e+08 
## ---------------------------------------------------------------------------
## capital 
##         n   missing    unique      Mean       .05       .10       .25 
##      5239      5120      5117   3240458 3.839e+02 7.935e+02 3.738e+03 
##       .50       .75       .90       .95 
## 9.740e+04 8.060e+05 4.452e+06 1.018e+07 
## 
## lowest : 2.324e+01 2.429e+01 2.620e+01 3.145e+01 3.763e+01
## highest: 3.350e+08 3.418e+08 5.556e+08 5.601e+08 5.701e+08 
## ---------------------------------------------------------------------------
## materials 
##       n missing  unique    Mean     .05     .10     .25     .50     .75 
##    5082    5277    4977 2376439    1286    2577    8714   52936  477915 
##     .90     .95 
## 2799931 7503468 
## 
## lowest : 3.431e+00 1.693e+01 2.031e+01 3.806e+01 4.129e+01
## highest: 2.845e+08 3.194e+08 3.436e+08 3.478e+08 3.841e+08 
## ---------------------------------------------------------------------------
## indirect.costs 
##         n   missing    unique      Mean       .05       .10       .25 
##      5133      5226      5092    458566     171.6     344.1    1280.0 
##       .50       .75       .90       .95 
##   10526.7   98568.5  536135.4 1587255.4 
## 
## lowest : 7.565e+00 1.024e+01 1.084e+01 1.136e+01 1.224e+01
## highest: 5.474e+07 5.781e+07 6.092e+07 6.408e+07 6.825e+07 
## ---------------------------------------------------------------------------

Problem 1.2. Descriptive plots

ist(log(df$output))  ## plot the histogram of the logoutput##

## Error: û??"ist"????????

plot(x = log(df$labor), y = log(df$output))  ## scatter plot of log(output) and log(labor)

plot of chunk unnamed-chunk-3

install.packages("ggplot2")  ## install the ggplot package

## Installing package into 'C:/Users/WANG Kun/Documents/R/win-library/3.0'
## (as 'lib' is unspecified)

## Error: trying to use CRAN without setting a mirror

load.fun(ggplot2)

## Loading required package: ggplot2

library(ggplot2)
output.histogram <- ggplot(df, aes(x = log(output), fill = country)) + geom_histogram() + 
    facet_grid(country ~ industry) + theme_minimal() + scale_fill_brewer(type = "qual", 
    guide = FALSE)
output.histogram  ## the histogram by country and indsutry

## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.
## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.
## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.
## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.
## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.
## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.
## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.
## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.
## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.
## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.
## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.
## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.
## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.
## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.
## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.
## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.
## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.
## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.
## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.
## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.
## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.
## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.
## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.
## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.
## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.
## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.
## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.
## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.

plot of chunk unnamed-chunk-3

Problem 2: Create scatter plots of log output with log capital, log labor, log materials, and log indirect costs

scatter.capital <- ggplot(df, aes(x = log(capital), y = log(output), color = country)) + 
    geom_point() + facet_grid(country ~ industry) + theme_bw() + scale_color_brewer(type = "qual", 
    guide = FALSE)
scatter.capital

## Warning: Removed 384 rows containing missing values (geom_point).
## Warning: Removed 296 rows containing missing values (geom_point).
## Warning: Removed 300 rows containing missing values (geom_point).
## Warning: Removed 354 rows containing missing values (geom_point).
## Warning: Removed 75 rows containing missing values (geom_point).
## Warning: Removed 127 rows containing missing values (geom_point).
## Warning: Removed 538 rows containing missing values (geom_point).
## Warning: Removed 318 rows containing missing values (geom_point).
## Warning: Removed 398 rows containing missing values (geom_point).
## Warning: Removed 572 rows containing missing values (geom_point).
## Warning: Removed 156 rows containing missing values (geom_point).
## Warning: Removed 183 rows containing missing values (geom_point).
## Warning: Removed 1 rows containing missing values (geom_point).
## Warning: Removed 1 rows containing missing values (geom_point).
## Warning: Removed 4 rows containing missing values (geom_point).
## Warning: Removed 340 rows containing missing values (geom_point).
## Warning: Removed 336 rows containing missing values (geom_point).
## Warning: Removed 203 rows containing missing values (geom_point).
## Warning: Removed 509 rows containing missing values (geom_point).
## Warning: Removed 106 rows containing missing values (geom_point).
## Warning: Removed 112 rows containing missing values (geom_point).

plot of chunk unnamed-chunk-4

scatter.labor <- ggplot(df, aes(x = log(labor), y = log(output), color = country)) + 
    geom_point() + facet_grid(country ~ industry) + theme_bw() + scale_color_brewer(type = "qual", 
    guide = FALSE)
scatter.labor

## Warning: Removed 372 rows containing missing values (geom_point).
## Warning: Removed 291 rows containing missing values (geom_point).
## Warning: Removed 293 rows containing missing values (geom_point).
## Warning: Removed 312 rows containing missing values (geom_point).
## Warning: Removed 74 rows containing missing values (geom_point).
## Warning: Removed 123 rows containing missing values (geom_point).
## Warning: Removed 524 rows containing missing values (geom_point).
## Warning: Removed 316 rows containing missing values (geom_point).
## Warning: Removed 385 rows containing missing values (geom_point).
## Warning: Removed 563 rows containing missing values (geom_point).
## Warning: Removed 154 rows containing missing values (geom_point).
## Warning: Removed 181 rows containing missing values (geom_point).
## Warning: Removed 1 rows containing missing values (geom_point).
## Warning: Removed 1 rows containing missing values (geom_point).
## Warning: Removed 4 rows containing missing values (geom_point).
## Warning: Removed 337 rows containing missing values (geom_point).
## Warning: Removed 322 rows containing missing values (geom_point).
## Warning: Removed 198 rows containing missing values (geom_point).
## Warning: Removed 485 rows containing missing values (geom_point).
## Warning: Removed 106 rows containing missing values (geom_point).
## Warning: Removed 107 rows containing missing values (geom_point).

plot of chunk unnamed-chunk-4

scatter.materials <- ggplot(df, aes(x = log(materials), y = log(output), color = country)) + 
    geom_point() + facet_grid(country ~ industry) + theme_bw() + scale_color_brewer(type = "qual", 
    guide = FALSE)
scatter.materials

## Warning: Removed 374 rows containing missing values (geom_point).
## Warning: Removed 296 rows containing missing values (geom_point).
## Warning: Removed 295 rows containing missing values (geom_point).
## Warning: Removed 320 rows containing missing values (geom_point).
## Warning: Removed 75 rows containing missing values (geom_point).
## Warning: Removed 126 rows containing missing values (geom_point).
## Warning: Removed 555 rows containing missing values (geom_point).
## Warning: Removed 327 rows containing missing values (geom_point).
## Warning: Removed 405 rows containing missing values (geom_point).
## Warning: Removed 577 rows containing missing values (geom_point).
## Warning: Removed 158 rows containing missing values (geom_point).
## Warning: Removed 187 rows containing missing values (geom_point).
## Warning: Removed 3 rows containing missing values (geom_point).
## Warning: Removed 1 rows containing missing values (geom_point).
## Warning: Removed 5 rows containing missing values (geom_point).
## Warning: Removed 9 rows containing missing values (geom_point).
## Warning: Removed 34 rows containing missing values (geom_point).
## Warning: Removed 2 rows containing missing values (geom_point).
## Warning: Removed 337 rows containing missing values (geom_point).
## Warning: Removed 322 rows containing missing values (geom_point).
## Warning: Removed 198 rows containing missing values (geom_point).
## Warning: Removed 485 rows containing missing values (geom_point).
## Warning: Removed 106 rows containing missing values (geom_point).
## Warning: Removed 107 rows containing missing values (geom_point).

plot of chunk unnamed-chunk-4

scatter.indirect.costs <- ggplot(df, aes(x = log(indirect.costs), y = log(output), 
    color = country)) + geom_point() + facet_grid(country ~ industry) + theme_bw() + 
    scale_color_brewer(type = "qual", guide = FALSE)
scatter.indirect.costs

## Warning: Removed 377 rows containing missing values (geom_point).
## Warning: Removed 299 rows containing missing values (geom_point).
## Warning: Removed 298 rows containing missing values (geom_point).
## Warning: Removed 317 rows containing missing values (geom_point).
## Warning: Removed 74 rows containing missing values (geom_point).
## Warning: Removed 123 rows containing missing values (geom_point).
## Warning: Removed 542 rows containing missing values (geom_point).
## Warning: Removed 333 rows containing missing values (geom_point).
## Warning: Removed 399 rows containing missing values (geom_point).
## Warning: Removed 585 rows containing missing values (geom_point).
## Warning: Removed 158 rows containing missing values (geom_point).
## Warning: Removed 185 rows containing missing values (geom_point).
## Warning: Removed 3 rows containing missing values (geom_point).
## Warning: Removed 4 rows containing missing values (geom_point).
## Warning: Removed 11 rows containing missing values (geom_point).
## Warning: Removed 5 rows containing missing values (geom_point).
## Warning: Removed 27 rows containing missing values (geom_point).
## Warning: Removed 2 rows containing missing values (geom_point).
## Warning: Removed 337 rows containing missing values (geom_point).
## Warning: Removed 322 rows containing missing values (geom_point).
## Warning: Removed 198 rows containing missing values (geom_point).
## Warning: Removed 485 rows containing missing values (geom_point).
## Warning: Removed 106 rows containing missing values (geom_point).
## Warning: Removed 107 rows containing missing values (geom_point).

plot of chunk unnamed-chunk-4

create 3d plot

load.fun(rgl)

## Loading required package: rgl

open3d()

## wgl 
##   1

rgl.spheres(log(df$labor), log(df$capital), log(df$output), color = "grey", 
    radius = 0.05, specular = "white")
axes3d(c("x", "y", "z"))
title3d("", "", "log labor", "log capital", "log output")
grid3d(c("x", "y", "z"))

OLS estimation of the production function

summary(lm(log(output) ~ log(capital) + log(labor) + log(materials), data = df))

## 
## Call:
## lm(formula = log(output) ~ log(capital) + log(labor) + log(materials), 
##     data = df)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -3.389 -0.259 -0.041  0.203  5.601 
## 
## Coefficients:
##                Estimate Std. Error t value Pr(>|t|)    
## (Intercept)     1.84128    0.03364    54.7   <2e-16 ***
## log(capital)    0.08649    0.00403    21.4   <2e-16 ***
## log(labor)      0.21919    0.00842    26.0   <2e-16 ***
## log(materials)  0.75205    0.00481   156.5   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.462 on 4898 degrees of freedom
##   (5457 observations deleted due to missingness)
## Multiple R-squared:  0.969,  Adjusted R-squared:  0.969 
## F-statistic: 5.13e+04 on 3 and 4898 DF,  p-value: <2e-16

load ClusterFunctions.R

## These functions also produce heteroskedasticity robust clustered standard
## errors. They are adapted from http://people.su.se/~ma/mcluster.R
require(lmtest)

## Loading required package: lmtest
## Loading required package: zoo
## 
## Attaching package: 'zoo'
## 
## ???ж??????��???from 'package:base':
## 
##     as.Date, as.Date.numeric


cl <- function(dat, fm, cluster) {
    ef <- estfun(fm)
    if (length(cluster) != nrow(ef)) {
        cluster <- cluster[as.numeric(rownames(ef))]
    }

    M <- length(unique(cluster))
    N <- length(cluster)
    dfc <- (M/(M - 1)) * ((N - 1)/(N - fm$rank))
    u <- apply(estfun(fm), 2, function(x) tapply(x, cluster, sum))
    vcovCL <- dfc * sandwich(fm, meat = crossprod(u)/N)
    coeftest(fm, vcovCL)
}

cl.plm <- function(dat, fm, cluster) {
    require(sandwich, quietly = TRUE)
    require(lmtest, quietly = TRUE)
    ef <- estfun.plm(fm)
    if (length(cluster) != nrow(ef)) {
        cluster <- cluster[as.numeric(rownames(ef))]
    }
    M <- length(unique(cluster))
    N <- length(cluster)
    K <- ncol(ef)
    dfc <- (M/(M - 1)) * ((N - 1)/(N - K))

    uj <- apply(ef, 2, function(x) tapply(x, cluster, sum))
    bread <- bread.plm(fm)
    meat <- crossprod(uj)
    vcovCL <- dfc * bread %*% meat %*% bread
    coeftest(fm, vcovCL)
}

estfun.plm <- function(x, ...) {
    model <- x$args$model
    formula <- formula(x)
    if (length(formula)[2] > 1) {
        ## 2SLS
        data <- model.frame(x)
        effect <- x$args$effect
        X <- model.matrix(formula, data, rhs = 1, model = model, effect = effect, 
            theta = theta)
        if (length(formula)[2] == 2) 
            W <- model.matrix(formula, data, rhs = 2, model = model, effect = effect, 
                theta = theta) else W <- model.matrix(formula, data, rhs = c(2, 3), model = model, 
            effect = effect, theta = theta)
        Xhat <- lm(X ~ W)$fit
        return(x$residuals * Xhat)
    } else {
        ## OLS
        X <- model.matrix(x, model = model)
        return(x$residuals * X)
    }
}


bread.plm <- function(x, ...) {
    model <- x$args$model
    formula <- formula(x)
    if (length(formula)[2] > 1) {
        ## 2SLS
        data <- model.frame(x)
        effect <- x$args$effect
        X <- model.matrix(formula, data, rhs = 1, model = model, effect = effect, 
            theta = theta)
        if (length(formula)[2] == 2) 
            W <- model.matrix(formula, data, rhs = 2, model = model, effect = effect, 
                theta = theta) else W <- model.matrix(formula, data, rhs = c(2, 3), model = model, 
            effect = effect, theta = theta)
        Xhat <- lm(X ~ W)$fit
        return(solve(crossprod(Xhat)))
    } else {
        ## OLS
        X <- model.matrix(x, model = model)
        return(solve(crossprod(X)))
    }
}
load.fun(plm)

## Loading required package: plm

install.packages("lmtest")

## Installing package into 'C:/Users/WANG Kun/Documents/R/win-library/3.0'
## (as 'lib' is unspecified)

## Error: trying to use CRAN without setting a mirror

library(lmtest)
install.packages("zoo")

## Installing package into 'C:/Users/WANG Kun/Documents/R/win-library/3.0'
## (as 'lib' is unspecified)

## Error: trying to use CRAN without setting a mirror

library(zoo)
prod.ols <- plm(log(output) ~ log(capital) + log(labor) + log(materials), data = df, 
    model = "pooling", index = c("firm", "year"))
cl.plm(df, prod.ols, df$firm)

## 
## t test of coefficients:
## 
##                Estimate Std. Error t value Pr(>|t|)    
## (Intercept)     1.84128    0.04931    37.3   <2e-16 ***
## log(capital)    0.08649    0.00576    15.0   <2e-16 ***
## log(labor)      0.21919    0.01159    18.9   <2e-16 ***
## log(materials)  0.75205    0.01007    74.7   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Problem 3: What are estimated returns to scale

plm(log(output) ~ log(capital) + log(labor) + log(materials) + industry + as.factor(year), 
    data = df, model = "pooling", index = c("firm", "year"))  ## regression including industry and year dummies

## 
## Model Formula: log(output) ~ log(capital) + log(labor) + log(materials) + industry + 
##     as.factor(year)
## 
## Coefficients:
##                  (Intercept)                 log(capital) 
##                      1.80807                      0.08669 
##                   log(labor)               log(materials) 
##                      0.22834                      0.74506 
##            industryfurniture              industrygarment 
##                     -0.03341                      0.00559 
## industrymetals and machinery              industrytextile 
##                      0.01836                     -0.13178 
##                 industrywood          as.factor(year)1992 
##                      0.02545                      0.02694 
##          as.factor(year)1993          as.factor(year)1994 
##                      0.01228                      0.11514 
##          as.factor(year)1995          as.factor(year)1996 
##                      0.05481                      0.01640 
##          as.factor(year)1997          as.factor(year)1998 
##                      0.09882                      0.12022 
##          as.factor(year)1999          as.factor(year)2000 
##                      0.06475                      0.12641 
##          as.factor(year)2001          as.factor(year)2002 
##                      0.17281                      0.15764 
##          as.factor(year)2003 
##                      0.16231

prod.industry <- plm(log(output) ~ (log(capital) + log(labor) + log(materials)) * 
    industry + as.factor(year), data = df, model = "pooling", index = c("firm", 
    "year"))

using these regressions to answer Problem 4

cl.plm(df, prod.industry, df$firm)

## 
## t test of coefficients:
## 
##                                             Estimate Std. Error t value
## (Intercept)                                  2.04844    0.19778   10.36
## log(capital)                                 0.08101    0.01054    7.69
## log(labor)                                   0.30347    0.04555    6.66
## log(materials)                               0.70894    0.02840   24.96
## industryfurniture                           -0.67985    0.21258   -3.20
## industrygarment                              0.25828    0.25428    1.02
## industrymetals and machinery                -0.45267    0.21063   -2.15
## industrytextile                              0.04843    0.39404    0.12
## industrywood                                 0.00372    0.25500    0.01
## as.factor(year)1992                          0.02692    0.03761    0.72
## as.factor(year)1993                          0.02431    0.03940    0.62
## as.factor(year)1994                          0.12678    0.04330    2.93
## as.factor(year)1995                          0.06689    0.03826    1.75
## as.factor(year)1996                          0.02113    0.04091    0.52
## as.factor(year)1997                          0.08191    0.03622    2.26
## as.factor(year)1998                          0.11471    0.03414    3.36
## as.factor(year)1999                          0.07384    0.03474    2.13
## as.factor(year)2000                          0.12925    0.03755    3.44
## as.factor(year)2001                          0.17004    0.04506    3.77
## as.factor(year)2002                          0.15283    0.04180    3.66
## as.factor(year)2003                          0.14251    0.05852    2.44
## log(capital):industryfurniture               0.00243    0.01488    0.16
## log(capital):industrygarment                 0.02749    0.02007    1.37
## log(capital):industrymetals and machinery   -0.00578    0.01326   -0.44
## log(capital):industrytextile                 0.02598    0.04007    0.65
## log(capital):industrywood                    0.00119    0.01975    0.06
## log(labor):industryfurniture                -0.11669    0.05278   -2.21
## log(labor):industrygarment                  -0.12899    0.05096   -2.53
## log(labor):industrymetals and machinery     -0.10381    0.04933   -2.10
## log(labor):industrytextile                  -0.03428    0.07574   -0.45
## log(labor):industrywood                     -0.02645    0.05937   -0.45
## log(materials):industryfurniture             0.09160    0.03086    2.97
## log(materials):industrygarment              -0.02544    0.04326   -0.59
## log(materials):industrymetals and machinery  0.07557    0.03210    2.35
## log(materials):industrytextile              -0.03375    0.08335   -0.40
## log(materials):industrywood                  0.00360    0.03579    0.10
##                                             Pr(>|t|)    
## (Intercept)                                  < 2e-16 ***
## log(capital)                                 1.8e-14 ***
## log(labor)                                   3.0e-11 ***
## log(materials)                               < 2e-16 ***
## industryfurniture                            0.00139 ** 
## industrygarment                              0.30982    
## industrymetals and machinery                 0.03167 *  
## industrytextile                              0.90219    
## industrywood                                 0.98837    
## as.factor(year)1992                          0.47415    
## as.factor(year)1993                          0.53729    
## as.factor(year)1994                          0.00343 ** 
## as.factor(year)1995                          0.08050 .  
## as.factor(year)1996                          0.60550    
## as.factor(year)1997                          0.02376 *  
## as.factor(year)1998                          0.00079 ***
## as.factor(year)1999                          0.03361 *  
## as.factor(year)2000                          0.00058 ***
## as.factor(year)2001                          0.00016 ***
## as.factor(year)2002                          0.00026 ***
## as.factor(year)2003                          0.01492 *  
## log(capital):industryfurniture               0.87026    
## log(capital):industrygarment                 0.17078    
## log(capital):industrymetals and machinery    0.66319    
## log(capital):industrytextile                 0.51682    
## log(capital):industrywood                    0.95189    
## log(labor):industryfurniture                 0.02709 *  
## log(labor):industrygarment                   0.01140 *  
## log(labor):industrymetals and machinery      0.03540 *  
## log(labor):industrytextile                   0.65089    
## log(labor):industrywood                      0.65598    
## log(materials):industryfurniture             0.00301 ** 
## log(materials):industrygarment               0.55656    
## log(materials):industrymetals and machinery  0.01858 *  
## log(materials):industrytextile               0.68555    
## log(materials):industrywood                  0.91997    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

including country dummies

prod.country1 <- plm(log(output) ~ log(capital) + log(labor) + log(materials) + 
    industry + country + as.factor(year), data = df, model = "pooling", index = c("firm", 
    "year"))  ## regression including country dummy
`?`(cl.plm(df, prod.country1, df$firm), `?`(`?`(prod.country2 <- plm(log(output) ~ 
    (log(capital) + log(labor) + log(materials)) * country + industry + as.factor(year), 
    data = df, model = "pooling", index = c("firm", "year"))  ## regression allowing coefficients varying with country
)))

## Error: argument 'pattern' must be a single character string


cl.plm(df, prod.country2, df$firm)

## Error: ?Ҳ???????'prod.country2'

Fixed effect estimation

prod.fe <- plm(log(output) ~ log(capital) + log(labor) + log(materials) + industry + 
    as.factor(year), data = df, model = "within", index = c("firm", "year"))
cl.plm(df, prod.fe, df$firm)

## 
## t test of coefficients:
## 
##                     Estimate Std. Error t value Pr(>|t|)    
## log(capital)          0.0302     0.0133    2.27  0.02338 *  
## log(labor)            0.1870     0.0191    9.79  < 2e-16 ***
## log(materials)        0.6407     0.0196   32.76  < 2e-16 ***
## as.factor(year)1992   0.1469     0.0394    3.73  0.00019 ***
## as.factor(year)1993   0.1526     0.0400    3.82  0.00014 ***
## as.factor(year)1994   0.2132     0.0418    5.10  3.6e-07 ***
## as.factor(year)1995   0.1601     0.0377    4.25  2.2e-05 ***
## as.factor(year)1996   0.0763     0.0401    1.90  0.05727 .  
## as.factor(year)1997   0.0613     0.0372    1.65  0.09980 .  
## as.factor(year)1998   0.0984     0.0373    2.64  0.00840 ** 
## as.factor(year)1999   0.1257     0.0380    3.30  0.00096 ***
## as.factor(year)2000   0.1415     0.0382    3.71  0.00021 ***
## as.factor(year)2001   0.1273     0.0414    3.08  0.00210 ** 
## as.factor(year)2002   0.1112     0.0410    2.72  0.00664 ** 
## as.factor(year)2003   0.0792     0.0487    1.63  0.10418    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Problem 5 OP Estimation

df <- df[order(df$firm, df$year), ]
df$invest <- c(df$capital[2:nrow(df)] - df$capital[1:(nrow(df) - 1)], NA)
df$invest[c(df$firm[2:nrow(df)] != df$firm[1:(nrow(df) - 1)], FALSE)] <- NA
notmissing <- !is.na(df$capital) & !is.na(df$invest)  ##remove the missing values
df.nm <- subset(df, notmissing)
df.nm <- df.nm[order(df.nm$firm, df.nm$year), ]

first step regression

op1 <- lm(log(output) ~ log(labor) + log(materials) + poly(cbind(log(capital), 
    invest), degree = 4), data = df.nm)
summary(op1)

## 
## Call:
## lm(formula = log(output) ~ log(labor) + log(materials) + poly(cbind(log(capital), 
##     invest), degree = 4), data = df.nm)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -3.315 -0.255 -0.039  0.207  5.254 
## 
## Coefficients:
##                                                   Estimate Std. Error
## (Intercept)                                       2.94e+00   9.20e-02
## log(labor)                                        1.97e-01   9.86e-03
## log(materials)                                    7.50e-01   5.62e-03
## poly(cbind(log(capital), invest), degree = 4)1.0  1.48e+01   4.24e+00
## poly(cbind(log(capital), invest), degree = 4)2.0  3.59e+00   1.78e+00
## poly(cbind(log(capital), invest), degree = 4)3.0 -2.05e+00   6.29e-01
## poly(cbind(log(capital), invest), degree = 4)4.0  6.47e-01   5.09e-01
## poly(cbind(log(capital), invest), degree = 4)0.1  7.47e+01   9.17e+01
## poly(cbind(log(capital), invest), degree = 4)1.1 -4.40e+03   5.06e+03
## poly(cbind(log(capital), invest), degree = 4)2.1  1.97e+03   1.94e+03
## poly(cbind(log(capital), invest), degree = 4)3.1 -5.16e+02   3.99e+02
## poly(cbind(log(capital), invest), degree = 4)0.2 -1.63e+01   4.35e+01
## poly(cbind(log(capital), invest), degree = 4)1.2  9.36e+02   1.96e+03
## poly(cbind(log(capital), invest), degree = 4)2.2 -2.74e+02   4.66e+02
## poly(cbind(log(capital), invest), degree = 4)0.3  1.71e+01   1.02e+01
## poly(cbind(log(capital), invest), degree = 4)1.3 -4.59e+02   2.76e+02
## poly(cbind(log(capital), invest), degree = 4)0.4  2.28e+00   1.08e+00
##                                                  t value Pr(>|t|)    
## (Intercept)                                        31.98  < 2e-16 ***
## log(labor)                                         19.99  < 2e-16 ***
## log(materials)                                    133.53  < 2e-16 ***
## poly(cbind(log(capital), invest), degree = 4)1.0    3.50  0.00048 ***
## poly(cbind(log(capital), invest), degree = 4)2.0    2.02  0.04353 *  
## poly(cbind(log(capital), invest), degree = 4)3.0   -3.26  0.00112 ** 
## poly(cbind(log(capital), invest), degree = 4)4.0    1.27  0.20369    
## poly(cbind(log(capital), invest), degree = 4)0.1    0.81  0.41540    
## poly(cbind(log(capital), invest), degree = 4)1.1   -0.87  0.38486    
## poly(cbind(log(capital), invest), degree = 4)2.1    1.01  0.31091    
## poly(cbind(log(capital), invest), degree = 4)3.1   -1.29  0.19560    
## poly(cbind(log(capital), invest), degree = 4)0.2   -0.38  0.70694    
## poly(cbind(log(capital), invest), degree = 4)1.2    0.48  0.63235    
## poly(cbind(log(capital), invest), degree = 4)2.2   -0.59  0.55760    
## poly(cbind(log(capital), invest), degree = 4)0.3    1.68  0.09369 .  
## poly(cbind(log(capital), invest), degree = 4)1.3   -1.66  0.09645 .  
## poly(cbind(log(capital), invest), degree = 4)0.4    2.12  0.03433 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.445 on 3439 degrees of freedom
##   (248 observations deleted due to missingness)
## Multiple R-squared:  0.97,   Adjusted R-squared:  0.97 
## F-statistic: 6.93e+03 on 16 and 3439 DF,  p-value: <2e-16

Second regression

b1 <- op1$coefficients[c("log(labor)", "log(materials)")]
xb1 <- log(as.matrix(df.nm[, c("labor", "materials")])) %*% b1
fhat <- predict(op1, df.nm) - xb1

function to lag fhat and capital

lag <- function(x, i = df.nm$firm, t = df.nm$year) {
    if (length(i) != length(x) || length(i) != length(t)) {
        stop("Inputs not same length")
    }
    x.lag <- x[1:(length(x) - 1)]
    x.lag[i[1:(length(i) - 1)] != i[2:length(i)]] <- NA
    x.lag[t[1:(length(i) - 1)] + 1 != t[2:length(i)]] <- NA
    return(c(NA, x.lag))
}

Create data frame for step 2 regression

df.step2 <- data.frame(lhs = (log(df.nm$output - xb1)), year = df.nm$year, firm = df.nm$firm, 
    k = log(df.nm$capital), fhat = fhat, k.lag = log(lag(df.nm$capital)), f.lag = lag(fhat))

droping missing data

df.step2 <- subset(df.step2, !apply(df.step2, 1, function(x) any(is.na(x))))

objective function = sum of residuals²

objective <- function(betaK, degree = 4) {
    op2 <- lm(I(lhs - betaK * k) ~ poly(I(f.lag - betaK * k.lag), degree), data = df.step2)
    return(sum(residuals(op2)^2))
}

plot the objective before minimization

fig.df <- data.frame(bk = seq(from = -0.3, to = 0.3, by = 0.005))
fig.df$obj <- sapply(fig.df$bk, objective)
load.fun(ggplot2)
ggplot(data = fig.df, aes(x = bk, y = obj)) + geom_point()

plot of chunk unnamed-chunk-19

minimization problem

opt.out <- optim(prod.ols$coefficients["log(capital)"], fn = objective, method = "Brent", 
    lower = -1, upper = 1)
betaK <- opt.out$par

betaK

## [1] -0.01778

problem 8-Levinsohn and Petrin (2003) using the material cost to construct the control function

nomi <- !is.na(df$capital) & !is.na(df$materials)  ##remove the missing values
df.mater <- subset(df, nomi)
df.mater <- df.mater[order(df.mater$firm, df.mater$year), ]

first step OLS

op3 <- lm(log(output) ~ log(labor) + poly(cbind(log(capital), log(materials)), 
    degree = 4), data = df.mater)
op3

## 
## Call:
## lm(formula = log(output) ~ log(labor) + poly(cbind(log(capital), 
##     log(materials)), degree = 4), data = df.mater)
## 
## Coefficients:
##                                              (Intercept)  
##                                                   11.519  
##                                               log(labor)  
##                                                    0.209  
## poly(cbind(log(capital), log(materials)), degree = 4)1.0  
##                                                   39.524  
## poly(cbind(log(capital), log(materials)), degree = 4)2.0  
##                                                   17.642  
## poly(cbind(log(capital), log(materials)), degree = 4)3.0  
##                                                    9.268  
## poly(cbind(log(capital), log(materials)), degree = 4)4.0  
##                                                    1.046  
## poly(cbind(log(capital), log(materials)), degree = 4)0.1  
##                                                  131.393  
## poly(cbind(log(capital), log(materials)), degree = 4)1.1  
##                                                -2044.675  
## poly(cbind(log(capital), log(materials)), degree = 4)2.1  
##                                                -1285.827  
## poly(cbind(log(capital), log(materials)), degree = 4)3.1  
##                                                 -175.867  
## poly(cbind(log(capital), log(materials)), degree = 4)0.2  
##                                                   24.518  
## poly(cbind(log(capital), log(materials)), degree = 4)1.2  
##                                                  994.363  
## poly(cbind(log(capital), log(materials)), degree = 4)2.2  
##                                                  603.850  
## poly(cbind(log(capital), log(materials)), degree = 4)0.3  
##                                                   -8.984  
## poly(cbind(log(capital), log(materials)), degree = 4)1.3  
##                                                 -735.279  
## poly(cbind(log(capital), log(materials)), degree = 4)0.4  
##                                                    3.957

Second regression

bl <- op3$coefficients[c("log(labor)")]
xbl <- log(as.matrix(df.mater[, c("labor")])) %*% bl
fbar <- predict(op3, df.mater) - xbl

create data

lag1 <- function(x, i = df.mater$firm, t = df.mater$year) {
    if (length(i) != length(x) || length(i) != length(t)) {
        stop("Inputs not same length")
    }
    x.lag1 <- x[1:(length(x) - 1)]
    x.lag1[i[1:(length(i) - 1)] != i[2:length(i)]] <- NA
    x.lag1[t[1:(length(i) - 1)] + 1 != t[2:length(i)]] <- NA
    return(c(NA, x.lag1))
}
df.mat2 <- data.frame(lhs = log(df.mater$output - xbl), k = log(df.mater$capital), 
    fbar = fbar, k.lag = log(lag1(df.mater$capital)), f.lag = lag1(fbar), M.lag = log(lag1(df.mater$materials)), 
    M = log(df.mater$materials))
df.mat2 <- subset(df.mat2, !apply(df.mat2, 1, function(x) any(is.na(x))))
objective2 <- function(x, degree = 4) {
    betaK = x[1]
    betaM = x[2]
    op2 <- lm(I(lhs - betaK * k - betaM * M) ~ poly(I(f.lag - betaK * k.lag - 
        betaM * M.lag), degree), data = df.mat2)
    return(sum(residuals(op2)^2))
}
fig.df2 <- data.frame(bk2 = seq(from = -0.3, to = 0.3, by = 0.005), bk3 = seq(from = -0.3, 
    to = 0.3, by = 0.005))
x <- list(betak = seq(from = -0.3, to = 0.3, by = 0.005), betaM = seq(from = -0.3, 
    to = 0.3, by = 0.005))
n = 1000
x = runif(n, min = -1, max = 1)
y = runif(n, min = -1, max = 1)
var = cbind(x, y)
z = rep(NA, n)
for (i in 1:n) {
    z[i] = objective2(var[i, ])
}
plot3d(x, y, z, col = 1)
Z

## Error: ?Ҳ???????'Z'

library(rgl)
open3d()

## wgl 
##   2

plot3d(x, y, z, "betak", "betaM", "resi_Squ", type = "s")
open3d()

## wgl 
##   3

x <- sort(rnorm(1000))
y <- rnorm(1000)
z <- rnorm(1000) + atan2(x, y)
plot3d(x, y, z, col = rainbow(1000))

solving betaK and betaM

opt.out2 <- nlm(objective2, c(0.08648541, 0.7520484))
opt.out2

## $minimum
## [1] 735.6
## 
## $estimate
## [1] -0.04886  0.65325
## 
## $gradient
## [1]  5.161e-05 -3.110e-04
## 
## $code
## [1] 1
## 
## $iterations
## [1] 11