ECON565-Assignment 1-Production Function Estimation

Kun Wang (86756137) Feb13-2004

Problem 1 Descriptive Statistics and plots

1.1 Some Descriptive Statistics

summary(df)  ## show some summary statistics of df
##       firm         any.foreign.own      year         firm.age    
##  Min.   :   1001   Min.   :0.00    Min.   :1991   Min.   :  1.0  
##  1st Qu.:   1548   1st Qu.:0.00    1st Qu.:1994   1st Qu.:  9.0  
##  Median :   3156   Median :0.00    Median :1997   Median : 17.0  
##  Mean   :  86038   Mean   :0.18    Mean   :1996   Mean   : 18.8  
##  3rd Qu.:  50418   3rd Qu.:0.00    3rd Qu.:1999   3rd Qu.: 25.0  
##  Max.   :1009999   Max.   :1.00    Max.   :2003   Max.   :102.0  
##                    NA's   :257                    NA's   :1691   
##     exports        exportna       exporta        rmawagus     
##  Min.   :0      Min.   :0      Min.   :0      Min.   :  -729  
##  1st Qu.:0      1st Qu.:0      1st Qu.:0      1st Qu.:    22  
##  Median :0      Median :0      Median :0      Median :    51  
##  Mean   :0      Mean   :0      Mean   :0      Mean   :   158  
##  3rd Qu.:0      3rd Qu.:0      3rd Qu.:0      3rd Qu.:   110  
##  Max.   :1      Max.   :1      Max.   :1      Max.   :117383  
##  NA's   :5178   NA's   :6106   NA's   :6103   NA's   :5357    
##      eduwgt         agewgt         tenwgt         ernus     
##  Min.   : 0     Min.   : 0     Min.   : 0     Min.   :   0  
##  1st Qu.: 7     1st Qu.:26     1st Qu.: 3     1st Qu.:  38  
##  Median : 9     Median :32     Median : 6     Median :  61  
##  Mean   : 9     Mean   :31     Mean   : 7     Mean   :  73  
##  3rd Qu.:10     3rd Qu.:36     3rd Qu.: 9     3rd Qu.:  91  
##  Max.   :21     Max.   :65     Max.   :30     Max.   :1310  
##  NA's   :6190   NA's   :6190   NA's   :6190   NA's   :6522  
##          country        prateus        kus_routus       entex       
##  Ghana       :3390   Min.   :-1578   Min.   :  0    Min.   :0.0000  
##  Kenya       :3240   1st Qu.:    0   1st Qu.:  0    1st Qu.:0.0000  
##  Nigeria     : 700   Median :    0   Median :  0    Median :0.0000  
##  South Africa: 404   Mean   :    1   Mean   :  2    Mean   :0.0088  
##  Tanzania    :2625   3rd Qu.:    1   3rd Qu.:  1    3rd Qu.:0.0000  
##                      Max.   :  633   Max.   :191    Max.   :1.0000  
##                      NA's   :5580    NA's   :5274                   
##      exitex                       industry        labor      
##  Min.   :0.0000   food and baking     :2265   Min.   :    1  
##  1st Qu.:0.0000   furniture           :1820   1st Qu.:    8  
##  Median :0.0000   garment             :1741   Median :   25  
##  Mean   :0.0079   metals and machinery:3084   Mean   :  125  
##  3rd Qu.:0.0000   textile             : 648   3rd Qu.:   81  
##  Max.   :1.0000   wood                : 781   Max.   :13000  
##                   NA's                :  20   NA's   :4888   
##      output            capital           materials       
##  Min.   :8.70e+01   Min.   :2.30e+01   Min.   :3.00e+00  
##  1st Qu.:1.87e+04   1st Qu.:3.74e+03   1st Qu.:8.71e+03  
##  Median :1.15e+05   Median :9.74e+04   Median :5.29e+04  
##  Mean   :4.09e+06   Mean   :3.24e+06   Mean   :2.38e+06  
##  3rd Qu.:1.02e+06   3rd Qu.:8.06e+05   3rd Qu.:4.78e+05  
##  Max.   :5.15e+08   Max.   :5.70e+08   Max.   :3.84e+08  
##  NA's   :5149       NA's   :5120       NA's   :5277      
##  indirect.costs    
##  Min.   :       8  
##  1st Qu.:    1280  
##  Median :   10527  
##  Mean   :  458566  
##  3rd Qu.:   98569  
##  Max.   :68251868  
##  NA's   :5226

1.2. Descriptive plots

hist(log(df$output))  ## plot the histogram of the logoutput##

plot of chunk unnamed-chunk-3

plot(x = log(df$labor), y = log(df$output))  ## scatter plot of log(output) and log(labor)

plot of chunk unnamed-chunk-3

plot(x = log(df$capital), y = log(df$output))  ## scatter plot of log(output) and log(capital)

plot of chunk unnamed-chunk-3

plot(x = log(df$materials), y = log(df$output))  ## scatter plot of log(output) and log(materials)

plot of chunk unnamed-chunk-3

output.histogram <- ggplot(df, aes(x = log(output), fill = country)) + geom_histogram() + 
    facet_grid(country ~ industry) + theme_minimal() + scale_fill_brewer(type = "qual", 
    guide = FALSE)
output.histogram  ## the histogram by country and indsutry

plot of chunk unnamed-chunk-5

1.3: Create scatter plots of log output with log capital, log labor, log materials, and log indirect costs

(1) Are there any strange patterns or other obvious problems with the data?

(2) Does it appear that Cobb-Douglas will be a good approximation for these firm's production functions ? why or why not?


scatter.capital <- ggplot(df, aes(x = log(capital), y = log(output), color = country)) + 
    geom_point() + facet_grid(country ~ industry) + theme_bw() + scale_color_brewer(type = "qual", 
    guide = FALSE)
scatter.capital

plot of chunk unnamed-chunk-6



scatter.labor <- ggplot(df, aes(x = log(labor), y = log(output), color = country)) + 
    geom_point() + facet_grid(country ~ industry) + theme_bw() + scale_color_brewer(type = "qual", 
    guide = FALSE)
scatter.labor

plot of chunk unnamed-chunk-6



scatter.materials <- ggplot(df, aes(x = log(materials), y = log(output), color = country)) + 
    geom_point() + facet_grid(country ~ industry) + theme_bw() + scale_color_brewer(type = "qual", 
    guide = FALSE)
scatter.materials

plot of chunk unnamed-chunk-6



scatter.indirect.costs <- ggplot(df, aes(x = log(indirect.costs), y = log(output), 
    color = country)) + geom_point() + facet_grid(country ~ industry) + theme_bw() + 
    scale_color_brewer(type = "qual", guide = FALSE)
scatter.indirect.costs

plot of chunk unnamed-chunk-6

1.4 create 3d plot

** The 3-d plot confirms the general positive linear relationship between “log labor” and “log output”, “log capital” and “log output” **

This 3-d plot also demonstrates obvious linear relation between “log output” and the “log inputs”.

rgl.spheres(log(df$labor), log(df$capital), log(df$output), color = "grey", 
    radius = 0.05, specular = "white")
axes3d(c("x", "y", "z"))
title3d("", "", "log labor", "log capital", "log output")
grid3d(c("x", "y", "z"))

Problem 2 and 3 OLS estimation of the production function, and estimated return to scale

From the OLS, whe have following resutls

(we installed the ClusterFunctions.R to calculate the heterosckedasticity robust s.d.)

prod.ols <- plm(log(output) ~ log(capital) + log(labor) + log(materials), data = df, 
    model = "pooling", index = c("firm", "year"))
cl.plm(df, prod.ols, df$firm)
## 
## t test of coefficients:
## 
##                Estimate Std. Error t value Pr(>|t|)    
## (Intercept)     1.84128    0.04931    37.3   <2e-16 ***
## log(capital)    0.08649    0.00576    15.0   <2e-16 ***
## log(labor)      0.21919    0.01159    18.9   <2e-16 ***
## log(materials)  0.75205    0.01007    74.7   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Problem 4: What are estimated returns to scale by controlling “industry, year and countries” and “fixed effect estimation”

4.1 by including the “industry and year” dummies, the estimated returns to scale does not vary too much.

prod.indyear <- plm(log(output) ~ log(capital) + log(labor) + log(materials) + 
    industry + as.factor(year), data = df, model = "pooling", index = c("firm", 
    "year"))  ## regression including industry and year dummies

cl.plm(df, prod.indyear, df$firm)
## 
## t test of coefficients:
## 
##                              Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                   1.80807    0.07159   25.25  < 2e-16 ***
## log(capital)                  0.08669    0.00572   15.16  < 2e-16 ***
## log(labor)                    0.22834    0.01343   17.00  < 2e-16 ***
## log(materials)                0.74506    0.01125   66.26  < 2e-16 ***
## industryfurniture            -0.03341    0.02778   -1.20  0.22917    
## industrygarment               0.00559    0.02717    0.21  0.83710    
## industrymetals and machinery  0.01836    0.02066    0.89  0.37426    
## industrytextile              -0.13178    0.03402   -3.87  0.00011 ***
## industrywood                  0.02545    0.03642    0.70  0.48467    
## as.factor(year)1992           0.02694    0.03871    0.70  0.48643    
## as.factor(year)1993           0.01228    0.04029    0.30  0.76049    
## as.factor(year)1994           0.11514    0.04495    2.56  0.01046 *  
## as.factor(year)1995           0.05481    0.03921    1.40  0.16227    
## as.factor(year)1996           0.01640    0.04194    0.39  0.69569    
## as.factor(year)1997           0.09882    0.03721    2.66  0.00793 ** 
## as.factor(year)1998           0.12022    0.03516    3.42  0.00063 ***
## as.factor(year)1999           0.06475    0.03569    1.81  0.06975 .  
## as.factor(year)2000           0.12641    0.03962    3.19  0.00143 ** 
## as.factor(year)2001           0.17281    0.04712    3.67  0.00025 ***
## as.factor(year)2002           0.15764    0.04301    3.67  0.00025 ***
## as.factor(year)2003           0.16231    0.06052    2.68  0.00735 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

allowing the coefficients to be different for each industry

prod.industry <- plm(log(output) ~ (log(capital) + log(labor) + log(materials)) * 
    industry + as.factor(year), data = df, model = "pooling", index = c("firm", 
    "year"))

cl.plm(df, prod.industry, df$firm)
## 
## t test of coefficients:
## 
##                                             Estimate Std. Error t value
## (Intercept)                                  2.04844    0.19778   10.36
## log(capital)                                 0.08101    0.01054    7.69
## log(labor)                                   0.30347    0.04555    6.66
## log(materials)                               0.70894    0.02840   24.96
## industryfurniture                           -0.67985    0.21258   -3.20
## industrygarment                              0.25828    0.25428    1.02
## industrymetals and machinery                -0.45267    0.21063   -2.15
## industrytextile                              0.04843    0.39404    0.12
## industrywood                                 0.00372    0.25500    0.01
## as.factor(year)1992                          0.02692    0.03761    0.72
## as.factor(year)1993                          0.02431    0.03940    0.62
## as.factor(year)1994                          0.12678    0.04330    2.93
## as.factor(year)1995                          0.06689    0.03826    1.75
## as.factor(year)1996                          0.02113    0.04091    0.52
## as.factor(year)1997                          0.08191    0.03622    2.26
## as.factor(year)1998                          0.11471    0.03414    3.36
## as.factor(year)1999                          0.07384    0.03474    2.13
## as.factor(year)2000                          0.12925    0.03755    3.44
## as.factor(year)2001                          0.17004    0.04506    3.77
## as.factor(year)2002                          0.15283    0.04180    3.66
## as.factor(year)2003                          0.14251    0.05852    2.44
## log(capital):industryfurniture               0.00243    0.01488    0.16
## log(capital):industrygarment                 0.02749    0.02007    1.37
## log(capital):industrymetals and machinery   -0.00578    0.01326   -0.44
## log(capital):industrytextile                 0.02598    0.04007    0.65
## log(capital):industrywood                    0.00119    0.01975    0.06
## log(labor):industryfurniture                -0.11669    0.05278   -2.21
## log(labor):industrygarment                  -0.12899    0.05096   -2.53
## log(labor):industrymetals and machinery     -0.10381    0.04933   -2.10
## log(labor):industrytextile                  -0.03428    0.07574   -0.45
## log(labor):industrywood                     -0.02645    0.05937   -0.45
## log(materials):industryfurniture             0.09160    0.03086    2.97
## log(materials):industrygarment              -0.02544    0.04326   -0.59
## log(materials):industrymetals and machinery  0.07557    0.03210    2.35
## log(materials):industrytextile              -0.03375    0.08335   -0.40
## log(materials):industrywood                  0.00360    0.03579    0.10
##                                             Pr(>|t|)    
## (Intercept)                                  < 2e-16 ***
## log(capital)                                 1.8e-14 ***
## log(labor)                                   3.0e-11 ***
## log(materials)                               < 2e-16 ***
## industryfurniture                            0.00139 ** 
## industrygarment                              0.30982    
## industrymetals and machinery                 0.03167 *  
## industrytextile                              0.90219    
## industrywood                                 0.98837    
## as.factor(year)1992                          0.47415    
## as.factor(year)1993                          0.53729    
## as.factor(year)1994                          0.00343 ** 
## as.factor(year)1995                          0.08050 .  
## as.factor(year)1996                          0.60550    
## as.factor(year)1997                          0.02376 *  
## as.factor(year)1998                          0.00079 ***
## as.factor(year)1999                          0.03361 *  
## as.factor(year)2000                          0.00058 ***
## as.factor(year)2001                          0.00016 ***
## as.factor(year)2002                          0.00026 ***
## as.factor(year)2003                          0.01492 *  
## log(capital):industryfurniture               0.87026    
## log(capital):industrygarment                 0.17078    
## log(capital):industrymetals and machinery    0.66319    
## log(capital):industrytextile                 0.51682    
## log(capital):industrywood                    0.95189    
## log(labor):industryfurniture                 0.02709 *  
## log(labor):industrygarment                   0.01140 *  
## log(labor):industrymetals and machinery      0.03540 *  
## log(labor):industrytextile                   0.65089    
## log(labor):industrywood                      0.65598    
## log(materials):industryfurniture             0.00301 ** 
## log(materials):industrygarment               0.55656    
## log(materials):industrymetals and machinery  0.01858 *  
## log(materials):industrytextile               0.68555    
## log(materials):industrywood                  0.91997    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

4.2 Including Country dummies / and allowing the coefficients to be different for each country

By including the country dummies, it shows that Nigeria, South Africa have higher output level, and Tanzania has lower output level (the base is Ghana), while the estimated return to scale is consistent with previous results.

prod.country1 <- plm(log(output) ~ log(capital) + log(labor) + log(materials) + 
    industry + country + as.factor(year), data = df, model = "pooling", index = c("firm", 
    "year"))  ## regression including country dummy
cl.plm(df, prod.country1, df$firm)
## 
## t test of coefficients:
## 
##                              Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                   2.01810    0.08460   23.85  < 2e-16 ***
## log(capital)                  0.08633    0.00552   15.63  < 2e-16 ***
## log(labor)                    0.23461    0.01416   16.57  < 2e-16 ***
## log(materials)                0.72636    0.01212   59.91  < 2e-16 ***
## industryfurniture            -0.07251    0.02929   -2.48  0.01333 *  
## industrygarment              -0.05417    0.02958   -1.83  0.06708 .  
## industrymetals and machinery -0.04321    0.02322   -1.86  0.06282 .  
## industrytextile              -0.12688    0.03526   -3.60  0.00032 ***
## industrywood                  0.01884    0.03648    0.52  0.60557    
## countryKenya                 -0.04032    0.02313   -1.74  0.08133 .  
## countryNigeria                0.08735    0.02883    3.03  0.00246 ** 
## countrySouth Africa           0.36701    0.03274   11.21  < 2e-16 ***
## countryTanzania              -0.10227    0.01758   -5.82  6.3e-09 ***
## as.factor(year)1992           0.08112    0.04029    2.01  0.04411 *  
## as.factor(year)1993           0.06789    0.04043    1.68  0.09317 .  
## as.factor(year)1994           0.15277    0.04420    3.46  0.00055 ***
## as.factor(year)1995           0.09884    0.03969    2.49  0.01280 *  
## as.factor(year)1996           0.03003    0.04227    0.71  0.47744    
## as.factor(year)1997           0.03639    0.03731    0.98  0.32953    
## as.factor(year)1998           0.08615    0.03638    2.37  0.01794 *  
## as.factor(year)1999           0.10205    0.03736    2.73  0.00633 ** 
## as.factor(year)2000           0.15002    0.04038    3.72  0.00021 ***
## as.factor(year)2001           0.15207    0.04737    3.21  0.00133 ** 
## as.factor(year)2002           0.13605    0.04471    3.04  0.00235 ** 
## as.factor(year)2003           0.11353    0.06264    1.81  0.06998 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

If we allow the coefficients to be different among countries, the results shows that the return to scale does NOT vary significantly among different industries. Using Ghana as benchmark base, Nigeria and Tanazania has lower return to capital; South Africa and Tanazaniahas lower return to materials.

prod.country2 <- plm(log(output) ~ (log(capital) + log(labor) + log(materials)) * 
    country + industry + as.factor(year), data = df, model = "pooling", index = c("firm", 
    "year"))  ## regression allowing coefficients varying with country

cl.plm(df, prod.country2, df$firm)
## 
## t test of coefficients:
## 
##                                    Estimate Std. Error t value Pr(>|t|)
## (Intercept)                         1.81362    0.11321   16.02  < 2e-16
## log(capital)                        0.10454    0.00814   12.84  < 2e-16
## log(labor)                          0.24201    0.01895   12.77  < 2e-16
## log(materials)                      0.72733    0.01756   41.43  < 2e-16
## countryKenya                        0.32776    0.18368    1.78  0.07442
## countryNigeria                      0.60194    0.19257    3.13  0.00178
## countrySouth Africa                 1.60332    0.29213    5.49  4.3e-08
## countryTanzania                     0.03695    0.11500    0.32  0.74798
## industryfurniture                  -0.07657    0.02815   -2.72  0.00654
## industrygarment                    -0.05191    0.02802   -1.85  0.06398
## industrymetals and machinery       -0.04309    0.02199   -1.96  0.05012
## industrytextile                    -0.10769    0.03470   -3.10  0.00192
## industrywood                       -0.00334    0.03498   -0.10  0.92389
## as.factor(year)1992                 0.07616    0.04049    1.88  0.06003
## as.factor(year)1993                 0.06196    0.04104    1.51  0.13116
## as.factor(year)1994                 0.14075    0.04476    3.14  0.00167
## as.factor(year)1995                 0.09720    0.03979    2.44  0.01460
## as.factor(year)1996                 0.01236    0.04184    0.30  0.76771
## as.factor(year)1997                 0.02528    0.03771    0.67  0.50265
## as.factor(year)1998                 0.06953    0.03700    1.88  0.06031
## as.factor(year)1999                 0.07940    0.03783    2.10  0.03589
## as.factor(year)2000                 0.12403    0.04039    3.07  0.00215
## as.factor(year)2001                 0.12793    0.04704    2.72  0.00656
## as.factor(year)2002                 0.11418    0.04459    2.56  0.01047
## as.factor(year)2003                 0.08302    0.06239    1.33  0.18335
## log(capital):countryKenya          -0.02219    0.02269   -0.98  0.32824
## log(capital):countryNigeria        -0.04490    0.01350   -3.33  0.00089
## log(capital):countrySouth Africa   -0.02276    0.01685   -1.35  0.17670
## log(capital):countryTanzania       -0.05144    0.01127   -4.56  5.1e-06
## log(labor):countryKenya             0.00452    0.03250    0.14  0.88951
## log(labor):countryNigeria           0.01642    0.04630    0.35  0.72280
## log(labor):countrySouth Africa      0.06353    0.04564    1.39  0.16398
## log(labor):countryTanzania         -0.04614    0.02333   -1.98  0.04807
## log(materials):countryKenya        -0.01343    0.03715   -0.36  0.71768
## log(materials):countryNigeria      -0.00569    0.03511   -0.16  0.87121
## log(materials):countrySouth Africa -0.08886    0.03178   -2.80  0.00519
## log(materials):countryTanzania      0.05136    0.01945    2.64  0.00829
##                                       
## (Intercept)                        ***
## log(capital)                       ***
## log(labor)                         ***
## log(materials)                     ***
## countryKenya                       .  
## countryNigeria                     ** 
## countrySouth Africa                ***
## countryTanzania                       
## industryfurniture                  ** 
## industrygarment                    .  
## industrymetals and machinery       .  
## industrytextile                    ** 
## industrywood                          
## as.factor(year)1992                .  
## as.factor(year)1993                   
## as.factor(year)1994                ** 
## as.factor(year)1995                *  
## as.factor(year)1996                   
## as.factor(year)1997                   
## as.factor(year)1998                .  
## as.factor(year)1999                *  
## as.factor(year)2000                ** 
## as.factor(year)2001                ** 
## as.factor(year)2002                *  
## as.factor(year)2003                   
## log(capital):countryKenya             
## log(capital):countryNigeria        ***
## log(capital):countrySouth Africa      
## log(capital):countryTanzania       ***
## log(labor):countryKenya               
## log(labor):countryNigeria             
## log(labor):countrySouth Africa        
## log(labor):countryTanzania         *  
## log(materials):countryKenya           
## log(materials):countryNigeria         
## log(materials):countrySouth Africa ** 
## log(materials):countryTanzania     ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

4.3.Fixed effect estimation

The time-invariant fixed effects (firm, industry and country) are controlled by runing following "fixed-effect” regression. The “within model” was used.

The estimated return to scale has some changes in magnitude:

prod.fe <- plm(log(output) ~ log(capital) + log(labor) + log(materials) + industry + 
    as.factor(year), data = df, model = "within", index = c("firm", "year"))
cl.plm(df, prod.fe, df$firm)
## 
## t test of coefficients:
## 
##                     Estimate Std. Error t value Pr(>|t|)    
## log(capital)          0.0302     0.0133    2.27  0.02338 *  
## log(labor)            0.1870     0.0191    9.79  < 2e-16 ***
## log(materials)        0.6407     0.0196   32.76  < 2e-16 ***
## as.factor(year)1992   0.1469     0.0394    3.73  0.00019 ***
## as.factor(year)1993   0.1526     0.0400    3.82  0.00014 ***
## as.factor(year)1994   0.2132     0.0418    5.10  3.6e-07 ***
## as.factor(year)1995   0.1601     0.0377    4.25  2.2e-05 ***
## as.factor(year)1996   0.0763     0.0401    1.90  0.05727 .  
## as.factor(year)1997   0.0613     0.0372    1.65  0.09980 .  
## as.factor(year)1998   0.0984     0.0373    2.64  0.00840 ** 
## as.factor(year)1999   0.1257     0.0380    3.30  0.00096 ***
## as.factor(year)2000   0.1415     0.0382    3.71  0.00021 ***
## as.factor(year)2001   0.1273     0.0414    3.08  0.00210 ** 
## as.factor(year)2002   0.1112     0.0410    2.72  0.00664 ** 
## as.factor(year)2003   0.0792     0.0487    1.63  0.10418    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

In a summary, we have following results regarding OLS and fixed effect regressions

Problem 5 Olley-Pakes Estimation of the Production Function

df <- df[order(df$firm, df$year), ]
df$invest <- c(df$capital[2:nrow(df)] - df$capital[1:(nrow(df) - 1)], NA)
df$invest[c(df$firm[2:nrow(df)] != df$firm[1:(nrow(df) - 1)], FALSE)] <- NA
notmissing <- !is.na(df$capital) & !is.na(df$invest)  ##remove the missing values
df.nm <- subset(df, notmissing)
df.nm <- df.nm[order(df.nm$firm, df.nm$year), ]
op1 <- lm(log(output) ~ log(labor) + log(materials) + poly(cbind(log(capital), 
    invest), degree = 4), data = df.nm)
summary(op1)
## 
## Call:
## lm(formula = log(output) ~ log(labor) + log(materials) + poly(cbind(log(capital), 
##     invest), degree = 4), data = df.nm)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -3.315 -0.255 -0.039  0.207  5.254 
## 
## Coefficients:
##                                                   Estimate Std. Error
## (Intercept)                                       2.94e+00   9.20e-02
## log(labor)                                        1.97e-01   9.86e-03
## log(materials)                                    7.50e-01   5.62e-03
## poly(cbind(log(capital), invest), degree = 4)1.0  1.48e+01   4.24e+00
## poly(cbind(log(capital), invest), degree = 4)2.0  3.59e+00   1.78e+00
## poly(cbind(log(capital), invest), degree = 4)3.0 -2.05e+00   6.29e-01
## poly(cbind(log(capital), invest), degree = 4)4.0  6.47e-01   5.09e-01
## poly(cbind(log(capital), invest), degree = 4)0.1  7.47e+01   9.17e+01
## poly(cbind(log(capital), invest), degree = 4)1.1 -4.40e+03   5.06e+03
## poly(cbind(log(capital), invest), degree = 4)2.1  1.97e+03   1.94e+03
## poly(cbind(log(capital), invest), degree = 4)3.1 -5.16e+02   3.99e+02
## poly(cbind(log(capital), invest), degree = 4)0.2 -1.63e+01   4.35e+01
## poly(cbind(log(capital), invest), degree = 4)1.2  9.36e+02   1.96e+03
## poly(cbind(log(capital), invest), degree = 4)2.2 -2.74e+02   4.66e+02
## poly(cbind(log(capital), invest), degree = 4)0.3  1.71e+01   1.02e+01
## poly(cbind(log(capital), invest), degree = 4)1.3 -4.59e+02   2.76e+02
## poly(cbind(log(capital), invest), degree = 4)0.4  2.28e+00   1.08e+00
##                                                  t value Pr(>|t|)    
## (Intercept)                                        31.98  < 2e-16 ***
## log(labor)                                         19.99  < 2e-16 ***
## log(materials)                                    133.53  < 2e-16 ***
## poly(cbind(log(capital), invest), degree = 4)1.0    3.50  0.00048 ***
## poly(cbind(log(capital), invest), degree = 4)2.0    2.02  0.04353 *  
## poly(cbind(log(capital), invest), degree = 4)3.0   -3.26  0.00112 ** 
## poly(cbind(log(capital), invest), degree = 4)4.0    1.27  0.20369    
## poly(cbind(log(capital), invest), degree = 4)0.1    0.81  0.41540    
## poly(cbind(log(capital), invest), degree = 4)1.1   -0.87  0.38486    
## poly(cbind(log(capital), invest), degree = 4)2.1    1.01  0.31091    
## poly(cbind(log(capital), invest), degree = 4)3.1   -1.29  0.19560    
## poly(cbind(log(capital), invest), degree = 4)0.2   -0.38  0.70694    
## poly(cbind(log(capital), invest), degree = 4)1.2    0.48  0.63235    
## poly(cbind(log(capital), invest), degree = 4)2.2   -0.59  0.55760    
## poly(cbind(log(capital), invest), degree = 4)0.3    1.68  0.09369 .  
## poly(cbind(log(capital), invest), degree = 4)1.3   -1.66  0.09645 .  
## poly(cbind(log(capital), invest), degree = 4)0.4    2.12  0.03433 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.445 on 3439 degrees of freedom
##   (248 observations deleted due to missingness)
## Multiple R-squared:  0.97,   Adjusted R-squared:  0.97 
## F-statistic: 6.93e+03 on 16 and 3439 DF,  p-value: <2e-16

Problem 6. How do the OP estimates of betaM and betaL compare to the OLS and fixed effects estimates.

The OP estimates is smaller than OLS. This is consistent with expectation. According to Aguirregabiria (2012), the OLS creates upward bias to betaM and betaL. Second, the OP estimates is larger than fixed effect estimation. This is also consistent with expectation. This is because the fixed effect largely reduces the variation of variables and it introduces significant measurement error problem, thus creating smaller estimates of betaL and betaM. .

b1 <- op1$coefficients[c("log(labor)", "log(materials)")]
xb1 <- log(as.matrix(df.nm[, c("labor", "materials")])) %*% b1
fhat <- predict(op1, df.nm) - xb1

## construction of a lag function later used to calculate lag capital and
## fhat

lag <- function(x, i = df.nm$firm, t = df.nm$year) {
    if (length(i) != length(x) || length(i) != length(t)) {
        stop("Inputs not same length")
    }
    x.lag <- x[1:(length(x) - 1)]
    x.lag[i[1:(length(i) - 1)] != i[2:length(i)]] <- NA
    x.lag[t[1:(length(i) - 1)] + 1 != t[2:length(i)]] <- NA
    return(c(NA, x.lag))
}

## Create data frame for step 2 regression

df.step2 <- data.frame(lhs = log(df.nm$output) - xb1, year = df.nm$year, firm = df.nm$firm, 
    k = log(df.nm$capital), fhat = fhat, k.lag = log(lag(df.nm$capital)), f.lag = lag(fhat))

## droping missing data
df.step2 <- subset(df.step2, !apply(df.step2, 1, function(x) any(is.na(x))))
objective <- function(betaK, degree = 4) {
    op2 <- lm(I(lhs - betaK * k) ~ poly(I(f.lag - betaK * k.lag), degree), data = df.step2)
    return(sum(residuals(op2)^2))
}
fig.df <- data.frame(bk = seq(from = -0.1, to = 0.3, by = 0.005))
fig.df$obj <- sapply(fig.df$bk, objective)
load.fun(ggplot2)
ggplot(data = fig.df, aes(x = bk, y = obj)) + geom_point()

plot of chunk unnamed-chunk-20

opt.out <- optim(prod.ols$coefficients["log(capital)"], fn = objective, method = "Brent", 
    lower = -1, upper = 1)
betaK <- opt.out$par

betaK
## [1] 0.1031

Problem 7. How do the OP estimates of betaK compare to the OLS and fixed effects estimates.

The OP estimates is a little larger than OLS, because of OLS estimation is biased. Second, the OP estimates is larger than fixed effect estimation. This is consistent with expectation. This is because the fixed effect largely reduces the variation of variables and it introduces significant measurement error problem, thus creating smaller estimates of betaK. .

Problem 8-Levinsohn and Petrin (2003) using the material cost to construct the control function

nomi <- !is.na(df$capital) & !is.na(df$materials)  ##remove the missing values
df.mater <- subset(df, nomi)
df.mater <- df.mater[order(df.mater$firm, df.mater$year), ]

op3 <- lm(log(output) ~ log(labor) + poly(cbind(log(capital), log(materials)), 
    degree = 4), data = df.mater)
op3
## 
## Call:
## lm(formula = log(output) ~ log(labor) + poly(cbind(log(capital), 
##     log(materials)), degree = 4), data = df.mater)
## 
## Coefficients:
##                                              (Intercept)  
##                                                   11.519  
##                                               log(labor)  
##                                                    0.209  
## poly(cbind(log(capital), log(materials)), degree = 4)1.0  
##                                                   39.524  
## poly(cbind(log(capital), log(materials)), degree = 4)2.0  
##                                                   17.642  
## poly(cbind(log(capital), log(materials)), degree = 4)3.0  
##                                                    9.268  
## poly(cbind(log(capital), log(materials)), degree = 4)4.0  
##                                                    1.046  
## poly(cbind(log(capital), log(materials)), degree = 4)0.1  
##                                                  131.393  
## poly(cbind(log(capital), log(materials)), degree = 4)1.1  
##                                                -2044.675  
## poly(cbind(log(capital), log(materials)), degree = 4)2.1  
##                                                -1285.827  
## poly(cbind(log(capital), log(materials)), degree = 4)3.1  
##                                                 -175.867  
## poly(cbind(log(capital), log(materials)), degree = 4)0.2  
##                                                   24.518  
## poly(cbind(log(capital), log(materials)), degree = 4)1.2  
##                                                  994.363  
## poly(cbind(log(capital), log(materials)), degree = 4)2.2  
##                                                  603.850  
## poly(cbind(log(capital), log(materials)), degree = 4)0.3  
##                                                   -8.984  
## poly(cbind(log(capital), log(materials)), degree = 4)1.3  
##                                                 -735.279  
## poly(cbind(log(capital), log(materials)), degree = 4)0.4  
##                                                    3.957
bl <- op3$coefficients[c("log(labor)")]
xbl <- log(as.matrix(df.mater[, c("labor")])) %*% bl
fbar <- predict(op3, df.mater) - xbl

## create another lag function to get lag capital, materials and fbar

lag1 <- function(x, i = df.mater$firm, t = df.mater$year) {
    if (length(i) != length(x) || length(i) != length(t)) {
        stop("Inputs not same length")
    }
    x.lag1 <- x[1:(length(x) - 1)]
    x.lag1[i[1:(length(i) - 1)] != i[2:length(i)]] <- NA
    x.lag1[t[1:(length(i) - 1)] + 1 != t[2:length(i)]] <- NA
    return(c(NA, x.lag1))
}
## create the dataframe for second stage regression to retrieve betaK and
## betaM

df.mat2 <- data.frame(lhs = log(df.mater$output) - xbl, k = log(df.mater$capital), 
    M = log(df.mater$materials), fbar = fbar, k.lag = log(lag1(df.mater$capital)), 
    f.lag = lag1(fbar), M.lag = log(lag1(df.mater$materials)))
df.mat2 <- subset(df.mat2, !apply(df.mat2, 1, function(x) any(is.na(x))))

. .

objective2 <- function(x, degree = 4) {
    betaK = x[1]
    betaM = x[2]

    op2 <- lm(I(lhs - betaK * k - betaM * M) ~ poly(I(f.lag - betaK * k.lag - 
        betaM * M.lag), degree), data = df.mat2)
    return(sum(residuals(op2)^2))
}
# I randomly draw thetaK and thetaM from uniform distribution [-1,1] and
# calculate the objective.
n = 1000
x = runif(n, min = -1, max = 1)
y = runif(n, min = -1, max = 1)
var = cbind(x, y)
z = rep(NA, n)
for (i in 1:n) {
    z[i] = objective2(var[i, ])
}
plot3d(x, y, z, type = "p", col = 2)
opt.out2 <- nlm(objective2, c(0.08648541, 0.7520484))
opt.out2
## $minimum
## [1] 627.8
## 
## $estimate
## [1] 0.1465 0.6609
## 
## $gradient
## [1]  0.0005689 -0.0001473
## 
## $code
## [1] 1
## 
## $iterations
## [1] 8