library(readr)
library(rlang)
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.1 ──
## ✓ ggplot2 3.3.5     ✓ purrr   0.3.4
## ✓ tibble  3.1.6     ✓ stringr 1.4.0
## ✓ tidyr   1.1.4     ✓ forcats 0.5.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## x purrr::%@%()         masks rlang::%@%()
## x purrr::as_function() masks rlang::as_function()
## x dplyr::filter()      masks stats::filter()
## x purrr::flatten()     masks rlang::flatten()
## x purrr::flatten_chr() masks rlang::flatten_chr()
## x purrr::flatten_dbl() masks rlang::flatten_dbl()
## x purrr::flatten_int() masks rlang::flatten_int()
## x purrr::flatten_lgl() masks rlang::flatten_lgl()
## x purrr::flatten_raw() masks rlang::flatten_raw()
## x purrr::invoke()      masks rlang::invoke()
## x dplyr::lag()         masks stats::lag()
## x purrr::list_along()  masks rlang::list_along()
## x purrr::modify()      masks rlang::modify()
## x purrr::prepend()     masks rlang::prepend()
## x purrr::splice()      masks rlang::splice()
library(ggplot2)
library(frontier)
## Loading required package: micEcon
## 
## If you have questions, suggestions, or comments regarding one of the 'micEcon' packages, please use a forum or 'tracker' at micEcon's R-Forge site:
## https://r-forge.r-project.org/projects/micecon/
## Loading required package: lmtest
## Warning: package 'lmtest' was built under R version 4.0.5
## Loading required package: zoo
## 
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric
## 
## Please cite the 'frontier' package as:
## Tim Coelli and Arne Henningsen (2013). frontier: Stochastic Frontier Analysis. R package version 1.1. http://CRAN.R-Project.org/package=frontier.
## 
## If you have questions, suggestions, or comments regarding the 'frontier' package, please use a forum or 'tracker' at frontier's R-Forge site:
## https://r-forge.r-project.org/projects/frontier/
library(plm)
## 
## Attaching package: 'plm'
## The following objects are masked from 'package:dplyr':
## 
##     between, lag, lead
library(cattonum)
## cattonum is seeking a new maintainer; please respond if interested: https://github.com/bfgray3/cattonum/issues/40
cl.plm   <- function(dat,fm, cluster)
{
  require(sandwich, quietly = TRUE)
  require(lmtest, quietly = TRUE)
  ef <- estfun.plm(fm)
  if (length(cluster)!=nrow(ef)) {
    cluster <- cluster[as.numeric(rownames(ef))]
  }
  M <- length(unique(cluster))
  N <- length(cluster)
  K <- ncol(ef)
  dfc <- (M/(M-1))*((N-1)/(N-K))

  uj  <- apply(ef,2, function(x) tapply(x, cluster, sum))
  bread <- bread.plm(fm)
  meat <- crossprod(uj)
  vcovCL <- dfc*bread %*% meat %*% bread
  coeftest(fm, vcovCL)
}

estfun.plm <- function(x, ...) {
  model <- x$args$model
  formula <- formula(x)
  if (length(formula)[2]>1) { ## 2SLS
    data <- model.frame(x)
    effect <- x$args$effect
    X <- model.matrix(formula, data, rhs = 1, model = model,
                      effect = effect, theta = theta)
    if (length(formula)[2] == 2)
      W <- model.matrix(formula, data, rhs = 2, model = model,
                        effect = effect, theta = theta)
    else
      W <- model.matrix(formula, data, rhs = c(2,3), model = model,
                        effect = effect, theta = theta)
    Xhat <- lm(X ~ W)$fit
    return(x$residuals*Xhat)
  } else { ## OLS
    X <- model.matrix(x, model = model)
    return( x$residuals * X)
  }
}

bread.plm <- function(x, ...) {
  model <- x$args$model
  formula <- formula(x)
  if (length(formula)[2]>1) { ## 2SLS
    data <- model.frame(x)
    effect <- x$args$effect
    X <- model.matrix(formula, data, rhs = 1, model = model,
                      effect = effect, theta = theta)
    if (length(formula)[2] == 2)
      W <- model.matrix(formula, data, rhs = 2, model = model,
                        effect = effect, theta = theta)
    else
      W <- model.matrix(formula, data, rhs = c(2,3), model = model,
                        effect = effect, theta = theta)
    Xhat <- lm(X ~ W)$fit
    return( solve(crossprod(Xhat)) )
  } else { ## OLS
    X <- model.matrix(x, model = model)
    return( solve(crossprod(X)) )
  }
}

data <- read_csv(file="./raw_data.csv", show_col_types = FALSE, locale = locale(encoding = "UTF-8"))
data
## # A tibble: 17,278 × 89
##    EDINETコード 証券コード 提出日     会計期間終了日 従業員数 平均臨時雇用人員
##    <chr>             <dbl> <date>     <date>            <dbl>            <dbl>
##  1 E00008            13790 2016-06-24 2016-03-31         1253             2546
##  2 E00008            13790 2017-06-23 2017-03-31         1326             2602
##  3 E00008            13790 2018-06-22 2018-03-31         4006               NA
##  4 E00008            13790 2019-06-21 2019-03-31         4166               NA
##  5 E00008            13790 2020-06-26 2020-03-31         4181               NA
##  6 E00024            57060 2016-06-29 2016-03-31        11132             1298
##  7 E00024            57060 2017-06-29 2017-03-31        11630             1260
##  8 E00024            57060 2018-06-28 2018-03-31        12276             1267
##  9 E00024            57060 2019-06-27 2019-03-31        12498             1213
## 10 E00024            57060 2020-06-26 2020-03-31        12197             1317
## # … with 17,268 more rows, and 83 more variables: 所有株式数 <dbl>,
## #   発行済株式(自己株式を除く。)の総数に対する所有株式数の割合 <dbl>,
## #   発行済株式総数 <lgl>, 連結子会社の数 <dbl>, 1株当たり純資産 <dbl>,
## #   自己資本比率 <dbl>, 現金及び現金同等物の残高 <dbl>, 資産 <dbl>,
## #   流動資産 <dbl>, 固定資産 <dbl>, 有形固定資産 <dbl>, 無形固定資産 <dbl>,
## #   ソフトウエア <dbl>, 投資その他の資産 <dbl>, 負債 <dbl>, 流動負債 <dbl>,
## #   短期借入金 <dbl>, 1年内償還予定の社債 <dbl>, …
data$ind <- data$業種
data$year <- as.factor(format(data$会計期間終了日, format = "%Y"))
df <- data[,c("EDINETコード", "year", "ind", "従業員数", "売上高", "資本金", "有形固定資産")]
df
## # A tibble: 17,278 × 7
##    EDINETコード year  ind          従業員数       売上高     資本金 有形固定資産
##    <chr>        <fct> <chr>           <dbl>        <dbl>      <dbl>        <dbl>
##  1 E00008       2016  水産・農林業     1253  60987000000    5.5 e 9  60098000000
##  2 E00008       2017  水産・農林業     1326  63119000000    5.5 e 9  68350000000
##  3 E00008       2018  水産・農林業     4006  66907000000    5.5 e 9  69191000000
##  4 E00008       2019  水産・農林業     4166  70183000000    5.5 e 9  72365000000
##  5 E00008       2020  水産・農林業     4181  71220000000    5.5 e 9  67271000000
##  6 E00024       2016  非鉄金属        11132 450553000000    4.21e10 162931000000
##  7 E00024       2017  非鉄金属        11630 436330000000    4.21e10 169397000000
##  8 E00024       2018  非鉄金属        12276 519215000000    4.21e10 183369000000
##  9 E00024       2019  非鉄金属        12498 497701000000    4.21e10 189857000000
## 10 E00024       2020  非鉄金属        12197 473109000000    4.21e10 189124000000
## # … with 17,268 more rows
names(df) <- c("firm_id", "year", "ind", "employee", "sales", "capital", "fixed_assets")
df[df==0] <- NA
df <- df[complete.cases(df),]
df
## # A tibble: 13,506 × 7
##    firm_id year  ind          employee        sales     capital fixed_assets
##    <chr>   <fct> <chr>           <dbl>        <dbl>       <dbl>        <dbl>
##  1 E00008  2016  水産・農林業     1253  60987000000  5500000000  60098000000
##  2 E00008  2017  水産・農林業     1326  63119000000  5500000000  68350000000
##  3 E00008  2018  水産・農林業     4006  66907000000  5500000000  69191000000
##  4 E00008  2019  水産・農林業     4166  70183000000  5500000000  72365000000
##  5 E00008  2020  水産・農林業     4181  71220000000  5500000000  67271000000
##  6 E00024  2016  非鉄金属        11132 450553000000 42129000000 162931000000
##  7 E00024  2017  非鉄金属        11630 436330000000 42129000000 169397000000
##  8 E00024  2018  非鉄金属        12276 519215000000 42129000000 183369000000
##  9 E00024  2019  非鉄金属        12498 497701000000 42129000000 189857000000
## 10 E00024  2020  非鉄金属        12197 473109000000 42129000000 189124000000
## # … with 13,496 more rows
prod.ols <- plm(log(sales) ~ log(capital) + log(fixed_assets) + log(employee),
                data = df, model = "pooling", index = index("firm_id", "year"))
cl.plm(df, prod.ols, df$firm_id)
## 
## t test of coefficients:
## 
##                     Estimate Std. Error t value  Pr(>|t|)    
## (Intercept)       11.7970580  0.2149327  54.887 < 2.2e-16 ***
## log(capital)       0.1911991  0.0151607  12.611 < 2.2e-16 ***
## log(fixed_assets)  0.1968581  0.0097622  20.165 < 2.2e-16 ***
## log(employee)      0.5758034  0.0151981  37.886 < 2.2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
df <- df %>% catto_label(ind)
df$ind <- as.factor(df$ind)
prod.ind <- plm(log(sales) ~ log(capital) + log(fixed_assets) + log(employee) + ind + year,
                data = df, model = "pooling", index = index("firm_id", "year"))
cl.plm(df, prod.ind, df$firm_id)
## 
## t test of coefficients:
## 
##                    Estimate Std. Error t value  Pr(>|t|)    
## (Intercept)       11.847358   0.242996 48.7554 < 2.2e-16 ***
## log(capital)       0.186708   0.013112 14.2391 < 2.2e-16 ***
## log(fixed_assets)  0.148505   0.010997 13.5048 < 2.2e-16 ***
## log(employee)      0.676087   0.015018 45.0172 < 2.2e-16 ***
## ind2               0.900544   0.158710  5.6741 1.423e-08 ***
## ind3               1.119094   0.269286  4.1558 3.262e-05 ***
## ind4               0.566540   0.181334  3.1243 0.0017861 ** 
## ind5               0.481318   0.229168  2.1003 0.0357227 *  
## ind6               0.656797   0.128985  5.0920 3.590e-07 ***
## ind7              -0.201491   0.124083 -1.6238 0.1044319    
## ind8               0.293874   0.096202  3.0547 0.0022569 ** 
## ind9               0.195399   0.108858  1.7950 0.0726785 .  
## ind10             -0.207308   0.112232 -1.8471 0.0647475 .  
## ind11              0.335599   0.130606  2.5695 0.0101939 *  
## ind12             -0.021549   0.162183 -0.1329 0.8943016    
## ind13              0.373826   0.089692  4.1679 3.094e-05 ***
## ind14             -0.232037   0.120514 -1.9254 0.0542005 .  
## ind15              0.117429   0.094522  1.2423 0.2141293    
## ind16              0.011113   0.099698  0.1115 0.9112449    
## ind17              0.191679   0.084029  2.2811 0.0225574 *  
## ind18              0.145161   0.085103  1.7057 0.0880860 .  
## ind19              0.130542   0.087774  1.4872 0.1369726    
## ind20              1.059377   0.110460  9.5906 < 2.2e-16 ***
## ind21              0.682594   0.085787  7.9568 1.905e-15 ***
## ind22              0.804237   0.081103  9.9162 < 2.2e-16 ***
## ind23              0.258798   0.077728  3.3295 0.0008722 ***
## ind24              0.076130   0.078296  0.9723 0.3309027    
## ind25             -0.043807   0.080740 -0.5426 0.5874332    
## ind26              0.724976   0.080646  8.9896 < 2.2e-16 ***
## ind27              1.230460   0.086891 14.1609 < 2.2e-16 ***
## ind28              0.240806   0.083368  2.8885 0.0038773 ** 
## ind29              0.206799   0.082168  2.5168 0.0118551 *  
## year2016           0.037154   0.038746  0.9589 0.3376093    
## year2017           0.030251   0.039150  0.7727 0.4397262    
## year2018           0.056730   0.039214  1.4467 0.1480115    
## year2019           0.072546   0.039484  1.8374 0.0661779 .  
## year2020           0.047556   0.041874  1.1357 0.2561030    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
prod.ind <- plm(log(sales) ~ (log(capital) + log(fixed_assets) + log(employee)) * ind + year,
                data = df, model = "pooling", index = index("firm_id", "year"))
cl.plm(df, prod.ind, df$firm_id)
## 
## t test of coefficients:
## 
##                            Estimate  Std. Error t value  Pr(>|t|)    
## (Intercept)              1.5558e+01  2.9202e+00  5.3276 1.011e-07 ***
## log(capital)            -2.1482e-01  1.0696e-01 -2.0084 0.0446176 *  
## log(fixed_assets)        2.0214e-01  1.9707e-01  1.0257 0.3050433    
## log(employee)            1.1915e+00  4.9927e-01  2.3865 0.0170225 *  
## ind2                    -5.3205e+00  2.9346e+00 -1.8130 0.0698501 .  
## ind3                    -1.4171e+01  4.4500e+00 -3.1845 0.0014533 ** 
## ind4                    -1.1611e+01  3.4094e+00 -3.4056 0.0006622 ***
## ind5                    -1.8930e+00  4.7202e+00 -0.4010 0.6884019    
## ind6                    -5.2728e+00  3.9670e+00 -1.3291 0.1838214    
## ind7                    -1.0287e+01  3.2851e+00 -3.1315 0.0017427 ** 
## ind8                    -4.3685e+00  3.3915e+00 -1.2881 0.1977396    
## ind9                     4.5591e-01  3.4004e+00  0.1341 0.8933448    
## ind10                   -4.0804e+00  3.4891e+00 -1.1695 0.2422290    
## ind11                   -7.8898e+00  3.2164e+00 -2.4530 0.0141792 *  
## ind12                    1.5338e+00  3.9475e+00  0.3886 0.6976151    
## ind13                   -7.1215e+00  3.1756e+00 -2.2426 0.0249403 *  
## ind14                   -1.9358e+00  3.9711e+00 -0.4875 0.6259318    
## ind15                   -6.6915e+00  3.2483e+00 -2.0600 0.0394203 *  
## ind16                   -2.8476e+00  3.3368e+00 -0.8534 0.3934519    
## ind17                   -7.1524e+00  3.0839e+00 -2.3193 0.0203956 *  
## ind18                   -7.9262e+00  3.2241e+00 -2.4584 0.0139671 *  
## ind19                   -4.4548e+00  3.1517e+00 -1.4135 0.1575414    
## ind20                    7.7714e-01  3.0352e+00  0.2560 0.7979249    
## ind21                   -7.6250e+00  3.2142e+00 -2.3723 0.0176934 *  
## ind22                   -4.1708e+00  3.0092e+00 -1.3860 0.1657752    
## ind23                   -7.5485e+00  3.0286e+00 -2.4924 0.0127002 *  
## ind24                   -3.3359e+00  3.0911e+00 -1.0792 0.2805207    
## ind25                   -2.6350e+00  3.0727e+00 -0.8575 0.3911604    
## ind26                   -4.1225e+00  2.9924e+00 -1.3776 0.1683406    
## ind27                   -2.4523e+00  3.0842e+00 -0.7951 0.4265631    
## ind28                   -3.5207e+00  2.9597e+00 -1.1896 0.2342433    
## ind29                   -2.7787e+00  2.9705e+00 -0.9354 0.3495917    
## year2016                 2.4627e-02  3.5918e-02  0.6856 0.4929552    
## year2017                 1.7256e-02  3.6284e-02  0.4756 0.6343873    
## year2018                 4.1316e-02  3.6509e-02  1.1317 0.2577930    
## year2019                 5.7919e-02  3.6583e-02  1.5832 0.1133954    
## year2020                 3.2299e-02  3.9060e-02  0.8269 0.4083061    
## log(capital):ind2        3.2426e-01  1.1061e-01  2.9317 0.0033771 ** 
## log(capital):ind3        4.7883e-01  2.6361e-01  1.8164 0.0693300 .  
## log(capital):ind4        5.5775e-01  1.6841e-01  3.3118 0.0009293 ***
## log(capital):ind5        7.5494e-01  2.0907e-01  3.6109 0.0003063 ***
## log(capital):ind6        6.1412e-01  2.0586e-01  2.9832 0.0028579 ** 
## log(capital):ind7        4.9821e-01  1.4576e-01  3.4180 0.0006328 ***
## log(capital):ind8        4.1846e-01  1.5136e-01  2.7647 0.0057057 ** 
## log(capital):ind9        2.1408e-01  1.5952e-01  1.3420 0.1796042    
## log(capital):ind10       3.9097e-01  1.7618e-01  2.2192 0.0264895 *  
## log(capital):ind11       6.4732e-01  1.3475e-01  4.8037 1.574e-06 ***
## log(capital):ind12       8.2471e-02  1.5040e-01  0.5484 0.5834607    
## log(capital):ind13       3.2242e-01  1.2610e-01  2.5569 0.0105708 *  
## log(capital):ind14       2.4242e-01  1.6575e-01  1.4625 0.1436228    
## log(capital):ind15       1.4140e-01  1.4534e-01  0.9729 0.3306035    
## log(capital):ind16       3.9501e-01  1.3250e-01  2.9811 0.0028776 ** 
## log(capital):ind17       5.1459e-01  1.1371e-01  4.5254 6.079e-06 ***
## log(capital):ind18       3.9783e-01  1.1875e-01  3.3501 0.0008099 ***
## log(capital):ind19       4.1158e-01  1.1778e-01  3.4945 0.0004765 ***
## log(capital):ind20       4.0456e-01  1.1886e-01  3.4036 0.0006669 ***
## log(capital):ind21       4.4461e-01  1.2055e-01  3.6882 0.0002267 ***
## log(capital):ind22       4.5552e-01  1.1728e-01  3.8841 0.0001032 ***
## log(capital):ind23       3.3862e-01  1.1257e-01  3.0082 0.0026330 ** 
## log(capital):ind24       3.3484e-01  1.1299e-01  2.9634 0.0030484 ** 
## log(capital):ind25       3.4838e-01  1.1911e-01  2.9248 0.0034521 ** 
## log(capital):ind26       3.1965e-01  1.1201e-01  2.8538 0.0043265 ** 
## log(capital):ind27       4.0768e-01  1.1838e-01  3.4437 0.0005755 ***
## log(capital):ind28       3.7591e-01  1.1069e-01  3.3961 0.0006855 ***
## log(capital):ind29       4.5262e-01  1.1189e-01  4.0453 5.256e-05 ***
## log(fixed_assets):ind2   2.2553e-01  1.9795e-01  1.1393 0.2545939    
## log(fixed_assets):ind3   4.3361e-01  2.3479e-01  1.8468 0.0647960 .  
## log(fixed_assets):ind4   2.2980e-01  2.1994e-01  1.0449 0.2961092    
## log(fixed_assets):ind5  -5.8103e-01  2.4852e-01 -2.3380 0.0194016 *  
## log(fixed_assets):ind6  -1.0804e-01  1.9888e-01 -0.5432 0.5869704    
## log(fixed_assets):ind7   2.3823e-01  2.0006e-01  1.1908 0.2337517    
## log(fixed_assets):ind8   2.3235e-03  2.6763e-01  0.0087 0.9930732    
## log(fixed_assets):ind9  -8.7900e-02  2.4314e-01 -0.3615 0.7177177    
## log(fixed_assets):ind10 -3.8335e-02  2.3719e-01 -0.1616 0.8716079    
## log(fixed_assets):ind11  3.5507e-02  2.1292e-01  0.1668 0.8675585    
## log(fixed_assets):ind12 -1.6061e-01  2.2129e-01 -0.7258 0.4679814    
## log(fixed_assets):ind13  2.6183e-01  2.2250e-01  1.1768 0.2393082    
## log(fixed_assets):ind14  3.1106e-02  2.2353e-01  0.1392 0.8893271    
## log(fixed_assets):ind15  4.1614e-01  2.2293e-01  1.8667 0.0619641 .  
## log(fixed_assets):ind16 -8.1971e-02  2.0609e-01 -0.3977 0.6908278    
## log(fixed_assets):ind17  2.8436e-02  2.0844e-01  0.1364 0.8914865    
## log(fixed_assets):ind18  2.1598e-01  2.0692e-01  1.0438 0.2965933    
## log(fixed_assets):ind19 -1.0925e-02  2.0508e-01 -0.0533 0.9575159    
## log(fixed_assets):ind20 -2.2744e-01  2.0118e-01 -1.1305 0.2582748    
## log(fixed_assets):ind21  1.8347e-01  2.2176e-01  0.8274 0.4080439    
## log(fixed_assets):ind22 -6.6745e-02  2.0139e-01 -0.3314 0.7403336    
## log(fixed_assets):ind23  2.4427e-01  2.0228e-01  1.2076 0.2272362    
## log(fixed_assets):ind24  2.9514e-04  2.0518e-01  0.0014 0.9988523    
## log(fixed_assets):ind25 -4.4709e-02  2.0310e-01 -0.2201 0.8257702    
## log(fixed_assets):ind26  9.9508e-02  2.0038e-01  0.4966 0.6194847    
## log(fixed_assets):ind27 -9.0862e-02  2.0263e-01 -0.4484 0.6538681    
## log(fixed_assets):ind28 -1.6733e-02  1.9816e-01 -0.0844 0.9327055    
## log(fixed_assets):ind29 -1.3505e-01  1.9789e-01 -0.6825 0.4949577    
## log(employee):ind2      -9.1085e-01  4.9959e-01 -1.8232 0.0682963 .  
## log(employee):ind3      -8.4189e-01  5.8174e-01 -1.4472 0.1478668    
## log(employee):ind4      -8.2763e-01  5.0930e-01 -1.6250 0.1041812    
## log(employee):ind5      -7.1677e-02  5.8164e-01 -0.1232 0.9019256    
## log(employee):ind6      -6.9279e-01  5.3243e-01 -1.3012 0.1932129    
## log(employee):ind7      -8.4929e-01  5.0567e-01 -1.6795 0.0930713 .  
## log(employee):ind8      -6.1828e-01  5.1933e-01 -1.1905 0.2338573    
## log(employee):ind9      -3.8888e-01  5.0513e-01 -0.7699 0.4413982    
## log(employee):ind10     -5.1214e-01  5.2666e-01 -0.9724 0.3308579    
## log(employee):ind11     -9.3118e-01  5.0566e-01 -1.8415 0.0655671 .  
## log(employee):ind12      1.3794e-01  5.4377e-01  0.2537 0.7997548    
## log(employee):ind13     -7.9335e-01  5.0466e-01 -1.5720 0.1159633    
## log(employee):ind14     -5.7123e-01  5.1685e-01 -1.1052 0.2690891    
## log(employee):ind15     -8.1999e-01  5.0482e-01 -1.6243 0.1043353    
## log(employee):ind16     -5.2082e-01  5.0425e-01 -1.0329 0.3016894    
## log(employee):ind17     -6.2342e-01  5.0388e-01 -1.2372 0.2160216    
## log(employee):ind18     -7.7374e-01  5.0373e-01 -1.5360 0.1245591    
## log(employee):ind19     -5.5906e-01  5.0343e-01 -1.1105 0.2668005    
## log(employee):ind20     -4.5014e-01  5.0316e-01 -0.8946 0.3710021    
## log(employee):ind21     -7.9444e-01  5.0744e-01 -1.5656 0.1174713    
## log(employee):ind22     -4.6026e-01  5.0198e-01 -0.9169 0.3592161    
## log(employee):ind23     -7.2773e-01  5.0187e-01 -1.4500 0.1470764    
## log(employee):ind24     -5.2349e-01  5.0235e-01 -1.0421 0.2973973    
## log(employee):ind25     -5.3234e-01  5.0165e-01 -1.0612 0.2886269    
## log(employee):ind26     -6.0289e-01  5.0253e-01 -1.1997 0.2302707    
## log(employee):ind27     -4.0955e-01  5.0386e-01 -0.8128 0.4163247    
## log(employee):ind28     -5.3760e-01  5.0105e-01 -1.0730 0.2833127    
## log(employee):ind29     -5.2356e-01  5.0016e-01 -1.0468 0.2952153    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
prod.fe <- plm(log(sales) ~ log(capital) + log(fixed_assets) + log(employee) + ind + year,
               data = df, model = "within", index = index("firm_id", "year"))
cl.plm(df, prod.fe, df$firm_id)
## 
## t test of coefficients:
## 
##                    Estimate Std. Error t value  Pr(>|t|)    
## log(capital)       0.186708   0.013112 14.2397 < 2.2e-16 ***
## log(fixed_assets)  0.148505   0.010996 13.5053 < 2.2e-16 ***
## log(employee)      0.676087   0.015018 45.0188 < 2.2e-16 ***
## ind2               0.900544   0.158704  5.6744 1.421e-08 ***
## ind3               1.119094   0.269276  4.1559 3.260e-05 ***
## ind4               0.566540   0.181327  3.1244 0.0017854 ** 
## ind5               0.481318   0.229160  2.1004 0.0357158 *  
## ind6               0.656797   0.128980  5.0922 3.587e-07 ***
## ind7              -0.201491   0.124078 -1.6239 0.1044190    
## ind8               0.293874   0.096199  3.0549 0.0022561 ** 
## ind9               0.195399   0.108854  1.7951 0.0726679 .  
## ind10             -0.207308   0.112227 -1.8472 0.0647376 .  
## ind11              0.335599   0.130602  2.5696 0.0101911 *  
## ind12             -0.021549   0.162177 -0.1329 0.8942977    
## ind13              0.373826   0.089689  4.1680 3.092e-05 ***
## ind14             -0.232037   0.120509 -1.9255 0.0541915 .  
## ind15              0.117429   0.094518  1.2424 0.2141123    
## ind16              0.011113   0.099694  0.1115 0.9112416    
## ind17              0.191679   0.084026  2.2812 0.0225524 *  
## ind18              0.145161   0.085100  1.7058 0.0880742 .  
## ind19              0.130542   0.087771  1.4873 0.1369581    
## ind20              1.059377   0.110456  9.5909 < 2.2e-16 ***
## ind21              0.682594   0.085784  7.9571 1.901e-15 ***
## ind22              0.804237   0.081100  9.9166 < 2.2e-16 ***
## ind23              0.258798   0.077725  3.3297 0.0008718 ***
## ind24              0.076130   0.078293  0.9724 0.3308848    
## ind25             -0.043807   0.080737 -0.5426 0.5874194    
## ind26              0.724976   0.080643  8.9899 < 2.2e-16 ***
## ind27              1.230460   0.086888 14.1614 < 2.2e-16 ***
## ind28              0.240806   0.083365  2.8886 0.0038760 ** 
## ind29              0.206799   0.082165  2.5169 0.0118520 *  
## year2016           0.037154   0.038744  0.9590 0.3375914    
## year2017           0.030251   0.039149  0.7727 0.4397092    
## year2018           0.056730   0.039213  1.4467 0.1479964    
## year2019           0.072546   0.039482  1.8374 0.0661679 .  
## year2020           0.047556   0.041872  1.1357 0.2560853    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
df$year <- as.numeric(df$year)
df$ind <- as.numeric(df$ind)
df <- df[order(df$firm_id, df$year), ]
df$invest <- c(df$capital[2:nrow(df)] - df$capital[1:(nrow(df) - 1)], NA)
df$invest[c(df$firm_id[2:nrow(df)] != df$firm_id[1:(nrow(df) - 1)], FALSE)] <- NA
notmissing <- !is.na(df$capital) & !is.na(df$invest)  ##remove the missing values
df.nm <- subset(df, notmissing)
df.nm <- df.nm[order(df.nm$firm_id, df.nm$year), ]

op1 <- lm(log(sales) ~ poly(cbind(log(capital), invest), degree = 4)+log(fixed_assets)+log(employee), data = df.nm)
summary(op1)
## 
## Call:
## lm(formula = log(sales) ~ poly(cbind(log(capital), invest), degree = 4) + 
##     log(fixed_assets) + log(employee), data = df.nm)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -6.0100 -0.4378 -0.0782  0.3670  3.8206 
## 
## Coefficients:
##                                                    Estimate Std. Error t value
## (Intercept)                                       1.609e+01  9.883e-02 162.758
## poly(cbind(log(capital), invest), degree = 4)1.0  3.102e+01  1.257e+00  24.674
## poly(cbind(log(capital), invest), degree = 4)2.0  5.133e+00  7.615e-01   6.740
## poly(cbind(log(capital), invest), degree = 4)3.0 -2.645e+00  6.978e-01  -3.790
## poly(cbind(log(capital), invest), degree = 4)4.0  1.447e+00  6.947e-01   2.083
## poly(cbind(log(capital), invest), degree = 4)0.1  2.935e+01  9.686e+01   0.303
## poly(cbind(log(capital), invest), degree = 4)1.1 -1.346e+03  4.129e+03  -0.326
## poly(cbind(log(capital), invest), degree = 4)2.1 -2.371e+02  1.066e+03  -0.223
## poly(cbind(log(capital), invest), degree = 4)3.1  4.622e+02  2.168e+02   2.132
## poly(cbind(log(capital), invest), degree = 4)0.2  1.544e+01  7.450e+01   0.207
## poly(cbind(log(capital), invest), degree = 4)1.2 -9.873e+02  3.081e+03  -0.320
## poly(cbind(log(capital), invest), degree = 4)2.2  3.346e+02  8.378e+02   0.399
## poly(cbind(log(capital), invest), degree = 4)0.3  1.146e+01  3.741e+01   0.306
## poly(cbind(log(capital), invest), degree = 4)1.3 -5.314e+02  1.391e+03  -0.382
## poly(cbind(log(capital), invest), degree = 4)0.4 -7.819e-01  3.186e+00  -0.245
## log(fixed_assets)                                 1.967e-01  4.998e-03  39.349
## log(employee)                                     5.581e-01  7.505e-03  74.370
##                                                  Pr(>|t|)    
## (Intercept)                                       < 2e-16 ***
## poly(cbind(log(capital), invest), degree = 4)1.0  < 2e-16 ***
## poly(cbind(log(capital), invest), degree = 4)2.0 1.67e-11 ***
## poly(cbind(log(capital), invest), degree = 4)3.0 0.000152 ***
## poly(cbind(log(capital), invest), degree = 4)4.0 0.037316 *  
## poly(cbind(log(capital), invest), degree = 4)0.1 0.761901    
## poly(cbind(log(capital), invest), degree = 4)1.1 0.744444    
## poly(cbind(log(capital), invest), degree = 4)2.1 0.823881    
## poly(cbind(log(capital), invest), degree = 4)3.1 0.033049 *  
## poly(cbind(log(capital), invest), degree = 4)0.2 0.835774    
## poly(cbind(log(capital), invest), degree = 4)1.2 0.748653    
## poly(cbind(log(capital), invest), degree = 4)2.2 0.689604    
## poly(cbind(log(capital), invest), degree = 4)0.3 0.759305    
## poly(cbind(log(capital), invest), degree = 4)1.3 0.702449    
## poly(cbind(log(capital), invest), degree = 4)0.4 0.806108    
## log(fixed_assets)                                 < 2e-16 ***
## log(employee)                                     < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.6913 on 10520 degrees of freedom
## Multiple R-squared:  0.8183, Adjusted R-squared:  0.8181 
## F-statistic:  2962 on 16 and 10520 DF,  p-value: < 2.2e-16
b1 <- op1$coefficients[c("log(employee)", "log(fixed_assets)")]
xb1 <- log(as.matrix(df.nm[, c("employee", "fixed_assets")])) %*% b1
fhat <- predict(op1, df.nm) - xb1

## construction of a lag function later used to calculate lag capital and
## fhat
lag <- function(x, i = df.nm$firm_id, t = df.nm$year) {
  if (length(i) != length(x) || length(i) != length(t)) {
    stop("Inputs not same length")
  }
  x.lag <- x[1:(length(x) - 1)]
  x.lag[i[1:(length(i) - 1)] != i[2:length(i)]] <- NA
  x.lag[t[1:(length(i) - 1)] + 1 != t[2:length(i)]] <- NA
  return(c(NA, x.lag))
}

## Create data frame for step 2 regression
df.step2 <- data.frame(lhs = log(df.nm$sales) - xb1,
                       year = df.nm$year, firm_id = df.nm$firm_id,
                       k = log(df.nm$capital), fhat = fhat,
                       k.lag = log(lag(df.nm$capital)),
                       f.lag = lag(fhat))

## droping missing data
df.step2 <- subset(df.step2, !apply(df.step2, 1, function(x) any(is.na(x))))
objective <- function(betaK, degree = 4) {
  op2 <- lm(I(lhs - betaK * k) ~ poly(I(f.lag - betaK * k.lag), degree), data = df.step2)
  return(sum(residuals(op2)^2))
}

fig.df <- data.frame(bk = seq(from = -0.1, to = 0.3, by = 0.005))
fig.df$obj <- sapply(fig.df$bk, objective)
ggplot(data = fig.df, aes(x = bk, y = obj)) + geom_point()

opt.out <- optim(prod.ols$coefficients["log(capital)"], fn = objective, method = "Brent",
                 lower = -1, upper = 1)
betaK <- opt.out$par

betaK
## [1] 0.08126592