I am using data from Greene Econometric Analysis book of Christensen and Greene (1976), JPE 84. Purpose of this post is to learn R for Econometric for testing of hypothesis, testing hetroscedasticity, Chow break point test I am using tidyverse package and some other packages to carry out this analysis.
library(tidyverse)
## -- Attaching packages --------------------------------------- tidyverse 1.3.0 --
## v ggplot2 3.3.2 v purrr 0.3.4
## v tibble 3.0.4 v dplyr 1.0.2
## v tidyr 1.1.2 v stringr 1.4.0
## v readr 1.4.0 v forcats 0.5.0
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
setwd("C:/Users/hp/OneDrive - Higher Education Commission/Econometrics_R")
cgdata<-read.csv("http://people.stern.nyu.edu/wgreene/Text/Edition7/TableF4-4.csv")# data are uploaded from the web
#cgdata<-dput(cgdata) # to read data even if net is not working
#cgdata<-write.csv(cgdata,file = "Cobb-Douglas.csv")
#CD<-read.csv("Cobb-Doughlas.csv",header = TRUE)
library(skimr)
To creat variables log total cost, log total output, log wage rate, log price of capital, log price of fuel, we have transformed variable as below.
skim(cgdata) # summarise data
| Name | cgdata |
| Number of rows | 158 |
| Number of columns | 10 |
| _______________________ | |
| Column type frequency: | |
| numeric | 10 |
| ________________________ | |
| Group variables | None |
Variable type: numeric
| skim_variable | n_missing | complete_rate | mean | sd | p0 | p25 | p50 | p75 | p100 | hist |
|---|---|---|---|---|---|---|---|---|---|---|
| id | 0 | 1 | 127.21 | 60.79 | 1.00 | 78.50 | 138.50 | 178.75 | 218.00 | ▃▅▆▇▇ |
| YEAR | 0 | 1 | 1970.00 | 0.00 | 1970.00 | 1970.00 | 1970.00 | 1970.00 | 1970.00 | ▁▁▇▁▁ |
| COST | 0 | 1 | 53.27 | 87.06 | 0.13 | 10.22 | 25.55 | 55.32 | 737.41 | ▇▁▁▁▁ |
| Q | 0 | 1 | 10469.41 | 15187.52 | 4.00 | 1971.00 | 5645.50 | 12365.75 | 115500.00 | ▇▁▁▁▁ |
| PL | 0 | 1 | 8001.86 | 1398.84 | 5063.49 | 6975.18 | 7890.18 | 8855.23 | 13044.00 | ▂▇▅▁▁ |
| SL | 0 | 1 | 0.14 | 0.05 | 0.05 | 0.10 | 0.12 | 0.17 | 0.33 | ▅▇▃▁▁ |
| PK | 0 | 1 | 71.42 | 11.98 | 31.73 | 67.61 | 74.12 | 78.79 | 92.65 | ▁▁▃▇▃ |
| SK | 0 | 1 | 0.23 | 0.06 | 0.09 | 0.19 | 0.22 | 0.25 | 0.46 | ▂▇▃▁▁ |
| PF | 0 | 1 | 30.75 | 7.94 | 9.00 | 24.48 | 30.66 | 36.00 | 51.46 | ▁▇▇▆▂ |
| SF | 0 | 1 | 0.63 | 0.08 | 0.24 | 0.59 | 0.64 | 0.69 | 0.81 | ▁▁▂▇▂ |
cgdata1<- cgdata %>% mutate(lnc=log(COST),lnq=log(Q),lnpl=log(PL),lnpk=log(PK),lnpf=log(PF),lnqq=0.5*lnq^2,lnqpl=lnq*lnpl,lnqpk=lnq*lnpk,lnqpf=lnq*lnpf)
cg128<-head(cgdata1,n=128) # Select first 128 observations
# general model
# general model
cgmodel.1<- lm(lnc~lnpl+lnpk+lnpf+lnq+lnqq+lnqpl+lnqpk+lnqpf,data =cg128)
# cgmodel.1<-lm(lnc~(lnpl+lnpk+lnpf)*lnq+lnqq)
summary(cgmodel.1)
##
## Call:
## lm(formula = lnc ~ lnpl + lnpk + lnpf + lnq + lnqq + lnqpl +
## lnqpk + lnqpf, data = cg128)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.43003 -0.07982 0.00020 0.08193 0.37121
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 3.840845 3.801594 1.010 0.3144
## lnpl -0.666531 0.359624 -1.853 0.0663 .
## lnpk -0.298407 0.350720 -0.851 0.3966
## lnpf 0.235684 0.245856 0.959 0.3397
## lnq -0.866401 0.451067 -1.921 0.0572 .
## lnqq 0.054582 0.005412 10.085 <2e-16 ***
## lnqpl 0.104898 0.042583 2.463 0.0152 *
## lnqpk 0.045753 0.047428 0.965 0.3367
## lnqpf 0.053572 0.030611 1.750 0.0827 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.1378 on 119 degrees of freedom
## Multiple R-squared: 0.9929, Adjusted R-squared: 0.9925
## F-statistic: 2092 on 8 and 119 DF, p-value: < 2.2e-16
# cgmodel.1<-lm(lnc~(lnpl+lnpk+lnpf)*lnq+lnqq)
summary(cgmodel.1)
##
## Call:
## lm(formula = lnc ~ lnpl + lnpk + lnpf + lnq + lnqq + lnqpl +
## lnqpk + lnqpf, data = cg128)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.43003 -0.07982 0.00020 0.08193 0.37121
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 3.840845 3.801594 1.010 0.3144
## lnpl -0.666531 0.359624 -1.853 0.0663 .
## lnpk -0.298407 0.350720 -0.851 0.3966
## lnpf 0.235684 0.245856 0.959 0.3397
## lnq -0.866401 0.451067 -1.921 0.0572 .
## lnqq 0.054582 0.005412 10.085 <2e-16 ***
## lnqpl 0.104898 0.042583 2.463 0.0152 *
## lnqpk 0.045753 0.047428 0.965 0.3367
## lnqpf 0.053572 0.030611 1.750 0.0827 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.1378 on 119 degrees of freedom
## Multiple R-squared: 0.9929, Adjusted R-squared: 0.9925
## F-statistic: 2092 on 8 and 119 DF, p-value: < 2.2e-16
# linear hypothesis testing: linear homogeneity in prices
library(car)
## Loading required package: carData
##
## Attaching package: 'car'
## The following object is masked from 'package:dplyr':
##
## recode
## The following object is masked from 'package:purrr':
##
## some
lht(cgmodel.1,c("lnpl+lnpk+lnpf=1","lnqpl+lnqpk+lnqpf=0"))
## Linear hypothesis test
##
## Hypothesis:
## lnpl + lnpk + lnpf = 1
## lnqpl + lnqpk + lnqpf = 0
##
## Model 1: restricted model
## Model 2: lnc ~ lnpl + lnpk + lnpf + lnq + lnqq + lnqpl + lnqpk + lnqpf
##
## Res.Df RSS Df Sum of Sq F Pr(>F)
## 1 121 2.4453
## 2 119 2.2606 2 0.18467 4.8606 0.009351 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#test lnpl+lnpk+lnpf=1,lnqpl+lnqpk+lnqpf=0
R<-rbind(c(0,1,1,1,0,0,0,0,0),c(0,0,0,0,0,0,1,1,1))
# linearHypothesis(cgmodel.1,R,c(1,0))
# linearHypothesis(cgmodel.1,R,c(1,0))
linearHypothesis(cgmodel.1,R,c(1,0))
## Linear hypothesis test
##
## Hypothesis:
## lnpl + lnpk + lnpf = 1
## lnqpl + lnqpk + lnqpf = 0
##
## Model 1: restricted model
## Model 2: lnc ~ lnpl + lnpk + lnpf + lnq + lnqq + lnqpl + lnqpk + lnqpf
##
## Res.Df RSS Df Sum of Sq F Pr(>F)
## 1 121 2.4453
## 2 119 2.2606 2 0.18467 4.8606 0.009351 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# data normalized by fuel price: lnpf, lnqpf drop out
cg_norm<-cgdata1 %>% mutate(lncpf=lnc-lnpf, lnplpf=lnpl-lnpf, lnpkpf=lnpk-lnpf, lnqplpf=lnq*lnplpf, lnqpkpf=lnq*lnpkpf)
cg_norm128<-head(cg_norm,n=128)
# restricted model
cgmodel.2<-lm(lncpf~lnplpf+lnpkpf+lnq+lnqq+lnqplpf+lnqpkpf,data = cg_norm128)
# cgmodel.2<-lm(lncpf~(lnplpf+lnpkpf)*lnq+lnqq)
summary(cgmodel.2)
##
## Call:
## lm(formula = lncpf ~ lnplpf + lnpkpf + lnq + lnqq + lnqplpf +
## lnqpkpf, data = cg_norm128)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.48530 -0.08965 -0.00140 0.08496 0.40995
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -7.513186 1.125459 -6.676 7.88e-10 ***
## lnplpf 0.225136 0.224238 1.004 0.31738
## lnpkpf 0.458969 0.249167 1.842 0.06792 .
## lnq 0.442643 0.138053 3.206 0.00172 **
## lnqq 0.059165 0.005369 11.021 < 2e-16 ***
## lnqplpf 0.002761 0.027699 0.100 0.92076
## lnqpkpf -0.049460 0.032132 -1.539 0.12635
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.1422 on 121 degrees of freedom
## Multiple R-squared: 0.9921, Adjusted R-squared: 0.9917
## F-statistic: 2518 on 6 and 121 DF, p-value: < 2.2e-16
# restricted model
cgmodel.0<-lm(lnc~lnpl+lnpk+lnpf+lnq,data=cg_norm128)
summary(cgmodel.0)
##
## Call:
## lm(formula = lnc ~ lnpl + lnpk + lnpf + lnq, data = cg_norm128)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.53444 -0.13267 -0.00883 0.09207 0.95204
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -8.39480 1.31704 -6.374 3.35e-09 ***
## lnpl 0.13244 0.13250 1.000 0.319
## lnpk 0.20326 0.13481 1.508 0.134
## lnpf 0.74007 0.07491 9.880 < 2e-16 ***
## lnq 0.83618 0.01092 76.585 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.2199 on 123 degrees of freedom
## Multiple R-squared: 0.9814, Adjusted R-squared: 0.9808
## F-statistic: 1626 on 4 and 123 DF, p-value: < 2.2e-16
# model comparison
cgmodel.3<-lm(lnc~lnplpf+lnpkpf+lnq+lnqq+lnqplpf+lnqpkpf,offset=lnpf,data = cg_norm128)
summary(cgmodel.3)
##
## Call:
## lm(formula = lnc ~ lnplpf + lnpkpf + lnq + lnqq + lnqplpf + lnqpkpf,
## data = cg_norm128, offset = lnpf)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.48530 -0.08965 -0.00140 0.08496 0.40995
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -7.513186 1.125459 -6.676 7.88e-10 ***
## lnplpf 0.225136 0.224238 1.004 0.31738
## lnpkpf 0.458969 0.249167 1.842 0.06792 .
## lnq 0.442643 0.138053 3.206 0.00172 **
## lnqq 0.059165 0.005369 11.021 < 2e-16 ***
## lnqplpf 0.002761 0.027699 0.100 0.92076
## lnqpkpf -0.049460 0.032132 -1.539 0.12635
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.1422 on 121 degrees of freedom
## Multiple R-squared: 0.9924, Adjusted R-squared: 0.992
## F-statistic: 2623 on 6 and 121 DF, p-value: < 2.2e-16
anova(cgmodel.3,cgmodel.1)
## Analysis of Variance Table
##
## Model 1: lnc ~ lnplpf + lnpkpf + lnq + lnqq + lnqplpf + lnqpkpf
## Model 2: lnc ~ lnpl + lnpk + lnpf + lnq + lnqq + lnqpl + lnqpk + lnqpf
## Res.Df RSS Df Sum of Sq F Pr(>F)
## 1 121 2.4453
## 2 119 2.2606 2 0.18467 4.8606 0.009351 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# cost function: linear homogeneity in prices
cgmodel1<-lm(lncpf~lnplpf+lnpkpf+lnq+lnqq+lnqplpf+lnqpkpf, data = cg_norm)
# cgmodel<-lm(lncpf~(lnplpf+lnpkpf)*lnq+lnqq)
summary(cgmodel1)
##
## Call:
## lm(formula = lncpf ~ lnplpf + lnpkpf + lnq + lnqq + lnqplpf +
## lnqpkpf, data = cg_norm)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.46034 -0.07956 -0.00052 0.08586 0.37748
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -7.240779 1.014113 -7.140 3.67e-11 ***
## lnplpf 0.191299 0.195304 0.979 0.328902
## lnpkpf 0.389430 0.169332 2.300 0.022830 *
## lnq 0.458892 0.124021 3.700 0.000301 ***
## lnqq 0.060083 0.004414 13.612 < 2e-16 ***
## lnqplpf -0.004618 0.023639 -0.195 0.845371
## lnqpkpf -0.029544 0.021249 -1.390 0.166463
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.1375 on 151 degrees of freedom
## Multiple R-squared: 0.9924, Adjusted R-squared: 0.992
## F-statistic: 3266 on 6 and 151 DF, p-value: < 2.2e-16
cgmodel<-lm(lncpf~lnplpf+lnpkpf+lnq+lnqq,data = cg_norm)
summary(cgmodel)
##
## Call:
## lm(formula = lncpf ~ lnplpf + lnpkpf + lnq + lnqq, data = cg_norm)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.42576 -0.08891 -0.00223 0.08404 0.37363
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -6.818163 0.252439 -27.009 < 2e-16 ***
## lnplpf 0.152445 0.046597 3.272 0.00132 **
## lnpkpf 0.162034 0.040406 4.010 9.46e-05 ***
## lnq 0.402745 0.031483 12.792 < 2e-16 ***
## lnqq 0.060895 0.004325 14.079 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.1378 on 153 degrees of freedom
## Multiple R-squared: 0.9922, Adjusted R-squared: 0.992
## F-statistic: 4880 on 4 and 153 DF, p-value: < 2.2e-16
anova(cgmodel,cgmodel1)
## Analysis of Variance Table
##
## Model 1: lncpf ~ lnplpf + lnpkpf + lnq + lnqq
## Model 2: lncpf ~ lnplpf + lnpkpf + lnq + lnqq + lnqplpf + lnqpkpf
## Res.Df RSS Df Sum of Sq F Pr(>F)
## 1 153 2.9049
## 2 151 2.8563 2 0.048647 1.2859 0.2794
# test for homoscedasticity
library("lmtest")
## Loading required package: zoo
##
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
bptest(cgmodel)
##
## studentized Breusch-Pagan test
##
## data: cgmodel
## BP = 42.427, df = 4, p-value = 1.361e-08
coeftest(cgmodel)
##
## t test of coefficients:
##
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -6.8181633 0.2524392 -27.0091 < 2.2e-16 ***
## lnplpf 0.1524447 0.0465973 3.2715 0.001322 **
## lnpkpf 0.1620338 0.0404056 4.0102 9.458e-05 ***
## lnq 0.4027454 0.0314831 12.7924 < 2.2e-16 ***
## lnqq 0.0608951 0.0043253 14.0788 < 2.2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# correct for heteroscedasticity
library("car")
vcov(cgmodel) # assuming homoscedasticity
## (Intercept) lnplpf lnpkpf lnq
## (Intercept) 0.0637255474 -1.047129e-02 3.994585e-03 -0.0023838181
## lnplpf -0.0104712922 2.171313e-03 -1.019813e-03 -0.0001995679
## lnpkpf 0.0039945854 -1.019813e-03 1.632609e-03 0.0001002255
## lnq -0.0023838181 -1.995679e-04 1.002255e-04 0.0009911868
## lnqq 0.0003104204 2.453652e-05 -1.493338e-05 -0.0001335824
## lnqq
## (Intercept) 3.104204e-04
## lnplpf 2.453652e-05
## lnpkpf -1.493338e-05
## lnq -1.335824e-04
## lnqq 1.870819e-05
hccm(cgmodel) # assuming heteroscedasticity (White-Huber estimator)
## (Intercept) lnplpf lnpkpf lnq
## (Intercept) 0.162377183 -1.684195e-02 0.0079443104 -0.0193385959
## lnplpf -0.016841951 2.838030e-03 -0.0009389112 0.0005525213
## lnpkpf 0.007944310 -9.389112e-04 0.0018421413 -0.0009639384
## lnq -0.019338596 5.525213e-04 -0.0009639384 0.0043604051
## lnqq 0.002391483 -7.610844e-05 0.0001061293 -0.0005363602
## lnqq
## (Intercept) 2.391483e-03
## lnplpf -7.610844e-05
## lnpkpf 1.061293e-04
## lnq -5.363602e-04
## lnqq 6.704117e-05
coeftest(cgmodel,df=Inf,vcov.=hccm(cgmodel))
##
## z test of coefficients:
##
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -6.8181633 0.4029605 -16.9202 < 2.2e-16 ***
## lnplpf 0.1524447 0.0532732 2.8616 0.0042155 **
## lnpkpf 0.1620338 0.0429202 3.7752 0.0001599 ***
## lnq 0.4027454 0.0660334 6.0991 1.067e-09 ***
## lnqq 0.0608951 0.0081879 7.4372 1.028e-13 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
library(sandwich)
coeftest(cgmodel,df=Inf,vcov.=vcovHC)
##
## z test of coefficients:
##
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -6.8181633 0.4029605 -16.9202 < 2.2e-16 ***
## lnplpf 0.1524447 0.0532732 2.8616 0.0042155 **
## lnpkpf 0.1620338 0.0429202 3.7752 0.0001599 ***
## lnq 0.4027454 0.0660334 6.0991 1.067e-09 ***
## lnqq 0.0608951 0.0081879 7.4372 1.028e-13 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1