On this page I will be posting solutions to Gujarati’s “Basic Econometrics” 4th edition using R.
Table 3.2 Hypothetical Data on Weekly Family Consumption Expenditure and Weekly Family Income
t3.2 <- read.table("/Users/ttueconomics/Dropbox/Econometrics/Gujarati Econometrics/gujarati tables/Table 3.2.txt", skip = 6, header = T)
y <- t3.2[,1]
x <- t3.2[,2]
mod3.2 <- lm(y~x)
summary(mod3.2)
##
## Call:
## lm(formula = y ~ x)
##
## Residuals:
## Min 1Q Median 3Q Max
## -10.364 -4.977 1.409 4.364 8.364
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 24.45455 6.41382 3.813 0.00514 **
## x 0.50909 0.03574 14.243 5.75e-07 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 6.493 on 8 degrees of freedom
## Multiple R-squared: 0.9621, Adjusted R-squared: 0.9573
## F-statistic: 202.9 on 1 and 8 DF, p-value: 5.753e-07
Monte Carlo study classroom assignment: Refer to the 10 X values given in Table 3.2 Let \(\beta_1 = 25\) and \(\beta_2 = 0.5\). Assume \(u_i \approx N(0, 9)\), that is, \(u_i\) are normally distributed with mean 0 and variance 9. Generate 100 samples using these values, obtaining 100 estimates of \(\beta_1\) and \(\beta_2\). Graph these estimates. What conclusions can you draw from the Monte Carlo study?
Note: Most statistical packages now can generate random variables from most well-known probability distributions. Ask your instructor for help, in case you have difficulty generating such variables.
Solution
data <- read.table("/Users/ttueconomics/Dropbox/Econometrics/Gujarati Econometrics/gujarati tables/Table 3.2.txt", skip = 6, header = T)
set.seed(1)
e <- rnorm(100, 0, 3)
x <- sample(data[,2], 100, replace = TRUE)
y <- 25 + 0.5*x + e
mod <- lm(y ~ x)
mod
##
## Call:
## lm(formula = y ~ x)
##
## Coefficients:
## (Intercept) x
## 24.6827 0.5041
plot(x,y)
abline(mod, col=2, lwd=1)
Now we can repeat the same simulation 100 times to get 100 values of coefficients:
monte.carlic <- function(){
e <- rnorm(100, 0, 3)
x <- sample(data[,2], 100, replace = TRUE)
y <- 25 + 0.5*x + e
mod <- lm(y ~ x)
beta <- c(beta_1=coef(mod)[1], beta_2=coef(mod)[2])
}
replicate(100, monte.carlic())
## [,1] [,2] [,3] [,4] [,5]
## beta_1.(Intercept) 25.540043 24.7589385 24.2356897 25.2038243 26.8694326
## beta_2.x 0.497332 0.5023676 0.5045014 0.4952342 0.4886385
## [,6] [,7] [,8] [,9] [,10]
## beta_1.(Intercept) 25.7227579 25.7198855 26.7746756 23.7674583 25.59403
## beta_2.x 0.4956354 0.4941385 0.4933398 0.5069359 0.49628
## [,11] [,12] [,13] [,14] [,15]
## beta_1.(Intercept) 23.570685 25.8803419 25.2414472 25.1470334 25.5120882
## beta_2.x 0.507833 0.4970334 0.4982868 0.5001065 0.4983652
## [,16] [,17] [,18] [,19] [,20]
## beta_1.(Intercept) 24.167693 24.2693547 25.9985260 24.794198 24.2041273
## beta_2.x 0.505989 0.5045617 0.4955019 0.503275 0.5069336
## [,21] [,22] [,23] [,24] [,25]
## beta_1.(Intercept) 24.3044939 23.2444834 24.3495675 24.921229 25.3086794
## beta_2.x 0.5059076 0.5109606 0.5035905 0.498556 0.4974154
## [,26] [,27] [,28] [,29] [,30]
## beta_1.(Intercept) 25.5679716 24.2831362 24.6729789 24.8061652 24.3614856
## beta_2.x 0.4959591 0.5031065 0.5011272 0.5019222 0.5008543
## [,31] [,32] [,33] [,34] [,35]
## beta_1.(Intercept) 25.1358322 24.4228408 23.3064137 24.4900966 24.6339908
## beta_2.x 0.5019912 0.5025612 0.5061959 0.5040538 0.5037937
## [,36] [,37] [,38] [,39] [,40]
## beta_1.(Intercept) 24.0385941 25.287297 24.73422 25.3115795 24.9480917
## beta_2.x 0.5055963 0.497263 0.50320 0.4966668 0.4993182
## [,41] [,42] [,43] [,44] [,45]
## beta_1.(Intercept) 23.3768738 24.4669723 25.9785145 26.7680615 26.331323
## beta_2.x 0.5086019 0.5038233 0.4928039 0.4924449 0.490958
## [,46] [,47] [,48] [,49] [,50]
## beta_1.(Intercept) 25.5319282 25.1068213 23.3482633 24.2937005 25.4627894
## beta_2.x 0.4947648 0.4977284 0.5065666 0.5055285 0.4983332
## [,51] [,52] [,53] [,54] [,55]
## beta_1.(Intercept) 24.4800422 26.9171246 26.7989315 26.0771232 25.7528458
## beta_2.x 0.5040227 0.4893905 0.4867282 0.4928013 0.4929831
## [,56] [,57] [,58] [,59] [,60]
## beta_1.(Intercept) 25.4310546 25.160758 24.6228205 25.7183326 24.9613522
## beta_2.x 0.4935776 0.500846 0.5029817 0.4978954 0.5003102
## [,61] [,62] [,63] [,64] [,65]
## beta_1.(Intercept) 26.3400194 23.205556 23.6908560 24.1410070 26.6862350
## beta_2.x 0.4927246 0.508798 0.5075319 0.5044259 0.4889292
## [,66] [,67] [,68] [,69] [,70]
## beta_1.(Intercept) 26.0628575 25.3343123 23.7915908 24.2761832 24.3880756
## beta_2.x 0.4934838 0.4994237 0.5057005 0.5018442 0.5033131
## [,71] [,72] [,73] [,74] [,75]
## beta_1.(Intercept) 25.7316100 24.6039538 25.3054656 24.6157502 24.3682422
## beta_2.x 0.4947144 0.5016904 0.4968086 0.5016653 0.5037999
## [,76] [,77] [,78] [,79] [,80]
## beta_1.(Intercept) 25.3407190 24.6627444 23.7705152 26.0107785 25.926791
## beta_2.x 0.4985775 0.5019887 0.5055682 0.4929544 0.494745
## [,81] [,82] [,83] [,84] [,85]
## beta_1.(Intercept) 24.6662138 25.8465880 23.3365536 25.0977437 24.9152928
## beta_2.x 0.4998677 0.4957464 0.5083263 0.5002863 0.4985858
## [,86] [,87] [,88] [,89] [,90]
## beta_1.(Intercept) 24.0338541 25.4375906 25.4683870 25.6966621 24.7170353
## beta_2.x 0.5065274 0.4981872 0.4971136 0.4958402 0.5015877
## [,91] [,92] [,93] [,94] [,95]
## beta_1.(Intercept) 23.6861738 25.5174078 25.0317557 26.2837332 25.408880
## beta_2.x 0.5071825 0.4977167 0.4999832 0.4906258 0.498659
## [,96] [,97] [,98] [,99] [,100]
## beta_1.(Intercept) 25.0268019 25.1308016 24.1370992 25.7284915 22.9401391
## beta_2.x 0.4975581 0.5005732 0.5031502 0.4959727 0.5100315
Here is another simulation with different commands:
sims <- 100 # Set the number of simulations at the top of the script
beta_1 <- numeric(sims) # Empty vector for storing the simulated intercepts
beta_2 <- numeric(sims) # Empty vector for storing the simulated slopes
n <- 100 # sample size
a <- 25 # True value for the intercept
b <- .5 # True value for the slope
e <- rnorm(n, 0, 3)
X <- sample(data[,2], n, replace = TRUE) # Create a sample of n observations on the variable X.
# Note that this variable is outside the loop, because X
# should be fixed in repeated samples.
for(i in 1:sims){ # Start the loop
Y <- a + b*X + rnorm(n, 0, 3) # The true DGP, with N(0, 1) error
model <- lm(Y ~ X) # Estimate OLS Model
beta_1[i] <- model$coef[1] # Put the estimate for the intercept
# in the vector $\beta_1$
beta_2[i] <- model$coef[2] # Put the estimate for X in the vector "*beta[2]*"
} # End loop
beta_1
## [1] 23.79906 24.70302 26.66095 24.90002 26.85557 24.25931 24.72455
## [8] 23.89814 24.48193 24.10518 23.38273 24.85860 25.59687 25.75908
## [15] 26.01534 25.99116 26.36297 25.70014 26.52839 24.59356 24.69084
## [22] 24.06654 27.49589 26.12195 23.83379 26.30565 24.44464 25.73610
## [29] 23.73999 25.80671 25.12979 24.92624 25.11428 24.64331 26.39082
## [36] 23.50362 25.14857 25.39298 25.43060 23.66115 25.02324 25.51146
## [43] 23.34763 25.46079 25.07709 24.85393 24.71334 24.15694 25.95537
## [50] 26.08856 24.43961 26.45758 27.52519 24.93742 23.76086 23.72852
## [57] 25.60792 24.65452 24.58952 26.33250 25.57180 22.30906 25.48110
## [64] 25.93661 25.75951 23.78776 24.87865 25.13830 24.74739 25.48236
## [71] 25.17168 24.40320 23.09348 25.13627 25.98191 24.51055 24.15362
## [78] 25.65366 25.87493 24.02988 23.26313 23.94838 25.28797 24.04412
## [85] 24.10514 24.76613 23.94501 25.27879 24.76245 23.97829 24.74217
## [92] 26.16253 25.57518 25.03727 25.30108 26.19680 24.70048 25.77405
## [99] 25.19405 23.69542
beta_2
## [1] 0.5089421 0.5009488 0.4894183 0.4991839 0.4900699 0.5047770 0.5014337
## [8] 0.5068714 0.5034572 0.5050237 0.5121626 0.5004322 0.4949356 0.4967502
## [15] 0.4958628 0.4965223 0.4960439 0.4963943 0.4919068 0.5037544 0.5012703
## [22] 0.5057193 0.4868311 0.4952464 0.5056748 0.4933709 0.5024456 0.4957832
## [29] 0.5057581 0.4980707 0.4994429 0.4972036 0.4997763 0.5020080 0.4940858
## [36] 0.5087335 0.5004514 0.4931413 0.4950294 0.5070240 0.5005335 0.4990504
## [43] 0.5099473 0.5002004 0.4984099 0.5005132 0.4995080 0.5061568 0.4975269
## [50] 0.4924868 0.5069168 0.4943670 0.4845270 0.4992294 0.5057317 0.5090749
## [57] 0.4996980 0.5015582 0.5054418 0.4916829 0.5006004 0.5141886 0.4961971
## [64] 0.4972152 0.4968338 0.5048530 0.4981214 0.4963674 0.5012625 0.4991259
## [71] 0.4990319 0.5036955 0.5091039 0.5001308 0.4944555 0.5036927 0.5082128
## [78] 0.4963661 0.4956794 0.5037470 0.5081541 0.5083577 0.4990005 0.5052043
## [85] 0.5061852 0.5042689 0.5089698 0.5006228 0.5016890 0.5057241 0.4985889
## [92] 0.4938808 0.4963437 0.5005058 0.5003337 0.4929618 0.5012174 0.4983583
## [99] 0.4969467 0.5083118
plot(beta_1, beta_2, xlab=expression(beta[l]), ylab=expression(beta[2]), main=expression("Plot of "*beta[1]*" and "*beta[2]*""))
The author uses MINITAB 13 to create the figure 5.7. I’d like to accomplish the same task using R.
We apply the lm function to a formula that describes the variable consumption expenditure by the variable income, and save the linear regression model in a new variable mod5.12. Then we compute the standardized residual with the rstandard function.
t3.2 <- read.table("/Users/ttueconomics/Dropbox/Econometrics/Gujarati Econometrics/gujarati tables/Table 3.2.txt", skip = 6, header = T)
y <- t3.2[,1]
x <- t3.2[,2]
mod5.12 <- lm(y~x)
mod5.12.stdres = rstandard(mod5.12)
We now create the normal probability plot with the qqnorm function, and add the qqline for further comparison.
qqnorm(mod5.12.stdres, ylab="Standardized Residuals", xlab="Normal Scores", main="Residuals from consumption-income regression")
qqline(mod5.12.stdres)
mod5.12.resid <- resid(mod5.12)
qqnorm(mod5.12.resid)
qqline(mod5.12.resid)
library("nortest")
ad.test(mod5.12.resid)
##
## Anderson-Darling normality test
##
## data: mod5.12.resid
## A = 0.39366, p-value = 0.3047
shapiro.test(mod5.12.resid)
##
## Shapiro-Wilk normality test
##
## data: mod5.12.resid
## W = 0.9267, p-value = 0.4162
#get probability distribution for residuals
probDist <- pnorm(mod5.12.resid)
#create PP plot
plot(ppoints(length(mod5.12.resid)), sort(probDist), main = "PP Plot", xlab = "Observed Probability", ylab = "Expected Probability")
#add diagonal line
abline(0,1)
library("tseries")
jarque.bera.test(mod5.12.resid)
##
## Jarque Bera Test
##
## data: mod5.12.resid
## X-squared = 0.77692, df = 2, p-value = 0.6781
t6.1 <- read.table("/Users/ttueconomics/Dropbox/Econometrics/Gujarati Econometrics/gujarati tables/Table 6.1.txt", skip = 7, header = T)
t6.1
## YEAR Y X
## 1 1971 67.5 19.5
## 2 1972 19.2 8.5
## 3 1973 -35.2 -29.3
## 4 1974 -42.0 -26.5
## 5 1975 63.7 61.9
## 6 1976 19.3 45.5
## 7 1977 3.6 9.5
## 8 1978 20.0 14.0
## 9 1979 40.3 35.3
## 10 1980 37.5 31.0
y<-t6.1[,2]
x<-t6.1[,3]
mod6.1ols<-lm(y~x)
summary(mod6.1ols)
##
## Call:
## lm(formula = y ~ x)
##
## Residuals:
## Min 1Q Median 3Q Max
## -30.623 -7.166 -1.237 3.585 45.373
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.2797 7.6886 0.166 0.87194
## x 1.0691 0.2383 4.486 0.00204 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 20.69 on 8 degrees of freedom
## Multiple R-squared: 0.7155, Adjusted R-squared: 0.68
## F-statistic: 20.12 on 1 and 8 DF, p-value: 0.00204
mod6.1<-lm(y~0+x)
summary(mod6.1)
##
## Call:
## lm(formula = y ~ 0 + x)
##
## Residuals:
## Min 1Q Median 3Q Max
## -30.291 -6.007 -0.720 4.484 46.247
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## x 1.0899 0.1916 5.69 0.000298 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 19.54 on 9 degrees of freedom
## Multiple R-squared: 0.7825, Adjusted R-squared: 0.7583
## F-statistic: 32.38 on 1 and 9 DF, p-value: 0.0002981
mod6.1a<-lm(y~x-1)
summary(mod6.1a)
##
## Call:
## lm(formula = y ~ x - 1)
##
## Residuals:
## Min 1Q Median 3Q Max
## -30.291 -6.007 -0.720 4.484 46.247
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## x 1.0899 0.1916 5.69 0.000298 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 19.54 on 9 degrees of freedom
## Multiple R-squared: 0.7825, Adjusted R-squared: 0.7583
## F-statistic: 32.38 on 1 and 9 DF, p-value: 0.0002981
library(lm.beta)
t6.2 <- read.table("/Users/ttueconomics/Dropbox/Econometrics/Gujarati Econometrics/gujarati tables/Table 6.2.txt", skip = 6)
t6.2
## V1 V2 V3 V4 V5
## 1 1988 828.2 828200 5865.2 5865200
## 2 1989 863.5 863500 6062.0 6062000
## 3 1990 815.0 815000 6136.3 6136300
## 4 1991 738.1 738100 6079.4 6079400
## 5 1992 790.4 790400 6244.4 6244400
## 6 1993 863.6 863600 6389.6 6389600
## 7 1994 975.7 975700 6610.7 6610700
## 8 1995 996.1 996100 6761.6 6761600
## 9 1996 1084.1 1084100 6994.8 6994800
## 10 1997 1206.4 1206400 7269.8 7269800
y<-t6.2[,2]
x<-t6.2[,4]
mod6.2<-lm(y~x)
mod6.2.beta<-lm.beta(mod6.2)
summary(mod6.2.beta)
##
## Call:
## lm(formula = y ~ x)
##
## Residuals:
## Min 1Q Median 3Q Max
## -68.843 -31.816 -4.008 32.471 85.856
##
## Coefficients:
## Estimate Standardized Std. Error t value Pr(>|t|)
## (Intercept) -1026.4980 0.0000 257.5874 -3.985 0.00403 **
## x 0.3016 0.9366 0.0399 7.558 6.56e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 54.49 on 8 degrees of freedom
## Multiple R-squared: 0.8772, Adjusted R-squared: 0.8618
## F-statistic: 57.13 on 1 and 8 DF, p-value: 6.556e-05