birinci veri seti 401k veri seti 401k:emekli maaşlarına ilişkin kesitsel veriler
bağımsız değişkenler:
ikinci veri seti airfare veri seti airfare:uçak biletlerine ilişkin kesitsel veriler
bağımsız değişkenler:
üçüncü veri seti athlet1 veri seti athlet1 : okulların spor programlarına ilişkin kesitsel bireysel veriler
bağımsız değişkenler:
dördüncü veri seti bwght veri seti bwght:doğum ağırlığına ilişkin kesitsel bireysel veriler
bağımsız değişkenler:
beşinci veri seti jtrain veri seti jtrain:iş eğitimine ilişkin panel bireysel veriler
bağımsız değişkenler:
library(wooldridge)
data("jtrain")
summary(jtrain)
## year fcode employ sales
## Min. :1987 Min. :410032 Min. : 4.00 Min. : 110000
## 1st Qu.:1987 1st Qu.:410604 1st Qu.: 15.00 1st Qu.: 1550000
## Median :1988 Median :418084 Median : 30.00 Median : 3000000
## Mean :1988 Mean :415709 Mean : 59.32 Mean : 6116037
## 3rd Qu.:1989 3rd Qu.:419309 3rd Qu.: 72.00 3rd Qu.: 7700000
## Max. :1989 Max. :419486 Max. :525.00 Max. :54000000
## NA's :31 NA's :98
## avgsal scrap rework tothrs
## Min. : 4237 Min. : 0.0100 Min. : 0.000 Min. : 0.0
## 1st Qu.:14102 1st Qu.: 0.5925 1st Qu.: 0.350 1st Qu.: 0.0
## Median :17773 Median : 1.4150 Median : 1.160 Median : 12.0
## Mean :18873 Mean : 3.8436 Mean : 3.474 Mean : 29.2
## 3rd Qu.:22360 3rd Qu.: 4.0000 3rd Qu.: 4.000 3rd Qu.: 40.0
## Max. :42583 Max. :30.0000 Max. :40.000 Max. :320.0
## NA's :65 NA's :309 NA's :348 NA's :56
## union grant d89 d88
## Min. :0.0000 Min. :0.0000 Min. :0.0000 Min. :0.0000
## 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.0000
## Median :0.0000 Median :0.0000 Median :0.0000 Median :0.0000
## Mean :0.1975 Mean :0.1401 Mean :0.3333 Mean :0.3333
## 3rd Qu.:0.0000 3rd Qu.:0.0000 3rd Qu.:1.0000 3rd Qu.:1.0000
## Max. :1.0000 Max. :1.0000 Max. :1.0000 Max. :1.0000
##
## totrain hrsemp lscrap lemploy
## Min. : 0.00 Min. : 0.000 Min. :-4.6052 Min. :1.386
## 1st Qu.: 0.00 1st Qu.: 0.000 1st Qu.:-0.5234 1st Qu.:2.708
## Median : 8.00 Median : 3.308 Median : 0.3471 Median :3.401
## Mean : 23.09 Mean : 14.968 Mean : 0.3937 Mean :3.531
## 3rd Qu.: 25.00 3rd Qu.: 18.663 3rd Qu.: 1.3863 3rd Qu.:4.277
## Max. :350.00 Max. :163.917 Max. : 3.4012 Max. :6.263
## NA's :6 NA's :81 NA's :309 NA's :31
## lsales lrework lhrsemp lscrap_1
## Min. :11.61 Min. :-4.6052 Min. :0.000 Min. :-4.6052
## 1st Qu.:14.25 1st Qu.:-0.9163 1st Qu.:0.000 1st Qu.:-0.2675
## Median :14.91 Median : 0.1823 Median :1.460 Median : 0.4414
## Mean :15.03 Mean : 0.1642 Mean :1.650 Mean : 0.5129
## 3rd Qu.:15.86 3rd Qu.: 1.3863 3rd Qu.:2.979 3rd Qu.: 1.6094
## Max. :17.80 Max. : 3.6889 Max. :5.105 Max. : 3.4012
## NA's :98 NA's :350 NA's :81 NA's :363
## grant_1 clscrap cgrant clemploy
## Min. :0.00000 Min. :-3.3142 Min. :-1.00000 Min. :-0.98083
## 1st Qu.:0.00000 1st Qu.:-0.3975 1st Qu.: 0.00000 1st Qu.:-0.02899
## Median :0.00000 Median :-0.1411 Median : 0.00000 Median : 0.07066
## Mean :0.07643 Mean :-0.2211 Mean : 0.06369 Mean : 0.08202
## 3rd Qu.:0.00000 3rd Qu.: 0.0093 3rd Qu.: 0.00000 3rd Qu.: 0.18232
## Max. :1.00000 Max. : 2.3979 Max. : 1.00000 Max. : 1.67398
## NA's :363 NA's :181
## clsales lavgsal clavgsal cgrant_1
## Min. :-1.98287 Min. : 8.352 Min. :-0.40547 Min. :0.0000
## 1st Qu.:-0.01101 1st Qu.: 9.554 1st Qu.: 0.02228 1st Qu.:0.0000
## Median : 0.10711 Median : 9.785 Median : 0.05716 Median :0.0000
## Mean : 0.11587 Mean : 9.785 Mean : 0.06026 Mean :0.1147
## 3rd Qu.: 0.22314 3rd Qu.:10.015 3rd Qu.: 0.09076 3rd Qu.:0.0000
## Max. : 2.89670 Max. :10.659 Max. : 0.56891 Max. :1.0000
## NA's :226 NA's :65 NA's :204 NA's :157
## chrsemp clhrsemp
## Min. :-88.62255 Min. :-4.02535
## 1st Qu.: -0.07257 1st Qu.:-0.01493
## Median : 0.19860 Median : 0.03479
## Mean : 5.93591 Mean : 0.50370
## 3rd Qu.: 11.00952 3rd Qu.: 1.36811
## Max. :142.00000 Max. : 4.39445
## NA's :220 NA's :220
jtrain_reg <- lm(sales~ union+employ+totrain+avgsal,data = jtrain)
summary(jtrain_reg)
##
## Call:
## lm(formula = sales ~ union + employ + totrain + avgsal, data = jtrain)
##
## Residuals:
## Min 1Q Median 3Q Max
## -21152164 -2003541 -359164 794550 33167083
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -3.356e+06 8.050e+05 -4.170 3.89e-05 ***
## union 2.262e+06 6.551e+05 3.454 0.000623 ***
## employ 8.412e+04 6.024e+03 13.963 < 2e-16 ***
## totrain 2.688e+04 7.772e+03 3.458 0.000614 ***
## avgsal 1.923e+02 3.784e+01 5.082 6.21e-07 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 4800000 on 336 degrees of freedom
## (130 observations deleted due to missingness)
## Multiple R-squared: 0.6617, Adjusted R-squared: 0.6577
## F-statistic: 164.3 on 4 and 336 DF, p-value: < 2.2e-16
part2
qt(0.95,92)
## [1] 1.661585
%95 olasılıkta:
4 değişkende anlamlıdır çünkü t değerleri 1,66dan büyüktür
confint(jtrain_reg)
## 2.5 % 97.5 %
## (Intercept) -4939942.0107 -1773033.7519
## union 973941.8965 3551014.8391
## employ 72268.3225 95968.7469
## totrain 11589.6945 42165.5620
## avgsal 117.8553 266.7059
confint(jtrain_reg, level=0.99)
## 0.5 % 99.5 %
## (Intercept) -5.441846e+06 -1271129.9710
## union 5.655175e+05 3959439.2542
## employ 6.851219e+04 99724.8812
## totrain 6.743914e+03 47011.3429
## avgsal 9.426485e+01 290.2963
library(coefplot)
## Zorunlu paket yükleniyor: ggplot2
## Loading required package: ggplot2
coefplot(jtrain_reg)
## Warning: It is deprecated to specify `guide = FALSE` to remove a guide. Please
## use `guide = "none"` instead.
## Warning: It is deprecated to specify `guide = FALSE` to remove a guide. Please
## use `guide = "none"` instead.
## Warning: It is deprecated to specify `guide = FALSE` to remove a guide. Please
## use `guide = "none"` instead.
jtrain_reg <- lm(sales~ union+employ+totrain+avgsal,data = jtrain)
summary(jtrain_reg)
##
## Call:
## lm(formula = sales ~ union + employ + totrain + avgsal, data = jtrain)
##
## Residuals:
## Min 1Q Median 3Q Max
## -21152164 -2003541 -359164 794550 33167083
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -3.356e+06 8.050e+05 -4.170 3.89e-05 ***
## union 2.262e+06 6.551e+05 3.454 0.000623 ***
## employ 8.412e+04 6.024e+03 13.963 < 2e-16 ***
## totrain 2.688e+04 7.772e+03 3.458 0.000614 ***
## avgsal 1.923e+02 3.784e+01 5.082 6.21e-07 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 4800000 on 336 degrees of freedom
## (130 observations deleted due to missingness)
## Multiple R-squared: 0.6617, Adjusted R-squared: 0.6577
## F-statistic: 164.3 on 4 and 336 DF, p-value: < 2.2e-16
t testine bakıcak olursak hepsi anlamlıdır.
kısıtlı modelimizi oluşturalım.
H0:b1=0,b2=0,b3=0
kısıtlı modelimizi oluşturalım.
sales=b0+b4avgsal+u
jtrain_kısıtlı <- lm(sales~ avgsal,data = jtrain)
summary(jtrain_kısıtlı)
##
## Call:
## lm(formula = sales ~ avgsal, data = jtrain)
##
## Residuals:
## Min 1Q Median 3Q Max
## -8676510 -4328568 -3185828 1587164 48176928
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2.908e+06 1.281e+06 2.271 0.02377 *
## avgsal 1.774e+02 6.383e+01 2.779 0.00575 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 8102000 on 341 degrees of freedom
## (128 observations deleted due to missingness)
## Multiple R-squared: 0.02215, Adjusted R-squared: 0.01928
## F-statistic: 7.723 on 1 and 341 DF, p-value: 0.005754
r2_ur<-summary(jtrain_reg)$r.sq
r2_r<-summary(jtrain_kısıtlı)$r.sq
r2_ur
## [1] 0.6616917
r2_r
## [1] 0.02214663
üç değişken eklediğimizde (ur) modelin R2’si kısıtlı modele (r) göre çok artmıştır
n<-nobs(jtrain_reg)
n
## [1] 341
k kısıtsız modelimizde kaç tane bağımsız değişkenimiz olduğunu söyler. totrain, rework, employ, avgsal olmak üzere 4 bağımsız değişken kullandık
k<-4
Son olarak q, kaç tane kısıt kullandığımız önemlidir.toplam 3 tane kısıt kullandık.
q<-3
F_jtrain_reg<-((r2_ur-r2_r)/(1-r2_ur))*((n-k-1)/q)
F_jtrain_reg
## [1] 211.7272
qf(0.99,q,n-k-1)
## [1] 3.840401
test sonucu krtitik değerin çok üstünde hipotez reddedilecektir.
1-pf(F_jtrain_reg,q,n-k-1)
## [1] 0
H0 hipotezi reddedilir.