#Ekonometri Final Ödevi
#A Şıkkı
library(wooldridge)
library(discrim)
## Loading required package: parsnip
data("discrim")
head(discrim)
## psoda pfries pentree wagest nmgrs nregs hrsopen emp psoda2 pfries2 pentree2
## 1 1.12 1.06 1.02 4.25 3 5 16.0 27.5 1.11 1.11 1.05
## 2 1.06 0.91 0.95 4.75 3 3 16.5 21.5 1.05 0.89 0.95
## 3 1.06 0.91 0.98 4.25 3 5 18.0 30.0 1.05 0.94 0.98
## 4 1.12 1.02 1.06 5.00 4 5 16.0 27.5 1.15 1.05 1.05
## 5 1.12 NA 0.49 5.00 3 3 16.0 5.0 1.04 1.01 0.58
## 6 1.06 0.95 1.01 4.25 4 4 15.0 17.5 1.05 0.94 1.00
## wagest2 nmgrs2 nregs2 hrsopen2 emp2 compown chain density crmrte state
## 1 5.05 5 5 15.0 27.0 1 3 4030 0.0528866 1
## 2 5.05 4 3 17.5 24.5 0 1 4030 0.0528866 1
## 3 5.05 4 5 17.5 25.0 0 1 11400 0.0360003 1
## 4 5.05 4 5 16.0 NA 0 3 8345 0.0484232 1
## 5 5.05 3 3 16.0 12.0 0 1 720 0.0615890 1
## 6 5.05 3 4 15.0 28.0 0 1 4424 0.0334823 1
## prpblck prppov prpncar hseval nstores income county lpsoda
## 1 0.1711542 0.0365789 0.0788428 148300 3 44534 18 0.11332869
## 2 0.1711542 0.0365789 0.0788428 148300 3 44534 18 0.05826885
## 3 0.0473602 0.0879072 0.2694298 169200 3 41164 12 0.05826885
## 4 0.0528394 0.0591227 0.1366903 171600 3 50366 10 0.11332869
## 5 0.0344800 0.0254145 0.0738020 249100 1 72287 10 0.11332869
## 6 0.0591327 0.0835001 0.1151341 148000 2 44515 18 0.05826885
## lpfries lhseval lincome ldensity NJ BK KFC RR
## 1 0.05826885 11.90699 10.70401 8.301521 1 0 0 1
## 2 -0.09431065 11.90699 10.70401 8.301521 1 1 0 0
## 3 -0.09431065 12.03884 10.62532 9.341369 1 1 0 0
## 4 0.01980261 12.05292 10.82707 9.029418 1 0 0 1
## 5 NA 12.42561 11.18840 6.579251 1 1 0 0
## 6 -0.05129331 11.90497 10.70358 8.394799 1 1 0 0
help(discrim)
## Help on topic 'discrim' was found in the following packages:
##
## Package Library
## discrim /Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/library
## wooldridge /Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/library
##
##
## Using the first match ...
#Değişkenlerin Türkçe Tanımları
psoda: orta soda fiyatı, 1. dalga
pfries: küçük patates kızartmasının fiyatı, 1. dalga
pentree: fiyat ana yemek (burger veya tavuk), 1. dalga
wagest: başlangıç ücreti, 1. dalga
nmgrs: yönetici sayısı, 1. dalga
nregs: kayıt sayısı, 1. dalga
hrsopen:açılış saatleri, 1. dalga
emp: çalışan sayısı, 1. dalga
psoda2: orta soda fiyatı, 2. dalga
pfries2: küçük patates kızartmasının fiyatı, 2. dalga
pentree2: fiyat girişi, 2. dalga
wagest2: başlangıç ücreti, 2. dalga
nmgrs2: yönetici sayısı, 2. dalga
nregs2: kayıt sayısı, 2. dalga
hrsopen2: açılış saatleri, 2. dalga
emp2: çalışan sayısı, 2. dalga
compown: =1 eğer şirket sahibiyse
chain: BK = 1, KFC = 2, Roy Rogers = 3, Wendy’s = 4
density: nüfus yoğunluğu, kasaba
crmrte: suç oranı, kasaba
state: NJ = 1, PA = 2
prpblck: oran siyah, posta kodu
prppov: yoksulluk oranı, posta kodu
prpncar: oran araç yok, posta kodu
hseval: ortalama konut değeri, posta kodu
nstores: mağaza sayısı, posta kodu
income: ortalama aile geliri, posta kodu
county: ilçe etiketi
lpsoda: günlük(psoda)
lpfries: log(pfries)
lhseval: log(hseval)
lincome: log(gelir)
ldensity: log(yoğunluk)
NJ: New Jersey için =1
BK: =1 eğer Burger King ise
KFC: =1 Kentucky Fried Chicken ise
RR: =1 eğer Roy Rogers ise
#B Şıkkı
mean(discrim$prpblck)
## [1] NA
sd(discrim$prpblck)
## [1] NA
mean(discrim$income)
## [1] NA
sd(discrim$income)
## [1] NA
sum(is.na(discrim$prpblck))
## [1] 1
sum(is.na(discrim$income))
## [1] 1
mean(discrim$prpblck,na.rm = TRUE)
## [1] 0.1134864
sd(discrim$prpblck,na.rm = TRUE)
## [1] 0.1824165
mean(discrim$income, na.rm = TRUE)
## [1] 47053.78
sd(discrim$income, na.rm = TRUE)
## [1] 13179.29
library(vtable)
## Loading required package: kableExtra
sumtable(discrim, summ=c('notNA(x)', 'countNA(x)', 'mean(x)','sd(x)'),out='return')
## Variable NotNA CountNA Mean Sd
## 1 psoda 402 8 1 0.089
## 2 pfries 393 17 0.92 0.11
## 3 pentree 398 12 1.3 0.64
## 4 wagest 390 20 4.6 0.35
## 5 nmgrs 404 6 3.4 1
## 6 nregs 388 22 3.6 1.2
## 7 hrsopen 410 0 14 2.8
## 8 emp 404 6 18 9.4
## 9 psoda2 388 22 1 0.094
## 10 pfries2 382 28 0.94 0.11
## 11 pentree2 386 24 1.4 0.65
## 12 wagest2 389 21 5 0.25
## 13 nmgrs2 404 6 3.5 1.1
## 14 nregs2 388 22 3.6 1.2
## 15 hrsopen2 399 11 14 2.8
## 16 emp2 397 13 18 8.6
## 17 compown 410 0 0.34 0.48
## 18 chain 410 0 2.1 1.1
## 19 density 409 1 4562 5132
## 20 crmrte 409 1 0.053 0.047
## 21 state 410 0 1.2 0.39
## 22 prpblck 409 1 0.11 0.18
## 23 prppov 409 1 0.071 0.067
## 24 prpncar 409 1 0.11 0.12
## 25 hseval 409 1 147399 56070
## 26 nstores 410 0 3.1 1.8
## 27 income 409 1 47054 13179
## 28 county 410 0 14 8
## 29 lpsoda 402 8 0.04 0.085
## 30 lpfries 393 17 -0.088 0.12
## 31 lhseval 409 1 12 0.39
## 32 lincome 409 1 11 0.28
## 33 ldensity 409 1 8 1
## 34 NJ 410 0 0.81 0.39
## 35 BK 410 0 0.42 0.49
## 36 KFC 410 0 0.2 0.4
## 37 RR 410 0 0.24 0.43
#C Şıkkı
discrimreg <- lm(psoda~prpblck+income, data = discrim)
summary(discrimreg)
##
## Call:
## lm(formula = psoda ~ prpblck + income, data = discrim)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.29401 -0.05242 0.00333 0.04231 0.44322
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 9.563e-01 1.899e-02 50.354 < 2e-16 ***
## prpblck 1.150e-01 2.600e-02 4.423 1.26e-05 ***
## income 1.603e-06 3.618e-07 4.430 1.22e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.08611 on 398 degrees of freedom
## (9 observations deleted due to missingness)
## Multiple R-squared: 0.06422, Adjusted R-squared: 0.05952
## F-statistic: 13.66 on 2 and 398 DF, p-value: 1.835e-06
#D Şıkkı
basitdiscrimreg <- lm(psoda~prpblck, data = discrim)
summary(basitdiscrimreg)
##
## Call:
## lm(formula = psoda ~ prpblck, data = discrim)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.30884 -0.05963 0.01135 0.03206 0.44840
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.03740 0.00519 199.87 < 2e-16 ***
## prpblck 0.06493 0.02396 2.71 0.00702 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.0881 on 399 degrees of freedom
## (9 observations deleted due to missingness)
## Multiple R-squared: 0.01808, Adjusted R-squared: 0.01561
## F-statistic: 7.345 on 1 and 399 DF, p-value: 0.007015
#E Şıkkı
logdiscrimreg <- lm(log(psoda)~prpblck+log(income), data = discrim)
summary(logdiscrimreg)
##
## Call:
## lm(formula = log(psoda) ~ prpblck + log(income), data = discrim)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.33563 -0.04695 0.00658 0.04334 0.35413
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.79377 0.17943 -4.424 1.25e-05 ***
## prpblck 0.12158 0.02575 4.722 3.24e-06 ***
## log(income) 0.07651 0.01660 4.610 5.43e-06 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.0821 on 398 degrees of freedom
## (9 observations deleted due to missingness)
## Multiple R-squared: 0.06809, Adjusted R-squared: 0.06341
## F-statistic: 14.54 on 2 and 398 DF, p-value: 8.039e-07
paste( (0.2*100)*0.122, "yüzdelik artış")
## [1] "2.44 yüzdelik artış"
#F Şıkkı
logdiscrimregprpov <- lm(log(psoda)~prpblck+log(income)+prppov, data = discrim)
summary(logdiscrimregprpov)
##
## Call:
## lm(formula = log(psoda) ~ prpblck + log(income) + prppov, data = discrim)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.32218 -0.04648 0.00651 0.04272 0.35622
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -1.46333 0.29371 -4.982 9.4e-07 ***
## prpblck 0.07281 0.03068 2.373 0.0181 *
## log(income) 0.13696 0.02676 5.119 4.8e-07 ***
## prppov 0.38036 0.13279 2.864 0.0044 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.08137 on 397 degrees of freedom
## (9 observations deleted due to missingness)
## Multiple R-squared: 0.08696, Adjusted R-squared: 0.08006
## F-statistic: 12.6 on 3 and 397 DF, p-value: 6.917e-08
#G Şıkkı
cor(log(discrim$income), discrim$prppov, use = "complete.obs")
## [1] -0.838467
#H Şıkkı
Yüksek düzeyde ilişkili olmalarına rağmen, her ikisinin de dahil edilmesi mükemmel bir doğrusallık ile sonuçlanmaz ve bunun yerine, ayırt edici etkiyi izole etmeye yardımcı olan başka bir kontrol değişkeni ekleyerek modeli tamamlar.
#F testi
f_test <- var.test(log(discrim$psoda),discrim$prpblck)
print(f_test)
##
## F test to compare two variances
##
## data: log(discrim$psoda) and discrim$prpblck
## F = 0.21575, num df = 401, denom df = 408, p-value < 2.2e-16
## alternative hypothesis: true ratio of variances is not equal to 1
## 95 percent confidence interval:
## 0.1775050 0.2622688
## sample estimates:
## ratio of variances
## 0.2157458
#F testi sonuç F testi sonucunda, log(psoda) ve prpblck değişkenlerinin varyanslarının eşit olmadığı bulunmuştur.
F istatistiği: 0.21575, yani varyans oranı 1’den küçük. P-değeri: 2.2e-16 (çok küçük), bu da null hipotezinin (varyansların eşit olduğu) reddedildiğini ve varyansların farklı olduğunu gösterir. %95 güven aralığı: Varyans oranının 0.1775 ile 0.2623 arasında olduğu bulunmuştur. Sonuç olarak, varyanslar arasında anlamlı bir fark vardır.