1 rmarkdown (r programımızda neden rmarkdown kullanıyoruz -> o zamandaki verileri tablolaştırmak istediğimizde örneğin page_table için rmakdown kullanıyoruz .
library(wooldridge)
## Warning: package 'wooldridge' was built under R version 4.2.2
library(rmarkdown)
data("alcohol")
paged_table(alcohol)
ŞİMDİ NEDEN RMARKDOWN KULLANDIĞIMIZI ANLIYORUZ..
BİR VERİDEN 2 FARKLI ÇIKARMAK İSTEDİĞİMİZDE AŞAĞIDAKİ ADIMLARI UYGULUYORUZ..
summary(lm(abuse ~ age + educ + married + famsize, data = alcohol))
##
## Call:
## lm(formula = abuse ~ age + educ + married + famsize, data = alcohol)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.18888 -0.11455 -0.09666 -0.07384 0.99322
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.1909951 0.0208919 9.142 < 2e-16 ***
## age 0.0001842 0.0003293 0.559 0.576
## educ -0.0041901 0.0010463 -4.005 6.26e-05 ***
## married -0.0093980 0.0089139 -1.054 0.292
## famsize -0.0129875 0.0021864 -5.940 2.95e-09 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.298 on 9817 degrees of freedom
## Multiple R-squared: 0.006398, Adjusted R-squared: 0.005993
## F-statistic: 15.8 on 4 and 9817 DF, p-value: 6.719e-13
BUNU YAPABİLMEMİZ İÇİN 2. BİR YOLUMUZ DAHA VAR
lm(alcohol$abuse ~ alcohol$age + alcohol$educ + alcohol$married + alcohol$famsize, data = alcohol )
##
## Call:
## lm(formula = alcohol$abuse ~ alcohol$age + alcohol$educ + alcohol$married +
## alcohol$famsize, data = alcohol)
##
## Coefficients:
## (Intercept) alcohol$age alcohol$educ alcohol$married
## 0.1909951 0.0001842 -0.0041901 -0.0093980
## alcohol$famsize
## -0.0129875
incelediğimiz diğer bir konu ise hayalci , hayalcinin işlevi çıktıyı güzel tablolara aktarmanın veya bir ve daha fazla değişkenle karşılaştırmanın bir yoludur. Yıldız gözlemcisi verileri iki şekilde dışa aktarabilir sonuçların görüntülenmesini sağlayan metin olarak ve world belgeleri için düzenlenebilir html olarak . Bir ÖRNEKLE BUNU AÇIKLAYALIM
data1 <- lm(abuse ~ +age + scale(educ) + married + famsize, data = alcohol)
data2 <- lm(abuse ~ age + educ + married + famsize,data = alcohol)
library(stargazer)
##
## Please cite as:
## Hlavac, Marek (2022). stargazer: Well-Formatted Regression and Summary Statistics Tables.
## R package version 5.2.3. https://CRAN.R-project.org/package=stargazer
stargazer(data1, data2, type = "text")
##
## ============================================================
## Dependent variable:
## ----------------------------
## abuse
## (1) (2)
## ------------------------------------------------------------
## age 0.0002 0.0002
## (0.0003) (0.0003)
##
## scale(educ) -0.012***
## (0.003)
##
## educ -0.004***
## (0.001)
##
## married -0.009 -0.009
## (0.009) (0.009)
##
## famsize -0.013*** -0.013***
## (0.002) (0.002)
##
## Constant 0.135*** 0.191***
## (0.014) (0.021)
##
## ------------------------------------------------------------
## Observations 9,822 9,822
## R2 0.006 0.006
## Adjusted R2 0.006 0.006
## Residual Std. Error (df = 9817) 0.298 0.298
## F Statistic (df = 4; 9817) 15.804*** 15.804***
## ============================================================
## Note: *p<0.1; **p<0.05; ***p<0.01
regresyonumuza ölçek ekleyerek standardizasyon yapabiliriz, aşağıdaki örneğe bakalım
lm(scale(abuse) ~ scale(age) + scale(educ) + scale(married) + scale(famsize), data = alcohol )
##
## Call:
## lm(formula = scale(abuse) ~ scale(age) + scale(educ) + scale(married) +
## scale(famsize), data = alcohol)
##
## Coefficients:
## (Intercept) scale(age) scale(educ) scale(married) scale(famsize)
## 1.265e-14 5.939e-03 -4.064e-02 -1.217e-02 -6.622e-02
logaritmik regresyonda logarştma
summary(lm(abuse ~ age + educ + married +log(famsize),data = alcohol ))
##
## Call:
## lm(formula = abuse ~ age + educ + married + log(famsize), data = alcohol)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.19151 -0.11404 -0.09263 -0.07140 0.96017
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.1839099 0.0203831 9.023 < 2e-16 ***
## age 0.0001288 0.0003288 0.392 0.695
## educ -0.0041984 0.0010445 -4.019 5.88e-05 ***
## married 0.0024569 0.0093421 0.263 0.793
## log(famsize) -0.0426261 0.0058570 -7.278 3.65e-13 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.2977 on 9817 degrees of freedom
## Multiple R-squared: 0.008178, Adjusted R-squared: 0.007774
## F-statistic: 20.24 on 4 and 9817 DF, p-value: < 2.2e-16
DİĞER DEĞİŞKENLER SABİT TUTULDUĞUNDA FAMSİZE %1 ARTARSA ABUSE 0.042 DÜŞER
model11 <- lm(abuse ~ age + educ + married + log(famsize), data = alcohol)
summary(data1)
##
## Call:
## lm(formula = abuse ~ +age + scale(educ) + married + famsize,
## data = alcohol)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.18888 -0.11455 -0.09666 -0.07384 0.99322
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.1352270 0.0139512 9.693 < 2e-16 ***
## age 0.0001842 0.0003293 0.559 0.576
## scale(educ) -0.0121459 0.0030329 -4.005 6.26e-05 ***
## married -0.0093980 0.0089139 -1.054 0.292
## famsize -0.0129875 0.0021864 -5.940 2.95e-09 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.298 on 9817 degrees of freedom
## Multiple R-squared: 0.006398, Adjusted R-squared: 0.005993
## F-statistic: 15.8 on 4 and 9817 DF, p-value: 6.719e-13
T DEĞERİ -2 İLE 2 ARASINDA OLDUĞU İÇİN İSTATİKSEL OLARAK ANLAMSIZ BİR SONUÇ ORTAYA ÇIKAR
data2 <- lm(abuse ~ (age) + (educ) + married + poly(famsize,2, raw = TRUE), data = alcohol)
summary(data2)
##
## Call:
## lm(formula = abuse ~ (age) + (educ) + married + poly(famsize,
## 2, raw = TRUE), data = alcohol)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.33252 -0.11475 -0.09032 -0.07174 0.95011
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.2180994 0.0217744 10.016 < 2e-16 ***
## age 0.0001140 0.0003294 0.346 0.729380
## educ -0.0039658 0.0010466 -3.789 0.000152 ***
## married 0.0031421 0.0093564 0.336 0.737010
## poly(famsize, 2, raw = TRUE)1 -0.0409462 0.0067595 -6.058 1.43e-09 ***
## poly(famsize, 2, raw = TRUE)2 0.0039716 0.0009087 4.371 1.25e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.2977 on 9816 degrees of freedom
## Multiple R-squared: 0.008328, Adjusted R-squared: 0.007823
## F-statistic: 16.49 on 5 and 9816 DF, p-value: 3.043e-16
YUKARIDADA GÖRÜYORUZKİ AYNI SONUCU ELDE ETTİK
ANOVA TABLOSU BİZE HANGİ DEĞİŞKENİN DAHA FAZLA AÇIKLAMA GÜCÜ KATTIĞINI GÖSTERİR
library(car)
## Zorunlu paket yükleniyor: carData
anova(data2)
## Analysis of Variance Table
##
## Response: abuse
## Df Sum Sq Mean Sq F value Pr(>F)
## age 1 0.06 0.06097 0.6879 0.4069103
## educ 1 1.04 1.03974 11.7297 0.0006176 ***
## married 1 1.38 1.37984 15.5665 8.021e-05 ***
## poly(famsize, 2, raw = TRUE) 2 4.83 2.41337 27.2262 1.616e-12 ***
## Residuals 9816 870.11 0.08864
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
FAMSİZE’IN BU MODELDE DAHA FAZLA AÇIKLAYICI DEĞİŞKEN OLDUĞUNU GÖREBİLİRİZ
KESME İLE LINEER REGRESYON VE KESME OLMADAN LINEER REGRESYON
data111 <- lm(formula = abuse ~ age + educ + married + famsize, data = alcohol)
summary(data111)
##
## Call:
## lm(formula = abuse ~ age + educ + married + famsize, data = alcohol)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.18888 -0.11455 -0.09666 -0.07384 0.99322
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.1909951 0.0208919 9.142 < 2e-16 ***
## age 0.0001842 0.0003293 0.559 0.576
## educ -0.0041901 0.0010463 -4.005 6.26e-05 ***
## married -0.0093980 0.0089139 -1.054 0.292
## famsize -0.0129875 0.0021864 -5.940 2.95e-09 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.298 on 9817 degrees of freedom
## Multiple R-squared: 0.006398, Adjusted R-squared: 0.005993
## F-statistic: 15.8 on 4 and 9817 DF, p-value: 6.719e-13
data222 <- lm(formula = abuse ~ age + educ + married + famsize - 1, data = alcohol)
summary(data222)
##
## Call:
## lm(formula = abuse ~ age + educ + married + famsize - 1, data = alcohol)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.16432 -0.10961 -0.09029 -0.07113 0.98562
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## age 0.0020522 0.0002594 7.912 2.81e-15 ***
## educ 0.0029733 0.0006963 4.270 1.97e-05 ***
## married -0.0058074 0.0089426 -0.649 0.516090
## famsize -0.0073023 0.0021049 -3.469 0.000524 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.2993 on 9818 degrees of freedom
## Multiple R-squared: 0.09731, Adjusted R-squared: 0.09694
## F-statistic: 264.6 on 4 and 9818 DF, p-value: < 2.2e-16
şimdi lineer regresyonu çizmek için çalışalım
library(tidyverse)
## Warning: package 'tidyverse' was built under R version 4.2.2
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.2 ──
## ✔ ggplot2 3.3.6 ✔ purrr 0.3.5
## ✔ tibble 3.1.8 ✔ dplyr 1.0.10
## ✔ tidyr 1.2.1 ✔ stringr 1.4.1
## ✔ readr 2.1.3 ✔ forcats 0.5.2
## Warning: package 'readr' was built under R version 4.2.2
## Warning: package 'dplyr' was built under R version 4.2.2
## Warning: package 'forcats' was built under R version 4.2.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ✖ dplyr::recode() masks car::recode()
## ✖ purrr::some() masks car::some()
qplot(alcohol$abuse, alcohol$age)
qplot(alcohol$abuse, alcohol$age) + geom_smooth(method = "lm", se = F)
## `geom_smooth()` using formula 'y ~ x'
data4 <- lm(formula = abuse ~ age + educ + married, alcohol)
coef(data4)
## (Intercept) age educ married
## 0.1556981519 0.0004911728 -0.0037408631 -0.0318282684
data5 <- lm(formula = abuse ~ age + educ + married, alcohol)
cor(alcohol$abuse, alcohol$age)
## [1] 0.008336228
ŞİMDİ LİNEER REGRESYONUMUZU YORUMLAYALIM
library(wooldridge)
data("alcohol")
summary(lm(lm(formula = abuse ~ age + educ + married, data = alcohol)))
##
## Call:
## lm(formula = lm(formula = abuse ~ age + educ + married, data = alcohol))
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.18468 -0.10599 -0.09507 -0.08317 0.92971
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.1556982 0.0200640 7.760 9.33e-15 ***
## age 0.0004912 0.0003258 1.507 0.131728
## educ -0.0037409 0.0010454 -3.578 0.000347 ***
## married -0.0318283 0.0080886 -3.935 8.38e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.2985 on 9818 degrees of freedom
## Multiple R-squared: 0.002827, Adjusted R-squared: 0.002522
## F-statistic: 9.278 on 3 and 9818 DF, p-value: 4.006e-06
BU VERİLER BİRÇOK ÜLKE İÇİN 2005 - 2008 YILLARINDAKİ KİŞİ BAŞI ALKOL TÜKETİM DEĞERLERİNİ VERMEKTEDİR. DİĞER YILLARDA VERİSİ BULUNAN BİR KAÇ ÜLKEDE BULUNMAKTADIR ..
BU VERİLERİ WOOLDRİDGE’ de AGE-EDUC-MARRİED-FAMSİZE VERİLERİNİ KULLANARAK BUNLARIN ALKOL KULLANMAYA OLAN ETKİLERİNİ GÖRYORUZ .
BURADA T İSTATİSTİĞİMİZ 7.7 ÇIKTı ve stadar hatamız 0.29 bu değişkenlerimiz arasında bir bağlantı olduğunu gösteriyor ..
F İSTATİSTİĞİMİZİN 9.2 VE p-value’nin 4.006 çıkmasıda bizi doğrular nitelikte