R Markdown

R Markdown ne için kullanılır?

R Markdown, R ile dinamik belgeler oluşturmak için bir dosya biçimidir. Bir R Markdown belgesi, markdown (yazması kolay bir düz metin biçimi) ile yazılır ve aşağıdaki belge gibi gömülü R kodu parçaları içerir.

library(wooldridge)
library(rmarkdown)
data("crime3")

şimdi neden rmarkdown kullandığımızı anlıyoruz

paged_table(crime3)

##summary R’deki bir vektör, veri çerçevesi, regresyon modeli veya ANOVA modelindeki değerleri hızlı bir şekilde özetlemek için kullanılabilir.

summary(lm(formula = district ~ year + crime + clrprc1 + clrprc2 + d78 + avgclr +  lcrime + clcrime + cavgclr + cclrprc1 + cclrprc2 , data = crime3))
## 
## Call:
## lm(formula = district ~ year + crime + clrprc1 + clrprc2 + d78 + 
##     avgclr + lcrime + clcrime + cavgclr + cclrprc1 + cclrprc2, 
##     data = crime3)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -25.234  -7.268   0.316   7.229  30.640 
## 
## Coefficients: (4 not defined because of singularities)
##             Estimate Std. Error t value Pr(>|t|)  
## (Intercept) 14.32066   27.53759   0.520   0.6056  
## year              NA         NA      NA       NA  
## crime        0.09654    0.44968   0.215   0.8310  
## clrprc1      0.83773    0.32305   2.593   0.0128 *
## clrprc2     -0.22270    0.30842  -0.722   0.4740  
## d78               NA         NA      NA       NA  
## avgclr            NA         NA      NA       NA  
## lcrime      -4.67199    9.96486  -0.469   0.6414  
## clcrime     -2.82100    5.88658  -0.479   0.6341  
## cavgclr      0.53268    0.56600   0.941   0.3517  
## cclrprc1    -0.84517    0.49026  -1.724   0.0916 .
## cclrprc2          NA         NA      NA       NA  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 13.73 on 45 degrees of freedom
##   (53 observations deleted due to missingness)
## Multiple R-squared:  0.3163, Adjusted R-squared:   0.21 
## F-statistic: 2.974 on 7 and 45 DF,  p-value: 0.01189

##stargazer Stargazer komutu, birkaç modelden regresyon analizi sonuçlarını yan yana tutan iyi biçimlendirilmiş tablolar için LaTeX kodu, HTML kodu ve ASCII metni üretir. Ayrıca özet istatistikler ve veri çerçevesi içeriği de verebilir. hayalci, çeşitli paketlerden çok sayıda model nesnesini destekler.

model1 <- lm (district ~ year + crime + clrprc1 + clrprc2 + log (d78) + avgclr +  lcrime + clcrime + cavgclr + cclrprc1 + cclrprc2 , data = crime3)
model2 <- lm (district ~ year + crime + clrprc1 + log (clrprc2) + d78 + avgclr +  lcrime + clcrime + cavgclr + cclrprc1 + cclrprc2 , data = crime3)
library(stargazer)
## 
## Please cite as:
##  Hlavac, Marek (2022). stargazer: Well-Formatted Regression and Summary Statistics Tables.
##  R package version 5.2.3. https://CRAN.R-project.org/package=stargazer

STANDARDİZASYON

regresyonumuza ölçek ekleyerek standardizasyon yapabiliriz

lm(scale(district) ~ scale(year) + scale(crime) + scale(clrprc1) + scale(clrprc2) + scale(d78) + scale(avgclr) + scale(lcrime) + scale(clcrime) + scale(cavgclr) + scale(cclrprc1) + scale(cclrprc2), data = crime3)
## 
## Call:
## lm(formula = scale(district) ~ scale(year) + scale(crime) + scale(clrprc1) + 
##     scale(clrprc2) + scale(d78) + scale(avgclr) + scale(lcrime) + 
##     scale(clcrime) + scale(cavgclr) + scale(cclrprc1) + scale(cclrprc2), 
##     data = crime3)
## 
## Coefficients:
##     (Intercept)      scale(year)     scale(crime)   scale(clrprc1)  
##         0.15074               NA          0.06955          0.69868  
##  scale(clrprc2)       scale(d78)    scale(avgclr)    scale(lcrime)  
##        -0.18603               NA               NA         -0.18740  
##  scale(clcrime)   scale(cavgclr)  scale(cclrprc1)  scale(cclrprc2)  
##        -0.06905          0.32733         -0.63318               NA

Logarithmic

regresyonumza logaritim bize fiyatın yüzde (yaklaşık) değerini verir

lm (log(district) ~ log(year) + crime + clrprc1 + clrprc2 + log (d78) + avgclr +  lcrime + clcrime + cavgclr + cclrprc1 + cclrprc2 , data = crime3)
## 
## Call:
## lm(formula = log(district) ~ log(year) + crime + clrprc1 + clrprc2 + 
##     log(d78) + avgclr + lcrime + clcrime + cavgclr + cclrprc1 + 
##     cclrprc2, data = crime3)
## 
## Coefficients:
## (Intercept)    log(year)        crime      clrprc1      clrprc2     log(d78)  
##     2.36782           NA     -0.03176      0.03713     -0.01941           NA  
##      avgclr       lcrime      clcrime      cavgclr     cclrprc1     cclrprc2  
##          NA      0.21866     -0.02287      0.03929     -0.04605           NA
model11 <- lm (log(district) ~ log(year) + crime + clrprc1 + I(clrprc1^2) + clrprc2 + log (d78) + avgclr +  lcrime + clcrime + cavgclr + cclrprc1 + cclrprc2 , data = crime3)
summary(model11)
## 
## Call:
## lm(formula = log(district) ~ log(year) + crime + clrprc1 + I(clrprc1^2) + 
##     clrprc2 + log(d78) + avgclr + lcrime + clcrime + cavgclr + 
##     cclrprc1 + cclrprc2, data = crime3)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -2.2425 -0.3197  0.1315  0.4969  1.2820 
## 
## Coefficients: (4 not defined because of singularities)
##                Estimate Std. Error t value Pr(>|t|)  
## (Intercept)   3.0929371  1.7818580   1.736   0.0896 .
## log(year)            NA         NA      NA       NA  
## crime        -0.0434064  0.0290252  -1.495   0.1419  
## clrprc1      -0.0312925  0.0690101  -0.453   0.6525  
## I(clrprc1^2)  0.0008743  0.0008469   1.032   0.3075  
## clrprc2      -0.0160044  0.0186357  -0.859   0.3951  
## log(d78)             NA         NA      NA       NA  
## avgclr               NA         NA      NA       NA  
## lcrime        0.4237108  0.6249832   0.678   0.5013  
## clcrime      -0.0126662  0.3501966  -0.036   0.9713  
## cavgclr       0.0358945  0.0338187   1.061   0.2943  
## cclrprc1     -0.0449767  0.0291726  -1.542   0.1303  
## cclrprc2             NA         NA      NA       NA  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.8163 on 44 degrees of freedom
##   (53 observations deleted due to missingness)
## Multiple R-squared:  0.293,  Adjusted R-squared:  0.1644 
## F-statistic: 2.279 on 8 and 44 DF,  p-value: 0.03898

ANOVA

ANOVA(Analysis of Variance), kategorik bir değişkeni bağımsız olarak ve nümerik bir değişkeni de bağımlı değişken olarak alır. Yani, kategorik değişkenin seviyeleri(kategorileri) değiştikçe nümerik değişken de anlamlı olarak değişiyor mu bunu araştırır.

library(car)
## Zorunlu paket yükleniyor: carData
Anova(model11)
## Note: model has aliased coefficients
##       sums of squares computed by model comparison
## Anova Table (Type II tests)
## 
## Response: log(district)
##               Sum Sq Df F value Pr(>F)
## log(year)             0               
## crime         1.4902  1  2.2364 0.1419
## clrprc1               0               
## I(clrprc1^2)  0.7101  1  1.0657 0.3075
## clrprc2               0               
## log(d78)              0               
## avgclr                0               
## lcrime        0.3063  1  0.4596 0.5013
## clcrime       0.0009  1  0.0013 0.9713
## cavgclr               0               
## cclrprc1              0               
## cclrprc2              0               
## Residuals    29.3178 44

INTERCEPT

model111 <- lm (formula = district ~ year + crime + clrprc1 + clrprc2 + log (d78) + avgclr +  lcrime + clcrime + cavgclr + cclrprc1 + cclrprc2 , data = crime3)
summary(model111)
## 
## Call:
## lm(formula = district ~ year + crime + clrprc1 + clrprc2 + log(d78) + 
##     avgclr + lcrime + clcrime + cavgclr + cclrprc1 + cclrprc2, 
##     data = crime3)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -25.234  -7.268   0.316   7.229  30.640 
## 
## Coefficients: (4 not defined because of singularities)
##             Estimate Std. Error t value Pr(>|t|)  
## (Intercept) 14.32066   27.53759   0.520   0.6056  
## year              NA         NA      NA       NA  
## crime        0.09654    0.44968   0.215   0.8310  
## clrprc1      0.83773    0.32305   2.593   0.0128 *
## clrprc2     -0.22270    0.30842  -0.722   0.4740  
## log(d78)          NA         NA      NA       NA  
## avgclr            NA         NA      NA       NA  
## lcrime      -4.67199    9.96486  -0.469   0.6414  
## clcrime     -2.82100    5.88658  -0.479   0.6341  
## cavgclr      0.53268    0.56600   0.941   0.3517  
## cclrprc1    -0.84517    0.49026  -1.724   0.0916 .
## cclrprc2          NA         NA      NA       NA  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 13.73 on 45 degrees of freedom
##   (53 observations deleted due to missingness)
## Multiple R-squared:  0.3163, Adjusted R-squared:   0.21 
## F-statistic: 2.974 on 7 and 45 DF,  p-value: 0.01189

linear regression without intercept

model22 <- lm (formula = district ~ year + crime + clrprc1 + clrprc2 + log (d78) + avgclr +  lcrime + clcrime + cavgclr + cclrprc1 + cclrprc2  - 1, data = crime3)
summary(model22)
## 
## Call:
## lm(formula = district ~ year + crime + clrprc1 + clrprc2 + log(d78) + 
##     avgclr + lcrime + clcrime + cavgclr + cclrprc1 + cclrprc2 - 
##     1, data = crime3)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -25.234  -7.268   0.316   7.229  30.640 
## 
## Coefficients: (3 not defined because of singularities)
##          Estimate Std. Error t value Pr(>|t|)  
## year      0.18360    0.35305   0.520   0.6056  
## crime     0.09654    0.44968   0.215   0.8310  
## clrprc1   0.83773    0.32305   2.593   0.0128 *
## clrprc2  -0.22270    0.30842  -0.722   0.4740  
## log(d78)       NA         NA      NA       NA  
## avgclr         NA         NA      NA       NA  
## lcrime   -4.67199    9.96486  -0.469   0.6414  
## clcrime  -2.82100    5.88658  -0.479   0.6341  
## cavgclr   0.53268    0.56600   0.941   0.3517  
## cclrprc1 -0.84517    0.49026  -1.724   0.0916 .
## cclrprc2       NA         NA      NA       NA  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 13.73 on 45 degrees of freedom
##   (53 observations deleted due to missingness)
## Multiple R-squared:  0.8339, Adjusted R-squared:  0.8043 
## F-statistic: 28.23 on 8 and 45 DF,  p-value: 4.196e-15

plot

burada da çizgilerle gösterilen (lineer) regresyonu çiziyoruz

library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.2 ──
## ✔ ggplot2 3.4.0      ✔ purrr   0.3.5 
## ✔ tibble  3.1.8      ✔ dplyr   1.0.10
## ✔ tidyr   1.2.1      ✔ stringr 1.4.1 
## ✔ readr   2.1.3      ✔ forcats 0.5.2 
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ✖ dplyr::recode() masks car::recode()
## ✖ purrr::some()   masks car::some()
qplot(crime3$district, crime3$year)
## Warning: `qplot()` was deprecated in ggplot2 3.4.0.

qplot(crime3$district, crime3$year) + geom_smooth(method = "lm", se = F)
## `geom_smooth()` using formula = 'y ~ x'

comment

library(wooldridge)
data("crime3")
summary(lm(lm(formula = district ~ year + crime + clrprc1 + clrprc2 + d78 + avgclr +  lcrime + clcrime + cavgclr + cclrprc1 + cclrprc2 , data = crime3)))
## 
## Call:
## lm(formula = lm(formula = district ~ year + crime + clrprc1 + 
##     clrprc2 + d78 + avgclr + lcrime + clcrime + cavgclr + cclrprc1 + 
##     cclrprc2, data = crime3))
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -25.234  -7.268   0.316   7.229  30.640 
## 
## Coefficients: (4 not defined because of singularities)
##             Estimate Std. Error t value Pr(>|t|)  
## (Intercept) 14.32066   27.53759   0.520   0.6056  
## year              NA         NA      NA       NA  
## crime        0.09654    0.44968   0.215   0.8310  
## clrprc1      0.83773    0.32305   2.593   0.0128 *
## clrprc2     -0.22270    0.30842  -0.722   0.4740  
## d78               NA         NA      NA       NA  
## avgclr            NA         NA      NA       NA  
## lcrime      -4.67199    9.96486  -0.469   0.6414  
## clcrime     -2.82100    5.88658  -0.479   0.6341  
## cavgclr      0.53268    0.56600   0.941   0.3517  
## cclrprc1    -0.84517    0.49026  -1.724   0.0916 .
## cclrprc2          NA         NA      NA       NA  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 13.73 on 45 degrees of freedom
##   (53 observations deleted due to missingness)
## Multiple R-squared:  0.3163, Adjusted R-squared:   0.21 
## F-statistic: 2.974 on 7 and 45 DF,  p-value: 0.01189