R Markdown, R ile dinamik belgeler oluşturmak için bir dosya biçimidir. Bir R Markdown belgesi, markdown (yazması kolay bir düz metin biçimi) ile yazılır ve aşağıdaki belge gibi gömülü R kodu parçaları içerir.
library(wooldridge)
library(rmarkdown)
data("crime3")
şimdi neden rmarkdown kullandığımızı anlıyoruz
paged_table(crime3)
##summary R’deki bir vektör, veri çerçevesi, regresyon modeli veya ANOVA modelindeki değerleri hızlı bir şekilde özetlemek için kullanılabilir.
summary(lm(formula = district ~ year + crime + clrprc1 + clrprc2 + d78 + avgclr + lcrime + clcrime + cavgclr + cclrprc1 + cclrprc2 , data = crime3))
##
## Call:
## lm(formula = district ~ year + crime + clrprc1 + clrprc2 + d78 +
## avgclr + lcrime + clcrime + cavgclr + cclrprc1 + cclrprc2,
## data = crime3)
##
## Residuals:
## Min 1Q Median 3Q Max
## -25.234 -7.268 0.316 7.229 30.640
##
## Coefficients: (4 not defined because of singularities)
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 14.32066 27.53759 0.520 0.6056
## year NA NA NA NA
## crime 0.09654 0.44968 0.215 0.8310
## clrprc1 0.83773 0.32305 2.593 0.0128 *
## clrprc2 -0.22270 0.30842 -0.722 0.4740
## d78 NA NA NA NA
## avgclr NA NA NA NA
## lcrime -4.67199 9.96486 -0.469 0.6414
## clcrime -2.82100 5.88658 -0.479 0.6341
## cavgclr 0.53268 0.56600 0.941 0.3517
## cclrprc1 -0.84517 0.49026 -1.724 0.0916 .
## cclrprc2 NA NA NA NA
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 13.73 on 45 degrees of freedom
## (53 observations deleted due to missingness)
## Multiple R-squared: 0.3163, Adjusted R-squared: 0.21
## F-statistic: 2.974 on 7 and 45 DF, p-value: 0.01189
##stargazer Stargazer komutu, birkaç modelden regresyon analizi sonuçlarını yan yana tutan iyi biçimlendirilmiş tablolar için LaTeX kodu, HTML kodu ve ASCII metni üretir. Ayrıca özet istatistikler ve veri çerçevesi içeriği de verebilir. hayalci, çeşitli paketlerden çok sayıda model nesnesini destekler.
model1 <- lm (district ~ year + crime + clrprc1 + clrprc2 + log (d78) + avgclr + lcrime + clcrime + cavgclr + cclrprc1 + cclrprc2 , data = crime3)
model2 <- lm (district ~ year + crime + clrprc1 + log (clrprc2) + d78 + avgclr + lcrime + clcrime + cavgclr + cclrprc1 + cclrprc2 , data = crime3)
library(stargazer)
##
## Please cite as:
## Hlavac, Marek (2022). stargazer: Well-Formatted Regression and Summary Statistics Tables.
## R package version 5.2.3. https://CRAN.R-project.org/package=stargazer
regresyonumuza ölçek ekleyerek standardizasyon yapabiliriz
lm(scale(district) ~ scale(year) + scale(crime) + scale(clrprc1) + scale(clrprc2) + scale(d78) + scale(avgclr) + scale(lcrime) + scale(clcrime) + scale(cavgclr) + scale(cclrprc1) + scale(cclrprc2), data = crime3)
##
## Call:
## lm(formula = scale(district) ~ scale(year) + scale(crime) + scale(clrprc1) +
## scale(clrprc2) + scale(d78) + scale(avgclr) + scale(lcrime) +
## scale(clcrime) + scale(cavgclr) + scale(cclrprc1) + scale(cclrprc2),
## data = crime3)
##
## Coefficients:
## (Intercept) scale(year) scale(crime) scale(clrprc1)
## 0.15074 NA 0.06955 0.69868
## scale(clrprc2) scale(d78) scale(avgclr) scale(lcrime)
## -0.18603 NA NA -0.18740
## scale(clcrime) scale(cavgclr) scale(cclrprc1) scale(cclrprc2)
## -0.06905 0.32733 -0.63318 NA
regresyonumza logaritim bize fiyatın yüzde (yaklaşık) değerini verir
lm (log(district) ~ log(year) + crime + clrprc1 + clrprc2 + log (d78) + avgclr + lcrime + clcrime + cavgclr + cclrprc1 + cclrprc2 , data = crime3)
##
## Call:
## lm(formula = log(district) ~ log(year) + crime + clrprc1 + clrprc2 +
## log(d78) + avgclr + lcrime + clcrime + cavgclr + cclrprc1 +
## cclrprc2, data = crime3)
##
## Coefficients:
## (Intercept) log(year) crime clrprc1 clrprc2 log(d78)
## 2.36782 NA -0.03176 0.03713 -0.01941 NA
## avgclr lcrime clcrime cavgclr cclrprc1 cclrprc2
## NA 0.21866 -0.02287 0.03929 -0.04605 NA
model11 <- lm (log(district) ~ log(year) + crime + clrprc1 + I(clrprc1^2) + clrprc2 + log (d78) + avgclr + lcrime + clcrime + cavgclr + cclrprc1 + cclrprc2 , data = crime3)
summary(model11)
##
## Call:
## lm(formula = log(district) ~ log(year) + crime + clrprc1 + I(clrprc1^2) +
## clrprc2 + log(d78) + avgclr + lcrime + clcrime + cavgclr +
## cclrprc1 + cclrprc2, data = crime3)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.2425 -0.3197 0.1315 0.4969 1.2820
##
## Coefficients: (4 not defined because of singularities)
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 3.0929371 1.7818580 1.736 0.0896 .
## log(year) NA NA NA NA
## crime -0.0434064 0.0290252 -1.495 0.1419
## clrprc1 -0.0312925 0.0690101 -0.453 0.6525
## I(clrprc1^2) 0.0008743 0.0008469 1.032 0.3075
## clrprc2 -0.0160044 0.0186357 -0.859 0.3951
## log(d78) NA NA NA NA
## avgclr NA NA NA NA
## lcrime 0.4237108 0.6249832 0.678 0.5013
## clcrime -0.0126662 0.3501966 -0.036 0.9713
## cavgclr 0.0358945 0.0338187 1.061 0.2943
## cclrprc1 -0.0449767 0.0291726 -1.542 0.1303
## cclrprc2 NA NA NA NA
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.8163 on 44 degrees of freedom
## (53 observations deleted due to missingness)
## Multiple R-squared: 0.293, Adjusted R-squared: 0.1644
## F-statistic: 2.279 on 8 and 44 DF, p-value: 0.03898
ANOVA(Analysis of Variance), kategorik bir değişkeni bağımsız olarak ve nümerik bir değişkeni de bağımlı değişken olarak alır. Yani, kategorik değişkenin seviyeleri(kategorileri) değiştikçe nümerik değişken de anlamlı olarak değişiyor mu bunu araştırır.
library(car)
## Zorunlu paket yükleniyor: carData
Anova(model11)
## Note: model has aliased coefficients
## sums of squares computed by model comparison
## Anova Table (Type II tests)
##
## Response: log(district)
## Sum Sq Df F value Pr(>F)
## log(year) 0
## crime 1.4902 1 2.2364 0.1419
## clrprc1 0
## I(clrprc1^2) 0.7101 1 1.0657 0.3075
## clrprc2 0
## log(d78) 0
## avgclr 0
## lcrime 0.3063 1 0.4596 0.5013
## clcrime 0.0009 1 0.0013 0.9713
## cavgclr 0
## cclrprc1 0
## cclrprc2 0
## Residuals 29.3178 44
model111 <- lm (formula = district ~ year + crime + clrprc1 + clrprc2 + log (d78) + avgclr + lcrime + clcrime + cavgclr + cclrprc1 + cclrprc2 , data = crime3)
summary(model111)
##
## Call:
## lm(formula = district ~ year + crime + clrprc1 + clrprc2 + log(d78) +
## avgclr + lcrime + clcrime + cavgclr + cclrprc1 + cclrprc2,
## data = crime3)
##
## Residuals:
## Min 1Q Median 3Q Max
## -25.234 -7.268 0.316 7.229 30.640
##
## Coefficients: (4 not defined because of singularities)
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 14.32066 27.53759 0.520 0.6056
## year NA NA NA NA
## crime 0.09654 0.44968 0.215 0.8310
## clrprc1 0.83773 0.32305 2.593 0.0128 *
## clrprc2 -0.22270 0.30842 -0.722 0.4740
## log(d78) NA NA NA NA
## avgclr NA NA NA NA
## lcrime -4.67199 9.96486 -0.469 0.6414
## clcrime -2.82100 5.88658 -0.479 0.6341
## cavgclr 0.53268 0.56600 0.941 0.3517
## cclrprc1 -0.84517 0.49026 -1.724 0.0916 .
## cclrprc2 NA NA NA NA
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 13.73 on 45 degrees of freedom
## (53 observations deleted due to missingness)
## Multiple R-squared: 0.3163, Adjusted R-squared: 0.21
## F-statistic: 2.974 on 7 and 45 DF, p-value: 0.01189
model22 <- lm (formula = district ~ year + crime + clrprc1 + clrprc2 + log (d78) + avgclr + lcrime + clcrime + cavgclr + cclrprc1 + cclrprc2 - 1, data = crime3)
summary(model22)
##
## Call:
## lm(formula = district ~ year + crime + clrprc1 + clrprc2 + log(d78) +
## avgclr + lcrime + clcrime + cavgclr + cclrprc1 + cclrprc2 -
## 1, data = crime3)
##
## Residuals:
## Min 1Q Median 3Q Max
## -25.234 -7.268 0.316 7.229 30.640
##
## Coefficients: (3 not defined because of singularities)
## Estimate Std. Error t value Pr(>|t|)
## year 0.18360 0.35305 0.520 0.6056
## crime 0.09654 0.44968 0.215 0.8310
## clrprc1 0.83773 0.32305 2.593 0.0128 *
## clrprc2 -0.22270 0.30842 -0.722 0.4740
## log(d78) NA NA NA NA
## avgclr NA NA NA NA
## lcrime -4.67199 9.96486 -0.469 0.6414
## clcrime -2.82100 5.88658 -0.479 0.6341
## cavgclr 0.53268 0.56600 0.941 0.3517
## cclrprc1 -0.84517 0.49026 -1.724 0.0916 .
## cclrprc2 NA NA NA NA
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 13.73 on 45 degrees of freedom
## (53 observations deleted due to missingness)
## Multiple R-squared: 0.8339, Adjusted R-squared: 0.8043
## F-statistic: 28.23 on 8 and 45 DF, p-value: 4.196e-15
burada da çizgilerle gösterilen (lineer) regresyonu çiziyoruz
library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.2 ──
## ✔ ggplot2 3.4.0 ✔ purrr 0.3.5
## ✔ tibble 3.1.8 ✔ dplyr 1.0.10
## ✔ tidyr 1.2.1 ✔ stringr 1.4.1
## ✔ readr 2.1.3 ✔ forcats 0.5.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ✖ dplyr::recode() masks car::recode()
## ✖ purrr::some() masks car::some()
qplot(crime3$district, crime3$year)
## Warning: `qplot()` was deprecated in ggplot2 3.4.0.
qplot(crime3$district, crime3$year) + geom_smooth(method = "lm", se = F)
## `geom_smooth()` using formula = 'y ~ x'
library(wooldridge)
data("crime3")
summary(lm(lm(formula = district ~ year + crime + clrprc1 + clrprc2 + d78 + avgclr + lcrime + clcrime + cavgclr + cclrprc1 + cclrprc2 , data = crime3)))
##
## Call:
## lm(formula = lm(formula = district ~ year + crime + clrprc1 +
## clrprc2 + d78 + avgclr + lcrime + clcrime + cavgclr + cclrprc1 +
## cclrprc2, data = crime3))
##
## Residuals:
## Min 1Q Median 3Q Max
## -25.234 -7.268 0.316 7.229 30.640
##
## Coefficients: (4 not defined because of singularities)
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 14.32066 27.53759 0.520 0.6056
## year NA NA NA NA
## crime 0.09654 0.44968 0.215 0.8310
## clrprc1 0.83773 0.32305 2.593 0.0128 *
## clrprc2 -0.22270 0.30842 -0.722 0.4740
## d78 NA NA NA NA
## avgclr NA NA NA NA
## lcrime -4.67199 9.96486 -0.469 0.6414
## clcrime -2.82100 5.88658 -0.479 0.6341
## cavgclr 0.53268 0.56600 0.941 0.3517
## cclrprc1 -0.84517 0.49026 -1.724 0.0916 .
## cclrprc2 NA NA NA NA
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 13.73 on 45 degrees of freedom
## (53 observations deleted due to missingness)
## Multiple R-squared: 0.3163, Adjusted R-squared: 0.21
## F-statistic: 2.974 on 7 and 45 DF, p-value: 0.01189