library(wooldridge)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(rmarkdown)
Wooldridge içinden veri seti aldığımız ana paketimizdir. Dplyr veri setlerinin düzenlenmesi, filtrelenmesi, sıralanması ve belirli değişkenlerin hesaplanmasını mümkün kılan 6 ayrı fiile (fonksiyona) sahip olması için kullanılan bir komuttur. Bunlar: select(), mutate(), filter(), arrange(), summarize() ve group_by(). R programlama dilini öğrenmek bu fiilleri kullanmaktan geçiyor.
data("traffic1")
head(traffic1)
## state admn90 admn85 open90 open85 dthrte90 dthrte85 speed90 speed85
## 1 AL 0 0 0 0 2.6 2.9 1 0
## 2 AK 1 1 1 0 2.1 3.2 0 0
## 3 AZ 1 0 0 0 2.5 4.4 1 0
## 4 AR 0 0 0 0 2.9 3.4 1 0
## 5 CA 1 0 1 1 2.0 2.6 1 0
## 6 CO 1 1 0 0 1.9 2.4 1 0
## cdthrte cadmn copen cspeed
## 1 -0.3000002 0 0 1
## 2 -1.1000001 0 1 0
## 3 -1.9000001 1 0 1
## 4 -0.5000000 0 0 1
## 5 -0.5999999 1 0 1
## 6 -0.5000001 0 0 1
tail(traffic1)
## state admn90 admn85 open90 open85 dthrte90 dthrte85 speed90 speed85
## 46 VT 1 0 0 0 1.5 2.5 1 0
## 47 VA 0 0 0 0 1.8 2.1 1 0
## 48 WA 0 1 1 1 1.9 2.3 1 0
## 49 WV 1 1 0 0 3.2 3.6 1 0
## 50 WI 1 0 1 1 1.8 2.1 1 0
## 51 WY 1 1 0 0 2.2 2.7 1 0
## cdthrte cadmn copen cspeed
## 46 -1.0000000 1 0 1
## 47 -0.3000000 0 0 1
## 48 -0.4000000 -1 0 1
## 49 -0.3999999 0 0 1
## 50 -0.3000000 1 0 1
## 51 -0.5000000 0 0 1
Head ve tail komutu ile ilk 6 ve son 6 verimizi gözlemleyebiliyoruz .
paged_table(traffic1)
Paged_table komutu veri setinin tamamını görmemizi sağlıyor .
summary(traffic1)
## state admn90 admn85 open90
## Length:51 Min. :0.0000 Min. :0.0000 Min. :0.0000
## Class :character 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.0000
## Mode :character Median :1.0000 Median :0.0000 Median :0.0000
## Mean :0.5686 Mean :0.4118 Mean :0.4314
## 3rd Qu.:1.0000 3rd Qu.:1.0000 3rd Qu.:1.0000
## Max. :1.0000 Max. :1.0000 Max. :1.0000
## open85 dthrte90 dthrte85 speed90 speed85
## Min. :0.0000 Min. :1.300 Min. :1.90 Min. :0.0000 Min. :0
## 1st Qu.:0.0000 1st Qu.:1.850 1st Qu.:2.30 1st Qu.:1.0000 1st Qu.:0
## Median :0.0000 Median :2.000 Median :2.60 Median :1.0000 Median :0
## Mean :0.3725 Mean :2.155 Mean :2.70 Mean :0.7843 Mean :0
## 3rd Qu.:1.0000 3rd Qu.:2.500 3rd Qu.:3.05 3rd Qu.:1.0000 3rd Qu.:0
## Max. :1.0000 Max. :3.600 Max. :4.40 Max. :1.0000 Max. :0
## cdthrte cadmn copen cspeed
## Min. :-1.9000 Min. :-1.0000 Min. :0.00000 Min. :0.0000
## 1st Qu.:-0.7000 1st Qu.: 0.0000 1st Qu.:0.00000 1st Qu.:1.0000
## Median :-0.5000 Median : 0.0000 Median :0.00000 Median :1.0000
## Mean :-0.5451 Mean : 0.1569 Mean :0.05882 Mean :0.7843
## 3rd Qu.:-0.3000 3rd Qu.: 0.0000 3rd Qu.:0.00000 3rd Qu.:1.0000
## Max. : 0.3000 Max. : 1.0000 Max. :1.00000 Max. :1.0000
Summary bir sütundaki verileri tek bir değere indirgiyor, özetliyor .Bu değer bir değişkenin ortalaması (mean) olabilir.
traffic1%>%
group_by("speed") %>%
summarise("yönetici(cadmn)"= mean(cadmn))
## # A tibble: 1 × 2
## `"speed"` `yönetici(cadmn)`
## <chr> <dbl>
## 1 speed 0.157
Belirli değişkenleri / sütunları seçme
traffic1 %>% select(cspeed ,cadmn , copen)
## cspeed cadmn copen
## 1 1 0 0
## 2 0 0 1
## 3 1 1 0
## 4 1 0 0
## 5 1 1 0
## 6 1 0 0
## 7 0 1 0
## 8 0 0 0
## 9 0 0 0
## 10 1 1 1
## 11 1 0 0
## 12 0 0 0
## 13 1 0 0
## 14 1 1 0
## 15 1 0 0
## 16 1 0 0
## 17 1 1 0
## 18 1 0 0
## 19 1 0 0
## 20 1 0 0
## 21 0 1 0
## 22 0 0 0
## 23 1 0 0
## 24 1 0 0
## 25 1 0 0
## 26 1 0 0
## 27 1 0 0
## 28 1 0 0
## 29 1 0 0
## 30 1 0 0
## 31 0 0 0
## 32 1 0 1
## 33 0 0 0
## 34 1 0 0
## 35 1 0 0
## 36 1 0 0
## 37 1 0 0
## 38 1 0 0
## 39 0 0 0
## 40 0 0 0
## 41 1 0 0
## 42 1 0 0
## 43 1 0 0
## 44 1 0 0
## 45 1 0 0
## 46 1 1 0
## 47 1 0 0
## 48 1 -1 0
## 49 1 0 0
## 50 1 1 0
## 51 1 0 0
traffic1 %>% mutate(open90,open85)
## state admn90 admn85 open90 open85 dthrte90 dthrte85 speed90 speed85
## 1 AL 0 0 0 0 2.6 2.9 1 0
## 2 AK 1 1 1 0 2.1 3.2 0 0
## 3 AZ 1 0 0 0 2.5 4.4 1 0
## 4 AR 0 0 0 0 2.9 3.4 1 0
## 5 CA 1 0 1 1 2.0 2.6 1 0
## 6 CO 1 1 0 0 1.9 2.4 1 0
## 7 CT 1 0 0 0 1.5 2.0 0 0
## 8 DE 1 1 0 0 2.2 2.2 0 0
## 9 DC 1 1 0 0 1.6 3.0 0 0
## 10 FL 1 0 1 0 2.7 3.4 1 0
## 11 GA 0 0 0 0 2.0 2.7 1 0
## 12 HI 0 0 1 1 2.3 2.0 0 0
## 13 ID 0 0 1 1 2.9 3.5 1 0
## 14 IL 1 0 1 1 1.9 2.3 1 0
## 15 IN 1 1 0 0 1.8 2.6 1 0
## 16 IA 1 1 1 1 2.1 2.4 1 0
## 17 KS 1 0 1 1 2.1 2.6 1 0
## 18 KY 0 0 0 0 2.6 2.6 1 0
## 19 LA 1 1 0 0 2.5 3.0 1 0
## 20 ME 1 1 0 0 1.8 2.4 1 0
## 21 MD 1 0 1 1 1.9 2.3 0 0
## 22 MA 0 0 0 0 1.3 1.9 0 0
## 23 MI 0 0 1 1 1.9 2.4 1 0
## 24 MN 1 1 1 1 1.5 2.0 1 0
## 25 MS 1 1 0 0 3.2 3.6 1 0
## 26 MO 1 1 0 0 2.3 2.6 1 0
## 27 MT 0 0 1 1 2.5 3.1 1 0
## 28 NE 0 0 0 0 1.9 2.1 1 0
## 29 NV 1 1 0 0 3.6 3.9 1 0
## 30 NH 0 0 0 0 1.6 2.6 1 0
## 31 NJ 0 0 0 0 1.5 1.9 0 0
## 32 NM 1 1 1 0 3.1 4.2 1 0
## 33 NY 0 0 0 0 2.0 2.3 0 0
## 34 NC 1 1 1 1 2.3 3.1 1 0
## 35 ND 1 1 1 1 1.9 2.2 1 0
## 36 OH 0 0 0 0 1.8 2.1 1 0
## 37 OK 1 1 1 1 2.0 2.5 1 0
## 38 OR 1 1 1 1 2.2 2.8 1 0
## 39 PA 0 0 0 0 1.9 2.4 0 0
## 40 RI 0 0 0 0 1.3 2.1 0 0
## 41 SC 0 0 1 1 2.9 3.5 1 0
## 42 SD 0 0 1 1 2.3 2.3 1 0
## 43 TN 0 0 0 0 2.6 3.4 1 0
## 44 TX 0 0 0 0 2.0 2.7 1 0
## 45 UT 1 1 1 1 2.0 2.8 1 0
## 46 VT 1 0 0 0 1.5 2.5 1 0
## 47 VA 0 0 0 0 1.8 2.1 1 0
## 48 WA 0 1 1 1 1.9 2.3 1 0
## 49 WV 1 1 0 0 3.2 3.6 1 0
## 50 WI 1 0 1 1 1.8 2.1 1 0
## 51 WY 1 1 0 0 2.2 2.7 1 0
## cdthrte cadmn copen cspeed
## 1 -0.3000002 0 0 1
## 2 -1.1000001 0 1 0
## 3 -1.9000001 1 0 1
## 4 -0.5000000 0 0 1
## 5 -0.5999999 1 0 1
## 6 -0.5000001 0 0 1
## 7 -0.5000000 1 0 0
## 8 0.0000000 0 0 0
## 9 -1.4000000 0 0 0
## 10 -0.7000000 1 1 1
## 11 -0.7000000 0 0 1
## 12 0.3000000 0 0 0
## 13 -0.5999999 0 0 1
## 14 -0.4000000 1 0 1
## 15 -0.8000000 0 0 1
## 16 -0.3000002 0 0 1
## 17 -0.5000000 1 0 1
## 18 0.0000000 0 0 1
## 19 -0.5000000 0 0 1
## 20 -0.6000001 0 0 1
## 21 -0.4000000 1 0 0
## 22 -0.6000000 0 0 0
## 23 -0.5000001 0 0 1
## 24 -0.5000000 0 0 1
## 25 -0.3999999 0 0 1
## 26 -0.3000000 0 0 1
## 27 -0.5999999 0 0 1
## 28 -0.1999999 0 0 1
## 29 -0.3000002 0 0 1
## 30 -0.9999999 0 0 1
## 31 -0.4000000 0 0 0
## 32 -1.0999999 0 1 1
## 33 -0.3000000 0 0 0
## 34 -0.8000000 0 0 1
## 35 -0.3000001 0 0 1
## 36 -0.3000000 0 0 1
## 37 -0.5000000 0 0 1
## 38 -0.5999999 0 0 1
## 39 -0.5000001 0 0 0
## 40 -0.8000000 0 0 0
## 41 -0.5999999 0 0 1
## 42 0.0000000 0 0 1
## 43 -0.8000002 0 0 1
## 44 -0.7000000 0 0 1
## 45 -0.8000000 0 0 1
## 46 -1.0000000 1 0 1
## 47 -0.3000000 0 0 1
## 48 -0.4000000 -1 0 1
## 49 -0.3999999 0 0 1
## 50 -0.3000000 1 0 1
## 51 -0.5000000 0 0 1
Bağlantı operatörü ( %>% ) kullandığımızda veri setini her seferde fiillerin içerisinde kullanmamıza gerek kalmıyor.
Tablom <- traffic1 %>% filter(cdthrte == -0.5000000)
summary(lm(admn90 ~ open90 + dthrte90 + speed90+ admn85 , data = traffic1 ))
##
## Call:
## lm(formula = admn90 ~ open90 + dthrte90 + speed90 + admn85, data = traffic1)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.05449 -0.25455 -0.04904 0.13376 0.77598
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.33416 0.23853 1.401 0.168
## open90 0.15060 0.11152 1.350 0.183
## dthrte90 -0.05454 0.11630 -0.469 0.641
## speed90 0.02622 0.14538 0.180 0.858
## admn85 0.64714 0.11270 5.742 7.04e-07 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.3883 on 46 degrees of freedom
## Multiple R-squared: 0.4455, Adjusted R-squared: 0.3973
## F-statistic: 9.241 on 4 and 46 DF, p-value: 1.446e-05
Tablom2 <- traffic1 %>% filter(cdthrte == -1)
Tablom2
## state admn90 admn85 open90 open85 dthrte90 dthrte85 speed90 speed85 cdthrte
## 1 VT 1 0 0 0 1.5 2.5 1 0 -1
## cadmn copen cspeed
## 1 1 0 1
Lm formülü kullanarak Interceptli regresyon oluşmuştur. Intercept admn90 a göre yorumlanmıştır.
summary(lm(admn90 ~ open90 + dthrte90+ speed85 + speed90+ admn85 -1 , data = traffic1 ))
##
## Call:
## lm(formula = admn90 ~ open90 + dthrte90 + speed85 + speed90 +
## admn85 - 1, data = traffic1)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.01624 -0.20104 -0.03314 0.15761 0.87326
##
## Coefficients: (1 not defined because of singularities)
## Estimate Std. Error t value Pr(>|t|)
## open90 0.16962 0.11182 1.517 0.136
## dthrte90 0.08449 0.06126 1.379 0.174
## speed85 NA NA NA NA
## speed90 0.03627 0.14668 0.247 0.806
## admn85 0.64980 0.11383 5.708 7.42e-07 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.3923 on 47 degrees of freedom
## Multiple R-squared: 0.7506, Adjusted R-squared: 0.7294
## F-statistic: 35.37 on 4 and 47 DF, p-value: 1.25e-13
Bu regresyonumuzda ise hem Intercepti çıkardık hem de speed85 değişkenini ekledik.
Cilebülbül <- traffic1 %>%
mutate (sabit= 6 ) %>%
mutate(cile= ifelse(open85 == 1,8,2))
Cilebülbül
## state admn90 admn85 open90 open85 dthrte90 dthrte85 speed90 speed85
## 1 AL 0 0 0 0 2.6 2.9 1 0
## 2 AK 1 1 1 0 2.1 3.2 0 0
## 3 AZ 1 0 0 0 2.5 4.4 1 0
## 4 AR 0 0 0 0 2.9 3.4 1 0
## 5 CA 1 0 1 1 2.0 2.6 1 0
## 6 CO 1 1 0 0 1.9 2.4 1 0
## 7 CT 1 0 0 0 1.5 2.0 0 0
## 8 DE 1 1 0 0 2.2 2.2 0 0
## 9 DC 1 1 0 0 1.6 3.0 0 0
## 10 FL 1 0 1 0 2.7 3.4 1 0
## 11 GA 0 0 0 0 2.0 2.7 1 0
## 12 HI 0 0 1 1 2.3 2.0 0 0
## 13 ID 0 0 1 1 2.9 3.5 1 0
## 14 IL 1 0 1 1 1.9 2.3 1 0
## 15 IN 1 1 0 0 1.8 2.6 1 0
## 16 IA 1 1 1 1 2.1 2.4 1 0
## 17 KS 1 0 1 1 2.1 2.6 1 0
## 18 KY 0 0 0 0 2.6 2.6 1 0
## 19 LA 1 1 0 0 2.5 3.0 1 0
## 20 ME 1 1 0 0 1.8 2.4 1 0
## 21 MD 1 0 1 1 1.9 2.3 0 0
## 22 MA 0 0 0 0 1.3 1.9 0 0
## 23 MI 0 0 1 1 1.9 2.4 1 0
## 24 MN 1 1 1 1 1.5 2.0 1 0
## 25 MS 1 1 0 0 3.2 3.6 1 0
## 26 MO 1 1 0 0 2.3 2.6 1 0
## 27 MT 0 0 1 1 2.5 3.1 1 0
## 28 NE 0 0 0 0 1.9 2.1 1 0
## 29 NV 1 1 0 0 3.6 3.9 1 0
## 30 NH 0 0 0 0 1.6 2.6 1 0
## 31 NJ 0 0 0 0 1.5 1.9 0 0
## 32 NM 1 1 1 0 3.1 4.2 1 0
## 33 NY 0 0 0 0 2.0 2.3 0 0
## 34 NC 1 1 1 1 2.3 3.1 1 0
## 35 ND 1 1 1 1 1.9 2.2 1 0
## 36 OH 0 0 0 0 1.8 2.1 1 0
## 37 OK 1 1 1 1 2.0 2.5 1 0
## 38 OR 1 1 1 1 2.2 2.8 1 0
## 39 PA 0 0 0 0 1.9 2.4 0 0
## 40 RI 0 0 0 0 1.3 2.1 0 0
## 41 SC 0 0 1 1 2.9 3.5 1 0
## 42 SD 0 0 1 1 2.3 2.3 1 0
## 43 TN 0 0 0 0 2.6 3.4 1 0
## 44 TX 0 0 0 0 2.0 2.7 1 0
## 45 UT 1 1 1 1 2.0 2.8 1 0
## 46 VT 1 0 0 0 1.5 2.5 1 0
## 47 VA 0 0 0 0 1.8 2.1 1 0
## 48 WA 0 1 1 1 1.9 2.3 1 0
## 49 WV 1 1 0 0 3.2 3.6 1 0
## 50 WI 1 0 1 1 1.8 2.1 1 0
## 51 WY 1 1 0 0 2.2 2.7 1 0
## cdthrte cadmn copen cspeed sabit cile
## 1 -0.3000002 0 0 1 6 2
## 2 -1.1000001 0 1 0 6 2
## 3 -1.9000001 1 0 1 6 2
## 4 -0.5000000 0 0 1 6 2
## 5 -0.5999999 1 0 1 6 8
## 6 -0.5000001 0 0 1 6 2
## 7 -0.5000000 1 0 0 6 2
## 8 0.0000000 0 0 0 6 2
## 9 -1.4000000 0 0 0 6 2
## 10 -0.7000000 1 1 1 6 2
## 11 -0.7000000 0 0 1 6 2
## 12 0.3000000 0 0 0 6 8
## 13 -0.5999999 0 0 1 6 8
## 14 -0.4000000 1 0 1 6 8
## 15 -0.8000000 0 0 1 6 2
## 16 -0.3000002 0 0 1 6 8
## 17 -0.5000000 1 0 1 6 8
## 18 0.0000000 0 0 1 6 2
## 19 -0.5000000 0 0 1 6 2
## 20 -0.6000001 0 0 1 6 2
## 21 -0.4000000 1 0 0 6 8
## 22 -0.6000000 0 0 0 6 2
## 23 -0.5000001 0 0 1 6 8
## 24 -0.5000000 0 0 1 6 8
## 25 -0.3999999 0 0 1 6 2
## 26 -0.3000000 0 0 1 6 2
## 27 -0.5999999 0 0 1 6 8
## 28 -0.1999999 0 0 1 6 2
## 29 -0.3000002 0 0 1 6 2
## 30 -0.9999999 0 0 1 6 2
## 31 -0.4000000 0 0 0 6 2
## 32 -1.0999999 0 1 1 6 2
## 33 -0.3000000 0 0 0 6 2
## 34 -0.8000000 0 0 1 6 8
## 35 -0.3000001 0 0 1 6 8
## 36 -0.3000000 0 0 1 6 2
## 37 -0.5000000 0 0 1 6 8
## 38 -0.5999999 0 0 1 6 8
## 39 -0.5000001 0 0 0 6 2
## 40 -0.8000000 0 0 0 6 2
## 41 -0.5999999 0 0 1 6 8
## 42 0.0000000 0 0 1 6 8
## 43 -0.8000002 0 0 1 6 2
## 44 -0.7000000 0 0 1 6 2
## 45 -0.8000000 0 0 1 6 8
## 46 -1.0000000 1 0 1 6 2
## 47 -0.3000000 0 0 1 6 2
## 48 -0.4000000 -1 0 1 6 8
## 49 -0.3999999 0 0 1 6 2
## 50 -0.3000000 1 0 1 6 8
## 51 -0.5000000 0 0 1 6 2
Cilebülbül isimli yeni bir veri datası oluşturduk . Bu yeni datamızın içerisinde traffic1 verileri yer almaktadır , Buna ek olarak sabit isimli ve cile isimli yeni değişkenler ekledik .Cile isimli değişkenimizi ifelse komutu ile open85 değişkenini kullanarak yeni sonuçlar elde ettik .
Model1 <- lm (Tablom$admn90~Tablom$admn85)
Model1
##
## Call:
## lm(formula = Tablom$admn90 ~ Tablom$admn85)
##
## Coefficients:
## (Intercept) Tablom$admn85
## 0.6667 0.3333
Model2 <- lm ( Tablom2 $dthrte90 ~ Tablom2$speed90)
Model2
##
## Call:
## lm(formula = Tablom2$dthrte90 ~ Tablom2$speed90)
##
## Coefficients:
## (Intercept) Tablom2$speed90
## 1.5 NA
require(stargazer)
## Zorunlu paket yükleniyor: stargazer
##
## Please cite as:
## Hlavac, Marek (2022). stargazer: Well-Formatted Regression and Summary Statistics Tables.
## R package version 5.2.3. https://CRAN.R-project.org/package=stargazer
Require bize library ile aynı işlemi yapmamızı sağlar . Ek olarak indirilmemiş olan paketleri indirerek library yapar .
stargazer(list(Model1 , Model2 ), type = "text")
##
## ================================================
## Dependent variable:
## ----------------------------
## admn90 dthrte90
## (1) (2)
## ------------------------------------------------
## admn85 0.333
## (0.279)
##
## speed90
##
##
## Constant 0.667** 1.500
## (0.211)
##
## ------------------------------------------------
## Observations 7 1
## R2 0.222 0.000
## Adjusted R2 0.067 0.000
## Residual Std. Error 0.365 (df = 5)
## F Statistic 1.429 (df = 1; 5)
## ================================================
## Note: *p<0.1; **p<0.05; ***p<0.01