library(MASS)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following object is masked from 'package:MASS':
##
## select
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 4.5.2
data(Boston)
glimpse(Boston)
## Rows: 506
## Columns: 14
## $ crim <dbl> 0.00632, 0.02731, 0.02729, 0.03237, 0.06905, 0.02985, 0.08829,…
## $ zn <dbl> 18.0, 0.0, 0.0, 0.0, 0.0, 0.0, 12.5, 12.5, 12.5, 12.5, 12.5, 1…
## $ indus <dbl> 2.31, 7.07, 7.07, 2.18, 2.18, 2.18, 7.87, 7.87, 7.87, 7.87, 7.…
## $ chas <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ nox <dbl> 0.538, 0.469, 0.469, 0.458, 0.458, 0.458, 0.524, 0.524, 0.524,…
## $ rm <dbl> 6.575, 6.421, 7.185, 6.998, 7.147, 6.430, 6.012, 6.172, 5.631,…
## $ age <dbl> 65.2, 78.9, 61.1, 45.8, 54.2, 58.7, 66.6, 96.1, 100.0, 85.9, 9…
## $ dis <dbl> 4.0900, 4.9671, 4.9671, 6.0622, 6.0622, 6.0622, 5.5605, 5.9505…
## $ rad <int> 1, 2, 2, 3, 3, 3, 5, 5, 5, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4,…
## $ tax <dbl> 296, 242, 242, 222, 222, 222, 311, 311, 311, 311, 311, 311, 31…
## $ ptratio <dbl> 15.3, 17.8, 17.8, 18.7, 18.7, 18.7, 15.2, 15.2, 15.2, 15.2, 15…
## $ black <dbl> 396.90, 396.90, 392.83, 394.63, 396.90, 394.12, 395.60, 396.90…
## $ lstat <dbl> 4.98, 9.14, 4.03, 2.94, 5.33, 5.21, 12.43, 19.15, 29.93, 17.10…
## $ medv <dbl> 24.0, 21.6, 34.7, 33.4, 36.2, 28.7, 22.9, 27.1, 16.5, 18.9, 15…
names(Boston)
## [1] "crim" "zn" "indus" "chas" "nox" "rm" "age"
## [8] "dis" "rad" "tax" "ptratio" "black" "lstat" "medv"
boston_tr <- Boston %>%
rename(
konut_degeri = medv,
oda_sayisi = rm,
dusuk_sosyoek = lstat,
nehir_kenari =chas,
emlak_vergisi = tax )
summary(boston_tr)
## crim zn indus nehir_kenari
## Min. : 0.00632 Min. : 0.00 Min. : 0.46 Min. :0.00000
## 1st Qu.: 0.08205 1st Qu.: 0.00 1st Qu.: 5.19 1st Qu.:0.00000
## Median : 0.25651 Median : 0.00 Median : 9.69 Median :0.00000
## Mean : 3.61352 Mean : 11.36 Mean :11.14 Mean :0.06917
## 3rd Qu.: 3.67708 3rd Qu.: 12.50 3rd Qu.:18.10 3rd Qu.:0.00000
## Max. :88.97620 Max. :100.00 Max. :27.74 Max. :1.00000
## nox oda_sayisi age dis
## Min. :0.3850 Min. :3.561 Min. : 2.90 Min. : 1.130
## 1st Qu.:0.4490 1st Qu.:5.886 1st Qu.: 45.02 1st Qu.: 2.100
## Median :0.5380 Median :6.208 Median : 77.50 Median : 3.207
## Mean :0.5547 Mean :6.285 Mean : 68.57 Mean : 3.795
## 3rd Qu.:0.6240 3rd Qu.:6.623 3rd Qu.: 94.08 3rd Qu.: 5.188
## Max. :0.8710 Max. :8.780 Max. :100.00 Max. :12.127
## rad emlak_vergisi ptratio black
## Min. : 1.000 Min. :187.0 Min. :12.60 Min. : 0.32
## 1st Qu.: 4.000 1st Qu.:279.0 1st Qu.:17.40 1st Qu.:375.38
## Median : 5.000 Median :330.0 Median :19.05 Median :391.44
## Mean : 9.549 Mean :408.2 Mean :18.46 Mean :356.67
## 3rd Qu.:24.000 3rd Qu.:666.0 3rd Qu.:20.20 3rd Qu.:396.23
## Max. :24.000 Max. :711.0 Max. :22.00 Max. :396.90
## dusuk_sosyoek konut_degeri
## Min. : 1.73 Min. : 5.00
## 1st Qu.: 6.95 1st Qu.:17.02
## Median :11.36 Median :21.20
## Mean :12.65 Mean :22.53
## 3rd Qu.:16.95 3rd Qu.:25.00
## Max. :37.97 Max. :50.00
#Konut değerinin ortalama değeri 22.53'tür.
#Konut değerinin medyan değeri 21.20'dir.
#Konut değerinin ranjı- maksimum değerden minimum değeri çıkardığımda- 45 değerini bulmaktayım.
table(boston_tr$nehir_kenari)
##
## 0 1
## 471 35
prop.table(table(boston_tr$nehir_kenari))*100
##
## 0 1
## 93.083004 6.916996
boston_tr <- boston_tr |>
select(oda_sayisi, konut_degeri)|>
na.omit()
ggplot(boston_tr, aes(x = oda_sayisi, y = konut_degeri))+
geom_point() +
labs(x = "oda_sayisi (rm)",
y = "konut_degeri (medv)",
title = "Oda sayısı ile konut değerinin ilişkisi")
cor(
boston_tr$oda_sayisi,
boston_tr$konut_degeri,
use = "pairwise.complete.obs")
## [1] 0.6953599
#Korelasyon katsayısı 0.70'tir diyebiliriz. Pozitif ve Orta-yüksek düzeyde ilişkili bir değerdir
boston_mod <- lm(oda_sayisi~ konut_degeri, data = boston_tr)
summary(boston_mod)
##
## Call:
## lm(formula = oda_sayisi ~ konut_degeri, data = boston_tr)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.98750 -0.24448 0.01893 0.27379 2.52898
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 5.087639 0.059510 85.49 <2e-16 ***
## konut_degeri 0.053122 0.002446 21.72 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.5054 on 504 degrees of freedom
## Multiple R-squared: 0.4835, Adjusted R-squared: 0.4825
## F-statistic: 471.8 on 1 and 504 DF, p-value: < 2.2e-16
# Eğim değeri 0.053122
# Kesişim değeri 5.087639
# değeri 0.4825'tir. yani %48.
ggplot(boston_tr, aes(x = oda_sayisi, y = konut_degeri))+
geom_point() +
geom_smooth(method = "lm", se = FALSE, color = "red")+
labs(x = "oda_sayisi(rm)",
y = "konut_degeri (medv)",
title= "Oda sayısı ile konut değeri arasındaki ilişkiyi gösteren basit doğrusal regresyon çizgisi")
## `geom_smooth()` using formula = 'y ~ x'
geom_jitter fonksiyonunun kullanım amacı
nedir?#Veri görselleştirmede üst üste binme sorununu çözmek için kullanılan önemli bir araçtır