23.03.2026_5.ders

Bu haftaki konumuz “Lojistik Regresyon”

Dersteki Uygulama

library(readr)
heart_transplant <- read_csv("heart_transplant.csv")
## Rows: 103 Columns: 8
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (3): survived, prior, transplant
## dbl (5): id, acceptyear, age, survtime, wait
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
View(heart_transplant)
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 4.5.3
ggplot(data = heart_transplant, aes(x = age, y = survived)) + 
  geom_jitter(width = 0, height = 0.05, alpha = 0.5)

glm(formula = survived=="alive" ~ age, family = binomial, data = heart_transplant)
## 
## Call:  glm(formula = survived == "alive" ~ age, family = binomial, data = heart_transplant)
## 
## Coefficients:
## (Intercept)          age  
##     1.56438     -0.05847  
## 
## Degrees of Freedom: 102 Total (i.e. Null);  101 Residual
## Null Deviance:       120.5 
## Residual Deviance: 113.7     AIC: 117.7
library(tidyverse)
## Warning: package 'dplyr' was built under R version 4.5.3
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.2.1     ✔ stringr   1.6.0
## ✔ forcats   1.0.1     ✔ tibble    3.3.0
## ✔ lubridate 1.9.4     ✔ tidyr     1.3.1
## ✔ purrr     1.2.0     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
heart_transplant <- heart_transplant |>
  mutate(is_alive = ifelse(survived == "alive", 1, 0))
data_space <- ggplot(data = heart_transplant, aes(x = age, y = is_alive)) + 
  geom_jitter(width = 0, height = 0.05, alpha = 0.5)
data_space

data_space +
  geom_smooth(method = "lm", se = FALSE)
## `geom_smooth()` using formula = 'y ~ x'

glm(is_alive ~ age, data = heart_transplant, family = binomial)
## 
## Call:  glm(formula = is_alive ~ age, family = binomial, data = heart_transplant)
## 
## Coefficients:
## (Intercept)          age  
##     1.56438     -0.05847  
## 
## Degrees of Freedom: 102 Total (i.e. Null);  101 Residual
## Null Deviance:       120.5 
## Residual Deviance: 113.7     AIC: 117.7
binomial()
## 
## Family: binomial 
## Link function: logit
exp(-0.05847)
## [1] 0.9432065
data_space <- ggplot(data = heart_transplant, aes(x = age, y = is_alive)) + 
  geom_jitter(width = 0, height = 0.05, alpha = 0.5)
data_space

data_space + 
  geom_smooth(method = "lm", se = FALSE)+
    geom_smooth(method = "glm", se = FALSE, color = "red", 
              method.args = list(family = "binomial"))
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'

heart_breaks <- heart_transplant |>
  pull(age) |>
  quantile(probs = 0:7/10)

data_binned_space <- data_space + 
  stat_summary_bin(
    fun = "mean", color = "red",
    geom = "line", breaks = heart_breaks
  )

data_binned_space
## Warning: Removed 1 row containing missing values or values outside the scale range
## (`geom_line()`).

library(broom)
## Warning: package 'broom' was built under R version 4.5.3
mod_heart <- glm(is_alive ~ age, data = heart_transplant, family = binomial)

data_binned_space + 
  geom_line(
    data = augment(mod_heart, type.predict = "response"), 
    aes(y = .fitted), color = "blue"
  )
## Warning: Removed 1 row containing missing values or values outside the scale range
## (`geom_line()`).

heart_transplant_plus <- mod_heart |>
  augment(type.predict = "response") |>
  mutate(y_hat = .fitted)
ggplot(heart_transplant_plus, aes(x = age, y = y_hat)) + 
  geom_point() + 
  geom_line() + 
  scale_y_continuous("Probability of being alive", limits = c(0, 1))

heart_transplant_plus <- heart_transplant_plus |>
  mutate(odds_hat = y_hat / (1 - y_hat))
ggplot(heart_transplant_plus, aes(x = age, y = odds_hat)) + 
  geom_point() + 
  geom_line() + 
  scale_y_continuous("Odds of being alive")

heart_transplant_plus <- heart_transplant_plus |>
  mutate(log_odds_hat = log(odds_hat))
ggplot(heart_transplant_plus, aes(x = age, y = log_odds_hat)) + 
  geom_point() + 
  geom_line() + 
  scale_y_continuous("Log(odds) of being alive")

exp(coef(mod_heart))
## (Intercept)         age 
##   4.7797050   0.9432099

Kod Üreterek Deneme Uygulaması

veri <- mtcars

# Bağımlı değişken: am (0: Otomatik, 1: Manuel vites) -> Faktöre çeviriyoruz
veri$am <- as.factor(veri$am)


set.seed(42) 


satir_sayisi   <- nrow(veri)
secilen_satirlar <- sample(1:satir_sayisi, size = round(0.8 * satir_sayisi))

egitim_seti <- veri[secilen_satirlar, ]
test_seti   <- veri[-secilen_satirlar, ]


# am: Bağımlı değişken / mpg ve hp: Bağımsız değişkenler

model <- glm(am ~ mpg + hp, data = egitim_seti, family = binomial)


summary(model)
## 
## Call:
## glm(formula = am ~ mpg + hp, family = binomial, data = egitim_seti)
## 
## Coefficients:
##              Estimate Std. Error z value Pr(>|z|)  
## (Intercept) -36.33712   18.38827  -1.976   0.0481 *
## mpg           1.40766    0.71576   1.967   0.0492 *
## hp            0.05444    0.02971   1.832   0.0669 .
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 34.646  on 25  degrees of freedom
## Residual deviance: 16.003  on 23  degrees of freedom
## AIC: 22.003
## 
## Number of Fisher Scoring iterations: 7
tahmin_olasilik <- predict(model, newdata = test_seti, type = "response")


tahmin_sinif <- ifelse(tahmin_olasilik > 0.5, 1, 0)


matris <- table(Gercek = test_seti$am, Tahmin = tahmin_sinif)
print("--- Karisiklik Matrisi ---")
## [1] "--- Karisiklik Matrisi ---"
print(matris)
##       Tahmin
## Gercek 0 1
##      0 2 1
##      1 0 3
dogruluk <- sum(diag(matris)) / sum(matris)
print(paste("Modelin Dogruluk Orani (Accuracy):", round(dogruluk, 4)))
## [1] "Modelin Dogruluk Orani (Accuracy): 0.8333"

Öğrenme Günlüğü

Kategorik bağımlı değişkenlerle de regresyon yapabildiğimizi ve bu regresyonunun adının lojistik regresyon olduğunu ve bu uygulamayı R üzerinden nasıl yapabileceğimizi öğrendik.