Bu haftaki konumuz “Lojistik Regresyon”
library(readr)
heart_transplant <- read_csv("heart_transplant.csv")
## Rows: 103 Columns: 8
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (3): survived, prior, transplant
## dbl (5): id, acceptyear, age, survtime, wait
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
View(heart_transplant)
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 4.5.3
ggplot(data = heart_transplant, aes(x = age, y = survived)) +
geom_jitter(width = 0, height = 0.05, alpha = 0.5)
glm(formula = survived=="alive" ~ age, family = binomial, data = heart_transplant)
##
## Call: glm(formula = survived == "alive" ~ age, family = binomial, data = heart_transplant)
##
## Coefficients:
## (Intercept) age
## 1.56438 -0.05847
##
## Degrees of Freedom: 102 Total (i.e. Null); 101 Residual
## Null Deviance: 120.5
## Residual Deviance: 113.7 AIC: 117.7
library(tidyverse)
## Warning: package 'dplyr' was built under R version 4.5.3
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.2.1 ✔ stringr 1.6.0
## ✔ forcats 1.0.1 ✔ tibble 3.3.0
## ✔ lubridate 1.9.4 ✔ tidyr 1.3.1
## ✔ purrr 1.2.0
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
heart_transplant <- heart_transplant |>
mutate(is_alive = ifelse(survived == "alive", 1, 0))
data_space <- ggplot(data = heart_transplant, aes(x = age, y = is_alive)) +
geom_jitter(width = 0, height = 0.05, alpha = 0.5)
data_space
data_space +
geom_smooth(method = "lm", se = FALSE)
## `geom_smooth()` using formula = 'y ~ x'
glm(is_alive ~ age, data = heart_transplant, family = binomial)
##
## Call: glm(formula = is_alive ~ age, family = binomial, data = heart_transplant)
##
## Coefficients:
## (Intercept) age
## 1.56438 -0.05847
##
## Degrees of Freedom: 102 Total (i.e. Null); 101 Residual
## Null Deviance: 120.5
## Residual Deviance: 113.7 AIC: 117.7
binomial()
##
## Family: binomial
## Link function: logit
exp(-0.05847)
## [1] 0.9432065
data_space <- ggplot(data = heart_transplant, aes(x = age, y = is_alive)) +
geom_jitter(width = 0, height = 0.05, alpha = 0.5)
data_space
data_space +
geom_smooth(method = "lm", se = FALSE)+
geom_smooth(method = "glm", se = FALSE, color = "red",
method.args = list(family = "binomial"))
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
heart_breaks <- heart_transplant |>
pull(age) |>
quantile(probs = 0:7/10)
data_binned_space <- data_space +
stat_summary_bin(
fun = "mean", color = "red",
geom = "line", breaks = heart_breaks
)
data_binned_space
## Warning: Removed 1 row containing missing values or values outside the scale range
## (`geom_line()`).
library(broom)
## Warning: package 'broom' was built under R version 4.5.3
mod_heart <- glm(is_alive ~ age, data = heart_transplant, family = binomial)
data_binned_space +
geom_line(
data = augment(mod_heart, type.predict = "response"),
aes(y = .fitted), color = "blue"
)
## Warning: Removed 1 row containing missing values or values outside the scale range
## (`geom_line()`).
heart_transplant_plus <- mod_heart |>
augment(type.predict = "response") |>
mutate(y_hat = .fitted)
ggplot(heart_transplant_plus, aes(x = age, y = y_hat)) +
geom_point() +
geom_line() +
scale_y_continuous("Probability of being alive", limits = c(0, 1))
heart_transplant_plus <- heart_transplant_plus |>
mutate(odds_hat = y_hat / (1 - y_hat))
ggplot(heart_transplant_plus, aes(x = age, y = odds_hat)) +
geom_point() +
geom_line() +
scale_y_continuous("Odds of being alive")
heart_transplant_plus <- heart_transplant_plus |>
mutate(log_odds_hat = log(odds_hat))
ggplot(heart_transplant_plus, aes(x = age, y = log_odds_hat)) +
geom_point() +
geom_line() +
scale_y_continuous("Log(odds) of being alive")
exp(coef(mod_heart))
## (Intercept) age
## 4.7797050 0.9432099
veri <- mtcars
# Bağımlı değişken: am (0: Otomatik, 1: Manuel vites) -> Faktöre çeviriyoruz
veri$am <- as.factor(veri$am)
set.seed(42)
satir_sayisi <- nrow(veri)
secilen_satirlar <- sample(1:satir_sayisi, size = round(0.8 * satir_sayisi))
egitim_seti <- veri[secilen_satirlar, ]
test_seti <- veri[-secilen_satirlar, ]
# am: Bağımlı değişken / mpg ve hp: Bağımsız değişkenler
model <- glm(am ~ mpg + hp, data = egitim_seti, family = binomial)
summary(model)
##
## Call:
## glm(formula = am ~ mpg + hp, family = binomial, data = egitim_seti)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -36.33712 18.38827 -1.976 0.0481 *
## mpg 1.40766 0.71576 1.967 0.0492 *
## hp 0.05444 0.02971 1.832 0.0669 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 34.646 on 25 degrees of freedom
## Residual deviance: 16.003 on 23 degrees of freedom
## AIC: 22.003
##
## Number of Fisher Scoring iterations: 7
tahmin_olasilik <- predict(model, newdata = test_seti, type = "response")
tahmin_sinif <- ifelse(tahmin_olasilik > 0.5, 1, 0)
matris <- table(Gercek = test_seti$am, Tahmin = tahmin_sinif)
print("--- Karisiklik Matrisi ---")
## [1] "--- Karisiklik Matrisi ---"
print(matris)
## Tahmin
## Gercek 0 1
## 0 2 1
## 1 0 3
dogruluk <- sum(diag(matris)) / sum(matris)
print(paste("Modelin Dogruluk Orani (Accuracy):", round(dogruluk, 4)))
## [1] "Modelin Dogruluk Orani (Accuracy): 0.8333"
Kategorik bağımlı değişkenlerle de regresyon yapabildiğimizi ve bu regresyonunun adının lojistik regresyon olduğunu ve bu uygulamayı R üzerinden nasıl yapabileceğimizi öğrendik.