ロジスティック回帰分析.knit

d <- read.csv(file = 'https://stats.dip.jp/01_ds/data/gender.csv')

library(DT)
datatable(d, caption = '性別データ')

d$性別 <- ifelse(d$性別 == '男性', 1, 0)
fit <- glm(性別 ~ 身長, data = d, family = 'binomial')
summary(fit)

## 
## Call:
## glm(formula = 性別 ~ 身長, family = "binomial", data = d)
## 
## Coefficients:
##              Estimate Std. Error z value Pr(>|z|)    
## (Intercept) -105.1286    18.8522  -5.576 2.45e-08 ***
## 身長           0.6092     0.1092   5.577 2.45e-08 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 186.965  on 134  degrees of freedom
## Residual deviance:  41.452  on 133  degrees of freedom
## AIC: 45.452
## 
## Number of Fisher Scoring iterations: 7

xp <- seq(150, 200, 1) # 
yp <- predict(fit, type = 'response',
              newdata = data.frame(身長 = xp))

matplot(x = d$身長, y = d$性別, pch = 1,
        ylim = c(0, 1), xlim = c(150, 200),
        main = 'ロジスティック回帰分析',
        xlab = '身長',
        ylab = '男性である確率')
grid()

matlines(x = xp, y = yp, lty = 1, col = 2, lwd = 2)

library(latex2exp)
text(x = 3.8, y = 0.4, adj = 0, 
     labels = TeX('$\\leftarrow \\hat{p}=\\frac{1}{1+\\exp\\{-(b_0 + b_1 x)\\}}$'))
legend('topleft', pch = c(1, NA), lty = c(NA, 1), col = c(1, 2),
       legend = c('男性(1)，女性(0)', 'ロジスティック回帰モデル'))

library(plotly)

##  要求されたパッケージ ggplot2 をロード中です

## 
##  次のパッケージを付け加えます: 'plotly'

##  以下のオブジェクトは 'package:ggplot2' からマスクされています:
## 
##     last_plot

##  以下のオブジェクトは 'package:latex2exp' からマスクされています:
## 
##     TeX

##  以下のオブジェクトは 'package:stats' からマスクされています:
## 
##     filter

##  以下のオブジェクトは 'package:graphics' からマスクされています:
## 
##     layout

plot_ly(type = 'scatter') |>
  add_trace(x = d$身長, y = d$性別,
            mode = 'markers', name = '男性(1)，女性(0)') |>
  add_trace(x = xp, y = yp, 
            mode = 'lines',   name = 'ロジスティック回帰モデル') |> 
  layout(title = 'ロジスティック回帰分析', 
         xaxis = list(title = '身長'),
         yaxis = list(title = '男性である確率'))

## No scatter mode specifed:
##   Setting the mode to markers
##   Read more about this attribute -> https://plotly.com/r/reference/#scatter-mode

yp <- predict(fit, type = 'response', newdata = data.frame(身長 = 170))
sprintf('男性である確率：%2.1f％', yp * 100)

## [1] "男性である確率：17.2％"