library(DT)
library(gapminder)
library(gghighlight)
## Loading required package: ggplot2
library(ggrepel)
library(stargazer)
## 
## Please cite as:
##  Hlavac, Marek (2022). stargazer: Well-Formatted Regression and Summary Statistics Tables.
##  R package version 5.2.3. https://CRAN.R-project.org/package=stargazer
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ lubridate 1.9.4     ✔ tibble    3.2.1
## ✔ purrr     1.0.4     ✔ tidyr     1.3.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(corrplot)
## corrplot 0.95 loaded
library(jtools)
library(margins)
library(ROCR)
## 
## Attaching package: 'ROCR'
## The following object is masked from 'package:margins':
## 
##     prediction
library(patchwork)
library(prediction)
## 
## Attaching package: 'prediction'
## The following object is masked from 'package:ROCR':
## 
##     prediction
library(stargazer)
library(tidyverse)
library(stargazer)
df <- read_csv("data.csv",
               na = ".")
## Warning: One or more parsing issues, call `problems()` on your data frame for details,
## e.g.:
##   dat <- vroom(...)
##   problems(dat)
## Rows: 152 Columns: 30
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (13): prefecture, district, name, yomi, lastname, firstname, last_kana, ...
## dbl (17): dist_no, age, duplicate, win_smd, win_pr, votes, vshare, prop, qrc...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
names(df)
##  [1] "prefecture"  "dist_no"     "district"    "name"        "yomi"       
##  [6] "lastname"    "firstname"   "last_kana"   "first_kana"  "age"        
## [11] "party"       "recommended" "status"      "previous"    "duplicate"  
## [16] "win_smd"     "win_pr"      "votes"       "vshare"      "prop"       
## [21] "qrcode"      "area"        "population"  "density"     "scale"      
## [26] "teeth"       "gender"      "anage"       "last"        "first"
str(df)
## spc_tbl_ [152 × 30] (S3: spec_tbl_df/tbl_df/tbl/data.frame)
##  $ prefecture : chr [1:152] "北海道" "北海道" "北海道" "北海道" ...
##  $ dist_no    : num [1:152] 1 1 1 1 1 2 2 2 2 3 ...
##  $ district   : chr [1:152] "北海道1区" "北海道1区" "北海道1区" "北海道1区" ...
##  $ name       : chr [1:152] "小林 悟" "道下 大樹" "田中 義人" "千葉 尚子" ...
##  $ yomi       : chr [1:152] "こばやし さとる" "みちした だいき" "たなか よしひと" "ちば なおこ" ...
##  $ lastname   : chr [1:152] "小林" "道下" "田中" "千葉" ...
##  $ firstname  : chr [1:152] "悟" "大樹" "義人" "尚子" ...
##  $ last_kana  : chr [1:152] "こばやし" "みちした" "たなか" "ちば" ...
##  $ first_kana : chr [1:152] "さとる" "だいき" "よしひと" "なおこ" ...
##  $ age        : num [1:152] 60 48 52 44 41 51 32 65 44 70 ...
##  $ party      : chr [1:152] "維新" "立憲" "参政" "共産" ...
##  $ recommended: chr [1:152] "NA" "NA" "NA" "NA" ...
##  $ status     : chr [1:152] "新" "前" "新" "新" ...
##  $ previous   : chr [1:152] "NA" "2" "NA" "NA" ...
##  $ duplicate  : num [1:152] 1 1 1 0 1 1 1 1 1 0 ...
##  $ win_smd    : num [1:152] 0 1 0 0 0 0 0 1 0 0 ...
##  $ win_pr     : num [1:152] 0 0 0 0 0 0 0 0 0 0 ...
##  $ votes      : num [1:152] 20000 108394 20097 21451 80133 ...
##  $ vshare     : num [1:152] 8 43.34 8.04 8.58 32.04 ...
##  $ prop       : num [1:152] 0 1 1 1 0 0 1 0 0 0 ...
##  $ qrcode     : num [1:152] 1 1 1 1 1 0 1 1 1 0 ...
##  $ area       : num [1:152] 769 769 769 769 769 ...
##  $ population : num [1:152] 454514 454514 454514 454514 454514 ...
##  $ density    : num [1:152] 591 591 591 591 591 ...
##  $ scale      : chr [1:152] "No" "No" "No" "No" ...
##  $ teeth      : num [1:152] 0 1 0 1 0 1 1 1 1 0 ...
##  $ gender     : num [1:152] 1 1 1 0 1 1 0 1 1 1 ...
##  $ anage      : num [1:152] 0 0 1 1 1 0 1 0 0 0 ...
##  $ last       : num [1:152] 0 0 0 0 1 0 0 0 0 0 ...
##  $ first      : num [1:152] 1 0 1 1 0 0 1 1 1 0 ...
##  - attr(*, "spec")=
##   .. cols(
##   ..   prefecture = col_character(),
##   ..   dist_no = col_double(),
##   ..   district = col_character(),
##   ..   name = col_character(),
##   ..   yomi = col_character(),
##   ..   lastname = col_character(),
##   ..   firstname = col_character(),
##   ..   last_kana = col_character(),
##   ..   first_kana = col_character(),
##   ..   age = col_double(),
##   ..   party = col_character(),
##   ..   recommended = col_character(),
##   ..   status = col_character(),
##   ..   previous = col_character(),
##   ..   duplicate = col_double(),
##   ..   win_smd = col_double(),
##   ..   win_pr = col_double(),
##   ..   votes = col_double(),
##   ..   vshare = col_double(),
##   ..   prop = col_double(),
##   ..   qrcode = col_double(),
##   ..   area = col_double(),
##   ..   population = col_double(),
##   ..   density = col_double(),
##   ..   scale = col_character(),
##   ..   teeth = col_double(),
##   ..   gender = col_double(),
##   ..   anage = col_double(),
##   ..   last = col_double(),
##   ..   first = col_double()
##   .. )
##  - attr(*, "problems")=<externalptr>
stargazer(as.data.frame(df),
          type = "html",
          digits = 2)
Statistic N Mean St. Dev. Min Max
dist_no 152 3.09 2.51 1 12
age 152 54.56 10.95 26 82
duplicate 152 0.62 0.49 0 1
win_smd 152 0.30 0.46 0 1
win_pr 152 0.12 0.32 0 1
votes 152 56,149.90 37,049.57 2,210 132,361
vshare 152 30.26 19.44 1.23 74.66
prop 152 0.42 0.50 0 1
qrcode 151 0.64 0.48 0 1
area 152 3,240.19 3,149.21 118.52 15,318.04
population 152 345,026.40 68,005.92 225,023 460,689
density 152 471.60 871.37 17.75 3,875.17
teeth 152 0.45 0.50 0 1
gender 152 0.80 0.40 0 1
anage 152 0.29 0.46 0 1
last 152 0.36 0.48 0 1
first 152 0.55 0.50 0 1
df_1 <- df %>%
   group_by(party, gender, age, anage, teeth, first, last, vshare) %>%  # 政党別、性別、当落別に立候補者数 N を計算
   summarise(N = n())
## `summarise()` has grouped output by 'party', 'gender', 'age', 'anage', 'teeth',
## 'first', 'last'. You can override using the `.groups` argument.
df_1 <- df_1 |> 
  mutate(party = factor(party,
                        levels = c("自民",
                                   "立憲",
                                   "共産",
                                   "維新",
                                   "無所",
                                   "国民",
                                   "参政",
                                   "れい",
                                   "公明")))
df_1 <- df_1 |>
  mutate(teeth_chr = if_else(teeth == 1, "yes", "no")) |> 
  mutate(gender_chr = if_else(gender == 1, "male", "female"))
df_1 |> 
  ggplot() +
  geom_bar(aes(x = gender_chr,
               y = N,
               fill = teeth_chr),
           stat = "identity") +
      labs(x = "候補者の性別", y = "選挙公報で歯を見せているかどうか") +
  ggtitle("候補者の性別と歯を見せているかどうかの関係",
          "2024年総選挙における北海道・東北・北陸地方を例にして") +
   theme_bw(base_family = "HiraKakuProN-W3") # 文字化け対策

plot1 <- df |> 
  ggplot(aes(x = age, y = teeth)) +
  geom_jitter(size = 1,
              alpha = 1/3,
              width = 0,
              height = 0.05) +
  geom_smooth(method = "glm", 
    color = "red",
    method.args = list(family = binomial(link = "logit"))) +
  labs(x = "候補者の年齢",
       y = "選挙公報で候補者が歯を出している確率") +
  theme_bw(base_family = "HiraKakuProN-W3")

print(plot1)
## `geom_smooth()` using formula = 'y ~ x'

model_1 <- glm(teeth ~ age + gender,
               data = df_1, 
            family = binomial(link = "logit")) # 係数を「オッズの対数」に指定
tidy(model_1, conf.int = TRUE)
## # A tibble: 3 × 7
##   term        estimate std.error statistic   p.value conf.low conf.high
##   <chr>          <dbl>     <dbl>     <dbl>     <dbl>    <dbl>     <dbl>
## 1 (Intercept)  1.41       0.964      1.47  0.143      -0.442     3.36  
## 2 age          0.00463    0.0164     0.282 0.778      -0.0275    0.0371
## 3 gender      -2.27       0.530     -4.27  0.0000192  -3.42     -1.30
stargazer::stargazer(model_1, type = "html")
Dependent variable:
teeth
age 0.005
(0.016)
gender -2.266***
(0.530)
Constant 1.413
(0.964)
Observations 152
Log Likelihood -92.392
Akaike Inf. Crit. 190.784
Note: p<0.1; p<0.05; p<0.01

仮説の設定

帰無仮説:年齢と年齢を公表するかどうかには関係がない 対立仮説:年齢と年齢を公表するかどうかには関係がある

stargazer::stargazer(as.data.frame(df),
                     type = "html")
Statistic N Mean St. Dev. Min Max
dist_no 152 3.086 2.505 1 12
age 152 54.559 10.945 26 82
duplicate 152 0.625 0.486 0 1
win_smd 152 0.303 0.461 0 1
win_pr 152 0.118 0.324 0 1
votes 152 56,149.900 37,049.570 2,210 132,361
vshare 152 30.263 19.439 1.230 74.660
prop 152 0.421 0.495 0 1
qrcode 151 0.642 0.481 0 1
area 152 3,240.185 3,149.206 118.520 15,318.040
population 152 345,026.400 68,005.920 225,023 460,689
density 152 471.597 871.373 17.751 3,875.169
teeth 152 0.454 0.500 0 1
gender 152 0.796 0.404 0 1
anage 152 0.289 0.455 0 1
last 152 0.362 0.482 0 1
first 152 0.553 0.499 0 1
plot2 <- df |> 
  ggplot(aes(x = age, y = anage)) +
  geom_jitter(size = 1,
              alpha = 1/3,
              width = 0,
              height = 0.05) +
  geom_smooth(method = "glm", 
    color = "red",
    method.args = list(family = binomial(link = "logit"))) +
  labs(x = "候補者の年齢",
       y = "選挙公報で候補者の年齢を公表しているかどうかの確率") +
  theme_bw(base_family = "HiraKakuProN-W3")

print(plot2)
## `geom_smooth()` using formula = 'y ~ x'

model_2 <- glm(anage ~ age,
               data = df_1, 
            family = binomial(link = "logit")) # 係数を「オッズの対数」に指定
tidy(model_2, conf.int = TRUE)
## # A tibble: 2 × 7
##   term        estimate std.error statistic  p.value conf.low conf.high
##   <chr>          <dbl>     <dbl>     <dbl>    <dbl>    <dbl>     <dbl>
## 1 (Intercept)   2.51      0.965       2.60 0.00942     0.655    4.46  
## 2 age          -0.0642    0.0183     -3.50 0.000458   -0.102   -0.0295
stargazer::stargazer(model_2, type = "html")
Dependent variable:
anage
age -0.064***
(0.018)
Constant 2.506***
(0.965)
Observations 152
Log Likelihood -84.577
Akaike Inf. Crit. 173.155
Note: p<0.1; p<0.05; p<0.01
p_48 <- predict(model_2, type = "response", #予測当選確率を表示したい時の指定  
               newdata = data_frame(age = 48))
## Warning: `data_frame()` was deprecated in tibble 1.1.0.
## ℹ Please use `tibble()` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
p_48
##         1 
## 0.3603702

ロジスティック回帰分析の係数.

→表示される回帰式の係数 estimate は 「オッズの対数」 = Log Odds.
*Log Odds は解釈しにくい → 確率に変換する

Marginal Effects

margins(model_2, variables = "age",
        at = list(age = 48))
## Average marginal effects at specified values
## glm(formula = anage ~ age, family = binomial(link = "logit"),     data = df_1)
##  at(age)      age
##       48 -0.01479
df_pred <- cplot(model_2, 
                  x = "age", 
                  what = "prediction") %>% # Y軸に予測当選確率を表示させる設定  
  as_data_frame() %>%
  ggplot(aes(x = xvals, 
    y = yvals, 
    ymin = lower, 
    ymax = upper)) +
  geom_ribbon(fill = "gray") +
  geom_line() +
  labs(x = "候補者の年齢(歳)", 
     y = "候補者が年齢を公表する確率の予測値", 
     title = "候補者が年齢を公表する確率の予測値",
     "2024年総選挙における北海道・東北・北陸地方を例にして") +
theme_bw(base_family = "HiraKakuProN-W3")
## Warning: `as_data_frame()` was deprecated in tibble 2.0.0.
## ℹ Please use `as_tibble()` (with slightly different semantics) to convert to a
##   tibble, or `as.data.frame()` to convert to a data frame.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

df_me <- cplot(model_2,  
                  x = "age",    # x軸に据える変数
                  dx = "age",   # 説明変数
                  what = "effect") %>% # Y軸に限界効果を表示させる設定  
  as_data_frame() %>%
  ggplot(aes(x = xvals, 
    y = yvals, 
    ymin = lower, 
    ymax = upper)) +
  geom_ribbon(fill = "gray") +
  geom_line() +
  geom_abline(intercept = 0, slope = 0, linetype = "dashed", color = "red") +
  labs(x = "候補者の年齢(歳)", 
     y = "候補者の年齢の限界効果", 
     title = "候補者の年齢の限界効果",
     "2024年総選挙における北海道・東北・北陸地方を例にして") +
  theme_bw(base_family = "HiraKakuProN-W3")

df_pred + df_me