library(DT)
library(gapminder)
library(gghighlight)
## Loading required package: ggplot2
library(ggrepel)
library(stargazer)
##
## Please cite as:
## Hlavac, Marek (2022). stargazer: Well-Formatted Regression and Summary Statistics Tables.
## R package version 5.2.3. https://CRAN.R-project.org/package=stargazer
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.5
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ lubridate 1.9.4 ✔ tibble 3.2.1
## ✔ purrr 1.0.4 ✔ tidyr 1.3.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(corrplot)
## corrplot 0.95 loaded
library(jtools)
library(margins)
library(ROCR)
##
## Attaching package: 'ROCR'
## The following object is masked from 'package:margins':
##
## prediction
library(patchwork)
library(prediction)
##
## Attaching package: 'prediction'
## The following object is masked from 'package:ROCR':
##
## prediction
library(stargazer)
library(tidyverse)
library(stargazer)
df <- read_csv("data.csv",
na = ".")
## Warning: One or more parsing issues, call `problems()` on your data frame for details,
## e.g.:
## dat <- vroom(...)
## problems(dat)
## Rows: 152 Columns: 30
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (13): prefecture, district, name, yomi, lastname, firstname, last_kana, ...
## dbl (17): dist_no, age, duplicate, win_smd, win_pr, votes, vshare, prop, qrc...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
names(df)
## [1] "prefecture" "dist_no" "district" "name" "yomi"
## [6] "lastname" "firstname" "last_kana" "first_kana" "age"
## [11] "party" "recommended" "status" "previous" "duplicate"
## [16] "win_smd" "win_pr" "votes" "vshare" "prop"
## [21] "qrcode" "area" "population" "density" "scale"
## [26] "teeth" "gender" "anage" "last" "first"
str(df)
## spc_tbl_ [152 × 30] (S3: spec_tbl_df/tbl_df/tbl/data.frame)
## $ prefecture : chr [1:152] "北海道" "北海道" "北海道" "北海道" ...
## $ dist_no : num [1:152] 1 1 1 1 1 2 2 2 2 3 ...
## $ district : chr [1:152] "北海道1区" "北海道1区" "北海道1区" "北海道1区" ...
## $ name : chr [1:152] "小林 悟" "道下 大樹" "田中 義人" "千葉 尚子" ...
## $ yomi : chr [1:152] "こばやし さとる" "みちした だいき" "たなか よしひと" "ちば なおこ" ...
## $ lastname : chr [1:152] "小林" "道下" "田中" "千葉" ...
## $ firstname : chr [1:152] "悟" "大樹" "義人" "尚子" ...
## $ last_kana : chr [1:152] "こばやし" "みちした" "たなか" "ちば" ...
## $ first_kana : chr [1:152] "さとる" "だいき" "よしひと" "なおこ" ...
## $ age : num [1:152] 60 48 52 44 41 51 32 65 44 70 ...
## $ party : chr [1:152] "維新" "立憲" "参政" "共産" ...
## $ recommended: chr [1:152] "NA" "NA" "NA" "NA" ...
## $ status : chr [1:152] "新" "前" "新" "新" ...
## $ previous : chr [1:152] "NA" "2" "NA" "NA" ...
## $ duplicate : num [1:152] 1 1 1 0 1 1 1 1 1 0 ...
## $ win_smd : num [1:152] 0 1 0 0 0 0 0 1 0 0 ...
## $ win_pr : num [1:152] 0 0 0 0 0 0 0 0 0 0 ...
## $ votes : num [1:152] 20000 108394 20097 21451 80133 ...
## $ vshare : num [1:152] 8 43.34 8.04 8.58 32.04 ...
## $ prop : num [1:152] 0 1 1 1 0 0 1 0 0 0 ...
## $ qrcode : num [1:152] 1 1 1 1 1 0 1 1 1 0 ...
## $ area : num [1:152] 769 769 769 769 769 ...
## $ population : num [1:152] 454514 454514 454514 454514 454514 ...
## $ density : num [1:152] 591 591 591 591 591 ...
## $ scale : chr [1:152] "No" "No" "No" "No" ...
## $ teeth : num [1:152] 0 1 0 1 0 1 1 1 1 0 ...
## $ gender : num [1:152] 1 1 1 0 1 1 0 1 1 1 ...
## $ anage : num [1:152] 0 0 1 1 1 0 1 0 0 0 ...
## $ last : num [1:152] 0 0 0 0 1 0 0 0 0 0 ...
## $ first : num [1:152] 1 0 1 1 0 0 1 1 1 0 ...
## - attr(*, "spec")=
## .. cols(
## .. prefecture = col_character(),
## .. dist_no = col_double(),
## .. district = col_character(),
## .. name = col_character(),
## .. yomi = col_character(),
## .. lastname = col_character(),
## .. firstname = col_character(),
## .. last_kana = col_character(),
## .. first_kana = col_character(),
## .. age = col_double(),
## .. party = col_character(),
## .. recommended = col_character(),
## .. status = col_character(),
## .. previous = col_character(),
## .. duplicate = col_double(),
## .. win_smd = col_double(),
## .. win_pr = col_double(),
## .. votes = col_double(),
## .. vshare = col_double(),
## .. prop = col_double(),
## .. qrcode = col_double(),
## .. area = col_double(),
## .. population = col_double(),
## .. density = col_double(),
## .. scale = col_character(),
## .. teeth = col_double(),
## .. gender = col_double(),
## .. anage = col_double(),
## .. last = col_double(),
## .. first = col_double()
## .. )
## - attr(*, "problems")=<externalptr>
stargazer(as.data.frame(df),
type = "html",
digits = 2)
|
|
|
Statistic
|
N
|
Mean
|
St. Dev.
|
Min
|
Max
|
|
|
|
dist_no
|
152
|
3.09
|
2.51
|
1
|
12
|
|
age
|
152
|
54.56
|
10.95
|
26
|
82
|
|
duplicate
|
152
|
0.62
|
0.49
|
0
|
1
|
|
win_smd
|
152
|
0.30
|
0.46
|
0
|
1
|
|
win_pr
|
152
|
0.12
|
0.32
|
0
|
1
|
|
votes
|
152
|
56,149.90
|
37,049.57
|
2,210
|
132,361
|
|
vshare
|
152
|
30.26
|
19.44
|
1.23
|
74.66
|
|
prop
|
152
|
0.42
|
0.50
|
0
|
1
|
|
qrcode
|
151
|
0.64
|
0.48
|
0
|
1
|
|
area
|
152
|
3,240.19
|
3,149.21
|
118.52
|
15,318.04
|
|
population
|
152
|
345,026.40
|
68,005.92
|
225,023
|
460,689
|
|
density
|
152
|
471.60
|
871.37
|
17.75
|
3,875.17
|
|
teeth
|
152
|
0.45
|
0.50
|
0
|
1
|
|
gender
|
152
|
0.80
|
0.40
|
0
|
1
|
|
anage
|
152
|
0.29
|
0.46
|
0
|
1
|
|
last
|
152
|
0.36
|
0.48
|
0
|
1
|
|
first
|
152
|
0.55
|
0.50
|
0
|
1
|
|
|
df_1 <- df %>%
group_by(party, gender, age, anage, teeth, first, last, vshare) %>% # 政党別、性別、当落別に立候補者数 N を計算
summarise(N = n())
## `summarise()` has grouped output by 'party', 'gender', 'age', 'anage', 'teeth',
## 'first', 'last'. You can override using the `.groups` argument.
df_1 <- df_1 |>
mutate(party = factor(party,
levels = c("自民",
"立憲",
"共産",
"維新",
"無所",
"国民",
"参政",
"れい",
"公明")))
df_1 <- df_1 |>
mutate(teeth_chr = if_else(teeth == 1, "yes", "no")) |>
mutate(gender_chr = if_else(gender == 1, "male", "female"))
df_1 |>
ggplot() +
geom_bar(aes(x = gender_chr,
y = N,
fill = teeth_chr),
stat = "identity") +
labs(x = "候補者の性別", y = "選挙公報で歯を見せているかどうか") +
ggtitle("候補者の性別と歯を見せているかどうかの関係",
"2024年総選挙における北海道・東北・北陸地方を例にして") +
theme_bw(base_family = "HiraKakuProN-W3") # 文字化け対策

plot1 <- df |>
ggplot(aes(x = age, y = teeth)) +
geom_jitter(size = 1,
alpha = 1/3,
width = 0,
height = 0.05) +
geom_smooth(method = "glm",
color = "red",
method.args = list(family = binomial(link = "logit"))) +
labs(x = "候補者の年齢",
y = "選挙公報で候補者が歯を出している確率") +
theme_bw(base_family = "HiraKakuProN-W3")
print(plot1)
## `geom_smooth()` using formula = 'y ~ x'

model_1 <- glm(teeth ~ age + gender,
data = df_1,
family = binomial(link = "logit")) # 係数を「オッズの対数」に指定
tidy(model_1, conf.int = TRUE)
## # A tibble: 3 × 7
## term estimate std.error statistic p.value conf.low conf.high
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 (Intercept) 1.41 0.964 1.47 0.143 -0.442 3.36
## 2 age 0.00463 0.0164 0.282 0.778 -0.0275 0.0371
## 3 gender -2.27 0.530 -4.27 0.0000192 -3.42 -1.30
stargazer::stargazer(model_1, type = "html")
|
|
|
|
Dependent variable:
|
|
|
|
|
|
teeth
|
|
|
|
age
|
0.005
|
|
|
(0.016)
|
|
|
|
|
gender
|
-2.266***
|
|
|
(0.530)
|
|
|
|
|
Constant
|
1.413
|
|
|
(0.964)
|
|
|
|
|
|
|
Observations
|
152
|
|
Log Likelihood
|
-92.392
|
|
Akaike Inf. Crit.
|
190.784
|
|
|
|
Note:
|
p<0.1; p<0.05;
p<0.01
|
仮説の設定
帰無仮説:年齢と年齢を公表するかどうかには関係がない
対立仮説:年齢と年齢を公表するかどうかには関係がある
stargazer::stargazer(as.data.frame(df),
type = "html")
|
|
|
Statistic
|
N
|
Mean
|
St. Dev.
|
Min
|
Max
|
|
|
|
dist_no
|
152
|
3.086
|
2.505
|
1
|
12
|
|
age
|
152
|
54.559
|
10.945
|
26
|
82
|
|
duplicate
|
152
|
0.625
|
0.486
|
0
|
1
|
|
win_smd
|
152
|
0.303
|
0.461
|
0
|
1
|
|
win_pr
|
152
|
0.118
|
0.324
|
0
|
1
|
|
votes
|
152
|
56,149.900
|
37,049.570
|
2,210
|
132,361
|
|
vshare
|
152
|
30.263
|
19.439
|
1.230
|
74.660
|
|
prop
|
152
|
0.421
|
0.495
|
0
|
1
|
|
qrcode
|
151
|
0.642
|
0.481
|
0
|
1
|
|
area
|
152
|
3,240.185
|
3,149.206
|
118.520
|
15,318.040
|
|
population
|
152
|
345,026.400
|
68,005.920
|
225,023
|
460,689
|
|
density
|
152
|
471.597
|
871.373
|
17.751
|
3,875.169
|
|
teeth
|
152
|
0.454
|
0.500
|
0
|
1
|
|
gender
|
152
|
0.796
|
0.404
|
0
|
1
|
|
anage
|
152
|
0.289
|
0.455
|
0
|
1
|
|
last
|
152
|
0.362
|
0.482
|
0
|
1
|
|
first
|
152
|
0.553
|
0.499
|
0
|
1
|
|
|
plot2 <- df |>
ggplot(aes(x = age, y = anage)) +
geom_jitter(size = 1,
alpha = 1/3,
width = 0,
height = 0.05) +
geom_smooth(method = "glm",
color = "red",
method.args = list(family = binomial(link = "logit"))) +
labs(x = "候補者の年齢",
y = "選挙公報で候補者の年齢を公表しているかどうかの確率") +
theme_bw(base_family = "HiraKakuProN-W3")
print(plot2)
## `geom_smooth()` using formula = 'y ~ x'

model_2 <- glm(anage ~ age,
data = df_1,
family = binomial(link = "logit")) # 係数を「オッズの対数」に指定
tidy(model_2, conf.int = TRUE)
## # A tibble: 2 × 7
## term estimate std.error statistic p.value conf.low conf.high
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 (Intercept) 2.51 0.965 2.60 0.00942 0.655 4.46
## 2 age -0.0642 0.0183 -3.50 0.000458 -0.102 -0.0295
stargazer::stargazer(model_2, type = "html")
|
|
|
|
Dependent variable:
|
|
|
|
|
|
anage
|
|
|
|
age
|
-0.064***
|
|
|
(0.018)
|
|
|
|
|
Constant
|
2.506***
|
|
|
(0.965)
|
|
|
|
|
|
|
Observations
|
152
|
|
Log Likelihood
|
-84.577
|
|
Akaike Inf. Crit.
|
173.155
|
|
|
|
Note:
|
p<0.1; p<0.05;
p<0.01
|
p_48 <- predict(model_2, type = "response", #予測当選確率を表示したい時の指定
newdata = data_frame(age = 48))
## Warning: `data_frame()` was deprecated in tibble 1.1.0.
## ℹ Please use `tibble()` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
p_48
## 1
## 0.3603702
ロジスティック回帰分析の係数.
→表示される回帰式の係数 estimate は 「オッズの対数」 = Log
Odds.
*Log Odds は解釈しにくい → 確率に変換する
Marginal Effects
margins(model_2, variables = "age",
at = list(age = 48))
## Average marginal effects at specified values
## glm(formula = anage ~ age, family = binomial(link = "logit"), data = df_1)
## at(age) age
## 48 -0.01479
df_pred <- cplot(model_2,
x = "age",
what = "prediction") %>% # Y軸に予測当選確率を表示させる設定
as_data_frame() %>%
ggplot(aes(x = xvals,
y = yvals,
ymin = lower,
ymax = upper)) +
geom_ribbon(fill = "gray") +
geom_line() +
labs(x = "候補者の年齢(歳)",
y = "候補者が年齢を公表する確率の予測値",
title = "候補者が年齢を公表する確率の予測値",
"2024年総選挙における北海道・東北・北陸地方を例にして") +
theme_bw(base_family = "HiraKakuProN-W3")
## Warning: `as_data_frame()` was deprecated in tibble 2.0.0.
## ℹ Please use `as_tibble()` (with slightly different semantics) to convert to a
## tibble, or `as.data.frame()` to convert to a data frame.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

df_me <- cplot(model_2,
x = "age", # x軸に据える変数
dx = "age", # 説明変数
what = "effect") %>% # Y軸に限界効果を表示させる設定
as_data_frame() %>%
ggplot(aes(x = xvals,
y = yvals,
ymin = lower,
ymax = upper)) +
geom_ribbon(fill = "gray") +
geom_line() +
geom_abline(intercept = 0, slope = 0, linetype = "dashed", color = "red") +
labs(x = "候補者の年齢(歳)",
y = "候補者の年齢の限界効果",
title = "候補者の年齢の限界効果",
"2024年総選挙における北海道・東北・北陸地方を例にして") +
theme_bw(base_family = "HiraKakuProN-W3")

df_pred + df_me
