library(DT)
library(gapminder)
library(gghighlight)
## Loading required package: ggplot2
library(ggrepel)
library(stargazer)
##
## Please cite as:
## Hlavac, Marek (2022). stargazer: Well-Formatted Regression and Summary Statistics Tables.
## R package version 5.2.3. https://CRAN.R-project.org/package=stargazer
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.5
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ lubridate 1.9.4 ✔ tibble 3.2.1
## ✔ purrr 1.0.4 ✔ tidyr 1.3.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(corrplot)
## corrplot 0.95 loaded
library(jtools)
library(margins)
library(ROCR)
##
## Attaching package: 'ROCR'
## The following object is masked from 'package:margins':
##
## prediction
library(patchwork)
library(prediction)
##
## Attaching package: 'prediction'
## The following object is masked from 'package:ROCR':
##
## prediction
library(stargazer)
library(tidyverse)
df <- read_csv("data.csv",
na = ".")
## Warning: One or more parsing issues, call `problems()` on your data frame for details,
## e.g.:
## dat <- vroom(...)
## problems(dat)
## Rows: 152 Columns: 32
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (13): prefecture, district, name, yomi, lastname, firstname, last_kana, ...
## dbl (19): dist_no, age, duplicate, win_smd, win_pr, votes, vshare, prop, qrc...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
names(df)
## [1] "prefecture" "dist_no" "district" "name" "yomi"
## [6] "lastname" "firstname" "last_kana" "first_kana" "age"
## [11] "party" "recommended" "status" "previous" "duplicate"
## [16] "win_smd" "win_pr" "votes" "vshare" "prop"
## [21] "qrcode" "area" "population" "density" "scale"
## [26] "teeth" "gender" "anage" "last" "first"
## [31] "n_first" "n_last"
str(df)
## spc_tbl_ [152 × 32] (S3: spec_tbl_df/tbl_df/tbl/data.frame)
## $ prefecture : chr [1:152] "北海道" "北海道" "北海道" "北海道" ...
## $ dist_no : num [1:152] 1 1 1 1 1 2 2 2 2 3 ...
## $ district : chr [1:152] "北海道1区" "北海道1区" "北海道1区" "北海道1区" ...
## $ name : chr [1:152] "小林 悟" "道下 大樹" "田中 義人" "千葉 尚子" ...
## $ yomi : chr [1:152] "こばやし さとる" "みちした だいき" "たなか よしひと" "ちば なおこ" ...
## $ lastname : chr [1:152] "小林" "道下" "田中" "千葉" ...
## $ firstname : chr [1:152] "悟" "大樹" "義人" "尚子" ...
## $ last_kana : chr [1:152] "こばやし" "みちした" "たなか" "ちば" ...
## $ first_kana : chr [1:152] "さとる" "だいき" "よしひと" "なおこ" ...
## $ age : num [1:152] 60 48 52 44 41 51 32 65 44 70 ...
## $ party : chr [1:152] "維新" "立憲" "参政" "共産" ...
## $ recommended: chr [1:152] "NA" "NA" "NA" "NA" ...
## $ status : chr [1:152] "新" "前" "新" "新" ...
## $ previous : chr [1:152] "NA" "2" "NA" "NA" ...
## $ duplicate : num [1:152] 1 1 1 0 1 1 1 1 1 0 ...
## $ win_smd : num [1:152] 0 1 0 0 0 0 0 1 0 0 ...
## $ win_pr : num [1:152] 0 0 0 0 0 0 0 0 0 0 ...
## $ votes : num [1:152] 20000 108394 20097 21451 80133 ...
## $ vshare : num [1:152] 8 43.34 8.04 8.58 32.04 ...
## $ prop : num [1:152] 0 1 1 1 0 0 1 0 0 0 ...
## $ qrcode : num [1:152] 1 1 1 1 1 0 1 1 1 0 ...
## $ area : num [1:152] 769 769 769 769 769 ...
## $ population : num [1:152] 454514 454514 454514 454514 454514 ...
## $ density : num [1:152] 591 591 591 591 591 ...
## $ scale : chr [1:152] "No" "No" "No" "No" ...
## $ teeth : num [1:152] 0 1 0 1 0 1 1 1 1 0 ...
## $ gender : num [1:152] 1 1 1 0 1 1 0 1 1 1 ...
## $ anage : num [1:152] 0 0 1 1 1 0 1 0 0 0 ...
## $ last : num [1:152] 0 0 0 0 1 0 0 0 0 0 ...
## $ first : num [1:152] 1 0 1 1 0 0 1 1 1 0 ...
## $ n_first : num [1:152] 11 16 9 14 20 14 9 12 25 17 ...
## $ n_last : num [1:152] 10 19 9 11 20 9 19 18 11 14 ...
## - attr(*, "spec")=
## .. cols(
## .. prefecture = col_character(),
## .. dist_no = col_double(),
## .. district = col_character(),
## .. name = col_character(),
## .. yomi = col_character(),
## .. lastname = col_character(),
## .. firstname = col_character(),
## .. last_kana = col_character(),
## .. first_kana = col_character(),
## .. age = col_double(),
## .. party = col_character(),
## .. recommended = col_character(),
## .. status = col_character(),
## .. previous = col_character(),
## .. duplicate = col_double(),
## .. win_smd = col_double(),
## .. win_pr = col_double(),
## .. votes = col_double(),
## .. vshare = col_double(),
## .. prop = col_double(),
## .. qrcode = col_double(),
## .. area = col_double(),
## .. population = col_double(),
## .. density = col_double(),
## .. scale = col_character(),
## .. teeth = col_double(),
## .. gender = col_double(),
## .. anage = col_double(),
## .. last = col_double(),
## .. first = col_double(),
## .. n_first = col_double(),
## .. n_last = col_double()
## .. )
## - attr(*, "problems")=<externalptr>
library(stargazer)
stargazer(as.data.frame(df),
type = "html",
digits = 2)
|
|
|
Statistic
|
N
|
Mean
|
St. Dev.
|
Min
|
Max
|
|
|
|
dist_no
|
152
|
3.09
|
2.51
|
1
|
12
|
|
age
|
152
|
54.56
|
10.95
|
26
|
82
|
|
duplicate
|
152
|
0.62
|
0.49
|
0
|
1
|
|
win_smd
|
152
|
0.30
|
0.46
|
0
|
1
|
|
win_pr
|
152
|
0.12
|
0.32
|
0
|
1
|
|
votes
|
152
|
56,149.90
|
37,049.57
|
2,210
|
132,361
|
|
vshare
|
152
|
30.26
|
19.44
|
1.23
|
74.66
|
|
prop
|
152
|
0.42
|
0.50
|
0
|
1
|
|
qrcode
|
151
|
0.64
|
0.48
|
0
|
1
|
|
area
|
152
|
3,240.19
|
3,149.21
|
118.52
|
15,318.04
|
|
population
|
152
|
345,026.40
|
68,005.92
|
225,023
|
460,689
|
|
density
|
152
|
471.60
|
871.37
|
17.75
|
3,875.17
|
|
teeth
|
152
|
0.45
|
0.50
|
0
|
1
|
|
gender
|
152
|
0.80
|
0.40
|
0
|
1
|
|
anage
|
152
|
0.29
|
0.46
|
0
|
1
|
|
last
|
152
|
0.36
|
0.48
|
0
|
1
|
|
first
|
152
|
0.55
|
0.50
|
0
|
1
|
|
n_first
|
152
|
15.35
|
5.28
|
3
|
32
|
|
n_last
|
152
|
14.86
|
5.04
|
0
|
29
|
|
|
df |>
ggplot(aes(x = first,
y = vshare)) +
geom_point() +
geom_smooth(method = lm) + # 回帰直線を引く
labs(x = "候補者の名字",
y = "得票率") +
ggtitle("候補者の名字表記と得票率の散布図",
"2024年総選挙における北海道・東北・北陸地方を例にして") +
theme_bw(base_family = "HiraKakuProN-W3")
## `geom_smooth()` using formula = 'y ~ x'

df |>
ggplot(aes(x = last,
y = vshare)) +
geom_point() +
geom_smooth(method = lm) + # 回帰直線を引く
labs(x = "候補者の名前",
y = "得票率") +
ggtitle("候補者の名前表記と得票率の散布図",
"2024年総選挙における北海道・東北・北陸地方を例にして") +
theme_bw(base_family = "HiraKakuProN-W3")
## `geom_smooth()` using formula = 'y ~ x'

model_1 <- lm(vshare ~ first, data = df)
summary(model_1)
##
## Call:
## lm(formula = vshare ~ first, data = df)
##
## Residuals:
## Min 1Q Median 3Q Max
## -32.632 -19.319 3.909 15.471 40.798
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 33.862 2.332 14.521 <2e-16 ***
## first -6.513 3.137 -2.076 0.0396 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 19.23 on 150 degrees of freedom
## Multiple R-squared: 0.02794, Adjusted R-squared: 0.02146
## F-statistic: 4.311 on 1 and 150 DF, p-value: 0.03957
stargazer(model_1, type = "html")
|
|
|
|
Dependent variable:
|
|
|
|
|
|
vshare
|
|
|
|
first
|
-6.513**
|
|
|
(3.137)
|
|
|
|
|
Constant
|
33.862***
|
|
|
(2.332)
|
|
|
|
|
|
|
Observations
|
152
|
|
R2
|
0.028
|
|
Adjusted R2
|
0.021
|
|
Residual Std. Error
|
19.230 (df = 150)
|
|
F Statistic
|
4.311** (df = 1; 150)
|
|
|
|
Note:
|
p<0.1; p<0.05;
p<0.01
|
model_2 <- lm(vshare ~ last, data = df)
summary(model_1)
##
## Call:
## lm(formula = vshare ~ first, data = df)
##
## Residuals:
## Min 1Q Median 3Q Max
## -32.632 -19.319 3.909 15.471 40.798
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 33.862 2.332 14.521 <2e-16 ***
## first -6.513 3.137 -2.076 0.0396 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 19.23 on 150 degrees of freedom
## Multiple R-squared: 0.02794, Adjusted R-squared: 0.02146
## F-statistic: 4.311 on 1 and 150 DF, p-value: 0.03957
stargazer(model_2, type = "html")
|
|
|
|
Dependent variable:
|
|
|
|
|
|
vshare
|
|
|
|
last
|
3.163
|
|
|
(3.282)
|
|
|
|
|
Constant
|
29.118***
|
|
|
(1.974)
|
|
|
|
|
|
|
Observations
|
152
|
|
R2
|
0.006
|
|
Adjusted R2
|
-0.0005
|
|
Residual Std. Error
|
19.444 (df = 150)
|
|
F Statistic
|
0.929 (df = 1; 150)
|
|
|
|
Note:
|
p<0.1; p<0.05;
p<0.01
|
plot1 <- df |>
ggplot(aes(x = n_first, y = first)) +
geom_jitter(size = 1,
alpha = 1/3,
width = 0,
height = 0.05) +
geom_smooth(method = "glm",
color = "red",
method.args = list(family = binomial(link = "logit"))) +
labs(x = "名字の画数",
y = "候補者の名字が漢字で書かれているの確率") +
theme_bw(base_family = "HiraKakuProN-W3")
print(plot1)
## `geom_smooth()` using formula = 'y ~ x'

model_3 <- glm(first ~ n_first*n_last,
data = df,
family = binomial(link = "logit")) # 係数を「オッズの対数」に指定
stargazer::stargazer(model_3, type = "html")
|
|
|
|
Dependent variable:
|
|
|
|
|
|
first
|
|
|
|
n_first
|
-0.003
|
|
|
(0.096)
|
|
|
|
|
n_last
|
0.050
|
|
|
(0.097)
|
|
|
|
|
n_first:n_last
|
-0.003
|
|
|
(0.006)
|
|
|
|
|
Constant
|
0.245
|
|
|
(1.543)
|
|
|
|
|
|
|
Observations
|
152
|
|
Log Likelihood
|
-103.057
|
|
Akaike Inf. Crit.
|
214.114
|
|
|
|
Note:
|
p<0.1; p<0.05;
p<0.01
|
plot2 <- df |>
ggplot(aes(x = n_last, y = last)) +
geom_jitter(size = 1,
alpha = 1/3,
width = 0,
height = 0.05) +
geom_smooth(method = "glm",
color = "red",
method.args = list(family = binomial(link = "logit"))) +
labs(x = "名前の画数",
y = "候補者の名前が漢字で書かれている確率") +
theme_bw(base_family = "HiraKakuProN-W3")
print(plot2)
## `geom_smooth()` using formula = 'y ~ x'

model_4 <- glm(last ~ n_last*n_first,
data = df,
family = binomial(link = "logit")) # 係数を「オッズの対数」に指定
stargazer::stargazer(model_4, type = "html")
|
|
|
|
Dependent variable:
|
|
|
|
|
|
last
|
|
|
|
n_last
|
-0.075
|
|
|
(0.116)
|
|
|
|
|
n_first
|
0.057
|
|
|
(0.108)
|
|
|
|
|
n_last:n_first
|
0.005
|
|
|
(0.007)
|
|
|
|
|
Constant
|
-1.508
|
|
|
(1.785)
|
|
|
|
|
|
|
Observations
|
152
|
|
Log Likelihood
|
-91.806
|
|
Akaike Inf. Crit.
|
191.612
|
|
|
|
Note:
|
p<0.1; p<0.05;
p<0.01
|