library(DT)
library(gapminder)
library(gghighlight)
## Loading required package: ggplot2
library(ggrepel)
library(stargazer)
## 
## Please cite as:
##  Hlavac, Marek (2022). stargazer: Well-Formatted Regression and Summary Statistics Tables.
##  R package version 5.2.3. https://CRAN.R-project.org/package=stargazer
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ lubridate 1.9.4     ✔ tibble    3.2.1
## ✔ purrr     1.0.4     ✔ tidyr     1.3.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(corrplot)
## corrplot 0.95 loaded
library(jtools)
library(margins)
library(ROCR)
## 
## Attaching package: 'ROCR'
## The following object is masked from 'package:margins':
## 
##     prediction
library(patchwork)
library(prediction)
## 
## Attaching package: 'prediction'
## The following object is masked from 'package:ROCR':
## 
##     prediction
library(stargazer)
library(tidyverse)
df <- read_csv("data.csv",
               na = ".")
## Warning: One or more parsing issues, call `problems()` on your data frame for details,
## e.g.:
##   dat <- vroom(...)
##   problems(dat)
## Rows: 152 Columns: 32
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (13): prefecture, district, name, yomi, lastname, firstname, last_kana, ...
## dbl (19): dist_no, age, duplicate, win_smd, win_pr, votes, vshare, prop, qrc...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
names(df)
##  [1] "prefecture"  "dist_no"     "district"    "name"        "yomi"       
##  [6] "lastname"    "firstname"   "last_kana"   "first_kana"  "age"        
## [11] "party"       "recommended" "status"      "previous"    "duplicate"  
## [16] "win_smd"     "win_pr"      "votes"       "vshare"      "prop"       
## [21] "qrcode"      "area"        "population"  "density"     "scale"      
## [26] "teeth"       "gender"      "anage"       "last"        "first"      
## [31] "n_first"     "n_last"
str(df)
## spc_tbl_ [152 × 32] (S3: spec_tbl_df/tbl_df/tbl/data.frame)
##  $ prefecture : chr [1:152] "北海道" "北海道" "北海道" "北海道" ...
##  $ dist_no    : num [1:152] 1 1 1 1 1 2 2 2 2 3 ...
##  $ district   : chr [1:152] "北海道1区" "北海道1区" "北海道1区" "北海道1区" ...
##  $ name       : chr [1:152] "小林 悟" "道下 大樹" "田中 義人" "千葉 尚子" ...
##  $ yomi       : chr [1:152] "こばやし さとる" "みちした だいき" "たなか よしひと" "ちば なおこ" ...
##  $ lastname   : chr [1:152] "小林" "道下" "田中" "千葉" ...
##  $ firstname  : chr [1:152] "悟" "大樹" "義人" "尚子" ...
##  $ last_kana  : chr [1:152] "こばやし" "みちした" "たなか" "ちば" ...
##  $ first_kana : chr [1:152] "さとる" "だいき" "よしひと" "なおこ" ...
##  $ age        : num [1:152] 60 48 52 44 41 51 32 65 44 70 ...
##  $ party      : chr [1:152] "維新" "立憲" "参政" "共産" ...
##  $ recommended: chr [1:152] "NA" "NA" "NA" "NA" ...
##  $ status     : chr [1:152] "新" "前" "新" "新" ...
##  $ previous   : chr [1:152] "NA" "2" "NA" "NA" ...
##  $ duplicate  : num [1:152] 1 1 1 0 1 1 1 1 1 0 ...
##  $ win_smd    : num [1:152] 0 1 0 0 0 0 0 1 0 0 ...
##  $ win_pr     : num [1:152] 0 0 0 0 0 0 0 0 0 0 ...
##  $ votes      : num [1:152] 20000 108394 20097 21451 80133 ...
##  $ vshare     : num [1:152] 8 43.34 8.04 8.58 32.04 ...
##  $ prop       : num [1:152] 0 1 1 1 0 0 1 0 0 0 ...
##  $ qrcode     : num [1:152] 1 1 1 1 1 0 1 1 1 0 ...
##  $ area       : num [1:152] 769 769 769 769 769 ...
##  $ population : num [1:152] 454514 454514 454514 454514 454514 ...
##  $ density    : num [1:152] 591 591 591 591 591 ...
##  $ scale      : chr [1:152] "No" "No" "No" "No" ...
##  $ teeth      : num [1:152] 0 1 0 1 0 1 1 1 1 0 ...
##  $ gender     : num [1:152] 1 1 1 0 1 1 0 1 1 1 ...
##  $ anage      : num [1:152] 0 0 1 1 1 0 1 0 0 0 ...
##  $ last       : num [1:152] 0 0 0 0 1 0 0 0 0 0 ...
##  $ first      : num [1:152] 1 0 1 1 0 0 1 1 1 0 ...
##  $ n_first    : num [1:152] 11 16 9 14 20 14 9 12 25 17 ...
##  $ n_last     : num [1:152] 10 19 9 11 20 9 19 18 11 14 ...
##  - attr(*, "spec")=
##   .. cols(
##   ..   prefecture = col_character(),
##   ..   dist_no = col_double(),
##   ..   district = col_character(),
##   ..   name = col_character(),
##   ..   yomi = col_character(),
##   ..   lastname = col_character(),
##   ..   firstname = col_character(),
##   ..   last_kana = col_character(),
##   ..   first_kana = col_character(),
##   ..   age = col_double(),
##   ..   party = col_character(),
##   ..   recommended = col_character(),
##   ..   status = col_character(),
##   ..   previous = col_character(),
##   ..   duplicate = col_double(),
##   ..   win_smd = col_double(),
##   ..   win_pr = col_double(),
##   ..   votes = col_double(),
##   ..   vshare = col_double(),
##   ..   prop = col_double(),
##   ..   qrcode = col_double(),
##   ..   area = col_double(),
##   ..   population = col_double(),
##   ..   density = col_double(),
##   ..   scale = col_character(),
##   ..   teeth = col_double(),
##   ..   gender = col_double(),
##   ..   anage = col_double(),
##   ..   last = col_double(),
##   ..   first = col_double(),
##   ..   n_first = col_double(),
##   ..   n_last = col_double()
##   .. )
##  - attr(*, "problems")=<externalptr>
library(stargazer)
stargazer(as.data.frame(df),
          type = "html",
          digits = 2)
Statistic N Mean St. Dev. Min Max
dist_no 152 3.09 2.51 1 12
age 152 54.56 10.95 26 82
duplicate 152 0.62 0.49 0 1
win_smd 152 0.30 0.46 0 1
win_pr 152 0.12 0.32 0 1
votes 152 56,149.90 37,049.57 2,210 132,361
vshare 152 30.26 19.44 1.23 74.66
prop 152 0.42 0.50 0 1
qrcode 151 0.64 0.48 0 1
area 152 3,240.19 3,149.21 118.52 15,318.04
population 152 345,026.40 68,005.92 225,023 460,689
density 152 471.60 871.37 17.75 3,875.17
teeth 152 0.45 0.50 0 1
gender 152 0.80 0.40 0 1
anage 152 0.29 0.46 0 1
last 152 0.36 0.48 0 1
first 152 0.55 0.50 0 1
n_first 152 15.35 5.28 3 32
n_last 152 14.86 5.04 0 29
df |> 
  ggplot(aes(x = first, 
                 y = vshare)) +
  geom_point() +
  geom_smooth(method = lm) +   # 回帰直線を引く
   labs(x = "候補者の名字", 
        y = "得票率") +
  ggtitle("候補者の名字表記と得票率の散布図",
          "2024年総選挙における北海道・東北・北陸地方を例にして") +
  theme_bw(base_family = "HiraKakuProN-W3")
## `geom_smooth()` using formula = 'y ~ x'

df |> 
  ggplot(aes(x = last, 
                 y = vshare)) +
  geom_point() +
  geom_smooth(method = lm) +   # 回帰直線を引く
   labs(x = "候補者の名前", 
        y = "得票率") +
  ggtitle("候補者の名前表記と得票率の散布図",
          "2024年総選挙における北海道・東北・北陸地方を例にして") +
  theme_bw(base_family = "HiraKakuProN-W3")
## `geom_smooth()` using formula = 'y ~ x'

model_1 <- lm(vshare ~ first, data = df)

summary(model_1)
## 
## Call:
## lm(formula = vshare ~ first, data = df)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -32.632 -19.319   3.909  15.471  40.798 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   33.862      2.332  14.521   <2e-16 ***
## first         -6.513      3.137  -2.076   0.0396 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 19.23 on 150 degrees of freedom
## Multiple R-squared:  0.02794,    Adjusted R-squared:  0.02146 
## F-statistic: 4.311 on 1 and 150 DF,  p-value: 0.03957
stargazer(model_1, type = "html")
Dependent variable:
vshare
first -6.513**
(3.137)
Constant 33.862***
(2.332)
Observations 152
R2 0.028
Adjusted R2 0.021
Residual Std. Error 19.230 (df = 150)
F Statistic 4.311** (df = 1; 150)
Note: p<0.1; p<0.05; p<0.01
model_2 <- lm(vshare ~ last, data = df)

summary(model_1)
## 
## Call:
## lm(formula = vshare ~ first, data = df)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -32.632 -19.319   3.909  15.471  40.798 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   33.862      2.332  14.521   <2e-16 ***
## first         -6.513      3.137  -2.076   0.0396 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 19.23 on 150 degrees of freedom
## Multiple R-squared:  0.02794,    Adjusted R-squared:  0.02146 
## F-statistic: 4.311 on 1 and 150 DF,  p-value: 0.03957
stargazer(model_2, type = "html")
Dependent variable:
vshare
last 3.163
(3.282)
Constant 29.118***
(1.974)
Observations 152
R2 0.006
Adjusted R2 -0.0005
Residual Std. Error 19.444 (df = 150)
F Statistic 0.929 (df = 1; 150)
Note: p<0.1; p<0.05; p<0.01
plot1 <- df |> 
  ggplot(aes(x = n_first, y = first)) +
  geom_jitter(size = 1,
              alpha = 1/3,
              width = 0,
              height = 0.05) +
  geom_smooth(method = "glm", 
    color = "red",
    method.args = list(family = binomial(link = "logit"))) +
  labs(x = "名字の画数",
       y = "候補者の名字が漢字で書かれているの確率") +
  theme_bw(base_family = "HiraKakuProN-W3")

print(plot1)
## `geom_smooth()` using formula = 'y ~ x'

model_3 <- glm(first ~ n_first*n_last,
               data = df, 
            family = binomial(link = "logit")) # 係数を「オッズの対数」に指定
stargazer::stargazer(model_3, type = "html")
Dependent variable:
first
n_first -0.003
(0.096)
n_last 0.050
(0.097)
n_first:n_last -0.003
(0.006)
Constant 0.245
(1.543)
Observations 152
Log Likelihood -103.057
Akaike Inf. Crit. 214.114
Note: p<0.1; p<0.05; p<0.01
plot2 <- df |> 
  ggplot(aes(x = n_last, y = last)) +
  geom_jitter(size = 1,
              alpha = 1/3,
              width = 0,
              height = 0.05) +
  geom_smooth(method = "glm", 
    color = "red",
    method.args = list(family = binomial(link = "logit"))) +
  labs(x = "名前の画数",
       y = "候補者の名前が漢字で書かれている確率") +
  theme_bw(base_family = "HiraKakuProN-W3")

print(plot2)
## `geom_smooth()` using formula = 'y ~ x'

model_4 <- glm(last ~ n_last*n_first,
               data = df, 
            family = binomial(link = "logit")) # 係数を「オッズの対数」に指定
stargazer::stargazer(model_4, type = "html")
Dependent variable:
last
n_last -0.075
(0.116)
n_first 0.057
(0.108)
n_last:n_first 0.005
(0.007)
Constant -1.508
(1.785)
Observations 152
Log Likelihood -91.806
Akaike Inf. Crit. 191.612
Note: p<0.1; p<0.05; p<0.01