name

library(DT)
library(gapminder)
library(gghighlight)

## Loading required package: ggplot2

library(ggrepel)
library(stargazer)

## 
## Please cite as:

##  Hlavac, Marek (2022). stargazer: Well-Formatted Regression and Summary Statistics Tables.

##  R package version 5.2.3. https://CRAN.R-project.org/package=stargazer

library(tidyverse)

## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ lubridate 1.9.4     ✔ tibble    3.2.1
## ✔ purrr     1.0.4     ✔ tidyr     1.3.1

## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors

library(corrplot)

## corrplot 0.95 loaded

library(jtools)
library(margins)
library(ROCR)

## 
## Attaching package: 'ROCR'

## The following object is masked from 'package:margins':
## 
##     prediction

library(patchwork)
library(prediction)

## 
## Attaching package: 'prediction'

## The following object is masked from 'package:ROCR':
## 
##     prediction

library(stargazer)
library(tidyverse)

df <- read_csv("data.csv",
               na = ".")

## Warning: One or more parsing issues, call `problems()` on your data frame for details,
## e.g.:
##   dat <- vroom(...)
##   problems(dat)

## Rows: 152 Columns: 32
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (13): prefecture, district, name, yomi, lastname, firstname, last_kana, ...
## dbl (19): dist_no, age, duplicate, win_smd, win_pr, votes, vshare, prop, qrc...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

names(df)

##  [1] "prefecture"  "dist_no"     "district"    "name"        "yomi"       
##  [6] "lastname"    "firstname"   "last_kana"   "first_kana"  "age"        
## [11] "party"       "recommended" "status"      "previous"    "duplicate"  
## [16] "win_smd"     "win_pr"      "votes"       "vshare"      "prop"       
## [21] "qrcode"      "area"        "population"  "density"     "scale"      
## [26] "teeth"       "gender"      "anage"       "last"        "first"      
## [31] "n_first"     "n_last"

str(df)

## spc_tbl_ [152 × 32] (S3: spec_tbl_df/tbl_df/tbl/data.frame)
##  $ prefecture : chr [1:152] "北海道" "北海道" "北海道" "北海道" ...
##  $ dist_no    : num [1:152] 1 1 1 1 1 2 2 2 2 3 ...
##  $ district   : chr [1:152] "北海道1区" "北海道1区" "北海道1区" "北海道1区" ...
##  $ name       : chr [1:152] "小林 悟" "道下 大樹" "田中 義人" "千葉 尚子" ...
##  $ yomi       : chr [1:152] "こばやし さとる" "みちした だいき" "たなか よしひと" "ちば なおこ" ...
##  $ lastname   : chr [1:152] "小林" "道下" "田中" "千葉" ...
##  $ firstname  : chr [1:152] "悟" "大樹" "義人" "尚子" ...
##  $ last_kana  : chr [1:152] "こばやし" "みちした" "たなか" "ちば" ...
##  $ first_kana : chr [1:152] "さとる" "だいき" "よしひと" "なおこ" ...
##  $ age        : num [1:152] 60 48 52 44 41 51 32 65 44 70 ...
##  $ party      : chr [1:152] "維新" "立憲" "参政" "共産" ...
##  $ recommended: chr [1:152] "NA" "NA" "NA" "NA" ...
##  $ status     : chr [1:152] "新" "前" "新" "新" ...
##  $ previous   : chr [1:152] "NA" "2" "NA" "NA" ...
##  $ duplicate  : num [1:152] 1 1 1 0 1 1 1 1 1 0 ...
##  $ win_smd    : num [1:152] 0 1 0 0 0 0 0 1 0 0 ...
##  $ win_pr     : num [1:152] 0 0 0 0 0 0 0 0 0 0 ...
##  $ votes      : num [1:152] 20000 108394 20097 21451 80133 ...
##  $ vshare     : num [1:152] 8 43.34 8.04 8.58 32.04 ...
##  $ prop       : num [1:152] 0 1 1 1 0 0 1 0 0 0 ...
##  $ qrcode     : num [1:152] 1 1 1 1 1 0 1 1 1 0 ...
##  $ area       : num [1:152] 769 769 769 769 769 ...
##  $ population : num [1:152] 454514 454514 454514 454514 454514 ...
##  $ density    : num [1:152] 591 591 591 591 591 ...
##  $ scale      : chr [1:152] "No" "No" "No" "No" ...
##  $ teeth      : num [1:152] 0 1 0 1 0 1 1 1 1 0 ...
##  $ gender     : num [1:152] 1 1 1 0 1 1 0 1 1 1 ...
##  $ anage      : num [1:152] 0 0 1 1 1 0 1 0 0 0 ...
##  $ last       : num [1:152] 0 0 0 0 1 0 0 0 0 0 ...
##  $ first      : num [1:152] 1 0 1 1 0 0 1 1 1 0 ...
##  $ n_first    : num [1:152] 11 16 9 14 20 14 9 12 25 17 ...
##  $ n_last     : num [1:152] 10 19 9 11 20 9 19 18 11 14 ...
##  - attr(*, "spec")=
##   .. cols(
##   ..   prefecture = col_character(),
##   ..   dist_no = col_double(),
##   ..   district = col_character(),
##   ..   name = col_character(),
##   ..   yomi = col_character(),
##   ..   lastname = col_character(),
##   ..   firstname = col_character(),
##   ..   last_kana = col_character(),
##   ..   first_kana = col_character(),
##   ..   age = col_double(),
##   ..   party = col_character(),
##   ..   recommended = col_character(),
##   ..   status = col_character(),
##   ..   previous = col_character(),
##   ..   duplicate = col_double(),
##   ..   win_smd = col_double(),
##   ..   win_pr = col_double(),
##   ..   votes = col_double(),
##   ..   vshare = col_double(),
##   ..   prop = col_double(),
##   ..   qrcode = col_double(),
##   ..   area = col_double(),
##   ..   population = col_double(),
##   ..   density = col_double(),
##   ..   scale = col_character(),
##   ..   teeth = col_double(),
##   ..   gender = col_double(),
##   ..   anage = col_double(),
##   ..   last = col_double(),
##   ..   first = col_double(),
##   ..   n_first = col_double(),
##   ..   n_last = col_double()
##   .. )
##  - attr(*, "problems")=<externalptr>

library(stargazer)

stargazer(as.data.frame(df),
          type = "html",
          digits = 2)


Statistic	N	Mean	St. Dev.	Min	Max

dist_no	152	3.09	2.51	1	12
age	152	54.56	10.95	26	82
duplicate	152	0.62	0.49	0	1
win_smd	152	0.30	0.46	0	1
win_pr	152	0.12	0.32	0	1
votes	152	56,149.90	37,049.57	2,210	132,361
vshare	152	30.26	19.44	1.23	74.66
prop	152	0.42	0.50	0	1
qrcode	151	0.64	0.48	0	1
area	152	3,240.19	3,149.21	118.52	15,318.04
population	152	345,026.40	68,005.92	225,023	460,689
density	152	471.60	871.37	17.75	3,875.17
teeth	152	0.45	0.50	0	1
gender	152	0.80	0.40	0	1
anage	152	0.29	0.46	0	1
last	152	0.36	0.48	0	1
first	152	0.55	0.50	0	1
n_first	152	15.35	5.28	3	32
n_last	152	14.86	5.04	0	29

df |> 
  ggplot(aes(x = first, 
                 y = vshare)) +
  geom_point() +
  geom_smooth(method = lm) + 　　# 回帰直線を引く
   labs(x = "候補者の名字", 
        y = "得票率") +
  ggtitle("候補者の名字表記と得票率の散布図",
          "2024年総選挙における北海道・東北・北陸地方を例にして") +
  theme_bw(base_family = "HiraKakuProN-W3")

## `geom_smooth()` using formula = 'y ~ x'

df |> 
  ggplot(aes(x = last, 
                 y = vshare)) +
  geom_point() +
  geom_smooth(method = lm) + 　　# 回帰直線を引く
   labs(x = "候補者の名前", 
        y = "得票率") +
  ggtitle("候補者の名前表記と得票率の散布図",
          "2024年総選挙における北海道・東北・北陸地方を例にして") +
  theme_bw(base_family = "HiraKakuProN-W3")

## `geom_smooth()` using formula = 'y ~ x'

model_1 <- lm(vshare ~ first, data = df)

summary(model_1)

## 
## Call:
## lm(formula = vshare ~ first, data = df)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -32.632 -19.319   3.909  15.471  40.798 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   33.862      2.332  14.521   <2e-16 ***
## first         -6.513      3.137  -2.076   0.0396 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 19.23 on 150 degrees of freedom
## Multiple R-squared:  0.02794,    Adjusted R-squared:  0.02146 
## F-statistic: 4.311 on 1 and 150 DF,  p-value: 0.03957

stargazer(model_1, type = "html")


	Dependent variable:

	vshare

first	-6.513^**
	(3.137)

Constant	33.862^***
	(2.332)


Observations	152
R²	0.028
Adjusted R²	0.021
Residual Std. Error	19.230 (df = 150)
F Statistic	4.311^** (df = 1; 150)

Note:	p<0.1; p<0.05; p<0.01

model_2 <- lm(vshare ~ last, data = df)

summary(model_1)

## 
## Call:
## lm(formula = vshare ~ first, data = df)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -32.632 -19.319   3.909  15.471  40.798 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   33.862      2.332  14.521   <2e-16 ***
## first         -6.513      3.137  -2.076   0.0396 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 19.23 on 150 degrees of freedom
## Multiple R-squared:  0.02794,    Adjusted R-squared:  0.02146 
## F-statistic: 4.311 on 1 and 150 DF,  p-value: 0.03957

stargazer(model_2, type = "html")


	Dependent variable:

	vshare

last	3.163
	(3.282)

Constant	29.118^***
	(1.974)


Observations	152
R²	0.006
Adjusted R²	-0.0005
Residual Std. Error	19.444 (df = 150)
F Statistic	0.929 (df = 1; 150)

Note:	p<0.1; p<0.05; p<0.01

plot1 <- df |> 
  ggplot(aes(x = n_first, y = first)) +
  geom_jitter(size = 1,
              alpha = 1/3,
              width = 0,
              height = 0.05) +
  geom_smooth(method = "glm", 
    color = "red",
    method.args = list(family = binomial(link = "logit"))) +
  labs(x = "名字の画数",
       y = "候補者の名字が漢字で書かれているの確率") +
  theme_bw(base_family = "HiraKakuProN-W3")

print(plot1)

## `geom_smooth()` using formula = 'y ~ x'

model_3 <- glm(first ~ n_first*n_last,
               data = df, 
            family = binomial(link = "logit")) # 係数を「オッズの対数」に指定

stargazer::stargazer(model_3, type = "html")


	Dependent variable:

	first

n_first	-0.003
	(0.096)

n_last	0.050
	(0.097)

n_first:n_last	-0.003
	(0.006)

Constant	0.245
	(1.543)


Observations	152
Log Likelihood	-103.057
Akaike Inf. Crit.	214.114

Note:	p<0.1; p<0.05; p<0.01

plot2 <- df |> 
  ggplot(aes(x = n_last, y = last)) +
  geom_jitter(size = 1,
              alpha = 1/3,
              width = 0,
              height = 0.05) +
  geom_smooth(method = "glm", 
    color = "red",
    method.args = list(family = binomial(link = "logit"))) +
  labs(x = "名前の画数",
       y = "候補者の名前が漢字で書かれている確率") +
  theme_bw(base_family = "HiraKakuProN-W3")

print(plot2)

## `geom_smooth()` using formula = 'y ~ x'

model_4 <- glm(last ~ n_last*n_first,
               data = df, 
            family = binomial(link = "logit")) # 係数を「オッズの対数」に指定

stargazer::stargazer(model_4, type = "html")


	Dependent variable:

	last

n_last	-0.075
	(0.116)

n_first	0.057
	(0.108)

n_last:n_first	0.005
	(0.007)

Constant	-1.508
	(1.785)


Observations	152
Log Likelihood	-91.806
Akaike Inf. Crit.	191.612

Note:	p<0.1; p<0.05; p<0.01