age

library(DT)
library(gapminder)
library(gghighlight)

## Loading required package: ggplot2

library(ggrepel)
library(stargazer)

## 
## Please cite as:

##  Hlavac, Marek (2022). stargazer: Well-Formatted Regression and Summary Statistics Tables.

##  R package version 5.2.3. https://CRAN.R-project.org/package=stargazer

library(tidyverse)

## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ lubridate 1.9.4     ✔ tibble    3.2.1
## ✔ purrr     1.0.4     ✔ tidyr     1.3.1

## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors

library(corrplot)

## corrplot 0.95 loaded

library(jtools)
library(margins)
library(ROCR)

## 
## Attaching package: 'ROCR'

## The following object is masked from 'package:margins':
## 
##     prediction

library(patchwork)
library(prediction)

## 
## Attaching package: 'prediction'

## The following object is masked from 'package:ROCR':
## 
##     prediction

library(stargazer)
library(tidyverse)

library(stargazer)

df <- read_csv("data.csv",
               na = ".")

## Warning: One or more parsing issues, call `problems()` on your data frame for details,
## e.g.:
##   dat <- vroom(...)
##   problems(dat)

## Rows: 152 Columns: 30
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (13): prefecture, district, name, yomi, lastname, firstname, last_kana, ...
## dbl (17): dist_no, age, duplicate, win_smd, win_pr, votes, vshare, prop, qrc...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

names(df)

##  [1] "prefecture"  "dist_no"     "district"    "name"        "yomi"       
##  [6] "lastname"    "firstname"   "last_kana"   "first_kana"  "age"        
## [11] "party"       "recommended" "status"      "previous"    "duplicate"  
## [16] "win_smd"     "win_pr"      "votes"       "vshare"      "prop"       
## [21] "qrcode"      "area"        "population"  "density"     "scale"      
## [26] "teeth"       "gender"      "anage"       "last"        "first"

str(df)

## spc_tbl_ [152 × 30] (S3: spec_tbl_df/tbl_df/tbl/data.frame)
##  $ prefecture : chr [1:152] "北海道" "北海道" "北海道" "北海道" ...
##  $ dist_no    : num [1:152] 1 1 1 1 1 2 2 2 2 3 ...
##  $ district   : chr [1:152] "北海道1区" "北海道1区" "北海道1区" "北海道1区" ...
##  $ name       : chr [1:152] "小林 悟" "道下 大樹" "田中 義人" "千葉 尚子" ...
##  $ yomi       : chr [1:152] "こばやし さとる" "みちした だいき" "たなか よしひと" "ちば なおこ" ...
##  $ lastname   : chr [1:152] "小林" "道下" "田中" "千葉" ...
##  $ firstname  : chr [1:152] "悟" "大樹" "義人" "尚子" ...
##  $ last_kana  : chr [1:152] "こばやし" "みちした" "たなか" "ちば" ...
##  $ first_kana : chr [1:152] "さとる" "だいき" "よしひと" "なおこ" ...
##  $ age        : num [1:152] 60 48 52 44 41 51 32 65 44 70 ...
##  $ party      : chr [1:152] "維新" "立憲" "参政" "共産" ...
##  $ recommended: chr [1:152] "NA" "NA" "NA" "NA" ...
##  $ status     : chr [1:152] "新" "前" "新" "新" ...
##  $ previous   : chr [1:152] "NA" "2" "NA" "NA" ...
##  $ duplicate  : num [1:152] 1 1 1 0 1 1 1 1 1 0 ...
##  $ win_smd    : num [1:152] 0 1 0 0 0 0 0 1 0 0 ...
##  $ win_pr     : num [1:152] 0 0 0 0 0 0 0 0 0 0 ...
##  $ votes      : num [1:152] 20000 108394 20097 21451 80133 ...
##  $ vshare     : num [1:152] 8 43.34 8.04 8.58 32.04 ...
##  $ prop       : num [1:152] 0 1 1 1 0 0 1 0 0 0 ...
##  $ qrcode     : num [1:152] 1 1 1 1 1 0 1 1 1 0 ...
##  $ area       : num [1:152] 769 769 769 769 769 ...
##  $ population : num [1:152] 454514 454514 454514 454514 454514 ...
##  $ density    : num [1:152] 591 591 591 591 591 ...
##  $ scale      : chr [1:152] "No" "No" "No" "No" ...
##  $ teeth      : num [1:152] 0 1 0 1 0 1 1 1 1 0 ...
##  $ gender     : num [1:152] 1 1 1 0 1 1 0 1 1 1 ...
##  $ anage      : num [1:152] 0 0 1 1 1 0 1 0 0 0 ...
##  $ last       : num [1:152] 0 0 0 0 1 0 0 0 0 0 ...
##  $ first      : num [1:152] 1 0 1 1 0 0 1 1 1 0 ...
##  - attr(*, "spec")=
##   .. cols(
##   ..   prefecture = col_character(),
##   ..   dist_no = col_double(),
##   ..   district = col_character(),
##   ..   name = col_character(),
##   ..   yomi = col_character(),
##   ..   lastname = col_character(),
##   ..   firstname = col_character(),
##   ..   last_kana = col_character(),
##   ..   first_kana = col_character(),
##   ..   age = col_double(),
##   ..   party = col_character(),
##   ..   recommended = col_character(),
##   ..   status = col_character(),
##   ..   previous = col_character(),
##   ..   duplicate = col_double(),
##   ..   win_smd = col_double(),
##   ..   win_pr = col_double(),
##   ..   votes = col_double(),
##   ..   vshare = col_double(),
##   ..   prop = col_double(),
##   ..   qrcode = col_double(),
##   ..   area = col_double(),
##   ..   population = col_double(),
##   ..   density = col_double(),
##   ..   scale = col_character(),
##   ..   teeth = col_double(),
##   ..   gender = col_double(),
##   ..   anage = col_double(),
##   ..   last = col_double(),
##   ..   first = col_double()
##   .. )
##  - attr(*, "problems")=<externalptr>

stargazer(as.data.frame(df),
          type = "html",
          digits = 2)


Statistic	N	Mean	St. Dev.	Min	Max

dist_no	152	3.09	2.51	1	12
age	152	54.56	10.95	26	82
duplicate	152	0.62	0.49	0	1
win_smd	152	0.30	0.46	0	1
win_pr	152	0.12	0.32	0	1
votes	152	56,149.90	37,049.57	2,210	132,361
vshare	152	30.26	19.44	1.23	74.66
prop	152	0.42	0.50	0	1
qrcode	151	0.64	0.48	0	1
area	152	3,240.19	3,149.21	118.52	15,318.04
population	152	345,026.40	68,005.92	225,023	460,689
density	152	471.60	871.37	17.75	3,875.17
teeth	152	0.45	0.50	0	1
gender	152	0.80	0.40	0	1
anage	152	0.29	0.46	0	1
last	152	0.36	0.48	0	1
first	152	0.55	0.50	0	1

df_1 <- df %>%
   group_by(party, gender, age, anage, teeth, first, last, vshare) %>%  # 政党別、性別、当落別に立候補者数 N を計算
   summarise(N = n())

## `summarise()` has grouped output by 'party', 'gender', 'age', 'anage', 'teeth',
## 'first', 'last'. You can override using the `.groups` argument.

df_1 <- df_1 |> 
  mutate(party = factor(party,
                        levels = c("自民",
                                   "立憲",
                                   "共産",
                                   "維新",
                                   "無所",
                                   "国民",
                                   "参政",
                                   "れい",
                                   "公明")))

df_1 <- df_1 |>
  mutate(teeth_chr = if_else(teeth == 1, "yes", "no")) |> 
  mutate(gender_chr = if_else(gender == 1, "male", "female"))

df_1 |> 
  ggplot() +
  geom_bar(aes(x = gender_chr,
               y = N,
               fill = teeth_chr),
           stat = "identity") +
      labs(x = "候補者の性別", y = "選挙公報で歯を見せているかどうか") +
  ggtitle("候補者の性別と歯を見せているかどうかの関係",
          "2024年総選挙における北海道・東北・北陸地方を例にして") +
   theme_bw(base_family = "HiraKakuProN-W3") # 文字化け対策

plot1 <- df |> 
  ggplot(aes(x = age, y = teeth)) +
  geom_jitter(size = 1,
              alpha = 1/3,
              width = 0,
              height = 0.05) +
  geom_smooth(method = "glm", 
    color = "red",
    method.args = list(family = binomial(link = "logit"))) +
  labs(x = "候補者の年齢",
       y = "選挙公報で候補者が歯を出している確率") +
  theme_bw(base_family = "HiraKakuProN-W3")

print(plot1)

## `geom_smooth()` using formula = 'y ~ x'

model_1 <- glm(teeth ~ age + gender,
               data = df_1, 
            family = binomial(link = "logit")) # 係数を「オッズの対数」に指定

tidy(model_1, conf.int = TRUE)

## # A tibble: 3 × 7
##   term        estimate std.error statistic   p.value conf.low conf.high
##   <chr>          <dbl>     <dbl>     <dbl>     <dbl>    <dbl>     <dbl>
## 1 (Intercept)  1.41       0.964      1.47  0.143      -0.442     3.36  
## 2 age          0.00463    0.0164     0.282 0.778      -0.0275    0.0371
## 3 gender      -2.27       0.530     -4.27  0.0000192  -3.42     -1.30

stargazer::stargazer(model_1, type = "html")


	Dependent variable:

	teeth

age	0.005
	(0.016)

gender	-2.266^***
	(0.530)

Constant	1.413
	(0.964)


Observations	152
Log Likelihood	-92.392
Akaike Inf. Crit.	190.784

Note:	p<0.1; p<0.05; p<0.01

仮説の設定

帰無仮説：年齢と年齢を公表するかどうかには関係がない対立仮説：年齢と年齢を公表するかどうかには関係がある

stargazer::stargazer(as.data.frame(df),
                     type = "html")


Statistic	N	Mean	St. Dev.	Min	Max

dist_no	152	3.086	2.505	1	12
age	152	54.559	10.945	26	82
duplicate	152	0.625	0.486	0	1
win_smd	152	0.303	0.461	0	1
win_pr	152	0.118	0.324	0	1
votes	152	56,149.900	37,049.570	2,210	132,361
vshare	152	30.263	19.439	1.230	74.660
prop	152	0.421	0.495	0	1
qrcode	151	0.642	0.481	0	1
area	152	3,240.185	3,149.206	118.520	15,318.040
population	152	345,026.400	68,005.920	225,023	460,689
density	152	471.597	871.373	17.751	3,875.169
teeth	152	0.454	0.500	0	1
gender	152	0.796	0.404	0	1
anage	152	0.289	0.455	0	1
last	152	0.362	0.482	0	1
first	152	0.553	0.499	0	1

plot2 <- df |> 
  ggplot(aes(x = age, y = anage)) +
  geom_jitter(size = 1,
              alpha = 1/3,
              width = 0,
              height = 0.05) +
  geom_smooth(method = "glm", 
    color = "red",
    method.args = list(family = binomial(link = "logit"))) +
  labs(x = "候補者の年齢",
       y = "選挙公報で候補者の年齢を公表しているかどうかの確率") +
  theme_bw(base_family = "HiraKakuProN-W3")

print(plot2)

## `geom_smooth()` using formula = 'y ~ x'

model_2 <- glm(anage ~ age,
               data = df_1, 
            family = binomial(link = "logit")) # 係数を「オッズの対数」に指定

tidy(model_2, conf.int = TRUE)

## # A tibble: 2 × 7
##   term        estimate std.error statistic  p.value conf.low conf.high
##   <chr>          <dbl>     <dbl>     <dbl>    <dbl>    <dbl>     <dbl>
## 1 (Intercept)   2.51      0.965       2.60 0.00942     0.655    4.46  
## 2 age          -0.0642    0.0183     -3.50 0.000458   -0.102   -0.0295

stargazer::stargazer(model_2, type = "html")


	Dependent variable:

	anage

age	-0.064^***
	(0.018)

Constant	2.506^***
	(0.965)


Observations	152
Log Likelihood	-84.577
Akaike Inf. Crit.	173.155

Note:	p<0.1; p<0.05; p<0.01

p_48 <- predict(model_2, type = "response", #予測当選確率を表示したい時の指定  
               newdata = data_frame(age = 48))

## Warning: `data_frame()` was deprecated in tibble 1.1.0.
## ℹ Please use `tibble()` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

p_48

##         1 
## 0.3603702

ロジスティック回帰分析の係数.

→表示される回帰式の係数 estimate は「オッズの対数」 = Log Odds.
＊Log Odds は解釈しにくい → 確率に変換する

Marginal Effects

margins(model_2, variables = "age",
        at = list(age = 48))

## Average marginal effects at specified values

## glm(formula = anage ~ age, family = binomial(link = "logit"),     data = df_1)

##  at(age)      age
##       48 -0.01479

df_pred <- cplot(model_2, 
                  x = "age", 
                  what = "prediction") %>% # Y軸に予測当選確率を表示させる設定  
  as_data_frame() %>%
  ggplot(aes(x = xvals, 
    y = yvals, 
    ymin = lower, 
    ymax = upper)) +
  geom_ribbon(fill = "gray") +
  geom_line() +
  labs(x = "候補者の年齢（歳）", 
     y = "候補者が年齢を公表する確率の予測値", 
     title = "候補者が年齢を公表する確率の予測値",
     "2024年総選挙における北海道・東北・北陸地方を例にして") +
theme_bw(base_family = "HiraKakuProN-W3")

## Warning: `as_data_frame()` was deprecated in tibble 2.0.0.
## ℹ Please use `as_tibble()` (with slightly different semantics) to convert to a
##   tibble, or `as.data.frame()` to convert to a data frame.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

df_me <- cplot(model_2,  
                  x = "age",    # x軸に据える変数
                  dx = "age",   # 説明変数
                  what = "effect") %>%　# Y軸に限界効果を表示させる設定  
  as_data_frame() %>%
  ggplot(aes(x = xvals, 
    y = yvals, 
    ymin = lower, 
    ymax = upper)) +
  geom_ribbon(fill = "gray") +
  geom_line() +
  geom_abline(intercept = 0, slope = 0, linetype = "dashed", color = "red") +
  labs(x = "候補者の年齢（歳）", 
     y = "候補者の年齢の限界効果", 
     title = "候補者の年齢の限界効果",
     "2024年総選挙における北海道・東北・北陸地方を例にして") +
  theme_bw(base_family = "HiraKakuProN-W3")

df_pred + df_me