2017-12-02

モチベーション

  • 論文やレポートに分析結果を綺麗に出力したい

  • 複数の回帰結果をまとめて扱いたい

使用するデータ

library(tidyverse)
library(gapminder)

gapminder = gapminder %>% 
    mutate(pop = as.double(pop))

gapminder
## # A tibble: 1,704 x 6
##        country continent  year lifeExp      pop gdpPercap
##         <fctr>    <fctr> <int>   <dbl>    <dbl>     <dbl>
##  1 Afghanistan      Asia  1952  28.801  8425333  779.4453
##  2 Afghanistan      Asia  1957  30.332  9240934  820.8530
##  3 Afghanistan      Asia  1962  31.997 10267083  853.1007
##  4 Afghanistan      Asia  1967  34.020 11537966  836.1971
##  5 Afghanistan      Asia  1972  36.088 13079460  739.9811
##  6 Afghanistan      Asia  1977  38.438 14880372  786.1134
##  7 Afghanistan      Asia  1982  39.854 12881816  978.0114
##  8 Afghanistan      Asia  1987  40.822 13867957  852.3959
##  9 Afghanistan      Asia  1992  41.674 16317921  649.3414
## 10 Afghanistan      Asia  1997  41.763 22227415  635.3414
## # ... with 1,694 more rows

stargazerパッケージ

  • rmdでhtmlとして出力するときはresults='asis'にする
  • type = "latex"とするとtex形式で,"text"とするとコンソール上で見やすい形に出力できる
library(stargazer)

gapminder %>% 
    lm(lifeExp ~ year + pop + gdpPercap, data = .) %>% 
    stargazer(type = "html")
Dependent variable:
lifeExp
year 0.235***
(0.014)
pop 0.000***
(0.000)
gdpPercap 0.001***
(0.00002)
Constant -411.450***
(27.666)
Observations 1,704
R2 0.440
Adjusted R2 0.439
Residual Std. Error 9.673 (df = 1700)
F Statistic 445.560*** (df = 3; 1700)
Note: p<0.1; p<0.05; p<0.01

国レベルでデータをネスト

gapminder = gapminder %>% 
    mutate_if(is.factor, as.character)

by_country = gapminder %>% 
    group_by(continent, country) %>% 
    nest()

by_country
## # A tibble: 142 x 3
##    continent     country              data
##        <chr>       <chr>            <list>
##  1      Asia Afghanistan <tibble [12 x 4]>
##  2    Europe     Albania <tibble [12 x 4]>
##  3    Africa     Algeria <tibble [12 x 4]>
##  4    Africa      Angola <tibble [12 x 4]>
##  5  Americas   Argentina <tibble [12 x 4]>
##  6   Oceania   Australia <tibble [12 x 4]>
##  7    Europe     Austria <tibble [12 x 4]>
##  8      Asia     Bahrain <tibble [12 x 4]>
##  9      Asia  Bangladesh <tibble [12 x 4]>
## 10    Europe     Belgium <tibble [12 x 4]>
## # ... with 132 more rows

国ごとに回帰

library(sandwich)

lin_reg = function(data) {glm(lifeExp ~ year + pop + gdpPercap, data = data)}
het_se = function(reg) {reg %>% vcovHC() %>% sqrt() %>% diag()}

by_country_with_reg = by_country %>% 
    mutate(reg = map(data, lin_reg), se = map(reg, het_se))

by_country_with_reg
## # A tibble: 142 x 5
##    continent     country              data       reg        se
##        <chr>       <chr>            <list>    <list>    <list>
##  1      Asia Afghanistan <tibble [12 x 4]> <S3: glm> <dbl [4]>
##  2    Europe     Albania <tibble [12 x 4]> <S3: glm> <dbl [4]>
##  3    Africa     Algeria <tibble [12 x 4]> <S3: glm> <dbl [4]>
##  4    Africa      Angola <tibble [12 x 4]> <S3: glm> <dbl [4]>
##  5  Americas   Argentina <tibble [12 x 4]> <S3: glm> <dbl [4]>
##  6   Oceania   Australia <tibble [12 x 4]> <S3: glm> <dbl [4]>
##  7    Europe     Austria <tibble [12 x 4]> <S3: glm> <dbl [4]>
##  8      Asia     Bahrain <tibble [12 x 4]> <S3: glm> <dbl [4]>
##  9      Asia  Bangladesh <tibble [12 x 4]> <S3: glm> <dbl [4]>
## 10    Europe     Belgium <tibble [12 x 4]> <S3: glm> <dbl [4]>
## # ... with 132 more rows

複数の回帰結果を一気に出力

library(magrittr)

by_country_with_reg %>% 
    slice(1:8) %$% 
    stargazer(reg, se = se, type = "html")
Dependent variable:
lifeExp
(1) (2) (3) (4) (5) (6) (7) (8)
year 0.388*** -0.290 0.476*** 0.527*** 0.153 0.188 0.336** 1.106***
(0.023) (0.298) (0.115) (0.071) (0.193) (0.967) (0.136) (0.161)
pop -0.00000*** 0.00001* 0.00000 -0.00000*** 0.00000 -0.00000 -0.00000 -0.00004***
(0.00000) (0.00001) (0.00000) (0.00000) (0.00000) (0.00000) (0.00000) (0.00001)
gdpPercap 0.003 0.002*** 0.001* 0.0005 -0.00001 0.0002 -0.0001 -0.001***
(0.002) (0.001) (0.001) (0.0003) (0.0001) (0.0002) (0.0002) (0.0001)
Constant -728.300*** 607.408 -888.968*** -991.385*** -239.891 -298.845 -583.919** -2,098.114***
(43.360) (573.055) (221.367) (134.694) (368.459) (1,850.263) (255.851) (313.112)
Observations 12 12 12 12 12 12 12 12
Log Likelihood -12.993 -22.293 -18.783 -8.590 -1.532 -9.650 -5.691 -12.933
Akaike Inf. Crit. 33.985 52.587 45.566 25.180 11.064 27.300 19.383 33.865
Note: p<0.1; p<0.05; p<0.01

stargazerのオプション例

by_country_with_reg %>% 
    slice(1:8) %$% 
    stargazer(reg, se = se, type = "html",
        covariate.labels = c("Year", "Population", "GdpPerCap"),
        omit.stat = c("ll", "aic"), omit.table.layout = "bl",
        add.lines = list(c("Continent", continent), c("Country", country)))
lifeExp
(1) (2) (3) (4) (5) (6) (7) (8)
Year 0.388*** -0.290 0.476*** 0.527*** 0.153 0.188 0.336** 1.106***
(0.023) (0.298) (0.115) (0.071) (0.193) (0.967) (0.136) (0.161)
Population -0.00000*** 0.00001* 0.00000 -0.00000*** 0.00000 -0.00000 -0.00000 -0.00004***
(0.00000) (0.00001) (0.00000) (0.00000) (0.00000) (0.00000) (0.00000) (0.00001)
GdpPerCap 0.003 0.002*** 0.001* 0.0005 -0.00001 0.0002 -0.0001 -0.001***
(0.002) (0.001) (0.001) (0.0003) (0.0001) (0.0002) (0.0002) (0.0001)
Constant -728.300*** 607.408 -888.968*** -991.385*** -239.891 -298.845 -583.919** -2,098.114***
(43.360) (573.055) (221.367) (134.694) (368.459) (1,850.263) (255.851) (313.112)
Continent Asia Europe Africa Africa Americas Oceania Europe Asia
Country Afghanistan Albania Algeria Angola Argentina Australia Austria Bahrain
Observations 12 12 12 12 12 12 12 12
Note: p<0.1; p<0.05; p<0.01

異なる水準でネストする場合

by_continent = gapminder %>% 
    group_by(continent) %>% 
    nest() %>% 
    add_column(country = "All")

by_country_with_continent = by_continent %>% 
    bind_rows(by_country) %>% 
    mutate(reg = map(data, lin_reg), se = map(reg, het_se))

by_country_with_continent
## # A tibble: 147 x 5
##    continent               data     country       reg        se
##        <chr>             <list>       <chr>    <list>    <list>
##  1      Asia <tibble [396 x 5]>         All <S3: glm> <dbl [4]>
##  2    Europe <tibble [360 x 5]>         All <S3: glm> <dbl [4]>
##  3    Africa <tibble [624 x 5]>         All <S3: glm> <dbl [4]>
##  4  Americas <tibble [300 x 5]>         All <S3: glm> <dbl [4]>
##  5   Oceania  <tibble [24 x 5]>         All <S3: glm> <dbl [4]>
##  6      Asia  <tibble [12 x 4]> Afghanistan <S3: glm> <dbl [4]>
##  7    Europe  <tibble [12 x 4]>     Albania <S3: glm> <dbl [4]>
##  8    Africa  <tibble [12 x 4]>     Algeria <S3: glm> <dbl [4]>
##  9    Africa  <tibble [12 x 4]>      Angola <S3: glm> <dbl [4]>
## 10  Americas  <tibble [12 x 4]>   Argentina <S3: glm> <dbl [4]>
## # ... with 137 more rows

回帰結果

by_country_with_continent %>% 
    slice(1:8) %$% 
    stargazer(reg, se = se, type = "html",
        covariate.labels = c("Year", "Population", "GdpPerCap"),
        omit.stat = c("ll", "aic"), omit.table.layout = "bl",
        add.lines = list(c("Continent", continent), c("Country", country)))
lifeExp
(1) (2) (3) (4) (5) (6) (7) (8)
Year 0.425*** 0.116*** 0.261*** 0.300*** 0.142*** 0.388*** -0.290 0.476***
(0.027) (0.011) (0.018) (0.021) (0.024) (0.023) (0.298) (0.115)
Population 0.000 -0.000 -0.000 -0.00000** 0.000 -0.00000*** 0.00001* 0.00000
(0.000) (0.000) (0.00000) (0.000) (0.00000) (0.00000) (0.00001) (0.00000)
GdpPerCap 0.0003*** 0.0003*** 0.001*** 0.001*** 0.0002*** 0.003 0.002*** 0.001*
(0.0001) (0.00002) (0.0002) (0.0001) (0.0001) (0.002) (0.001) (0.001)
Constant -783.335*** -161.348*** -470.281*** -533.313*** -209.702*** -728.300*** 607.408 -888.968***
(53.269) (21.736) (34.825) (41.817) (46.967) (43.360) (573.055) (221.367)
Continent Asia Europe Africa Americas Oceania Asia Europe Africa
Country All All All All All Afghanistan Albania Algeria
Observations 396 360 624 300 24 12 12 12
Note: p<0.1; p<0.05; p<0.01

異なるモデルを当てはめる場合

lin_reg2 = function(data) {glm(lifeExp ~ year*pop*gdpPercap, data = data)}

by_continent_with_reg = by_continent %>% 
    mutate(reg1 = map(data, lin_reg), reg2 = map(data, lin_reg2)) %>% 
    gather("model", "reg", starts_with("reg")) %>% 
    mutate(se = map(reg, het_se)) %>% 
    arrange(continent, model)

by_continent_with_reg
## # A tibble: 10 x 6
##    continent               data country model       reg        se
##        <chr>             <list>   <chr> <chr>    <list>    <list>
##  1    Africa <tibble [624 x 5]>     All  reg1 <S3: glm> <dbl [4]>
##  2    Africa <tibble [624 x 5]>     All  reg2 <S3: glm> <dbl [8]>
##  3  Americas <tibble [300 x 5]>     All  reg1 <S3: glm> <dbl [4]>
##  4  Americas <tibble [300 x 5]>     All  reg2 <S3: glm> <dbl [8]>
##  5      Asia <tibble [396 x 5]>     All  reg1 <S3: glm> <dbl [4]>
##  6      Asia <tibble [396 x 5]>     All  reg2 <S3: glm> <dbl [8]>
##  7    Europe <tibble [360 x 5]>     All  reg1 <S3: glm> <dbl [4]>
##  8    Europe <tibble [360 x 5]>     All  reg2 <S3: glm> <dbl [8]>
##  9   Oceania  <tibble [24 x 5]>     All  reg1 <S3: glm> <dbl [4]>
## 10   Oceania  <tibble [24 x 5]>     All  reg2 <S3: glm> <dbl [8]>

回帰結果

by_continent_with_reg %>% slice(1:8) %$% 
    stargazer(reg, se = se, type = "html",
        covariate.labels = c("Year", "Population", "GdpPerCap",
            "Year * Pop", "Year * GdpPerCap", "Pop * GdpPerCap",
            "Year * Pop * GdpPerCap"),
        omit.stat = c("ll", "aic"), omit.table.layout = "bl",
        add.lines = list(c("Continent", continent), c("Model", model)))
lifeExp
(1) (2) (3) (4) (5) (6) (7) (8)
Year 0.261*** 0.256*** 0.300*** 0.465*** 0.425*** 0.355*** 0.116*** 0.233***
(0.018) (0.027) (0.021) (0.037) (0.027) (0.031) (0.011) (0.021)
Population -0.000 0.00000 -0.00000** 0.00000 0.000 -0.00000*** -0.000 0.00000
(0.00000) (0.00000) (0.000) (0.00000) (0.000) (0.00000) (0.000) (0.00000)
GdpPerCap 0.001*** -0.018 0.001*** 0.063*** 0.0003*** -0.014*** 0.0003*** 0.020***
(0.0002) (0.025) (0.0001) (0.009) (0.0001) (0.003) (0.00002) (0.003)
Year * Pop -0.000 -0.000 0.000*** -0.000
(0.000) (0.000) (0.000) (0.000)
Year * GdpPerCap 0.00001 -0.00003*** 0.00001*** -0.00001***
(0.00001) (0.00000) (0.00000) (0.00000)
Pop * GdpPerCap 0.000 -0.000*** 0.000*** -0.000
(0.000) (0.000) (0.000) (0.000)
Year * Pop * GdpPerCap -0.000 0.000*** -0.000*** 0.000
(0.000) (0.000) (0.000) (0.000)
Constant -470.281*** -459.055*** -533.313*** -862.615*** -783.335*** -644.691*** -161.348*** -393.772***
(34.825) (54.114) (41.817) (73.482) (53.269) (61.268) (21.736) (42.134)
Continent Africa Africa Americas Americas Asia Asia Europe Europe
Model reg1 reg2 reg1 reg2 reg1 reg2 reg1 reg2
Observations 624 624 300 300 396 396 360 360
Note: p<0.1; p<0.05; p<0.01

要約統計量の出力

library(desctable)

stats_list = list(
  "Mean" = is.factor ~ percent | (is.double ~ mean),
  "SD"  = is.factor ~ NA | (is.double ~ sd)
)

gapminder %>%
    select(-c(country, year)) %>%
    filter(continent %in% c("Asia", "Europe")) %>%
    group_by(continent) %>%
    desctable(stats = stats_list) %>%
    datatable()