library(readr)
income = read_csv("income.csv")
## Parsed with column specification:
## cols(
##   id = col_character(),
##   sex = col_character(),
##   age = col_double(),
##   height = col_double(),
##   weight = col_double(),
##   income = col_number(),
##   generation = col_character()
## )
head(income)
## # A tibble: 6 x 7
##   id    sex      age height weight income generation
##   <chr> <chr>  <dbl>  <dbl>  <dbl>  <dbl> <chr>     
## 1 AU    male      70   160.   58.3    201 elder     
## 2 AY    female    70   156.   44      487 elder     
## 3 AB    male      69   173.   75.7    424 elder     
## 4 AM    male      67   166.   69.3   1735 elder     
## 5 M     male      66   171.   76.5    929 elder     
## 6 CM    female    66   164.   67.3    397 elder
dim(income)
## [1] 100   7
summary(income)
##       id                sex                 age            height     
##  Length:100         Length:100         Min.   :20.00   Min.   :148.0  
##  Class :character   Class :character   1st Qu.:36.00   1st Qu.:158.1  
##  Mode  :character   Mode  :character   Median :45.00   Median :162.9  
##                                        Mean   :45.96   Mean   :163.7  
##                                        3rd Qu.:57.25   3rd Qu.:170.2  
##                                        Max.   :70.00   Max.   :180.5  
##      weight          income        generation       
##  Min.   :28.30   Min.   :  24.0   Length:100        
##  1st Qu.:48.95   1st Qu.: 134.8   Class :character  
##  Median :59.95   Median : 298.5   Mode  :character  
##  Mean   :59.18   Mean   : 434.4                     
##  3rd Qu.:67.33   3rd Qu.: 607.2                     
##  Max.   :85.60   Max.   :2351.0
library(stargazer)
## 
## Please cite as:
##  Hlavac, Marek (2018). stargazer: Well-Formatted Regression and Summary Statistics Tables.
##  R package version 5.2.2. https://CRAN.R-project.org/package=stargazer
stargazer(as.data.frame(income),
type = "html")
Statistic N Mean St. Dev. Min Pctl(25) Pctl(75) Max
age 100 45.960 13.331 20 36 57.2 70
height 100 163.746 7.692 148.000 158.100 170.175 180.500
weight 100 59.179 12.647 28.300 48.950 67.325 85.600
income 100 434.400 445.777 24 134.8 607.2 2,351
hist(income$height,freq = FALSE,
     xlab = "身長",
     ylab = "確率密度",
     main = "身長の分布")

library(ggplot2) #ggplotパッケージをロード
ggplot(income,aes(height))+
geom_histogram(aes(y = ..density..),
bins = 10,
colour = "gray",
fill = "blue")+
  theme_classic()

ggplot(income,aes(height))+
geom_histogram(aes(y = ..density..),
bins = 10,
colour = "gray",
fill = "blue")+
 labs(title = "身長の分布",
x="身長",
y="確率密度")+
  theme_classic()

stem(income$height)#チャンクオプションにcomment=""を指定
## 
##   The decimal point is 1 digit(s) to the right of the |
## 
##   14 | 88
##   15 | 001112334
##   15 | 555667777788888899999999
##   16 | 00001112222233333444
##   16 | 556666777778999999
##   17 | 0001111222233333344
##   17 | 5555889
##   18 | 1
stem(income$height,scale=0.5)#チャンクオプションにcomment=""を指定
## 
##   The decimal point is 1 digit(s) to the right of the |
## 
##   14 | 88
##   15 | 001112334555667777788888899999999
##   16 | 00001112222233333444556666777778999999
##   17 | 00011112222333333445555889
##   18 | 1
ggplot(income,aes(x = sex,y = height)) + 
  geom_boxplot() +
  labs(x="性別",y="身長")+
    scale_x_discrete(labels=c("女性","男性"))+
    theme_classic()

ggplot(income,aes(x = sex,y = height)) + 
  geom_boxplot() +
  coord_flip()+
  labs(y="身長",x="性別")+
    scale_x_discrete(labels=c("女性","男性"))+
    theme_classic()

##p113

ggplot(income,aes(x = sex,y = height)) + 
  geom_violin() +
  geom_point()+
    labs(x="性別",y="身長")+
    scale_x_discrete(labels=c("女性","男性"))+
    theme_classic()

##p114

ggplot(income,aes(height,weight)) + 
  geom_point()+
  labs(x="身長",y="体重")+
    theme_classic()

##p115間違いstat_smoothnの引数を入れないとこうなる

ggplot(income,aes(height,weight)) + 
  geom_point()+
  stat_smooth()
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

  labs(x="身長",y="体重") +
    theme_classic()
## NULL

##p115-116 身長と体重の相関図 ##ggplot2のstat_smooth()のmethod引数で直線回帰を指定。 ##https://rpubs.com/uri-sy/ggplot2_stat_smooth参照 ##エラー箇所を= lmを= “lm”に修正したら回帰直線がでるようになった。

ggplot(income,aes(height,weight)) + 
  geom_point()+
  stat_smooth(method = "lm", se = FALSE, colour = "black", size = 1) +
  labs(x="身長",y="体重") +
    theme_classic()

#se = TRUEにするこで初期設定で95%信頼区間も表示される。

ggplot(income,aes(height,weight)) + 
  geom_point()+
  stat_smooth(method = "lm", se = TRUE, colour = "black", size = 1) +
  labs(x="身長",y="体重") +
    theme_classic()

##p117 身長と体重の観測点95%信頼区間有

ggplot(income,aes(height,weight)) +
    geom_point()+
  stat_smooth(method = "lm", se = TRUE, colour = "black", size = 1)  +
  geom_text(aes(y=weight+1,label=id),size=3,vjust=0)+
  labs(x="身長(身長と体重の観測点95%信頼区間有)",y="体重") +
    theme_classic()

##p117 身長と体重の観測点p2

ggplot(income,aes(height,weight)) +
    geom_point()+
  stat_smooth(method = "lm", se = FALSE, colour = "black", size = 1)  +
  geom_text(aes(y=weight+1,label=id),size=3,vjust=0)+
  labs(x="身長",y="体重") +
    theme_classic()

##p117 身長と体重の観測点 間違いサンプル

ggplot(income,aes(height,weight)) + 
  geom_point(aes(color=sex))+
  geom_smooth(se=FALSE,aes(color=sex))+
  scale_color_grey()+
  labs(x="身長",y="体重") +
    theme_classic()
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

library("dplyr")
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library("ggplot2")
library("DT")
library("broom")

##stat_smoothwをgeom_smoothnにしても同じ直線が引ける ##なお、ハッチのかかっている範囲は「母集団から標本を取ってきて、その平均から95%信頼区間を求める、という作業を100回やったときに、95回はその区間の中に母平均が含まれる」という意味です。 https://bellcurve.jp/statistics/course/8891.html

ggplot(income,aes(height,weight)) + 
  geom_point()+
  geom_smooth(method = "lm", se = TRUE, colour = "black", size = 1) +
  labs(x="身長",y="体重") +
    theme_classic()

##p118図4.46 身長と体重の散布図(男女色別) scale_color_grey()+

ggplot(income,aes(x = height,y = weight,shape = sex)) + 
  geom_point(aes(color=sex))+
  stat_smooth(method = "lm", se = FALSE,aes(color=sex))+
  labs(x="身長",y="体重") +
    theme_classic()

##p119 図4.46 身長と体重の散布図(男女ドットの濃淡比) scale_color_grey()+で色を消す

ggplot(income,aes(height,weight)) + 
  geom_point(aes(color=sex))+
  stat_smooth(method = "lm", se = FALSE,aes(color=sex))+
  scale_color_grey()+
  labs(x="身長",y="体重") +
    theme_classic()

##p120 折れ線グラフ

library(gapminder)
data("gapminder")
glimpse(gapminder)
## Observations: 1,704
## Variables: 6
## $ country   <fct> Afghanistan, Afghanistan, Afghanistan, Afghanistan, Afgha...
## $ continent <fct> Asia, Asia, Asia, Asia, Asia, Asia, Asia, Asia, Asia, Asi...
## $ year      <int> 1952, 1957, 1962, 1967, 1972, 1977, 1982, 1987, 1992, 199...
## $ lifeExp   <dbl> 28.801, 30.332, 31.997, 34.020, 36.088, 38.438, 39.854, 4...
## $ pop       <int> 8425333, 9240934, 10267083, 11537966, 13079460, 14880372,...
## $ gdpPercap <dbl> 779.4453, 820.8530, 853.1007, 836.1971, 739.9811, 786.113...
head(gapminder)
## # A tibble: 6 x 6
##   country     continent  year lifeExp      pop gdpPercap
##   <fct>       <fct>     <int>   <dbl>    <int>     <dbl>
## 1 Afghanistan Asia       1952    28.8  8425333      779.
## 2 Afghanistan Asia       1957    30.3  9240934      821.
## 3 Afghanistan Asia       1962    32.0 10267083      853.
## 4 Afghanistan Asia       1967    34.0 11537966      836.
## 5 Afghanistan Asia       1972    36.1 13079460      740.
## 6 Afghanistan Asia       1977    38.4 14880372      786.
tail(gapminder)
## # A tibble: 6 x 6
##   country  continent  year lifeExp      pop gdpPercap
##   <fct>    <fct>     <int>   <dbl>    <int>     <dbl>
## 1 Zimbabwe Africa     1982    60.4  7636524      789.
## 2 Zimbabwe Africa     1987    62.4  9216418      706.
## 3 Zimbabwe Africa     1992    60.4 10704340      693.
## 4 Zimbabwe Africa     1997    46.8 11404948      792.
## 5 Zimbabwe Africa     2002    40.0 11926563      672.
## 6 Zimbabwe Africa     2007    43.5 12311143      470.
summary(gapminder)
##         country        continent        year         lifeExp     
##  Afghanistan:  12   Africa  :624   Min.   :1952   Min.   :23.60  
##  Albania    :  12   Americas:300   1st Qu.:1966   1st Qu.:48.20  
##  Algeria    :  12   Asia    :396   Median :1980   Median :60.71  
##  Angola     :  12   Europe  :360   Mean   :1980   Mean   :59.47  
##  Argentina  :  12   Oceania : 24   3rd Qu.:1993   3rd Qu.:70.85  
##  Australia  :  12                  Max.   :2007   Max.   :82.60  
##  (Other)    :1632                                                
##       pop              gdpPercap       
##  Min.   :6.001e+04   Min.   :   241.2  
##  1st Qu.:2.794e+06   1st Qu.:  1202.1  
##  Median :7.024e+06   Median :  3531.8  
##  Mean   :2.960e+07   Mean   :  7215.3  
##  3rd Qu.:1.959e+07   3rd Qu.:  9325.5  
##  Max.   :1.319e+09   Max.   :113523.1  
## 

##p123 図4.50

table(gapminder$country)
## 
##              Afghanistan                  Albania                  Algeria 
##                       12                       12                       12 
##                   Angola                Argentina                Australia 
##                       12                       12                       12 
##                  Austria                  Bahrain               Bangladesh 
##                       12                       12                       12 
##                  Belgium                    Benin                  Bolivia 
##                       12                       12                       12 
##   Bosnia and Herzegovina                 Botswana                   Brazil 
##                       12                       12                       12 
##                 Bulgaria             Burkina Faso                  Burundi 
##                       12                       12                       12 
##                 Cambodia                 Cameroon                   Canada 
##                       12                       12                       12 
## Central African Republic                     Chad                    Chile 
##                       12                       12                       12 
##                    China                 Colombia                  Comoros 
##                       12                       12                       12 
##         Congo, Dem. Rep.              Congo, Rep.               Costa Rica 
##                       12                       12                       12 
##            Cote d'Ivoire                  Croatia                     Cuba 
##                       12                       12                       12 
##           Czech Republic                  Denmark                 Djibouti 
##                       12                       12                       12 
##       Dominican Republic                  Ecuador                    Egypt 
##                       12                       12                       12 
##              El Salvador        Equatorial Guinea                  Eritrea 
##                       12                       12                       12 
##                 Ethiopia                  Finland                   France 
##                       12                       12                       12 
##                    Gabon                   Gambia                  Germany 
##                       12                       12                       12 
##                    Ghana                   Greece                Guatemala 
##                       12                       12                       12 
##                   Guinea            Guinea-Bissau                    Haiti 
##                       12                       12                       12 
##                 Honduras         Hong Kong, China                  Hungary 
##                       12                       12                       12 
##                  Iceland                    India                Indonesia 
##                       12                       12                       12 
##                     Iran                     Iraq                  Ireland 
##                       12                       12                       12 
##                   Israel                    Italy                  Jamaica 
##                       12                       12                       12 
##                    Japan                   Jordan                    Kenya 
##                       12                       12                       12 
##         Korea, Dem. Rep.              Korea, Rep.                   Kuwait 
##                       12                       12                       12 
##                  Lebanon                  Lesotho                  Liberia 
##                       12                       12                       12 
##                    Libya               Madagascar                   Malawi 
##                       12                       12                       12 
##                 Malaysia                     Mali               Mauritania 
##                       12                       12                       12 
##                Mauritius                   Mexico                 Mongolia 
##                       12                       12                       12 
##               Montenegro                  Morocco               Mozambique 
##                       12                       12                       12 
##                  Myanmar                  Namibia                    Nepal 
##                       12                       12                       12 
##              Netherlands              New Zealand                Nicaragua 
##                       12                       12                       12 
##                    Niger                  Nigeria                   Norway 
##                       12                       12                       12 
##                     Oman                 Pakistan                   Panama 
##                       12                       12                       12 
##                 Paraguay                     Peru              Philippines 
##                       12                       12                       12 
##                   Poland                 Portugal              Puerto Rico 
##                       12                       12                       12 
##                  Reunion                  Romania                   Rwanda 
##                       12                       12                       12 
##    Sao Tome and Principe             Saudi Arabia                  Senegal 
##                       12                       12                       12 
##                   Serbia             Sierra Leone                Singapore 
##                       12                       12                       12 
##          Slovak Republic                 Slovenia                  Somalia 
##                       12                       12                       12 
##             South Africa                    Spain                Sri Lanka 
##                       12                       12                       12 
##                    Sudan                Swaziland                   Sweden 
##                       12                       12                       12 
##              Switzerland                    Syria                   Taiwan 
##                       12                       12                       12 
##                 Tanzania                 Thailand                     Togo 
##                       12                       12                       12 
##      Trinidad and Tobago                  Tunisia                   Turkey 
##                       12                       12                       12 
##                   Uganda           United Kingdom            United States 
##                       12                       12                       12 
##                  Uruguay                Venezuela                  Vietnam 
##                       12                       12                       12 
##       West Bank and Gaza              Yemen, Rep.                   Zambia 
##                       12                       12                       12 
##                 Zimbabwe 
##                       12
library(dplyr)
Japan <- gapminder %>%
  filter(country=="Japan") %>%
  select(year,lifeExp) #yearとlifeexpだけを抜き出す

##p126

ggplot(Japan,aes(x=year,y=lifeExp))+
  geom_point()+
  geom_line()+
  ggtitle("日本人の平均寿命")+
  labs(x="年",y="平均寿命")+
    theme_classic()

##p128 filter(country==“Japan” | ##country==“chaina”)の中の|はorを表す:https://kazutan.github.io/kazutanR/hands_on_170730/filter.html

jpn.chi <- gapminder %>%
 filter(country=="China" | country=="Japan") %>%
  select(year,country,lifeExp) #year,country,lifeExpだけを抜き出す
jpn.chi
## # A tibble: 24 x 3
##     year country lifeExp
##    <int> <fct>     <dbl>
##  1  1952 China      44  
##  2  1957 China      50.5
##  3  1962 China      44.5
##  4  1967 China      58.4
##  5  1972 China      63.1
##  6  1977 China      64.0
##  7  1982 China      65.5
##  8  1987 China      67.3
##  9  1992 China      68.7
## 10  1997 China      70.4
## # ... with 14 more rows
tail(jpn.chi)
## # A tibble: 6 x 3
##    year country lifeExp
##   <int> <fct>     <dbl>
## 1  1982 Japan      77.1
## 2  1987 Japan      78.7
## 3  1992 Japan      79.4
## 4  1997 Japan      80.7
## 5  2002 Japan      82  
## 6  2007 Japan      82.6

##p129 図4.57日本人と中国人の平均寿命の折れ線グラフ

ggplot(jpn.chi, aes(x=year, y=lifeExp, group=country)) +
  geom_point() +
  geom_line(aes(linetype=country)) +
  scale_linetype_manual(values=c("twodash","dottend")) +
  ggtitle("平均寿命(日本と中国)1952-2007") +
  labs(x="年", y="平均寿命") +
  scale_linetype_discrete(name="国名",labels = c("日本","中国"))+
  theme_classic()
## Scale for 'linetype' is already present. Adding another scale for 'linetype',
## which will replace the existing scale.

##p131 図4.58衆院選データの最初の6行

library(readr)
hr <- read_csv("hr96-17.csv",na = ".")
## Parsed with column specification:
## cols(
##   year = col_double(),
##   ku = col_character(),
##   kun = col_double(),
##   party = col_character(),
##   party_code = col_double(),
##   name = col_character(),
##   age = col_double(),
##   nocand = col_double(),
##   vote = col_double(),
##   voteshare = col_double(),
##   eligible = col_double()
## )
head(hr)
## # A tibble: 6 x 11
##    year ku      kun party party_code name    age nocand  vote voteshare eligible
##   <dbl> <chr> <dbl> <chr>      <dbl> <chr> <dbl>  <dbl> <dbl>     <dbl>    <dbl>
## 1  1996 aichi     1 NFP            8 KAWA~    47      7 66876      40     346774
## 2  1996 aichi     1 LDP            1 IMAE~    72      7 42969      25.7   346774
## 3  1996 aichi     1 DPJ            3 SATO~    53      7 33503      20.1   346774
## 4  1996 aichi     1 JCP            2 IWAN~    43      7 22209      13.3   346774
## 5  1996 aichi     1 othe~        100 ITO,~    51      7   616       0.4   346774
## 6  1996 aichi     1 koku~         22 YAMA~    51      7   566       0.3   346774
library(dplyr)
shinzo <- hr %>%
filter(name=="ABE, SHINZO") %>%
select(year,ku,kun,party,age,nocand,vote,voteshare)
shinzo
## # A tibble: 8 x 8
##    year ku          kun party   age nocand   vote voteshare
##   <dbl> <chr>     <dbl> <chr> <dbl>  <dbl>  <dbl>     <dbl>
## 1  1996 yamaguchi     4 LDP      42      3  93459      54.3
## 2  2000 yamaguchi     4 LDP      45      2 121835      71.7
## 3  2003 yamaguchi     4 LDP      49      3 140347      79.7
## 4  2005 yamaguchi     4 LDP      50      3 137701      73.6
## 5  2009 yamaguchi     4 LDP      54      3 121365      64.3
## 6  2012 yamaguchi     4 LDP      58      3 118696      78.2
## 7  2014 yamaguchi     4 LDP      60      3 100829      76.3
## 8  2017 yamaguchi     4 LDP      63      5 104825      72.6
mean(shinzo$voteshare)
## [1] 71.33375

##p133 図4.61 安倍晋三氏の得票率

ggplot(shinzo,aes(x = year, y = voteshare))+
  geom_point()+
  geom_line()+
  ggtitle("安倍晋三氏の得票率:総選挙(1966-2017)")+
geom_hline(yintercept = 71.33,#安倍氏の得票率平均に点線を入れる
          col = "black",
          linetype = "dotted",
          size = 1) +
  geom_text(aes(y = voteshare + 1, label = voteshare), size = 4, vjust =
              0)+
  labs(x="衆院選挙年", y="得票率")+
  theme_classic()

##p134 図4.62

shigeru <- hr %>%
filter(name=="ISHIBA, SHIGERU") %>%
select(year,ku,kun,party,age,nocand,vote,voteshare)
shigeru#取り出したデータを表示する
## # A tibble: 8 x 8
##    year ku        kun party         age nocand   vote voteshare
##   <dbl> <chr>   <dbl> <chr>       <dbl>  <dbl>  <dbl>     <dbl>
## 1  1996 tottori     1 independent    39      4  94147      62.5
## 2  2000 tottori     1 LDP            43      4  91163      49.1
## 3  2003 tottori     1 LDP            46      3 114283      71.6
## 4  2005 tottori     1 LDP            48      4 106805      59.2
## 5  2009 tottori     1 LDP            52      4 118121      62  
## 6  2012 tottori     1 LDP            55      3 124746      84.5
## 7  2014 tottori     1 LDP            57      2  93105      80.3
## 8  2017 tottori     1 LDP            60      2 106425      83.6

##p135 図4.63 石橋氏得票率平均

mean(shigeru$voteshare)
## [1] 69.10375
ggplot(shigeru,aes(x = year, y = voteshare))+
  geom_point()+
  geom_line()+
  ggtitle("石破茂氏の得票率:総選挙(1996-2017)")+
geom_hline(yintercept = 71.33,#石破氏の得票率平均に点線を入れる
          col = "black",
          linetype = "dotted",
          size = 1) +
  geom_text(aes(y = voteshare + 1, label = voteshare), size = 4, vjust =
              0)+
  labs(x="衆院選挙年", y="得票率(%)")+
    theme_classic()

##図4.65安倍氏と石破氏の選挙結果データ

abe_ishiba <- hr %>%
  filter(name=="ISHIBA, SHIGERU" | name=="ABE, SHINZO")%>%
  select(year,ku,kun,party,age,nocand,vote,voteshare)
abe_ishiba
## # A tibble: 16 x 8
##     year ku          kun party         age nocand   vote voteshare
##    <dbl> <chr>     <dbl> <chr>       <dbl>  <dbl>  <dbl>     <dbl>
##  1  1996 tottori       1 independent    39      4  94147      62.5
##  2  2000 tottori       1 LDP            43      4  91163      49.1
##  3  2003 tottori       1 LDP            46      3 114283      71.6
##  4  2005 tottori       1 LDP            48      4 106805      59.2
##  5  2009 tottori       1 LDP            52      4 118121      62  
##  6  2012 tottori       1 LDP            55      3 124746      84.5
##  7  2014 tottori       1 LDP            57      2  93105      80.3
##  8  2017 tottori       1 LDP            60      2 106425      83.6
##  9  1996 yamaguchi     4 LDP            42      3  93459      54.3
## 10  2000 yamaguchi     4 LDP            45      2 121835      71.7
## 11  2003 yamaguchi     4 LDP            49      3 140347      79.7
## 12  2005 yamaguchi     4 LDP            50      3 137701      73.6
## 13  2009 yamaguchi     4 LDP            54      3 121365      64.3
## 14  2012 yamaguchi     4 LDP            58      3 118696      78.2
## 15  2014 yamaguchi     4 LDP            60      3 100829      76.3
## 16  2017 yamaguchi     4 LDP            63      5 104825      72.6

##p137図4.66安倍氏と石破氏の得票率 ##レジェンドの位置は他にも選べる:legend.position=“none”, “left”, “right”,“bottom”など ##この面台は、nameがネックとなりでいなかった。原因 ##◆◆誤り:nameは、abe_ishibanの中にない。二つをグループ化するものはkuである。

ggplot(data=abe_ishiba, aes(x = year, y=voteshare, group=ku))+
  geom_line(aes(linetype=ku))+
  geom_point()+
  scale_linetype_manual(values=c("twodash", "dotted")) +
  ggtitle("安倍晋三氏と石破茂氏の得票率:総選挙(1996-2017)")+
  geom_text(aes(y = voteshare + 1, label = voteshare), size = 4,
            vjust = 0)+
labs(x="衆院選挙年", y="得票率(%)")+
theme(legend.position=c(0.9, 0.15))+
scale_linetype_discrete(name="候補者名",
labels = c("安倍晋三","石破茂"))+
theme_classic()
## Scale for 'linetype' is already present. Adding another scale for 'linetype',
## which will replace the existing scale.

##p139図4.67衆院選データの最初の6行

hr <- read_csv("hr96-17.csv",na = ".")
## Parsed with column specification:
## cols(
##   year = col_double(),
##   ku = col_character(),
##   kun = col_double(),
##   party = col_character(),
##   party_code = col_double(),
##   name = col_character(),
##   age = col_double(),
##   nocand = col_double(),
##   vote = col_double(),
##   voteshare = col_double(),
##   eligible = col_double()
## )
head(hr)
## # A tibble: 6 x 11
##    year ku      kun party party_code name    age nocand  vote voteshare eligible
##   <dbl> <chr> <dbl> <chr>      <dbl> <chr> <dbl>  <dbl> <dbl>     <dbl>    <dbl>
## 1  1996 aichi     1 NFP            8 KAWA~    47      7 66876      40     346774
## 2  1996 aichi     1 LDP            1 IMAE~    72      7 42969      25.7   346774
## 3  1996 aichi     1 DPJ            3 SATO~    53      7 33503      20.1   346774
## 4  1996 aichi     1 JCP            2 IWAN~    43      7 22209      13.3   346774
## 5  1996 aichi     1 othe~        100 ITO,~    51      7   616       0.4   346774
## 6  1996 aichi     1 koku~         22 YAMA~    51      7   566       0.3   346774

##p140図4.68データフレームldp自民党の中身を表示する。

ldp <- hr %>%
filter(party=="LDP") %>%
  group_by(year) %>%
  summarise(no.obs = n(),
            vsmeans = mean(voteshare)) %>%
            mutate(party=c(rep("LDP",8))) %>%
as.data.frame()
ldp           
##   year no.obs  vsmeans party
## 1 1996    288 40.90590   LDP
## 2 2000    271 46.23764   LDP
## 3 2003    277 48.12310   LDP
## 4 2005    290 49.42276   LDP
## 5 2009    291 40.55911   LDP
## 6 2012    289 45.74083   LDP
## 7 2014    283 50.91095   LDP
## 8 2017    277 50.37884   LDP

##p140図4.69(voteshare:得票率)の平均値

mean(ldp$vsmean)
## [1] 46.53489

##p141図 衆院選での自民党得票率 ##◆◆誤りvsmeanー>正解vsmeans

ggplot(ldp,aes(x=year,y =vsmeans))+
geom_point()+
geom_line()+
geom_hline(yintercept = 46.53,
           col = "lightblue",
           type = "dotted",
           size = 1)+
ggtitle("自民党平均得票率") +
  labs(x="衆院選挙年", y="得票率(%)")+
theme_classic()
## Warning: Ignoring unknown parameters: type

##p142民主党得票率

head(hr)
## # A tibble: 6 x 11
##    year ku      kun party party_code name    age nocand  vote voteshare eligible
##   <dbl> <chr> <dbl> <chr>      <dbl> <chr> <dbl>  <dbl> <dbl>     <dbl>    <dbl>
## 1  1996 aichi     1 NFP            8 KAWA~    47      7 66876      40     346774
## 2  1996 aichi     1 LDP            1 IMAE~    72      7 42969      25.7   346774
## 3  1996 aichi     1 DPJ            3 SATO~    53      7 33503      20.1   346774
## 4  1996 aichi     1 JCP            2 IWAN~    43      7 22209      13.3   346774
## 5  1996 aichi     1 othe~        100 ITO,~    51      7   616       0.4   346774
## 6  1996 aichi     1 koku~         22 YAMA~    51      7   566       0.3   346774
dpj <- hr %>%
  filter(party=="DPJ") %>%
  group_by(year) %>%
  summarise(no.obs = n(),
  vsmean = mean(voteshare)) %>%
mutate(party=c(rep("DPJ",7))) %>%
  as.data.frame()
dpj
##   year no.obs   vsmean party
## 1 1996    143 22.07483   DPJ
## 2 2000    242 33.56322   DPJ
## 3 2003    267 40.49438   DPJ
## 4 2005    289 37.51107   DPJ
## 5 2009    271 51.77638   DPJ
## 6 2012    264 25.59242   DPJ
## 7 2014    178 36.00449   DPJ

##P145自民・民主得票率:エラーのため完成せず。vsmeanがない? ldp_dpj <- union(ldp, dpj) %>% arrange(desc(party), year)

##4.4回帰分析とその結果の解釈 ##p148

stargazer(as.data.frame(income),
type = "html")
Statistic N Mean St. Dev. Min Pctl(25) Pctl(75) Max
age 100 45.960 13.331 20 36 57.2 70
height 100 163.746 7.692 148.000 158.100 170.175 180.500
weight 100 59.179 12.647 28.300 48.950 67.325 85.600
income 100 434.400 445.777 24 134.8 607.2 2,351

##単回帰分析

model_1 <- lm(weight ~ age, data = income)#単回帰分析 

##単回帰分析2

model_2 <- lm(weight ~ age+height, data = income)#単回帰分析2

##重回帰分析

model_3 <- lm(weight ~ age+height+income, data = income)#重回帰分析

##model_4の回帰分析のため、sexをダミー変数:男1、女0に置き換える

income <- income %>% 
mutate(sex = as.numeric(sex=="male"))

##model_4重回帰分析

model_4 <- lm(weight ~ age+height+income+sex, data = income)#重回帰分析4

##回帰分析結果

stargazer(model_1,model_2,model_3,model_4,
          type = "html")
Dependent variable:
weight
(1) (2) (3) (4)
age -0.028 -0.043 -0.049 -0.048
(0.096) (0.066) (0.067) (0.067)
height 1.201*** 1.194*** 1.220***
(0.114) (0.115) (0.153)
income 0.001 0.001
(0.002) (0.002)
sex -0.619
(2.340)
Constant 60.475*** -135.481*** -134.539*** -138.591***
(4.582) (18.862) (18.999) (24.471)
Observations 100 100 100 100
R2 0.001 0.534 0.536 0.536
Adjusted R2 -0.009 0.525 0.521 0.516
Residual Std. Error 12.705 (df = 98) 8.721 (df = 97) 8.751 (df = 96) 8.794 (df = 95)
F Statistic 0.087 (df = 1; 98) 55.602*** (df = 2; 97) 36.919*** (df = 3; 96) 27.439*** (df = 4; 95)
Note: p<0.1; p<0.05; p<0.01
library(jtools)
library(ggstance)
## 
## Attaching package: 'ggstance'
## The following objects are masked from 'package:ggplot2':
## 
##     geom_errorbarh, GeomErrorbarh

##p150重回帰分析結果プロット(モデル4)→多軸グラフにプロットする方法はないか?

plot_summs(model_4)

##p153モンティー・ホールシミュレーション#ドア1,2,3を無作為に100回選ぶなおset.seed()関数は、()が同じなら同じ乱数を発生させるset.seed(1)https://to-kei.net/r-beginner/set-seed/

set.seed(1)
trials <- 100
prize <- sample(1:3,trials,replace = TRUE)
prize
##   [1] 1 3 1 2 1 3 3 2 2 3 3 1 1 1 2 2 2 2 3 1 3 1 1 1 1 2 1 1 2 2 2 1 3 1 3 2 2
##  [38] 2 2 3 2 1 3 2 1 1 3 2 2 3 3 2 2 2 2 1 2 2 2 2 1 3 3 2 3 3 2 3 3 1 1 1 1 3
##  [75] 2 3 1 1 2 1 1 1 1 3 2 1 1 3 3 3 2 2 2 3 2 2 3 3 3 1
stay <- prize == 1
head(stay)
## [1]  TRUE FALSE  TRUE FALSE  TRUE FALSE
move <- prize !=1
head(move)
## [1] FALSE  TRUE FALSE  TRUE FALSE  TRUE

##p155 モンティー・ホールの提案を受けずに「ドア1」のとどまった場合、に高級車が当たるprobstayと「ドア1」から選択しなおした場合にあたるprob.move

prob.stay <- prov.move <- rep(NA, trials)
prov.move
##   [1] NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA
##  [26] NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA
##  [51] NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA
##  [76] NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA

##p159-162 birthday paradox

qbirthday(0.5,classes=365,coincident =2 )
## [1] 23
qbirthday(0.99,classes=365,coincident =2 )
## [1] 57
pbirthday(23,classes =365,coincident =2)
## [1] 0.5072972
pbirthday(58,classes =365,coincident =2)
## [1] 0.991665
 x <- 1:10
 y <- 1:10                # plot(x 軸のデータ, y 軸のデータ, オプション)
 plot(x, y)               # 範囲は自動で決まる(xlim=c(1,10)を指定した場合と同じ)

 plot(x, y, xlim=c(10,1)) # x 軸の正の向きを左向きにすることも出来る

 plot(sin, -pi, 2*pi)                                   # plot(関数名, 下限, 上限)

 gauss.density <- function(x) 1/sqrt(2*pi)*exp(-x^2/2)  # 標準正規分布の密度
 plot(gauss.density,-3,3)

 x <- seq(-3,3,length=50)     # x 方向の分点
 y <- x                       # y 方向の分点
 rho <- 0.9                   # 2次元正規分布の定数
 gauss3d <- function(x,y) {   # 2次元正規分布の関数
   1/(2*pi*sqrt(1-rho^2))*exp(-(x^2-2*rho*x*y+y^2) / (2*(1-rho^2)))
 }
 z <- outer(x,y,gauss3d)      # 外積をとって z 方向の大きさを求める
 z[is.na(z)] <- 1             # 欠損値を1で補う
 persp(x, y, z, theta = 30, phi = 30, expand = 0.5, col = "lightblue")

##p159 birthday paradox 作りたい基本形はこれ。これに対し後は修飾

dob <- 1:365
prob <- sapply(dob, pbirthday)
 plot(dob, prob,xlim= c(0, 100))

##修飾 type=“l”:線プロット(折れ線グラフ)、xlim= c(0, 100)):xの定義域を1→100で定義、lwd = 1:線分の幅 (line width) を番号で指定する.値が大きいほど太い線になる. ##http://cse.naro.affrc.go.jp/takezawa/r-tips/r/48.html ##http://cse.naro.affrc.go.jp/takezawa/r-tips/r/53.html

dob <- 1:365
prob <- sapply(dob, pbirthday)
 plot(dob, prob,type = "l", col ="tomato","lwd" = 2,xlim= c(0, 100),
xlab="the number of people in a group",
ylab="probability",
main="prob. that at least two people have the same DOB:pbirthday()")
abline(h = c(0,1),col ="gray")
abline(v = c(23,58),col ="gray")