library(readr)
income = read_csv("income.csv")
## Parsed with column specification:
## cols(
## id = col_character(),
## sex = col_character(),
## age = col_double(),
## height = col_double(),
## weight = col_double(),
## income = col_number(),
## generation = col_character()
## )
head(income)
## # A tibble: 6 x 7
## id sex age height weight income generation
## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <chr>
## 1 AU male 70 160. 58.3 201 elder
## 2 AY female 70 156. 44 487 elder
## 3 AB male 69 173. 75.7 424 elder
## 4 AM male 67 166. 69.3 1735 elder
## 5 M male 66 171. 76.5 929 elder
## 6 CM female 66 164. 67.3 397 elder
dim(income)
## [1] 100 7
summary(income)
## id sex age height
## Length:100 Length:100 Min. :20.00 Min. :148.0
## Class :character Class :character 1st Qu.:36.00 1st Qu.:158.1
## Mode :character Mode :character Median :45.00 Median :162.9
## Mean :45.96 Mean :163.7
## 3rd Qu.:57.25 3rd Qu.:170.2
## Max. :70.00 Max. :180.5
## weight income generation
## Min. :28.30 Min. : 24.0 Length:100
## 1st Qu.:48.95 1st Qu.: 134.8 Class :character
## Median :59.95 Median : 298.5 Mode :character
## Mean :59.18 Mean : 434.4
## 3rd Qu.:67.33 3rd Qu.: 607.2
## Max. :85.60 Max. :2351.0
library(stargazer)
##
## Please cite as:
## Hlavac, Marek (2018). stargazer: Well-Formatted Regression and Summary Statistics Tables.
## R package version 5.2.2. https://CRAN.R-project.org/package=stargazer
stargazer(as.data.frame(income),
type = "html")
Statistic | N | Mean | St. Dev. | Min | Pctl(25) | Pctl(75) | Max |
age | 100 | 45.960 | 13.331 | 20 | 36 | 57.2 | 70 |
height | 100 | 163.746 | 7.692 | 148.000 | 158.100 | 170.175 | 180.500 |
weight | 100 | 59.179 | 12.647 | 28.300 | 48.950 | 67.325 | 85.600 |
income | 100 | 434.400 | 445.777 | 24 | 134.8 | 607.2 | 2,351 |
hist(income$height,freq = FALSE,
xlab = "身長",
ylab = "確率密度",
main = "身長の分布")
library(ggplot2) #ggplotパッケージをロード
ggplot(income,aes(height))+
geom_histogram(aes(y = ..density..),
bins = 10,
colour = "gray",
fill = "blue")+
theme_classic()
ggplot(income,aes(height))+
geom_histogram(aes(y = ..density..),
bins = 10,
colour = "gray",
fill = "blue")+
labs(title = "身長の分布",
x="身長",
y="確率密度")+
theme_classic()
stem(income$height)#チャンクオプションにcomment=""を指定
##
## The decimal point is 1 digit(s) to the right of the |
##
## 14 | 88
## 15 | 001112334
## 15 | 555667777788888899999999
## 16 | 00001112222233333444
## 16 | 556666777778999999
## 17 | 0001111222233333344
## 17 | 5555889
## 18 | 1
stem(income$height,scale=0.5)#チャンクオプションにcomment=""を指定
##
## The decimal point is 1 digit(s) to the right of the |
##
## 14 | 88
## 15 | 001112334555667777788888899999999
## 16 | 00001112222233333444556666777778999999
## 17 | 00011112222333333445555889
## 18 | 1
ggplot(income,aes(x = sex,y = height)) +
geom_boxplot() +
labs(x="性別",y="身長")+
scale_x_discrete(labels=c("女性","男性"))+
theme_classic()
ggplot(income,aes(x = sex,y = height)) +
geom_boxplot() +
coord_flip()+
labs(y="身長",x="性別")+
scale_x_discrete(labels=c("女性","男性"))+
theme_classic()
##p113
ggplot(income,aes(x = sex,y = height)) +
geom_violin() +
geom_point()+
labs(x="性別",y="身長")+
scale_x_discrete(labels=c("女性","男性"))+
theme_classic()
##p114
ggplot(income,aes(height,weight)) +
geom_point()+
labs(x="身長",y="体重")+
theme_classic()
##p115間違いstat_smoothnの引数を入れないとこうなる
ggplot(income,aes(height,weight)) +
geom_point()+
stat_smooth()
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
labs(x="身長",y="体重") +
theme_classic()
## NULL
##p115-116 身長と体重の相関図 ##ggplot2のstat_smooth()のmethod引数で直線回帰を指定。 ##https://rpubs.com/uri-sy/ggplot2_stat_smooth参照 ##エラー箇所を= lmを= “lm”に修正したら回帰直線がでるようになった。
ggplot(income,aes(height,weight)) +
geom_point()+
stat_smooth(method = "lm", se = FALSE, colour = "black", size = 1) +
labs(x="身長",y="体重") +
theme_classic()
#se = TRUEにするこで初期設定で95%信頼区間も表示される。
ggplot(income,aes(height,weight)) +
geom_point()+
stat_smooth(method = "lm", se = TRUE, colour = "black", size = 1) +
labs(x="身長",y="体重") +
theme_classic()
##p117 身長と体重の観測点95%信頼区間有
ggplot(income,aes(height,weight)) +
geom_point()+
stat_smooth(method = "lm", se = TRUE, colour = "black", size = 1) +
geom_text(aes(y=weight+1,label=id),size=3,vjust=0)+
labs(x="身長(身長と体重の観測点95%信頼区間有)",y="体重") +
theme_classic()
##p117 身長と体重の観測点p2
ggplot(income,aes(height,weight)) +
geom_point()+
stat_smooth(method = "lm", se = FALSE, colour = "black", size = 1) +
geom_text(aes(y=weight+1,label=id),size=3,vjust=0)+
labs(x="身長",y="体重") +
theme_classic()
##p117 身長と体重の観測点 間違いサンプル
ggplot(income,aes(height,weight)) +
geom_point(aes(color=sex))+
geom_smooth(se=FALSE,aes(color=sex))+
scale_color_grey()+
labs(x="身長",y="体重") +
theme_classic()
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
library("dplyr")
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library("ggplot2")
library("DT")
library("broom")
##stat_smoothwをgeom_smoothnにしても同じ直線が引ける ##なお、ハッチのかかっている範囲は「母集団から標本を取ってきて、その平均から95%信頼区間を求める、という作業を100回やったときに、95回はその区間の中に母平均が含まれる」という意味です。 https://bellcurve.jp/statistics/course/8891.html
ggplot(income,aes(height,weight)) +
geom_point()+
geom_smooth(method = "lm", se = TRUE, colour = "black", size = 1) +
labs(x="身長",y="体重") +
theme_classic()
##p118図4.46 身長と体重の散布図(男女色別) scale_color_grey()+
ggplot(income,aes(x = height,y = weight,shape = sex)) +
geom_point(aes(color=sex))+
stat_smooth(method = "lm", se = FALSE,aes(color=sex))+
labs(x="身長",y="体重") +
theme_classic()
##p119 図4.46 身長と体重の散布図(男女ドットの濃淡比) scale_color_grey()+で色を消す
ggplot(income,aes(height,weight)) +
geom_point(aes(color=sex))+
stat_smooth(method = "lm", se = FALSE,aes(color=sex))+
scale_color_grey()+
labs(x="身長",y="体重") +
theme_classic()
##p120 折れ線グラフ
library(gapminder)
data("gapminder")
glimpse(gapminder)
## Observations: 1,704
## Variables: 6
## $ country <fct> Afghanistan, Afghanistan, Afghanistan, Afghanistan, Afgha...
## $ continent <fct> Asia, Asia, Asia, Asia, Asia, Asia, Asia, Asia, Asia, Asi...
## $ year <int> 1952, 1957, 1962, 1967, 1972, 1977, 1982, 1987, 1992, 199...
## $ lifeExp <dbl> 28.801, 30.332, 31.997, 34.020, 36.088, 38.438, 39.854, 4...
## $ pop <int> 8425333, 9240934, 10267083, 11537966, 13079460, 14880372,...
## $ gdpPercap <dbl> 779.4453, 820.8530, 853.1007, 836.1971, 739.9811, 786.113...
head(gapminder)
## # A tibble: 6 x 6
## country continent year lifeExp pop gdpPercap
## <fct> <fct> <int> <dbl> <int> <dbl>
## 1 Afghanistan Asia 1952 28.8 8425333 779.
## 2 Afghanistan Asia 1957 30.3 9240934 821.
## 3 Afghanistan Asia 1962 32.0 10267083 853.
## 4 Afghanistan Asia 1967 34.0 11537966 836.
## 5 Afghanistan Asia 1972 36.1 13079460 740.
## 6 Afghanistan Asia 1977 38.4 14880372 786.
tail(gapminder)
## # A tibble: 6 x 6
## country continent year lifeExp pop gdpPercap
## <fct> <fct> <int> <dbl> <int> <dbl>
## 1 Zimbabwe Africa 1982 60.4 7636524 789.
## 2 Zimbabwe Africa 1987 62.4 9216418 706.
## 3 Zimbabwe Africa 1992 60.4 10704340 693.
## 4 Zimbabwe Africa 1997 46.8 11404948 792.
## 5 Zimbabwe Africa 2002 40.0 11926563 672.
## 6 Zimbabwe Africa 2007 43.5 12311143 470.
summary(gapminder)
## country continent year lifeExp
## Afghanistan: 12 Africa :624 Min. :1952 Min. :23.60
## Albania : 12 Americas:300 1st Qu.:1966 1st Qu.:48.20
## Algeria : 12 Asia :396 Median :1980 Median :60.71
## Angola : 12 Europe :360 Mean :1980 Mean :59.47
## Argentina : 12 Oceania : 24 3rd Qu.:1993 3rd Qu.:70.85
## Australia : 12 Max. :2007 Max. :82.60
## (Other) :1632
## pop gdpPercap
## Min. :6.001e+04 Min. : 241.2
## 1st Qu.:2.794e+06 1st Qu.: 1202.1
## Median :7.024e+06 Median : 3531.8
## Mean :2.960e+07 Mean : 7215.3
## 3rd Qu.:1.959e+07 3rd Qu.: 9325.5
## Max. :1.319e+09 Max. :113523.1
##
##p123 図4.50
table(gapminder$country)
##
## Afghanistan Albania Algeria
## 12 12 12
## Angola Argentina Australia
## 12 12 12
## Austria Bahrain Bangladesh
## 12 12 12
## Belgium Benin Bolivia
## 12 12 12
## Bosnia and Herzegovina Botswana Brazil
## 12 12 12
## Bulgaria Burkina Faso Burundi
## 12 12 12
## Cambodia Cameroon Canada
## 12 12 12
## Central African Republic Chad Chile
## 12 12 12
## China Colombia Comoros
## 12 12 12
## Congo, Dem. Rep. Congo, Rep. Costa Rica
## 12 12 12
## Cote d'Ivoire Croatia Cuba
## 12 12 12
## Czech Republic Denmark Djibouti
## 12 12 12
## Dominican Republic Ecuador Egypt
## 12 12 12
## El Salvador Equatorial Guinea Eritrea
## 12 12 12
## Ethiopia Finland France
## 12 12 12
## Gabon Gambia Germany
## 12 12 12
## Ghana Greece Guatemala
## 12 12 12
## Guinea Guinea-Bissau Haiti
## 12 12 12
## Honduras Hong Kong, China Hungary
## 12 12 12
## Iceland India Indonesia
## 12 12 12
## Iran Iraq Ireland
## 12 12 12
## Israel Italy Jamaica
## 12 12 12
## Japan Jordan Kenya
## 12 12 12
## Korea, Dem. Rep. Korea, Rep. Kuwait
## 12 12 12
## Lebanon Lesotho Liberia
## 12 12 12
## Libya Madagascar Malawi
## 12 12 12
## Malaysia Mali Mauritania
## 12 12 12
## Mauritius Mexico Mongolia
## 12 12 12
## Montenegro Morocco Mozambique
## 12 12 12
## Myanmar Namibia Nepal
## 12 12 12
## Netherlands New Zealand Nicaragua
## 12 12 12
## Niger Nigeria Norway
## 12 12 12
## Oman Pakistan Panama
## 12 12 12
## Paraguay Peru Philippines
## 12 12 12
## Poland Portugal Puerto Rico
## 12 12 12
## Reunion Romania Rwanda
## 12 12 12
## Sao Tome and Principe Saudi Arabia Senegal
## 12 12 12
## Serbia Sierra Leone Singapore
## 12 12 12
## Slovak Republic Slovenia Somalia
## 12 12 12
## South Africa Spain Sri Lanka
## 12 12 12
## Sudan Swaziland Sweden
## 12 12 12
## Switzerland Syria Taiwan
## 12 12 12
## Tanzania Thailand Togo
## 12 12 12
## Trinidad and Tobago Tunisia Turkey
## 12 12 12
## Uganda United Kingdom United States
## 12 12 12
## Uruguay Venezuela Vietnam
## 12 12 12
## West Bank and Gaza Yemen, Rep. Zambia
## 12 12 12
## Zimbabwe
## 12
library(dplyr)
Japan <- gapminder %>%
filter(country=="Japan") %>%
select(year,lifeExp) #yearとlifeexpだけを抜き出す
##p126
ggplot(Japan,aes(x=year,y=lifeExp))+
geom_point()+
geom_line()+
ggtitle("日本人の平均寿命")+
labs(x="年",y="平均寿命")+
theme_classic()
##p128 filter(country==“Japan” | ##country==“chaina”)の中の|はorを表す:https://kazutan.github.io/kazutanR/hands_on_170730/filter.html
jpn.chi <- gapminder %>%
filter(country=="China" | country=="Japan") %>%
select(year,country,lifeExp) #year,country,lifeExpだけを抜き出す
jpn.chi
## # A tibble: 24 x 3
## year country lifeExp
## <int> <fct> <dbl>
## 1 1952 China 44
## 2 1957 China 50.5
## 3 1962 China 44.5
## 4 1967 China 58.4
## 5 1972 China 63.1
## 6 1977 China 64.0
## 7 1982 China 65.5
## 8 1987 China 67.3
## 9 1992 China 68.7
## 10 1997 China 70.4
## # ... with 14 more rows
tail(jpn.chi)
## # A tibble: 6 x 3
## year country lifeExp
## <int> <fct> <dbl>
## 1 1982 Japan 77.1
## 2 1987 Japan 78.7
## 3 1992 Japan 79.4
## 4 1997 Japan 80.7
## 5 2002 Japan 82
## 6 2007 Japan 82.6
##p129 図4.57日本人と中国人の平均寿命の折れ線グラフ
ggplot(jpn.chi, aes(x=year, y=lifeExp, group=country)) +
geom_point() +
geom_line(aes(linetype=country)) +
scale_linetype_manual(values=c("twodash","dottend")) +
ggtitle("平均寿命(日本と中国)1952-2007") +
labs(x="年", y="平均寿命") +
scale_linetype_discrete(name="国名",labels = c("日本","中国"))+
theme_classic()
## Scale for 'linetype' is already present. Adding another scale for 'linetype',
## which will replace the existing scale.
##p131 図4.58衆院選データの最初の6行
library(readr)
hr <- read_csv("hr96-17.csv",na = ".")
## Parsed with column specification:
## cols(
## year = col_double(),
## ku = col_character(),
## kun = col_double(),
## party = col_character(),
## party_code = col_double(),
## name = col_character(),
## age = col_double(),
## nocand = col_double(),
## vote = col_double(),
## voteshare = col_double(),
## eligible = col_double()
## )
head(hr)
## # A tibble: 6 x 11
## year ku kun party party_code name age nocand vote voteshare eligible
## <dbl> <chr> <dbl> <chr> <dbl> <chr> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 1996 aichi 1 NFP 8 KAWA~ 47 7 66876 40 346774
## 2 1996 aichi 1 LDP 1 IMAE~ 72 7 42969 25.7 346774
## 3 1996 aichi 1 DPJ 3 SATO~ 53 7 33503 20.1 346774
## 4 1996 aichi 1 JCP 2 IWAN~ 43 7 22209 13.3 346774
## 5 1996 aichi 1 othe~ 100 ITO,~ 51 7 616 0.4 346774
## 6 1996 aichi 1 koku~ 22 YAMA~ 51 7 566 0.3 346774
library(dplyr)
shinzo <- hr %>%
filter(name=="ABE, SHINZO") %>%
select(year,ku,kun,party,age,nocand,vote,voteshare)
shinzo
## # A tibble: 8 x 8
## year ku kun party age nocand vote voteshare
## <dbl> <chr> <dbl> <chr> <dbl> <dbl> <dbl> <dbl>
## 1 1996 yamaguchi 4 LDP 42 3 93459 54.3
## 2 2000 yamaguchi 4 LDP 45 2 121835 71.7
## 3 2003 yamaguchi 4 LDP 49 3 140347 79.7
## 4 2005 yamaguchi 4 LDP 50 3 137701 73.6
## 5 2009 yamaguchi 4 LDP 54 3 121365 64.3
## 6 2012 yamaguchi 4 LDP 58 3 118696 78.2
## 7 2014 yamaguchi 4 LDP 60 3 100829 76.3
## 8 2017 yamaguchi 4 LDP 63 5 104825 72.6
mean(shinzo$voteshare)
## [1] 71.33375
##p133 図4.61 安倍晋三氏の得票率
ggplot(shinzo,aes(x = year, y = voteshare))+
geom_point()+
geom_line()+
ggtitle("安倍晋三氏の得票率:総選挙(1966-2017)")+
geom_hline(yintercept = 71.33,#安倍氏の得票率平均に点線を入れる
col = "black",
linetype = "dotted",
size = 1) +
geom_text(aes(y = voteshare + 1, label = voteshare), size = 4, vjust =
0)+
labs(x="衆院選挙年", y="得票率")+
theme_classic()
##p134 図4.62
shigeru <- hr %>%
filter(name=="ISHIBA, SHIGERU") %>%
select(year,ku,kun,party,age,nocand,vote,voteshare)
shigeru#取り出したデータを表示する
## # A tibble: 8 x 8
## year ku kun party age nocand vote voteshare
## <dbl> <chr> <dbl> <chr> <dbl> <dbl> <dbl> <dbl>
## 1 1996 tottori 1 independent 39 4 94147 62.5
## 2 2000 tottori 1 LDP 43 4 91163 49.1
## 3 2003 tottori 1 LDP 46 3 114283 71.6
## 4 2005 tottori 1 LDP 48 4 106805 59.2
## 5 2009 tottori 1 LDP 52 4 118121 62
## 6 2012 tottori 1 LDP 55 3 124746 84.5
## 7 2014 tottori 1 LDP 57 2 93105 80.3
## 8 2017 tottori 1 LDP 60 2 106425 83.6
##p135 図4.63 石橋氏得票率平均
mean(shigeru$voteshare)
## [1] 69.10375
ggplot(shigeru,aes(x = year, y = voteshare))+
geom_point()+
geom_line()+
ggtitle("石破茂氏の得票率:総選挙(1996-2017)")+
geom_hline(yintercept = 71.33,#石破氏の得票率平均に点線を入れる
col = "black",
linetype = "dotted",
size = 1) +
geom_text(aes(y = voteshare + 1, label = voteshare), size = 4, vjust =
0)+
labs(x="衆院選挙年", y="得票率(%)")+
theme_classic()
##図4.65安倍氏と石破氏の選挙結果データ
abe_ishiba <- hr %>%
filter(name=="ISHIBA, SHIGERU" | name=="ABE, SHINZO")%>%
select(year,ku,kun,party,age,nocand,vote,voteshare)
abe_ishiba
## # A tibble: 16 x 8
## year ku kun party age nocand vote voteshare
## <dbl> <chr> <dbl> <chr> <dbl> <dbl> <dbl> <dbl>
## 1 1996 tottori 1 independent 39 4 94147 62.5
## 2 2000 tottori 1 LDP 43 4 91163 49.1
## 3 2003 tottori 1 LDP 46 3 114283 71.6
## 4 2005 tottori 1 LDP 48 4 106805 59.2
## 5 2009 tottori 1 LDP 52 4 118121 62
## 6 2012 tottori 1 LDP 55 3 124746 84.5
## 7 2014 tottori 1 LDP 57 2 93105 80.3
## 8 2017 tottori 1 LDP 60 2 106425 83.6
## 9 1996 yamaguchi 4 LDP 42 3 93459 54.3
## 10 2000 yamaguchi 4 LDP 45 2 121835 71.7
## 11 2003 yamaguchi 4 LDP 49 3 140347 79.7
## 12 2005 yamaguchi 4 LDP 50 3 137701 73.6
## 13 2009 yamaguchi 4 LDP 54 3 121365 64.3
## 14 2012 yamaguchi 4 LDP 58 3 118696 78.2
## 15 2014 yamaguchi 4 LDP 60 3 100829 76.3
## 16 2017 yamaguchi 4 LDP 63 5 104825 72.6
##p137図4.66安倍氏と石破氏の得票率 ##レジェンドの位置は他にも選べる:legend.position=“none”, “left”, “right”,“bottom”など ##この面台は、nameがネックとなりでいなかった。原因 ##◆◆誤り:nameは、abe_ishibanの中にない。二つをグループ化するものはkuである。
ggplot(data=abe_ishiba, aes(x = year, y=voteshare, group=ku))+
geom_line(aes(linetype=ku))+
geom_point()+
scale_linetype_manual(values=c("twodash", "dotted")) +
ggtitle("安倍晋三氏と石破茂氏の得票率:総選挙(1996-2017)")+
geom_text(aes(y = voteshare + 1, label = voteshare), size = 4,
vjust = 0)+
labs(x="衆院選挙年", y="得票率(%)")+
theme(legend.position=c(0.9, 0.15))+
scale_linetype_discrete(name="候補者名",
labels = c("安倍晋三","石破茂"))+
theme_classic()
## Scale for 'linetype' is already present. Adding another scale for 'linetype',
## which will replace the existing scale.
##p139図4.67衆院選データの最初の6行
hr <- read_csv("hr96-17.csv",na = ".")
## Parsed with column specification:
## cols(
## year = col_double(),
## ku = col_character(),
## kun = col_double(),
## party = col_character(),
## party_code = col_double(),
## name = col_character(),
## age = col_double(),
## nocand = col_double(),
## vote = col_double(),
## voteshare = col_double(),
## eligible = col_double()
## )
head(hr)
## # A tibble: 6 x 11
## year ku kun party party_code name age nocand vote voteshare eligible
## <dbl> <chr> <dbl> <chr> <dbl> <chr> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 1996 aichi 1 NFP 8 KAWA~ 47 7 66876 40 346774
## 2 1996 aichi 1 LDP 1 IMAE~ 72 7 42969 25.7 346774
## 3 1996 aichi 1 DPJ 3 SATO~ 53 7 33503 20.1 346774
## 4 1996 aichi 1 JCP 2 IWAN~ 43 7 22209 13.3 346774
## 5 1996 aichi 1 othe~ 100 ITO,~ 51 7 616 0.4 346774
## 6 1996 aichi 1 koku~ 22 YAMA~ 51 7 566 0.3 346774
##p140図4.68データフレームldp自民党の中身を表示する。
ldp <- hr %>%
filter(party=="LDP") %>%
group_by(year) %>%
summarise(no.obs = n(),
vsmeans = mean(voteshare)) %>%
mutate(party=c(rep("LDP",8))) %>%
as.data.frame()
ldp
## year no.obs vsmeans party
## 1 1996 288 40.90590 LDP
## 2 2000 271 46.23764 LDP
## 3 2003 277 48.12310 LDP
## 4 2005 290 49.42276 LDP
## 5 2009 291 40.55911 LDP
## 6 2012 289 45.74083 LDP
## 7 2014 283 50.91095 LDP
## 8 2017 277 50.37884 LDP
##p140図4.69(voteshare:得票率)の平均値
mean(ldp$vsmean)
## [1] 46.53489
##p141図 衆院選での自民党得票率 ##◆◆誤りvsmeanー>正解vsmeans
ggplot(ldp,aes(x=year,y =vsmeans))+
geom_point()+
geom_line()+
geom_hline(yintercept = 46.53,
col = "lightblue",
type = "dotted",
size = 1)+
ggtitle("自民党平均得票率") +
labs(x="衆院選挙年", y="得票率(%)")+
theme_classic()
## Warning: Ignoring unknown parameters: type
##p142民主党得票率
head(hr)
## # A tibble: 6 x 11
## year ku kun party party_code name age nocand vote voteshare eligible
## <dbl> <chr> <dbl> <chr> <dbl> <chr> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 1996 aichi 1 NFP 8 KAWA~ 47 7 66876 40 346774
## 2 1996 aichi 1 LDP 1 IMAE~ 72 7 42969 25.7 346774
## 3 1996 aichi 1 DPJ 3 SATO~ 53 7 33503 20.1 346774
## 4 1996 aichi 1 JCP 2 IWAN~ 43 7 22209 13.3 346774
## 5 1996 aichi 1 othe~ 100 ITO,~ 51 7 616 0.4 346774
## 6 1996 aichi 1 koku~ 22 YAMA~ 51 7 566 0.3 346774
dpj <- hr %>%
filter(party=="DPJ") %>%
group_by(year) %>%
summarise(no.obs = n(),
vsmean = mean(voteshare)) %>%
mutate(party=c(rep("DPJ",7))) %>%
as.data.frame()
dpj
## year no.obs vsmean party
## 1 1996 143 22.07483 DPJ
## 2 2000 242 33.56322 DPJ
## 3 2003 267 40.49438 DPJ
## 4 2005 289 37.51107 DPJ
## 5 2009 271 51.77638 DPJ
## 6 2012 264 25.59242 DPJ
## 7 2014 178 36.00449 DPJ
##P145自民・民主得票率:エラーのため完成せず。vsmeanがない? ldp_dpj <- union(ldp, dpj) %>% arrange(desc(party), year)
##4.4回帰分析とその結果の解釈 ##p148
stargazer(as.data.frame(income),
type = "html")
Statistic | N | Mean | St. Dev. | Min | Pctl(25) | Pctl(75) | Max |
age | 100 | 45.960 | 13.331 | 20 | 36 | 57.2 | 70 |
height | 100 | 163.746 | 7.692 | 148.000 | 158.100 | 170.175 | 180.500 |
weight | 100 | 59.179 | 12.647 | 28.300 | 48.950 | 67.325 | 85.600 |
income | 100 | 434.400 | 445.777 | 24 | 134.8 | 607.2 | 2,351 |
##単回帰分析
model_1 <- lm(weight ~ age, data = income)#単回帰分析
##単回帰分析2
model_2 <- lm(weight ~ age+height, data = income)#単回帰分析2
##重回帰分析
model_3 <- lm(weight ~ age+height+income, data = income)#重回帰分析
##model_4の回帰分析のため、sexをダミー変数:男1、女0に置き換える
income <- income %>%
mutate(sex = as.numeric(sex=="male"))
##model_4重回帰分析
model_4 <- lm(weight ~ age+height+income+sex, data = income)#重回帰分析4
##回帰分析結果
stargazer(model_1,model_2,model_3,model_4,
type = "html")
Dependent variable: | ||||
weight | ||||
(1) | (2) | (3) | (4) | |
age | -0.028 | -0.043 | -0.049 | -0.048 |
(0.096) | (0.066) | (0.067) | (0.067) | |
height | 1.201*** | 1.194*** | 1.220*** | |
(0.114) | (0.115) | (0.153) | ||
income | 0.001 | 0.001 | ||
(0.002) | (0.002) | |||
sex | -0.619 | |||
(2.340) | ||||
Constant | 60.475*** | -135.481*** | -134.539*** | -138.591*** |
(4.582) | (18.862) | (18.999) | (24.471) | |
Observations | 100 | 100 | 100 | 100 |
R2 | 0.001 | 0.534 | 0.536 | 0.536 |
Adjusted R2 | -0.009 | 0.525 | 0.521 | 0.516 |
Residual Std. Error | 12.705 (df = 98) | 8.721 (df = 97) | 8.751 (df = 96) | 8.794 (df = 95) |
F Statistic | 0.087 (df = 1; 98) | 55.602*** (df = 2; 97) | 36.919*** (df = 3; 96) | 27.439*** (df = 4; 95) |
Note: | p<0.1; p<0.05; p<0.01 |
library(jtools)
library(ggstance)
##
## Attaching package: 'ggstance'
## The following objects are masked from 'package:ggplot2':
##
## geom_errorbarh, GeomErrorbarh
##p150重回帰分析結果プロット(モデル4)→多軸グラフにプロットする方法はないか?
plot_summs(model_4)
##p153モンティー・ホールシミュレーション#ドア1,2,3を無作為に100回選ぶなおset.seed()関数は、()が同じなら同じ乱数を発生させるset.seed(1)https://to-kei.net/r-beginner/set-seed/
set.seed(1)
trials <- 100
prize <- sample(1:3,trials,replace = TRUE)
prize
## [1] 1 3 1 2 1 3 3 2 2 3 3 1 1 1 2 2 2 2 3 1 3 1 1 1 1 2 1 1 2 2 2 1 3 1 3 2 2
## [38] 2 2 3 2 1 3 2 1 1 3 2 2 3 3 2 2 2 2 1 2 2 2 2 1 3 3 2 3 3 2 3 3 1 1 1 1 3
## [75] 2 3 1 1 2 1 1 1 1 3 2 1 1 3 3 3 2 2 2 3 2 2 3 3 3 1
stay <- prize == 1
head(stay)
## [1] TRUE FALSE TRUE FALSE TRUE FALSE
move <- prize !=1
head(move)
## [1] FALSE TRUE FALSE TRUE FALSE TRUE
##p155 モンティー・ホールの提案を受けずに「ドア1」のとどまった場合、に高級車が当たるprobstayと「ドア1」から選択しなおした場合にあたるprob.move
prob.stay <- prov.move <- rep(NA, trials)
prov.move
## [1] NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA
## [26] NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA
## [51] NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA
## [76] NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA
##p159-162 birthday paradox
qbirthday(0.5,classes=365,coincident =2 )
## [1] 23
qbirthday(0.99,classes=365,coincident =2 )
## [1] 57
pbirthday(23,classes =365,coincident =2)
## [1] 0.5072972
pbirthday(58,classes =365,coincident =2)
## [1] 0.991665
x <- 1:10
y <- 1:10 # plot(x 軸のデータ, y 軸のデータ, オプション)
plot(x, y) # 範囲は自動で決まる(xlim=c(1,10)を指定した場合と同じ)
plot(x, y, xlim=c(10,1)) # x 軸の正の向きを左向きにすることも出来る
plot(sin, -pi, 2*pi) # plot(関数名, 下限, 上限)
gauss.density <- function(x) 1/sqrt(2*pi)*exp(-x^2/2) # 標準正規分布の密度
plot(gauss.density,-3,3)
x <- seq(-3,3,length=50) # x 方向の分点
y <- x # y 方向の分点
rho <- 0.9 # 2次元正規分布の定数
gauss3d <- function(x,y) { # 2次元正規分布の関数
1/(2*pi*sqrt(1-rho^2))*exp(-(x^2-2*rho*x*y+y^2) / (2*(1-rho^2)))
}
z <- outer(x,y,gauss3d) # 外積をとって z 方向の大きさを求める
z[is.na(z)] <- 1 # 欠損値を1で補う
persp(x, y, z, theta = 30, phi = 30, expand = 0.5, col = "lightblue")
##p159 birthday paradox 作りたい基本形はこれ。これに対し後は修飾
dob <- 1:365
prob <- sapply(dob, pbirthday)
plot(dob, prob,xlim= c(0, 100))
##修飾 type=“l”:線プロット(折れ線グラフ)、xlim= c(0, 100)):xの定義域を1→100で定義、lwd = 1:線分の幅 (line width) を番号で指定する.値が大きいほど太い線になる. ##http://cse.naro.affrc.go.jp/takezawa/r-tips/r/48.html ##http://cse.naro.affrc.go.jp/takezawa/r-tips/r/53.html
dob <- 1:365
prob <- sapply(dob, pbirthday)
plot(dob, prob,type = "l", col ="tomato","lwd" = 2,xlim= c(0, 100),
xlab="the number of people in a group",
ylab="probability",
main="prob. that at least two people have the same DOB:pbirthday()")
abline(h = c(0,1),col ="gray")
abline(v = c(23,58),col ="gray")