sep = "\t"
을 사용한 것에 유의library(knitr)
library(magrittr)
Gini_b_tax <- read.table(file = "../data/Gini_before_tax.txt",
header = FALSE,
sep = "\t")
Gini_a_tax <- read.table(file = "../data/Gini_after_tax.txt",
header = FALSE,
sep = "\t")
Gini_b_tax %>% str
## 'data.frame': 34 obs. of 8 variables:
## $ V1: Factor w/ 34 levels "Australia","Austria",..: 1 2 3 4 5 6 7 8 9 10 ...
## $ V2: num NA NA NA 0.385 NA NA NA NA 0.343 NA ...
## $ V3: num NA NA 0.449 0.395 NA NA 0.373 NA 0.387 0.38 ...
## $ V4: num NA NA NA 0.403 NA NA 0.396 NA NA 0.37 ...
## $ V5: num 0.467 NA 0.472 0.43 0.441 0.442 0.417 NA 0.479 0.473 ...
## $ V6: num 0.476 NA 0.464 0.44 NA 0.472 0.415 NA 0.478 0.49 ...
## $ V7: num 0.465 0.433 0.494 0.436 0.414 0.474 0.417 0.504 0.483 0.485 ...
## $ V8: num 0.468 0.472 0.469 0.441 0.426 0.444 0.416 0.458 0.465 0.483 ...
Gini_a_tax %>% str
## 'data.frame': 34 obs. of 8 variables:
## $ V1: Factor w/ 34 levels "Australia","Austria",..: 1 2 3 4 5 6 7 8 9 10 ...
## $ V2: num NA NA NA 0.304 NA NA NA NA 0.235 NA ...
## $ V3: num NA 0.236 0.274 0.293 NA NA 0.221 NA 0.209 0.3 ...
## $ V4: num NA NA NA 0.287 NA 0.232 0.226 NA NA 0.29 ...
## $ V5: num 0.309 0.238 0.287 0.289 0.427 0.257 0.215 NA 0.218 0.277 ...
## $ V6: num 0.317 0.252 0.289 0.318 NA 0.26 0.226 NA 0.247 0.287 ...
## $ V7: num 0.315 0.265 0.271 0.317 0.403 0.268 0.232 0.349 0.254 0.288 ...
## $ V8: num 0.336 0.261 0.259 0.324 0.394 0.256 0.248 0.315 0.259 0.293 ...
(Gini_b_a <- data.frame(Country = Gini_b_tax$V1,
Before = Gini_b_tax$V8,
After = Gini_a_tax$V8))
## Country Before After
## 1 Australia 0.468 0.336
## 2 Austria 0.472 0.261
## 3 Belgium 0.469 0.259
## 4 Canada 0.441 0.324
## 5 Chile 0.426 0.394
## 6 Czech_Republic 0.444 0.256
## 7 Denmark 0.416 0.248
## 8 Estonia 0.458 0.315
## 9 Finland 0.465 0.259
## 10 France 0.483 0.293
## 11 Germany 0.504 0.295
## 12 Greece 0.436 0.307
## 13 Hungary 0.466 0.272
## 14 Iceland 0.382 0.301
## 15 Ireland NA 0.293
## 16 Israel 0.498 0.371
## 17 Italy 0.534 0.337
## 18 Japan 0.462 0.329
## 19 Luxembourg 0.482 0.288
## 20 Mexico 0.494 0.476
## 21 Netherlands 0.426 0.294
## 22 New_Zealand 0.455 0.330
## 23 Norway 0.410 0.250
## 24 Poland 0.470 0.305
## 25 Portugal 0.521 0.353
## 26 Slovak_Republic 0.416 0.257
## 27 Slovenia 0.423 0.236
## 28 South_Korea 0.344 0.315
## 29 Spain 0.461 0.317
## 30 Sweden 0.426 0.259
## 31 Switzerland 0.409 0.303
## 32 Turkey 0.470 0.409
## 33 United_Kingdom 0.456 0.345
## 34 United_States 0.486 0.378
Gini_b_a$Improvement <- Gini_b_a %$%
`-`(Before, After)
Gini_b_a
## Country Before After Improvement
## 1 Australia 0.468 0.336 0.132
## 2 Austria 0.472 0.261 0.211
## 3 Belgium 0.469 0.259 0.210
## 4 Canada 0.441 0.324 0.117
## 5 Chile 0.426 0.394 0.032
## 6 Czech_Republic 0.444 0.256 0.188
## 7 Denmark 0.416 0.248 0.168
## 8 Estonia 0.458 0.315 0.143
## 9 Finland 0.465 0.259 0.206
## 10 France 0.483 0.293 0.190
## 11 Germany 0.504 0.295 0.209
## 12 Greece 0.436 0.307 0.129
## 13 Hungary 0.466 0.272 0.194
## 14 Iceland 0.382 0.301 0.081
## 15 Ireland NA 0.293 NA
## 16 Israel 0.498 0.371 0.127
## 17 Italy 0.534 0.337 0.197
## 18 Japan 0.462 0.329 0.133
## 19 Luxembourg 0.482 0.288 0.194
## 20 Mexico 0.494 0.476 0.018
## 21 Netherlands 0.426 0.294 0.132
## 22 New_Zealand 0.455 0.330 0.125
## 23 Norway 0.410 0.250 0.160
## 24 Poland 0.470 0.305 0.165
## 25 Portugal 0.521 0.353 0.168
## 26 Slovak_Republic 0.416 0.257 0.159
## 27 Slovenia 0.423 0.236 0.187
## 28 South_Korea 0.344 0.315 0.029
## 29 Spain 0.461 0.317 0.144
## 30 Sweden 0.426 0.259 0.167
## 31 Switzerland 0.409 0.303 0.106
## 32 Turkey 0.470 0.409 0.061
## 33 United_Kingdom 0.456 0.345 0.111
## 34 United_States 0.486 0.378 0.108
Gini_b_a[Gini_b_a$Improvement %>% order, ]
## Country Before After Improvement
## 20 Mexico 0.494 0.476 0.018
## 28 South_Korea 0.344 0.315 0.029
## 5 Chile 0.426 0.394 0.032
## 32 Turkey 0.470 0.409 0.061
## 14 Iceland 0.382 0.301 0.081
## 31 Switzerland 0.409 0.303 0.106
## 34 United_States 0.486 0.378 0.108
## 33 United_Kingdom 0.456 0.345 0.111
## 4 Canada 0.441 0.324 0.117
## 22 New_Zealand 0.455 0.330 0.125
## 16 Israel 0.498 0.371 0.127
## 12 Greece 0.436 0.307 0.129
## 1 Australia 0.468 0.336 0.132
## 21 Netherlands 0.426 0.294 0.132
## 18 Japan 0.462 0.329 0.133
## 8 Estonia 0.458 0.315 0.143
## 29 Spain 0.461 0.317 0.144
## 26 Slovak_Republic 0.416 0.257 0.159
## 23 Norway 0.410 0.250 0.160
## 24 Poland 0.470 0.305 0.165
## 30 Sweden 0.426 0.259 0.167
## 7 Denmark 0.416 0.248 0.168
## 25 Portugal 0.521 0.353 0.168
## 27 Slovenia 0.423 0.236 0.187
## 6 Czech_Republic 0.444 0.256 0.188
## 10 France 0.483 0.293 0.190
## 13 Hungary 0.466 0.272 0.194
## 19 Luxembourg 0.482 0.288 0.194
## 17 Italy 0.534 0.337 0.197
## 9 Finland 0.465 0.259 0.206
## 11 Germany 0.504 0.295 0.209
## 3 Belgium 0.469 0.259 0.210
## 2 Austria 0.472 0.261 0.211
## 15 Ireland NA 0.293 NA
Gini_b_a %>%
.[.$Improvement %>% order, ]
## Country Before After Improvement
## 20 Mexico 0.494 0.476 0.018
## 28 South_Korea 0.344 0.315 0.029
## 5 Chile 0.426 0.394 0.032
## 32 Turkey 0.470 0.409 0.061
## 14 Iceland 0.382 0.301 0.081
## 31 Switzerland 0.409 0.303 0.106
## 34 United_States 0.486 0.378 0.108
## 33 United_Kingdom 0.456 0.345 0.111
## 4 Canada 0.441 0.324 0.117
## 22 New_Zealand 0.455 0.330 0.125
## 16 Israel 0.498 0.371 0.127
## 12 Greece 0.436 0.307 0.129
## 1 Australia 0.468 0.336 0.132
## 21 Netherlands 0.426 0.294 0.132
## 18 Japan 0.462 0.329 0.133
## 8 Estonia 0.458 0.315 0.143
## 29 Spain 0.461 0.317 0.144
## 26 Slovak_Republic 0.416 0.257 0.159
## 23 Norway 0.410 0.250 0.160
## 24 Poland 0.470 0.305 0.165
## 30 Sweden 0.426 0.259 0.167
## 7 Denmark 0.416 0.248 0.168
## 25 Portugal 0.521 0.353 0.168
## 27 Slovenia 0.423 0.236 0.187
## 6 Czech_Republic 0.444 0.256 0.188
## 10 France 0.483 0.293 0.190
## 13 Hungary 0.466 0.272 0.194
## 19 Luxembourg 0.482 0.288 0.194
## 17 Italy 0.534 0.337 0.197
## 9 Finland 0.465 0.259 0.206
## 11 Germany 0.504 0.295 0.209
## 3 Belgium 0.469 0.259 0.210
## 2 Austria 0.472 0.261 0.211
## 15 Ireland NA 0.293 NA
decreasing = TRUE
추가.Gini_b_a %>%
.[.$Improvement %>% order(decreasing = TRUE), ]
## Country Before After Improvement
## 2 Austria 0.472 0.261 0.211
## 3 Belgium 0.469 0.259 0.210
## 11 Germany 0.504 0.295 0.209
## 9 Finland 0.465 0.259 0.206
## 17 Italy 0.534 0.337 0.197
## 13 Hungary 0.466 0.272 0.194
## 19 Luxembourg 0.482 0.288 0.194
## 10 France 0.483 0.293 0.190
## 6 Czech_Republic 0.444 0.256 0.188
## 27 Slovenia 0.423 0.236 0.187
## 25 Portugal 0.521 0.353 0.168
## 7 Denmark 0.416 0.248 0.168
## 30 Sweden 0.426 0.259 0.167
## 24 Poland 0.470 0.305 0.165
## 23 Norway 0.410 0.250 0.160
## 26 Slovak_Republic 0.416 0.257 0.159
## 29 Spain 0.461 0.317 0.144
## 8 Estonia 0.458 0.315 0.143
## 18 Japan 0.462 0.329 0.133
## 1 Australia 0.468 0.336 0.132
## 21 Netherlands 0.426 0.294 0.132
## 12 Greece 0.436 0.307 0.129
## 16 Israel 0.498 0.371 0.127
## 22 New_Zealand 0.455 0.330 0.125
## 4 Canada 0.441 0.324 0.117
## 33 United_Kingdom 0.456 0.345 0.111
## 34 United_States 0.486 0.378 0.108
## 31 Switzerland 0.409 0.303 0.106
## 14 Iceland 0.382 0.301 0.081
## 32 Turkey 0.470 0.409 0.061
## 5 Chile 0.426 0.394 0.032
## 28 South_Korea 0.344 0.315 0.029
## 20 Mexico 0.494 0.476 0.018
## 15 Ireland NA 0.293 NA
barplot()
이 적합함. barplot(height, ...)
에서 height
가 매트릭스일 때는 막대는 열의 각 요소를 크기대로 쌓아놓은 형태가 되므로, t()
를 이용하여 transpose시킨 후 barplot()
을 적용. 또한 transpose를 시켜도 여전히 data frame 이기 때문에 매트릭스로 강제 변환함. 세전, 세후 비교를 위해 쌓아 놓기 보다는 옆에 늘어세우는 게 나으므로 beside=TRUE
를 적용하고 각 막대의 이름으로 나라이름을 사용.Gini_b_a %>%
.[, 2:3] %>%
t %>%
as.matrix %>%
barplot(beside = TRUE,
names.arg = Gini_b_a$Country)
o_improvement
로 저장하여 지속적으로 활용.o_improvement <- Gini_b_a %>%
.$Improvement %>%
order(decreasing = TRUE)
Gini_b_a$Country[o_improvement]
## [1] Austria Belgium Germany Finland
## [5] Italy Hungary Luxembourg France
## [9] Czech_Republic Slovenia Portugal Denmark
## [13] Sweden Poland Norway Slovak_Republic
## [17] Spain Estonia Japan Australia
## [21] Netherlands Greece Israel New_Zealand
## [25] Canada United_Kingdom United_States Switzerland
## [29] Iceland Turkey Chile South_Korea
## [33] Mexico Ireland
## 34 Levels: Australia Austria Belgium Canada Chile ... United_States
Gini_b_a %>%
.[o_improvement, 2:3] %>%
t %>%
as.matrix %>%
barplot(beside = TRUE,
names.arg = Gini_b_a$Country[o_improvement])
las = 2
를 이용하여 막대 이름을 눕힘.Gini_b_a %>%
.[o_improvement, 2:3] %>%
t %>%
as.matrix %>%
barplot(beside = TRUE,
names.arg = Gini_b_a$Country[o_improvement],
las = 2)
par("mai")
를 조정old_par <- par(no.readonly = TRUE)
par("mai")
## [1] 1.02 0.82 0.82 0.42
par("mai" = c(1.5, 0.8, 0.8, 0.4))
Gini_b_a %>%
.[o_improvement, 2:3] %>%
t %>%
as.matrix %>%
barplot(beside = TRUE,
names.arg = Gini_b_a$Country[o_improvement],
las = 2)
par(old_par)
old_par <- par(no.readonly=TRUE)
par("mai")
## [1] 1.02 0.82 0.82 0.42
par("mai" = c(1.5, 0.8, 0.8, 0.4))
Gini_b_a %>%
.[o_improvement, 2:3] %>%
t %>%
as.matrix %>%
barplot(beside = TRUE,
names.arg = Gini_b_a$Country[o_improvement],
las = 2)
abline(h = 0.4, lty = 2, col = "red")
par(old_par)
old_par <- par(no.readonly = TRUE)
par("mai")
## [1] 1.02 0.82 0.82 0.42
par("mai" = c(1.5, 0.8, 0.8, 0.4))
Gini_b_a %>%
.[o_improvement, 2:3] %>%
t %>%
as.matrix %>%
barplot(beside = TRUE,
names.arg = Gini_b_a$Country[o_improvement],
las = 2,
legend.text = c("Before Tax", "After Tax"),
args.legend = list(x = 105, y = 0.62))
abline(h = 0.4, lty = 2, col = "red")
title(main = "Gini Coefficients of OECD Countries")
par(old_par)
las = 1
로 설정하면,Gini_b_a %>%
.[o_improvement, 2:3] %>%
t %>%
as.matrix %>%
barplot(beside = TRUE,
horiz = TRUE,
names.arg = Gini_b_a$Country[o_improvement],
las = 1)
par("mai")
를 조정.old_par <- par(no.readonly = TRUE)
par("mai")
## [1] 1.02 0.82 0.82 0.42
par("mai"= c(1.0, 1.5, 0.8, 0.4))
Gini_b_a %>%
.[o_improvement, 2:3] %>%
t %>%
as.matrix %>%
barplot(beside = TRUE,
horiz = TRUE,
names.arg = Gini_b_a$Country[o_improvement],
las = 1)
par(old_par)
old_par <- par(no.readonly = TRUE)
par("mai")
## [1] 1.02 0.82 0.82 0.42
par("mai"= c(1.0, 1.5, 0.8, 0.4))
o_improvement_2 <- Gini_b_a %>%
.$Improvement %>%
order(na.last = FALSE)
Gini_b_a %>%
.[o_improvement_2, 2:3] %>%
t %>%
as.matrix %>%
barplot(beside = TRUE,
horiz = TRUE,
names.arg = Gini_b_a$Country[o_improvement_2],
las = 1)
par(old_par)
na.last = FALSE
를 추가한 것임.
old_par <- par(no.readonly = TRUE)
par("mai")
## [1] 1.02 0.82 0.82 0.42
par("mai"= c(1.0, 1.5, 0.8, 0.4))
Gini_b_a %>%
.[o_improvement_2, 2:3] %>%
t %>%
as.matrix %>%
barplot(beside = TRUE,
horiz = TRUE,
names.arg = Gini_b_a$Country[o_improvement_2],
las = 1)
abline(v = 0.4, lty = 2, col = "red")
par(old_par)
old_par <- par(no.readonly = TRUE)
par("mai")
## [1] 1.02 0.82 0.82 0.42
par("mai"= c(1.0, 1.5, 0.8, 0.8))
Gini_b_a %>%
.[o_improvement_2, 2:3] %>%
t %>%
as.matrix %>%
barplot(beside = TRUE, horiz = TRUE,
names.arg = Gini_b_a$Country[o_improvement_2],
legend.text = c("Before Tax", "After Tax"),
args.legend = list(x = 0.67, y = 110),
las = 1)
abline(v = 0.4, lty = 2, col = "red")
title(main = "Gini Coefficients of OECD Countries")
par(old_par)
reshape2
package 를 검색 목록에 등록library(tidyverse)
## Registered S3 methods overwritten by 'ggplot2':
## method from
## [.quosures rlang
## c.quosures rlang
## print.quosures rlang
## Registered S3 method overwritten by 'rvest':
## method from
## read_xml.response xml2
## ── Attaching packages ─────────────────────────────────────────── tidyverse 1.2.1 ──
## ✔ ggplot2 3.1.1 ✔ purrr 0.3.2
## ✔ tibble 2.1.1 ✔ dplyr 0.8.0.1
## ✔ tidyr 0.8.3 ✔ stringr 1.4.0
## ✔ readr 1.3.1 ✔ forcats 0.4.0
## ── Conflicts ────────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ tidyr::extract() masks magrittr::extract()
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ✖ purrr::set_names() masks magrittr::set_names()
library(reshape2)
##
## Attaching package: 'reshape2'
## The following object is masked from 'package:tidyr':
##
## smiths
(Gini_b_a_melt <- melt(Gini_b_a,
id.vars = "Country",
measure.vars = c("Before", "After"),
variable.name = "Tax",
value.name = "Gini_Coef"))
## Country Tax Gini_Coef
## 1 Australia Before 0.468
## 2 Austria Before 0.472
## 3 Belgium Before 0.469
## 4 Canada Before 0.441
## 5 Chile Before 0.426
## 6 Czech_Republic Before 0.444
## 7 Denmark Before 0.416
## 8 Estonia Before 0.458
## 9 Finland Before 0.465
## 10 France Before 0.483
## 11 Germany Before 0.504
## 12 Greece Before 0.436
## 13 Hungary Before 0.466
## 14 Iceland Before 0.382
## 15 Ireland Before NA
## 16 Israel Before 0.498
## 17 Italy Before 0.534
## 18 Japan Before 0.462
## 19 Luxembourg Before 0.482
## 20 Mexico Before 0.494
## 21 Netherlands Before 0.426
## 22 New_Zealand Before 0.455
## 23 Norway Before 0.410
## 24 Poland Before 0.470
## 25 Portugal Before 0.521
## 26 Slovak_Republic Before 0.416
## 27 Slovenia Before 0.423
## 28 South_Korea Before 0.344
## 29 Spain Before 0.461
## 30 Sweden Before 0.426
## 31 Switzerland Before 0.409
## 32 Turkey Before 0.470
## 33 United_Kingdom Before 0.456
## 34 United_States Before 0.486
## 35 Australia After 0.336
## 36 Austria After 0.261
## 37 Belgium After 0.259
## 38 Canada After 0.324
## 39 Chile After 0.394
## 40 Czech_Republic After 0.256
## 41 Denmark After 0.248
## 42 Estonia After 0.315
## 43 Finland After 0.259
## 44 France After 0.293
## 45 Germany After 0.295
## 46 Greece After 0.307
## 47 Hungary After 0.272
## 48 Iceland After 0.301
## 49 Ireland After 0.293
## 50 Israel After 0.371
## 51 Italy After 0.337
## 52 Japan After 0.329
## 53 Luxembourg After 0.288
## 54 Mexico After 0.476
## 55 Netherlands After 0.294
## 56 New_Zealand After 0.330
## 57 Norway After 0.250
## 58 Poland After 0.305
## 59 Portugal After 0.353
## 60 Slovak_Republic After 0.257
## 61 Slovenia After 0.236
## 62 South_Korea After 0.315
## 63 Spain After 0.317
## 64 Sweden After 0.259
## 65 Switzerland After 0.303
## 66 Turkey After 0.409
## 67 United_Kingdom After 0.345
## 68 United_States After 0.378
str(Gini_b_a_melt)
## 'data.frame': 68 obs. of 3 variables:
## $ Country : Factor w/ 34 levels "Australia","Austria",..: 1 2 3 4 5 6 7 8 9 10 ...
## $ Tax : Factor w/ 2 levels "Before","After": 1 1 1 1 1 1 1 1 1 1 ...
## $ Gini_Coef: num 0.468 0.472 0.469 0.441 0.426 0.444 0.416 0.458 0.465 0.483 ...
Gini_b_a_tbl <- Gini_b_a %>%
select(1:3) %>%
gather(key = "Tax", value = "Gini_Coef", -Country)
ggplot2
등록 후 geom_bar()
수행 (tidyverse
에 기 탑재)ggplot(data = Gini_b_a_tbl,
mapping = aes(x = Country,
y = Gini_Coef,
fill = Tax)) +
geom_bar(stat = "identity",
position = position_dodge2(reverse = TRUE),
na.rm = TRUE) +
theme(axis.ticks.x = element_blank(),
axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.5)) +
#> x축에 생기는 공간 없액;
scale_y_continuous(expand = c(0, 0))
# coord_flip()
level
로 갖는 factor
로 만들어야함. o_improvement
가 내림차순으로 정리되어 있는 순서이기 때문에 rev(o_improvement)
는 올림차순으로 정리되어 있는 순서임. 따라서,Gini_b_a$Country_order <- factor(Gini_b_a$Country,
levels = Gini_b_a$Country[rev(o_improvement)])
Gini_b_a_order_melt <- melt(Gini_b_a,
id.vars = "Country_order",
measure.vars = c("Before", "After"),
variable.name = "Tax",
value.name = "Gini_Coef")
str(Gini_b_a_order_melt)
## 'data.frame': 68 obs. of 3 variables:
## $ Country_order: Factor w/ 34 levels "Ireland","Mexico",..: 15 34 33 10 4 26 23 17 31 27 ...
## $ Tax : Factor w/ 2 levels "Before","After": 1 1 1 1 1 1 1 1 1 1 ...
## $ Gini_Coef : num 0.468 0.472 0.469 0.441 0.426 0.444 0.416 0.458 0.465 0.483 ...
Gini_b_a_order_tbl <- Gini_b_a %>%
select(c("Before", "After", "Country_order")) %>%
gather(key = "Tax", value = "Gini_Coef", -Country_order) %>%
mutate(Tax = factor(Tax, levels = c("Before", "After")))
Gini_b_a_order_melt
의 Country_order
가 개선도 올림차순으로 정리되어 있는 factor
이기 때문에 그대로 활용하면 됨. 이 데이터는 Before
가 After
보다 항상 큰 값을 갖기 떄문에 position = position_identity()
를 적용하기 매우 좋은 사례임.ggplot(data = Gini_b_a_order_tbl,
mapping = aes(x = Country_order,
y = Gini_Coef,
fill = Tax)) +
geom_bar(stat = "identity",
position = "identity",
width = 0.7,
na.rm = TRUE) +
geom_hline(yintercept = 0.4,
color = "red",
linetype = 2,
size = 1) +
scale_fill_manual(values = c("darkgrey", "blue")) +
# scale_fill_brewer(type = "qual", palette = "Set1", direction = -1) +
scale_y_continuous(expand = c(0, 0)) +
labs(title = "OECD Gini Coefficient",
subtitle = "Before and After Tax",
x = "Country",
y = "Gini Coefficient") +
theme(axis.ticks.y = element_blank(),
plot.title = element_text(size = 15, hjust = 0.5),
plot.subtitle = element_text(size = 10, hjust = 0.5)) +
coord_flip()
save.image(file = "Gini_OECD.RData")