중앙투자심사 2013 - 2021 분석 프로젝트 첫번째

library(readxl)
library(dplyr)
## 
## 다음의 패키지를 부착합니다: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(ggplot2)
library(reshape2)

cli <- read_xlsx("C:\\_work\\2022\\alookso\\li\\cli_13_21.xlsx", sheet = "raw")

## 연도별 투자심사 대상 건수 및  총액 추이

t01 <- cli %>%
  group_by(year, pf) %>%
  summarize(n = length(total),
            sum = sum(total))
## `summarise()` has grouped output by 'year'. You can override using the
## `.groups` argument.
head(t01)
## # A tibble: 6 x 4
## # Groups:   year [3]
##    year pf        n    sum
##   <dbl> <chr> <int>  <dbl>
## 1  2013 fail     91  95625
## 2  2013 pass    196 138218
## 3  2014 fail    155 130794
## 4  2014 pass    327 246455
## 5  2015 fail    159 151941
## 6  2015 pass    288 222882
t01_1 <- dcast(t01, year ~ pf, value.var = "n")
t01_2 <- dcast(t01, year ~ pf, value.var = "sum")


## 건수 기준
t01_1
##   year fail pass
## 1 2013   91  196
## 2 2014  155  327
## 3 2015  159  288
## 4 2016  194  339
## 5 2017  170  422
## 6 2018  108  264
## 7 2019  105  392
## 8 2020  142  330
## 9 2021  111  271
## 금액 기준
t01_2
##   year   fail   pass
## 1 2013  95625 138218
## 2 2014 130794 246455
## 3 2015 151941 222882
## 4 2016 105921 150058
## 5 2017  58055 166667
## 6 2018  54469 174738
## 7 2019  63797 249687
## 8 2020 114339 342475
## 9 2021  86492 317293
t01_1$sum_n <- with(t01_1, pass + fail)
t01_1$pass_ratio_n <- with(t01_1, pass/sum_n)

t01_2$sum_total <- with(t01_2, pass + fail)
t01_2$pass_ratio_total <- with(t01_2, pass / sum_total)

t01_3 <- merge(t01_1, t01_2, by = "year")

t01_3
##   year fail.x pass.x sum_n pass_ratio_n fail.y pass.y sum_total
## 1 2013     91    196   287    0.6829268  95625 138218    233843
## 2 2014    155    327   482    0.6784232 130794 246455    377249
## 3 2015    159    288   447    0.6442953 151941 222882    374823
## 4 2016    194    339   533    0.6360225 105921 150058    255979
## 5 2017    170    422   592    0.7128378  58055 166667    224722
## 6 2018    108    264   372    0.7096774  54469 174738    229207
## 7 2019    105    392   497    0.7887324  63797 249687    313484
## 8 2020    142    330   472    0.6991525 114339 342475    456814
## 9 2021    111    271   382    0.7094241  86492 317293    403785
##   pass_ratio_total
## 1        0.5910718
## 2        0.6532953
## 3        0.5946327
## 4        0.5862122
## 5        0.7416586
## 6        0.7623589
## 7        0.7964904
## 8        0.7497034
## 9        0.7857969
t01_4 <- subset(t01_3, select = c(year, pass.x, sum_n))

t01_4
##   year pass.x sum_n
## 1 2013    196   287
## 2 2014    327   482
## 3 2015    288   447
## 4 2016    339   533
## 5 2017    422   592
## 6 2018    264   372
## 7 2019    392   497
## 8 2020    330   472
## 9 2021    271   382
t01_4t <- melt(t01_4, id.vars = "year")

t01_4t
##    year variable value
## 1  2013   pass.x   196
## 2  2014   pass.x   327
## 3  2015   pass.x   288
## 4  2016   pass.x   339
## 5  2017   pass.x   422
## 6  2018   pass.x   264
## 7  2019   pass.x   392
## 8  2020   pass.x   330
## 9  2021   pass.x   271
## 10 2013    sum_n   287
## 11 2014    sum_n   482
## 12 2015    sum_n   447
## 13 2016    sum_n   533
## 14 2017    sum_n   592
## 15 2018    sum_n   372
## 16 2019    sum_n   497
## 17 2020    sum_n   472
## 18 2021    sum_n   382
## 건수 기준 그림
ggplot(t01_4t, aes(x = year, y = value, fill = variable)) + 
  geom_bar(position = "dodge", stat = "identity", color = "black") +
  scale_fill_manual(values = c("grey70", "grey50"), labels = c("추진", "전체")) +
  geom_text(aes(label = value), position = position_dodge(.9), size = 4, vjust = -.5) +
  labs(x = "연도", y = "추진건수(건)", fill = "구분") +
  scale_x_continuous(breaks = 2013:2021) +
  scale_y_continuous(limits = c(0, 700), breaks = c(0, 200, 400, 600)) +
  theme(axis.text.x = element_text(size = 10), axis.text.y = element_text(size = 10), 
        axis.title.x = element_text(size = 18), axis.title.y = element_text(size = 18),
        legend.title = element_text(size = 18)) 

## 금액 기준 그림

t01_5 <- subset(t01_3, select = c(year, pass.y, sum_total))

t01_5t <- melt(t01_5, id.vars = "year")

t01_5t
##    year  variable  value
## 1  2013    pass.y 138218
## 2  2014    pass.y 246455
## 3  2015    pass.y 222882
## 4  2016    pass.y 150058
## 5  2017    pass.y 166667
## 6  2018    pass.y 174738
## 7  2019    pass.y 249687
## 8  2020    pass.y 342475
## 9  2021    pass.y 317293
## 10 2013 sum_total 233843
## 11 2014 sum_total 377249
## 12 2015 sum_total 374823
## 13 2016 sum_total 255979
## 14 2017 sum_total 224722
## 15 2018 sum_total 229207
## 16 2019 sum_total 313484
## 17 2020 sum_total 456814
## 18 2021 sum_total 403785
t01_5t$jo <- round(t01_5t$value / 10000, 1)

## 금액 기준 그림
ggplot(t01_5t, aes(x = year, y = jo, fill = variable)) + 
  geom_bar(position = "dodge", stat = "identity", color = "black") +
  scale_fill_manual(values = c("grey70", "grey50"), labels = c("추진", "전체")) +
  geom_text(aes(label = jo), position = position_dodge(.9), size = 4, vjust = -.5) +
  labs(x = "연도", y = "금액(조원)", fill = "구분") +
  scale_x_continuous(breaks = 2013:2021) +
  scale_y_continuous(limits = c(0, 50), breaks = c(0, 15, 30, 45)) +
  theme(axis.text.x = element_text(size = 10), axis.text.y = element_text(size = 10), 
        axis.title.x = element_text(size = 18), axis.title.y = element_text(size = 18),
        legend.title = element_text(size = 18)) 

## 통과율

t01_3
##   year fail.x pass.x sum_n pass_ratio_n fail.y pass.y sum_total
## 1 2013     91    196   287    0.6829268  95625 138218    233843
## 2 2014    155    327   482    0.6784232 130794 246455    377249
## 3 2015    159    288   447    0.6442953 151941 222882    374823
## 4 2016    194    339   533    0.6360225 105921 150058    255979
## 5 2017    170    422   592    0.7128378  58055 166667    224722
## 6 2018    108    264   372    0.7096774  54469 174738    229207
## 7 2019    105    392   497    0.7887324  63797 249687    313484
## 8 2020    142    330   472    0.6991525 114339 342475    456814
## 9 2021    111    271   382    0.7094241  86492 317293    403785
##   pass_ratio_total
## 1        0.5910718
## 2        0.6532953
## 3        0.5946327
## 4        0.5862122
## 5        0.7416586
## 6        0.7623589
## 7        0.7964904
## 8        0.7497034
## 9        0.7857969
t01_6 <- subset(t01_3, select = c(year, pass_ratio_n, pass_ratio_total))

t01_6t <- melt(t01_6, id.vars = "year")

t01_6t$ratio = round(t01_6t$value * 100, 1)

t01_6t
##    year         variable     value ratio
## 1  2013     pass_ratio_n 0.6829268  68.3
## 2  2014     pass_ratio_n 0.6784232  67.8
## 3  2015     pass_ratio_n 0.6442953  64.4
## 4  2016     pass_ratio_n 0.6360225  63.6
## 5  2017     pass_ratio_n 0.7128378  71.3
## 6  2018     pass_ratio_n 0.7096774  71.0
## 7  2019     pass_ratio_n 0.7887324  78.9
## 8  2020     pass_ratio_n 0.6991525  69.9
## 9  2021     pass_ratio_n 0.7094241  70.9
## 10 2013 pass_ratio_total 0.5910718  59.1
## 11 2014 pass_ratio_total 0.6532953  65.3
## 12 2015 pass_ratio_total 0.5946327  59.5
## 13 2016 pass_ratio_total 0.5862122  58.6
## 14 2017 pass_ratio_total 0.7416586  74.2
## 15 2018 pass_ratio_total 0.7623589  76.2
## 16 2019 pass_ratio_total 0.7964904  79.6
## 17 2020 pass_ratio_total 0.7497034  75.0
## 18 2021 pass_ratio_total 0.7857969  78.6
ggplot(t01_6t, aes(x = year, y = ratio, color = variable)) + 
  geom_line(size = 1.2) +
  scale_color_manual(values = c("grey70", "grey50"), labels = c("건수", "금액")) +
  labs(x = "연도", y = "추진건수(건)", color = "기준") +
  scale_x_continuous(breaks = 2013:2021) +
  scale_y_continuous(limits = c(0, 100), breaks = c(0, 25, 50, 75, 100)) +
  theme(axis.text.x = element_text(size = 10), axis.text.y = element_text(size = 10), 
        axis.title.x = element_text(size = 18), axis.title.y = element_text(size = 18),
        legend.title = element_text(size = 18))