Data Clearning
Data- 데이터 공공 포털 다운로드
setwd("C:/Users/Administrator/Desktop/BIG DATA")
data<-read.csv("popul_korea.csv")
data %>%
select("시점", "한국인인구", "한국인남자", "한국인여자") %>%
filter("시점" > 1963) -> df_one
names(df_one) <- c("Year", "Total", "Male", "Female")
as.numeric(as.character(df_one$Total)) -> df_one$Total
as.numeric(as.character(df_one$Male)) -> df_one$Male
as.numeric(as.character(df_one$Female)) -> df_one$Female
df_one %>%
mutate(Male_2 = Male/10000,
Female_2 = Female/10000,
Percent_total = round(Total/sum(Total) *100,1),
Percent_total = paste0(Percent_total, "%"),
Percent_male = round(Male/Total *100,2),
Percent_Female = round(Female/Total *100,2)) -> df_two#my_colors <- c("#770A1F", "#EC1D27", "#F56F52", "#F9B297")
#my_colors <- c("#014d64", "#01a2d9")
my_font <- "Roboto Condensed"
View(df_two)
df_two %>%
mutate(Stage = case_when(df_two$Year < 1980 ~ "Stage 1",
df_two$Year >= 1980 & df_two$Year < 2000 ~ "Stage 2",
TRUE ~ "Stage 3")) %>%
mutate(Stage = as.character(Stage)) -> Stage_label
#c("#fff5f0", "#fcbba1", "#fc9272", "#fb6a4a")
#("#f15b40", "#eca221", "#00526d", "#b0c6d2")
ggplot(df_two) +
geom_rect(aes(xmin = 1960, xmax = 1980, ymin =70, ymax= Inf, fill= "Stage 1"),
fill= "#fff5f0", alpha = 0.9, show.legend = FALSE)+
geom_rect(aes(xmin= 1980, xmax= 2000, ymin=70, ymax=Inf, fill= "Stage 2"),
fill= "#b0c6d2", alpha = 0.3,show.legend = FALSE)+
geom_rect(aes(xmin= 2000, xmax= 2014, ymin=70, ymax=Inf, fill= "Stage 3"),
fill= "grey90", alpha= 0.5,show.legend = FALSE)+
geom_line(aes(Year, Male_2), col = "#014d64", size= 1.2)+
geom_line(aes(Year, Female_2), col = "#770A1F",size= 1.2)+
annotate("text", x = 1970, y= 101, label = "Stage 1", color = 'Black', size=4.5)+
annotate("text", x = 1990, y= 101, label = "Stage 2", color = 'Black', size=4.5)+
annotate("text", x = 2006, y= 101, label = "Stage 3", color = 'Black', size=4.5)+
annotate("curve", curvature =0, x = 1964, xend = 2012,
y = seq(70,100,10), yend= seq(70,100,10),
color ="grey80", size= 0.5)+
theme_fivethirtyeight()+
theme(panel.grid = element_blank())+
theme(legend.key.width = unit(0.9, "cm")) +
theme(legend.key.height = unit(0.0, "cm")) +
annotate("text", x = 2013, y = 78, label= "Male", color = "#014d64")+
annotate("text", x = 2013, y = 75, label= "Female", color = "#770A1F" )+
labs(y = "Number of population (in Million)",
x= "Year",
title = "The Number of population from 1970 to 2012",
subtitle = "Male Population is over Female population",
caption = "Data Source : 공공데이터포털")+
theme(text = element_text(family = my_font, size = 12, color = "gray30"))+
theme(plot.caption = element_text(size = 10, color = "grey40", family = my_font,
face = "bold"))+
theme(plot.title = element_text(size= 15, face= "bold"))+
theme(plot.subtitle = element_text(color = "gray30", size = 10, family = my_font))