250p

#1 파일 및 패키지 준비

#install.packages("foreign")


library(foreign)
raw_welfare<-read.spss("https://www.dropbox.com/s/m2s7p3c5bvhvf5o/Koweps_hpc10_2015_beta1.sav?dl=1", to.data.frame=T)
## Warning in
## read.spss("https://www.dropbox.com/s/m2s7p3c5bvhvf5o/Koweps_hpc10_2015_beta1.sav?dl=1",
## : C:\Users\s\AppData\Local\Temp\RtmpSGzdZR\file4ac86a64475f: Compression bias
## (0) is not the usual value of 100
raw_welfare -> welfare
library(dplyr)
## 
## 다음의 패키지를 부착합니다: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
welfare <- rename(welfare, sex = h10_g3, # 성별 
                  birth = h10_g4, # 태어난 연도 
                  marriage = h10_g10, # 혼인 상태 
                  religion = h10_g11, # 종교 
                  income = p1002_8aq1, # 월급 
                  code_job = h10_eco9, # 직종 코드 
                  code_region = h10_reg7) # 지역 코드

2 age 순대로 나누기

welfare$birth <- ifelse(welfare$birth == 9999, NA, welfare$birth)
welfare$age <- 2015-welfare$birth +1
summary(welfare$age)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    2.00   28.00   50.00   48.43   70.00  109.00

#3 age group 짜기

welfare <- welfare %>% 
  mutate(ageg = ifelse(age < 30, "young",
                       ifelse(age <= 59, "middle" , "old")))

#4 결혼 나누기 및 이혼율 표 만들기

welfare$group_marriage <- ifelse(welfare$marriage == 1, "marriage", 
                                 ifelse(welfare$marriage== 3, "divorce", NA))


ageg_marriage <- welfare %>% 
  filter(!is.na(group_marriage)) %>% 
  group_by(ageg, group_marriage) %>% 
  summarise(n = n()) %>% 
  mutate(tot_group = sum(n),
         pct= round(n/tot_group*100,1))
## `summarise()` has grouped output by 'ageg'. You can override using the
## `.groups` argument.

그래프

#install.packages("ggplot2")
library(ggplot2)

ageg_divorce <- ageg_marriage %>% 
  filter(ageg != "young" & group_marriage == "divorce") %>% 
  select(ageg, pct)

ggplot(data = ageg_divorce, aes(x= ageg, y= pct)) +geom_col()

09-9

지역 코드 목록

list_region <- data.frame(code_region = c(1:7),
                          region= c("서울",
                                    "수도권(인천/경기)",
                                    "부산/경남/울산",
                                    "대구/경북",
                                    "대전/충남",
                                    "강원/충북",
                                    "광주/전남/전북/제주도"))
welfare <- left_join(welfare, list_region, by = "code_region")

welfare %>% 
  select(code_region,region) %>% 
  head
##   code_region region
## 1           1   서울
## 2           1   서울
## 3           1   서울
## 4           1   서울
## 5           1   서울
## 6           1   서울
region_ageg <- welfare %>% 
  group_by(region, ageg) %>% 
  summarise(n= n()) %>% 
  mutate(tot_group = sum(n),
         pct= round(n/tot_group*100, 2))
## `summarise()` has grouped output by 'region'. You can override using the
## `.groups` argument.
list_older_old <- region_ageg %>% 
  filter(ageg == "old") %>% 
  arrange(pct)

ggplot(data= region_ageg, aes(x = region, y=pct, fill = ageg)) + geom_col()+ coord_flip()+
  scale_x_discrete(limits=order)
## Warning in min(x): min에 전달되는 인자들 중 누락이 있어 Inf를 반환합니다
## Warning in max(x): max에 전달되는 인자들 중 누락이 있어 -Inf를 반환합니다
## Warning in min(d[d > tolerance]): min에 전달되는 인자들 중 누락이 있어 Inf를
## 반환합니다
## Warning: Removed 21 rows containing missing values or values outside the scale range
## (`geom_col()`).