installed.packages("foreign")
library(foreign)
if("dplyr" %in% installed.packages("dplyr") == FALSE)install.packages("dplyr")
library(dplyr)
if("ggplot2" %in% installed.packages("ggplot2") == FALSE)install.packages("ggplot2")
library(ggplot2)
install.packages("readxl")
library(readxl)
raw_welfare <- read.spss(https://www.dropbox.com/s/d9l0kan6oen1x94/Koweps_hpc10_2015_beta1.sav=1)
View(raw_walfare)
getwd()
setwd("C:\\Users\\Administrator\\rlang_weekend")
raw_walfare <- read.spss(file = "Koweps_hpc10_2015_beta1.sav",
                         to.data.frame = T
                         )
View(head(raw_walfare))
welfare <- raw_walfare
View(head(welfare))
  welfare <- dplyr::rename(
    welfare,
    sex = h10_g3, # 성별
    birth = h10_g4, # 태어난 연도
    marriage = h10_g10, # 혼인 상태
    religion = h10_g11, # 종교
    income = p1002_8aq1, # 월급
    code_job = h10_eco9, # 직업 코드
    code_region = h10_reg7 # 지역 코드
  )
welfare <- subset(welfare,
                  select = c(sex,birth,marriage,religion,income,code_job,code_region)
                  )
View(head(welfare))
# 성별에 따른 월급차이 = 성별에 따라 월급이 다를까?
# 변수 검수하기
class(welfare$sex)
table(welfare$sex)
# 이상치(Outlier) 결측(NA) 처리
welfare$sex <- ifelse(welfare$sex == 9, NA, welfare$sex)
table(is.na(welfare$sex))
# 성별 항목 이름 부여
welfare$sex <- ifelse(welfare$sex == 1, "남성","여성")
table(welfare$sex)
qplot(welfare$sex)

### 월급 변수 검토 및 전처리
class(welfare$income)
summary(welfare$income)

qplot(welfare$income)+xlim(0,1000)

welfare$income <- ifelse(
  welfare$income %in% c(0,9999),
  NA,
  welfare$income
)
table(is.na(welfare$income))

sex_income <- welfare %>% 
  dplyr::filter(!is.na(income)) %>% 
  dplyr::group_by(sex) %>% 
  dplyr::summarise(mean_income = mean(income))
sex_income
ggplot(
  data = sex_income,
  aes(x=sex, y=mean_income)
)+geom_col()

### 나이와 월급의 관계
welfare$birth <- ifelse(welfare$birth == 9999, NA, welfare$birth)
table(is.na(welfare$birth))

welfare$age <- 2015 - welfare$birth + 1
qplot(welfare$age)
age_income <- welfare %>% 
  filter(!is.na(income)) %>% 
  group_by(age) %>% 
  summarise(mean_income = mean(income))
age_income
ggplot(
  data = age_income,
  aes(
    x = age,
    y = mean_income
  )
)+geom_line()
# 파생변수 만들기 : mutate()
welfare <- welfare %>%
  dplyr::mutate(ageg = ifelse(age < 30, "초년",
                ifelse(age <=59,"중년","노년")))
welfare
table(welfare$ageg)
qplot(welfare$ageg)
sex_income <- welfare %>% 
  dplyr::filter(!is.na(income)) %>% 
  group_by(ageg,sex) %>% 
  summarise(mean_income = mean(income))

sex_income

ggplot(
  data = sex_income,
  aes(
    x = ageg,
    y = mean_income,
    fill = sex
  )
)+geom_col()+
  scale_x_discrete(limits = c("초년","중년","노년"))

















  
LS0tDQp0aXRsZTogIlIgTm90ZWJvb2siDQpvdXRwdXQ6IGh0bWxfbm90ZWJvb2sNCi0tLQ0KDQpgYGB7cn0NCmluc3RhbGxlZC5wYWNrYWdlcygiZm9yZWlnbiIpDQpsaWJyYXJ5KGZvcmVpZ24pDQppZigiZHBseXIiICVpbiUgaW5zdGFsbGVkLnBhY2thZ2VzKCJkcGx5ciIpID09IEZBTFNFKWluc3RhbGwucGFja2FnZXMoImRwbHlyIikNCmxpYnJhcnkoZHBseXIpDQppZigiZ2dwbG90MiIgJWluJSBpbnN0YWxsZWQucGFja2FnZXMoImdncGxvdDIiKSA9PSBGQUxTRSlpbnN0YWxsLnBhY2thZ2VzKCJnZ3Bsb3QyIikNCmxpYnJhcnkoZ2dwbG90MikNCmluc3RhbGwucGFja2FnZXMoInJlYWR4bCIpDQpsaWJyYXJ5KHJlYWR4bCkNCnJhd193ZWxmYXJlIDwtIHJlYWQuc3BzcyhodHRwczovL3d3dy5kcm9wYm94LmNvbS9zL2Q5bDBrYW42b2VuMXg5NC9Lb3dlcHNfaHBjMTBfMjAxNV9iZXRhMS5zYXY9MSkNClZpZXcocmF3X3dhbGZhcmUpDQpnZXR3ZCgpDQpzZXR3ZCgiQzpcXFVzZXJzXFxBZG1pbmlzdHJhdG9yXFxybGFuZ193ZWVrZW5kIikNCnJhd193YWxmYXJlIDwtIHJlYWQuc3BzcyhmaWxlID0gIktvd2Vwc19ocGMxMF8yMDE1X2JldGExLnNhdiIsDQogICAgICAgICAgICAgICAgICAgICAgICAgdG8uZGF0YS5mcmFtZSA9IFQNCiAgICAgICAgICAgICAgICAgICAgICAgICApDQpWaWV3KGhlYWQocmF3X3dhbGZhcmUpKQ0Kd2VsZmFyZSA8LSByYXdfd2FsZmFyZQ0KVmlldyhoZWFkKHdlbGZhcmUpKQ0KICB3ZWxmYXJlIDwtIGRwbHlyOjpyZW5hbWUoDQogICAgd2VsZmFyZSwNCiAgICBzZXggPSBoMTBfZzMsICMg7ISx67OEDQogICAgYmlydGggPSBoMTBfZzQsICMg7YOc7Ja064KcIOyXsOuPhA0KICAgIG1hcnJpYWdlID0gaDEwX2cxMCwgIyDtmLzsnbgg7IOB7YOcDQogICAgcmVsaWdpb24gPSBoMTBfZzExLCAjIOyiheq1kA0KICAgIGluY29tZSA9IHAxMDAyXzhhcTEsICMg7JuU6riJDQogICAgY29kZV9qb2IgPSBoMTBfZWNvOSwgIyDsp4Hsl4Ug7L2U65OcDQogICAgY29kZV9yZWdpb24gPSBoMTBfcmVnNyAjIOyngOyXrSDsvZTrk5wNCiAgKQ0Kd2VsZmFyZSA8LSBzdWJzZXQod2VsZmFyZSwNCiAgICAgICAgICAgICAgICAgIHNlbGVjdCA9IGMoc2V4LGJpcnRoLG1hcnJpYWdlLHJlbGlnaW9uLGluY29tZSxjb2RlX2pvYixjb2RlX3JlZ2lvbikNCiAgICAgICAgICAgICAgICAgICkNClZpZXcoaGVhZCh3ZWxmYXJlKSkNCiMg7ISx67OE7JeQIOuUsOuluCDsm5TquInssKjsnbQgPSDshLHrs4Tsl5Ag65Sw6528IOyblOq4ieydtCDri6TrpbzquYw/DQojIOuzgOyImCDqsoDsiJjtlZjquLANCmNsYXNzKHdlbGZhcmUkc2V4KQ0KdGFibGUod2VsZmFyZSRzZXgpDQojIOydtOyDgey5mChPdXRsaWVyKSDqsrDsuKEoTkEpIOyymOumrA0Kd2VsZmFyZSRzZXggPC0gaWZlbHNlKHdlbGZhcmUkc2V4ID09IDksIE5BLCB3ZWxmYXJlJHNleCkNCnRhYmxlKGlzLm5hKHdlbGZhcmUkc2V4KSkNCiMg7ISx67OEIO2VreuqqSDsnbTrpoQg67aA7JesDQp3ZWxmYXJlJHNleCA8LSBpZmVsc2Uod2VsZmFyZSRzZXggPT0gMSwgIuuCqOyEsSIsIuyXrOyEsSIpDQp0YWJsZSh3ZWxmYXJlJHNleCkNCnFwbG90KHdlbGZhcmUkc2V4KQ0KDQojIyMg7JuU6riJIOuzgOyImCDqsoDthqAg67CPIOyghOyymOumrA0KY2xhc3Mod2VsZmFyZSRpbmNvbWUpDQpzdW1tYXJ5KHdlbGZhcmUkaW5jb21lKQ0KDQpxcGxvdCh3ZWxmYXJlJGluY29tZSkreGxpbSgwLDEwMDApDQoNCndlbGZhcmUkaW5jb21lIDwtIGlmZWxzZSgNCiAgd2VsZmFyZSRpbmNvbWUgJWluJSBjKDAsOTk5OSksDQogIE5BLA0KICB3ZWxmYXJlJGluY29tZQ0KKQ0KdGFibGUoaXMubmEod2VsZmFyZSRpbmNvbWUpKQ0KDQpzZXhfaW5jb21lIDwtIHdlbGZhcmUgJT4lIA0KICBkcGx5cjo6ZmlsdGVyKCFpcy5uYShpbmNvbWUpKSAlPiUgDQogIGRwbHlyOjpncm91cF9ieShzZXgpICU+JSANCiAgZHBseXI6OnN1bW1hcmlzZShtZWFuX2luY29tZSA9IG1lYW4oaW5jb21lKSkNCnNleF9pbmNvbWUNCmdncGxvdCgNCiAgZGF0YSA9IHNleF9pbmNvbWUsDQogIGFlcyh4PXNleCwgeT1tZWFuX2luY29tZSkNCikrZ2VvbV9jb2woKQ0KDQojIyMg64KY7J207JmAIOyblOq4ieydmCDqtIDqs4QNCndlbGZhcmUkYmlydGggPC0gaWZlbHNlKHdlbGZhcmUkYmlydGggPT0gOTk5OSwgTkEsIHdlbGZhcmUkYmlydGgpDQp0YWJsZShpcy5uYSh3ZWxmYXJlJGJpcnRoKSkNCg0Kd2VsZmFyZSRhZ2UgPC0gMjAxNSAtIHdlbGZhcmUkYmlydGggKyAxDQpxcGxvdCh3ZWxmYXJlJGFnZSkNCmFnZV9pbmNvbWUgPC0gd2VsZmFyZSAlPiUgDQogIGZpbHRlcighaXMubmEoaW5jb21lKSkgJT4lIA0KICBncm91cF9ieShhZ2UpICU+JSANCiAgc3VtbWFyaXNlKG1lYW5faW5jb21lID0gbWVhbihpbmNvbWUpKQ0KYWdlX2luY29tZQ0KZ2dwbG90KA0KICBkYXRhID0gYWdlX2luY29tZSwNCiAgYWVzKA0KICAgIHggPSBhZ2UsDQogICAgeSA9IG1lYW5faW5jb21lDQogICkNCikrZ2VvbV9saW5lKCkNCiMg7YyM7IOd67OA7IiYIOunjOuTpOq4sCA6IG11dGF0ZSgpDQp3ZWxmYXJlIDwtIHdlbGZhcmUgJT4lDQogIGRwbHlyOjptdXRhdGUoYWdlZyA9IGlmZWxzZShhZ2UgPCAzMCwgIuy0iOuFhCIsDQogICAgICAgICAgICAgICAgaWZlbHNlKGFnZSA8PTU5LCLspJHrhYQiLCLrhbjrhYQiKSkpDQp3ZWxmYXJlDQp0YWJsZSh3ZWxmYXJlJGFnZWcpDQpxcGxvdCh3ZWxmYXJlJGFnZWcpDQpzZXhfaW5jb21lIDwtIHdlbGZhcmUgJT4lIA0KICBkcGx5cjo6ZmlsdGVyKCFpcy5uYShpbmNvbWUpKSAlPiUgDQogIGdyb3VwX2J5KGFnZWcsc2V4KSAlPiUgDQogIHN1bW1hcmlzZShtZWFuX2luY29tZSA9IG1lYW4oaW5jb21lKSkNCg0Kc2V4X2luY29tZQ0KDQpnZ3Bsb3QoDQogIGRhdGEgPSBzZXhfaW5jb21lLA0KICBhZXMoDQogICAgeCA9IGFnZWcsDQogICAgeSA9IG1lYW5faW5jb21lLA0KICAgIGZpbGwgPSBzZXgNCiAgKQ0KKStnZW9tX2NvbCgpKw0KICBzY2FsZV94X2Rpc2NyZXRlKGxpbWl0cyA9IGMoIuy0iOuFhCIsIuykkeuFhCIsIuuFuOuFhCIpKQ0KDQoNCg0KDQoNCg0KDQoNCg0KDQoNCg0KDQoNCg0KDQoNCiAgDQoNCmBgYA0K