패키지 실행 & 데이터 가져오기 (dropbox 인식 불가하여 로컬로 진행)
install.packages("foreign")
library(foreign)
if("dplyr" %in% installed.packages("dplyr") == FALSE)install.packages("dplyr")
library(dplyr)
if("ggplot2" %in% installed.packages("ggplot2") == FALSE)install.packages("ggplot2")
library(ggplot2)
library(readxl)
raw_welfare <- read.spss(file="https://www.dropbox.com/s/1kipxmhaoo0e5yj/Koweps_hpc10_2015_beta1.sav?dl=1", to.data.frame = T)
getwd()
setwd("C:\\Users\\Administrator\\rlang_weekend2\\Data_R_180929")
raw_welfare <- read.spss(file="Koweps_hpc10_2015_beta1.sav",
to.data.frame=T)
View(head(raw_welfare))
복사본 만들기
welfare <- raw_welfare
names(welfare)
welfare <- dplyr::rename(welfare,
sex = h10_g3, # 성별
birth = h10_g4, # 태어난 연도
marriage = h10_g10, # 혼인 상태
religion = h10_g11, # 종교
income = p1002_8aq1, # 월급
code_job = h10_eco9, # 직업 코드
code_region = h10_reg7 # 지역 코드
)
# 데이터의 일부 컬럼만 발췌하는 것 (부분집합 subset)
welfare <- subset(welfare,
select = c(sex, birth, marriage, religion, income, code_job, code_region))
View(head(welfare))
성별에 따른 월급차이 - 성별에 따라 월급이 다를까?
# 변수 검토하기
class(welfare$sex)
table(welfare$sex)
# 이상치(Outlier) 결측 (NA)
welfare$sex <- ifelse(welfare$sex == 9, NA, welfare$sex) # ifelse(조건, 조건 참일 경우 값, 거짓일 경우 값)
table(is.na(welfare$sex))
# 성별 항목 이름 부여
welfare$sex <- ifelse(welfare$sex == 1, "남성", "여성")
table(welfare$sex)
qplot(welfare$sex)
월급 변수 검토 및 전처리
class(welfare$income)
summary(welfare$income)
qplot(welfare$income)+ # + 기호는 동일 줄에 위치해야함 다음음 줄로 내릴수 없음
xlim(0,1000)
welfare$income <- ifelse(
welfare$income %in% c(0, 9999),
NA,
welfare$income
)
table(is.na(welfare$income))
sex_income <- welfare %>%
dplyr::filter(!is.na(income)) %>%
dplyr::group_by(sex) %>%
dplyr::summarise(mean_income = mean(income))
sex_income
ggplot(
data = sex_income,
aes(x = sex, y = mean_income)
)+geom_col()
나이와 월급의 관계
welfare$birth <- ifelse(welfare$birth == 9999, NA, welfare$birth)
table(is.na(welfare$birth))
welfare$age <- 2015 - welfare$birth + 1
qplot(welfare$age)
age_income <- welfare %>%
filter(!is.na(income)) %>%
group_by(age) %>%
summarise(mean_income = mean(income))
age_income
ggplot(
data = age_income,
aes(
x = age,
y = mean_income
)
)+geom_line()
# 파생변수 만들기 : mutate()
welfare <- welfare %>%
dplyr::mutate(
ageg = ifelse(age < 30, "초년",
ifelse(age < 40, "중년", "노년"))
)
welfare
tabel(welfare$ageg)
qplot(welfare$ageg)
sex_income <- welfare %>%
dplyr::filter(!is.na(income)) %>%
group_by(ageg, sex) %>%
summarise(mean_income = mean(income))
# 성별 혼합 막대그래프
ggplot(
data = sex_income,
aes(
x = ageg,
y = mean_income,
fill = sex
)
)+geom_col()+
scale_x_discrete(limits = c("초년", "중년", "노년")
)
# 성별 분리 막대그래프
ggplot(
data = sex_income,
aes(
x = ageg,
y = mean_income,
fill = sex
)
)+geom_col(position = "dodge")+
scale_x_discrete(limits = c("초년", "중년", "노년")
)
나이 및 성별 월급 평균표
sex_age <- welfare %>%
filter(!is.na(income)) %>%
group_by(age, sex) %>%
summarise(mean_income = mean(income))
ggplot(
data = sex_age,
aes(
x = age,
y = mean_income,
col = sex
)
)+geom_line()
LS0tDQp0aXRsZTogIu2VnOq1reuzteyngO2MqOuEkOuNsOydtO2EsCDrtoTshJ0oUGFnZSAyMDkpIg0Kb3V0cHV0OiBodG1sX25vdGVib29rDQotLS0NCiMjIyDtjKjtgqTsp4Ag7Iuk7ZaJICYg642w7J207YSwIOqwgOyguOyYpOq4sCAoZHJvcGJveCDsnbjsi50g67aI6rCA7ZWY7JesIOuhnOy7rOuhnCDsp4TtlokpDQoNCmBgYHtyfQ0KaW5zdGFsbC5wYWNrYWdlcygiZm9yZWlnbiIpDQpsaWJyYXJ5KGZvcmVpZ24pDQppZigiZHBseXIiICVpbiUgaW5zdGFsbGVkLnBhY2thZ2VzKCJkcGx5ciIpID09IEZBTFNFKWluc3RhbGwucGFja2FnZXMoImRwbHlyIikNCmxpYnJhcnkoZHBseXIpDQppZigiZ2dwbG90MiIgJWluJSBpbnN0YWxsZWQucGFja2FnZXMoImdncGxvdDIiKSA9PSBGQUxTRSlpbnN0YWxsLnBhY2thZ2VzKCJnZ3Bsb3QyIikNCmxpYnJhcnkoZ2dwbG90MikNCmxpYnJhcnkocmVhZHhsKQ0KDQpyYXdfd2VsZmFyZSA8LSByZWFkLnNwc3MoZmlsZT0iaHR0cHM6Ly93d3cuZHJvcGJveC5jb20vcy8xa2lweG1oYW9vMGU1eWovS293ZXBzX2hwYzEwXzIwMTVfYmV0YTEuc2F2P2RsPTEiLCB0by5kYXRhLmZyYW1lID0gVCkNCmdldHdkKCkNCnNldHdkKCJDOlxcVXNlcnNcXEFkbWluaXN0cmF0b3JcXHJsYW5nX3dlZWtlbmQyXFxEYXRhX1JfMTgwOTI5IikNCnJhd193ZWxmYXJlIDwtIHJlYWQuc3BzcyhmaWxlPSJLb3dlcHNfaHBjMTBfMjAxNV9iZXRhMS5zYXYiLCANCiAgICAgICAgICAgICAgICAgICAgICAgICB0by5kYXRhLmZyYW1lPVQpDQpWaWV3KGhlYWQocmF3X3dlbGZhcmUpKQ0KYGBgDQojIyMg67O17IKs67O4IOunjOuTpOq4sA0KDQpgYGB7cn0NCndlbGZhcmUgPC0gcmF3X3dlbGZhcmUNCm5hbWVzKHdlbGZhcmUpDQp3ZWxmYXJlIDwtIGRwbHlyOjpyZW5hbWUod2VsZmFyZSwNCiAgICAgICAgICAgICAgICAgIHNleCA9IGgxMF9nMywgIyDshLHrs4QNCiAgICAgICAgICAgICAgICAgIGJpcnRoID0gaDEwX2c0LCAjIO2DnOyWtOuCnCDsl7Drj4QNCiAgICAgICAgICAgICAgICAgIG1hcnJpYWdlID0gaDEwX2cxMCwgIyDtmLzsnbgg7IOB7YOcDQogICAgICAgICAgICAgICAgICByZWxpZ2lvbiA9IGgxMF9nMTEsICMg7KKF6rWQDQogICAgICAgICAgICAgICAgICBpbmNvbWUgPSBwMTAwMl84YXExLCAjIOyblOq4iQ0KICAgICAgICAgICAgICAgICAgY29kZV9qb2IgPSBoMTBfZWNvOSwgIyDsp4Hsl4Ug7L2U65OcDQogICAgICAgICAgICAgICAgICBjb2RlX3JlZ2lvbiA9IGgxMF9yZWc3ICMg7KeA7JetIOy9lOuTnA0KICAgICAgICAgICAgICAgICAgKQ0KIyDrjbDsnbTthLDsnZgg7J2867aAIOy7rOufvOunjCDrsJzst4ztlZjripQg6rKDICjrtoDrtoTsp5Htlakgc3Vic2V0KQ0Kd2VsZmFyZSA8LSBzdWJzZXQod2VsZmFyZSwgDQogICAgICAgICAgICAgICAgICBzZWxlY3QgPSBjKHNleCwgYmlydGgsIG1hcnJpYWdlLCByZWxpZ2lvbiwgaW5jb21lLCBjb2RlX2pvYiwgY29kZV9yZWdpb24pKQ0KVmlldyhoZWFkKHdlbGZhcmUpKQ0KYGBgDQoNCiMjIyDshLHrs4Tsl5Ag65Sw66W4IOyblOq4ieywqOydtCAtIOyEseuzhOyXkCDrlLDrnbwg7JuU6riJ7J20IOuLpOulvOq5jD8NCmBgYHtyfQ0KIyDrs4DsiJgg6rKA7Yag7ZWY6riwDQpjbGFzcyh3ZWxmYXJlJHNleCkNCnRhYmxlKHdlbGZhcmUkc2V4KQ0KIyDsnbTsg4HsuZgoT3V0bGllcikg6rKw7LihIChOQSkNCndlbGZhcmUkc2V4IDwtIGlmZWxzZSh3ZWxmYXJlJHNleCA9PSA5LCBOQSwgd2VsZmFyZSRzZXgpICAjIGlmZWxzZSjsobDqsbQsIOyhsOqxtCDssLjsnbwg6rK97JqwIOqwkiwg6rGw7KeT7J28IOqyveyasCDqsJIpDQp0YWJsZShpcy5uYSh3ZWxmYXJlJHNleCkpDQojIOyEseuzhCDtla3rqqkg7J2066aEIOu2gOyXrA0Kd2VsZmFyZSRzZXggPC0gIGlmZWxzZSh3ZWxmYXJlJHNleCA9PSAxLCAi64Ko7ISxIiwgIuyXrOyEsSIpDQp0YWJsZSh3ZWxmYXJlJHNleCkNCnFwbG90KHdlbGZhcmUkc2V4KQ0KYGBgDQojIyMg7JuU6riJIOuzgOyImCDqsoDthqAg67CPIOyghOyymOumrA0KYGBge3J9DQpjbGFzcyh3ZWxmYXJlJGluY29tZSkNCnN1bW1hcnkod2VsZmFyZSRpbmNvbWUpDQpxcGxvdCh3ZWxmYXJlJGluY29tZSkrICMgKyDquLDtmLjripQg64+Z7J28IOykhOyXkCDsnITsuZjtlbTslbztlagg64uk7J2M7J2MIOykhOuhnCDrgrTrprTsiJgg7JeG7J2MDQogIHhsaW0oMCwxMDAwKQ0Kd2VsZmFyZSRpbmNvbWUgPC0gIGlmZWxzZSgNCiAgd2VsZmFyZSRpbmNvbWUgJWluJSBjKDAsIDk5OTkpLA0KICBOQSwNCiAgd2VsZmFyZSRpbmNvbWUNCikNCnRhYmxlKGlzLm5hKHdlbGZhcmUkaW5jb21lKSkNCg0Kc2V4X2luY29tZSA8LSB3ZWxmYXJlICU+JSANCiAgZHBseXI6OmZpbHRlcighaXMubmEoaW5jb21lKSkgJT4lIA0KICBkcGx5cjo6Z3JvdXBfYnkoc2V4KSAlPiUNCiAgZHBseXI6OnN1bW1hcmlzZShtZWFuX2luY29tZSA9IG1lYW4oaW5jb21lKSkNCg0Kc2V4X2luY29tZQ0KZ2dwbG90KA0KICBkYXRhID0gc2V4X2luY29tZSwNCiAgYWVzKHggPSBzZXgsIHkgPSBtZWFuX2luY29tZSkNCikrZ2VvbV9jb2woKQ0KYGBgDQoNCiMjIyDrgpjsnbTsmYAg7JuU6riJ7J2YIOq0gOqzhA0KYGBge3J9DQp3ZWxmYXJlJGJpcnRoIDwtIGlmZWxzZSh3ZWxmYXJlJGJpcnRoID09IDk5OTksIE5BLCB3ZWxmYXJlJGJpcnRoKQ0KdGFibGUoaXMubmEod2VsZmFyZSRiaXJ0aCkpDQoNCndlbGZhcmUkYWdlIDwtICAyMDE1IC0gd2VsZmFyZSRiaXJ0aCArIDENCnFwbG90KHdlbGZhcmUkYWdlKQ0KYWdlX2luY29tZSA8LSB3ZWxmYXJlICU+JSANCiAgZmlsdGVyKCFpcy5uYShpbmNvbWUpKSAlPiUgDQogIGdyb3VwX2J5KGFnZSkgJT4lIA0KICBzdW1tYXJpc2UobWVhbl9pbmNvbWUgPSBtZWFuKGluY29tZSkpDQphZ2VfaW5jb21lDQpnZ3Bsb3QoDQogIGRhdGEgPSBhZ2VfaW5jb21lLA0KICBhZXMoDQogICAgeCA9IGFnZSwgDQogICAgeSA9IG1lYW5faW5jb21lDQogICkNCikrZ2VvbV9saW5lKCkNCg0KIyDtjIzsg53rs4DsiJgg66eM65Ok6riwIDogbXV0YXRlKCkNCndlbGZhcmUgPC0gd2VsZmFyZSAlPiUgDQogICAgZHBseXI6Om11dGF0ZSgNCiAgICBhZ2VnID0gaWZlbHNlKGFnZSA8IDMwLCAi7LSI64WEIiwgDQogICAgICAgICAgICAgICAgaWZlbHNlKGFnZSA8IDQwLCAi7KSR64WEIiwgIuuFuOuFhCIpKQ0KICAgICkNCg0Kd2VsZmFyZQ0KdGFiZWwod2VsZmFyZSRhZ2VnKQ0KcXBsb3Qod2VsZmFyZSRhZ2VnKQ0Kc2V4X2luY29tZSA8LSB3ZWxmYXJlICU+JSANCiAgZHBseXI6OmZpbHRlcighaXMubmEoaW5jb21lKSkgJT4lIA0KICBncm91cF9ieShhZ2VnLCBzZXgpICU+JSANCiAgc3VtbWFyaXNlKG1lYW5faW5jb21lID0gbWVhbihpbmNvbWUpKQ0KDQojIOyEseuzhCDtmLztlakg66eJ64yA6re4656Y7ZSEDQpnZ3Bsb3QoDQogIGRhdGEgPSBzZXhfaW5jb21lLA0KICBhZXMoDQogICAgeCA9IGFnZWcsDQogICAgeSA9IG1lYW5faW5jb21lLA0KICAgIGZpbGwgPSBzZXgNCiAgKQ0KKStnZW9tX2NvbCgpKw0KICBzY2FsZV94X2Rpc2NyZXRlKGxpbWl0cyA9IGMoIuy0iOuFhCIsICLspJHrhYQiLCAi64W464WEIikNCiAgKQ0KDQojIOyEseuzhCDrtoTrpqwg66eJ64yA6re4656Y7ZSEDQpnZ3Bsb3QoDQogIGRhdGEgPSBzZXhfaW5jb21lLA0KICBhZXMoDQogICAgeCA9IGFnZWcsDQogICAgeSA9IG1lYW5faW5jb21lLA0KICAgIGZpbGwgPSBzZXgNCiAgKQ0KKStnZW9tX2NvbChwb3NpdGlvbiA9ICJkb2RnZSIpKw0KICBzY2FsZV94X2Rpc2NyZXRlKGxpbWl0cyA9IGMoIuy0iOuFhCIsICLspJHrhYQiLCAi64W464WEIikNCiAgKQ0KYGBgDQoNCiMjIyDrgpjsnbQg67CPIOyEseuzhCDsm5TquIkg7Y+J6reg7ZGcDQpgYGB7cn0NCnNleF9hZ2UgPC0gd2VsZmFyZSAlPiUgDQogIGZpbHRlcighaXMubmEoaW5jb21lKSkgJT4lIA0KICBncm91cF9ieShhZ2UsIHNleCkgJT4lIA0KICBzdW1tYXJpc2UobWVhbl9pbmNvbWUgPSBtZWFuKGluY29tZSkpDQoNCmdncGxvdCgNCiAgZGF0YSA9IHNleF9hZ2UsDQogIGFlcygNCiAgICB4ID0gYWdlLCANCiAgICB5ID0gbWVhbl9pbmNvbWUsDQogICAgY29sID0gc2V4DQogICkNCikrZ2VvbV9saW5lKCkNCg0KYGBgDQoNCg0K