# 데이터 정규화는 변숫값의 분포를 표준화하는 것을 의미한다.
# 표준화는 변수에서 데이터의 평균을 빼거나
# 변수를 전체 데이터의 표준편차로 나누는 작업을 포함한다.
# 이렇게 하면 변숫값의 평균이 0이 되고 값의
# 퍼짐정도(분포) 또한 일정해진다.
# R에서 데이터를 정규화 하는 함수는 scale()이다.
# <임금격차 해소를 위한 현상황 분석예제>
## 성별 임금 소득에 대한 통계를 구하시오.
# 1. 급여에 대한 기술통계를 서술하시오.
# 2. 남녀성별에 따른 평균임금을 막대그래프로 구현
# 3. 가장 고소득인 연령대를 구하시오.
# 4. 그룹간 임금격차가 커지는 연령대를 구하시오.
# step 1
csv = "https://www.dropbox.com/s/9gchq4nbt67lpxu/example_salary.csv?dl=1"
salary <- read.csv(csv,
stringsAsFactor = F,
na = "-")
# step 2
str(salary)
colnames(salary)
# [1] "연령"
# [2] "월급여액..원."
# [3] "연간특별급여액..원."
# [4] "근로시간..시간."
# [5] "근로자수..명."
# [6] "경력구분"
# [7] "성별"
# step 3 연산을 하기 위해 한글명을 영어로 변환
colnames(salary) <- c(
"age","wage","special_wage","working_time","worker_count","career","gender"
)
colnames(salary)
#salary$wage
# step 4 : 검색목록에 올리기.
# salary$age 를 하지 않도록 조치
# salary dataframe 을 디폴트값으로 지정
# detach(salary)
# attach(salary)
# step 5 :기술통계 :: 평균, 중앙값, 최빈값
# mean, median, mode
salary$wage
wage_mean <- mean(salary$wage, na.rm = T)
wage_mean # [1] 2171578
# 중앙값 median
wage_mid <- median(salary$wage, na.rm = T)
wage_mid
# 범위 구하기
wage_range <- range(salary$wage, na.rm = T)
wage_range # 1117605 4064286
# 최고임금을 받는 사람의 정보
highest_wage <- which(salary$wage == 4064286)
salary[highest_wage,]
# 4분위 구하기
qnt <- quantile(salary$wage,na.rm=T)
qnt
# step 6 리스트에 담기
sal_list <- list(
평균월급 = wage_mean,
월급중앙값 = wage_mid,
월급범위 = wage_range,
월급사분위 = qnt
)
sal_list
# 성별에 따른 임금격차
wage_avg_per_gender <- tapply(
salary$wage,salary$gender,mean,na.rm=T
)
wage_avg_per_gender
# 남 여
# 2477332 1865823
# reshape2
install.packages("reshape2")
library(reshape2)
temp <- melt(wage_avg_per_gender)
temp
ggplot(
data = temp,
aes(
x = Var1, # melt에 내장된 x 값
y = value,
fill = Var1
)
)+geom_bar(
stat = "identity"
)
# 커리어에 따른 임금격차
# salary$career
wage_avg_per_career <- tapply(
salary$wage,salary$career,mean,na.rm=T
)
wage_avg_per_career
temp <- melt(wage_avg_per_career)
temp
ggplot(
data = temp,
aes(
x = Var1, # melt에 내장된 x 값
y = value,
fill = Var1
)
)+geom_bar(
stat = "identity"
)
melt <- melt(wage_avg_per_career)
ggplot(
melt,
aes(
x = Var1,
y = value,
group = 1
)
)+geom_line(
colr = 'blue',
size = 2
)+ coord_polar()+
ylim(0,max(melt$value))
# 각 경력별로 제일 적게 받는 월급 집단
# 1~3년미만 10년이상 1년미만
# 1905012 2907119 1730835
# 3~5년미만 5~10년미만
# 2028015 2360463
tapply(
salary$wage,
salary$career,
range,
na.rm = T
)
# $`1~3년미만`
# [1] 1172399 2619221
#
# $`10년이상`
# [1] 1685204 4064286
#
# $`1년미만`
# [1] 1117605 2414345
#
# $`3~5년미만`
# [1] 1245540 2827420
#
# $`5~10년미만`
# [1] 1548036 3309231
year_1 <- salary[which(salary$wage == 1117605),]
year_1_3 <- salary[which(salary$wage == 1172399),]
year_3_5 <- salary[which(salary$wage == 1245540),]
year_5_10 <- salary[which(salary$wage == 1548036),]
year_10 <- salary[which(salary$wage == 1685204),]
career_list <- list(
year_1,year_1_3,year_3_5,year_5_10,year_10
)
career_list
## 2번답
# 경력별 가장 낮은 월급을 받는 집단은 대부분 60대이상 여자.
# 특이점은 10년이상 경력에서 가장 낮은 월급을 받는 집단은
# 20대 초반여성
# 1886명. 이들은 10년이나 경력을 쌓고도 168만원을 수령함
# 3번. 표준화 시키기
wage_scale <- scale(salary$wage)
head(wage_scale, 10)
# [,1]
# [1,] -1.28886999
# [2,] -0.91757018
# [3,] -0.38981924
# [4,] -0.06340878
# [5,] 0.37924689
# [6,] 0.31343053
# [7,] 0.28505815
# [8,] -0.04016661
# [9,] -0.13812959
# [10,] -0.78222571
## 평균이 0이고, 0을 기준으로 분산된 값들이 있다
salary <- cbind(salary,scale = wage_scale)
str(salary)
g1 <- ggplot(salary,aes(x=salary$scale,y=salary$age))
g2 <- geom_segment(aes(yend=salary$age),xend=0)
g3 <- g1 + g2 + geom_point(
size = 7,
aes(color=salary$gender,shape=salary$career)
)+theme_minimal()
g3
## 해석
# 10년이상된 45~54세 남성이 가장 고소득자.
# 25 ~ 29세 그룹은 격차가 크지 않다
# 45세 이상부터는 그룹간 격차가 크다
# 저임금은 주로 여성그룹에서 나타난다
# 고임금은 주로 남성그룹에서 나타난다
LS0tDQp0aXRsZTogIsDTsd2w3cL3utC8riINCm91dHB1dDogaHRtbF9ub3RlYm9vaw0KLS0tDQoNCg0KYGBge3J9DQojILWlwMzFzSDBpLHUyK20wiC6r7z9sKrAxyC60Mb3uKYgx6XB2Mitx8+0wiCwzcC7IMDHuczH0bTZLg0KIyDHpcHYyK20wiC6r7z2v6G8rSC1pcDMxc3AxyDG8rHVwLsgu6mwxbOqDQojILqvvPa4piDA/MO8ILWlwMzFzcDHIMelwdjG7cL3t84gs6q0qbTCIMDbvvfAuyDG98fUx9G02S4NCiMgwMy3uLDUIMfPuOkguq+8/bCqwMcgxvKx1cDMIDDAzCC1x7DtILCqwMcNCiMgxtvB/MGktbUoutDG9ykgtsfH0SDAz8Gkx9jB+LTZLg0KIyBSv6G8rSC1pcDMxc24piDBpLHUyK0gx8+0wiDH1Lz2tMIgc2NhbGUoKcDMtNkuDQoNCg0KIyA8wNOx3bDdwvcgx9i80rimIMCnx9Egx/a788iyILrQvK6/ucGmPg0KICANCiAgIyMgvLq6sCDA07HdILzStea/oSC068fRIMXrsOi4piCxuMfPvcO/wC4NCiAgIyAxLiCx3r+pv6EgtOvH0SCx4rz6xeuw6LimILytvPrHz73Dv8AuDQogICMgMi4gs7Kz4Ly6urC/oSC1+7ilIMbysdXA07HdwLsguLe067HXt6HHwbfOILG4x/YNCiAgIyAzLiCwocDlILDtvNK15sDOIL+st8m067imILG4x8+9w7/ALg0KICAjIDQuILHXt+ywoyDA07HdsN3C97ChIMS/wfa0wiC/rLfJtOu4piCxuMfPvcO/wC4NCiAgIyBzdGVwIDENCiAgY3N2ID0gImh0dHBzOi8vd3d3LmRyb3Bib3guY29tL3MvOWdjaHE0bmJ0NjdscHh1L2V4YW1wbGVfc2FsYXJ5LmNzdj9kbD0xIg0KICBzYWxhcnkgPC0gcmVhZC5jc3YoY3N2LA0KICAgICAgICAgICAgICAgICAgICAgc3RyaW5nc0FzRmFjdG9yID0gRiwNCiAgICAgICAgICAgICAgICAgICAgIG5hID0gIi0iKQ0KICAjIHN0ZXAgMg0KICBzdHIoc2FsYXJ5KQ0KICBjb2xuYW1lcyhzYWxhcnkpDQogICMgWzFdICK/rLfJIiAgICAgICAgICAgICAgIA0KICAjIFsyXSAiv/mx3r+pvtcuLr/4LiIgICAgICANCiAgIyBbM10gIr+ssKPGr7qwsd6/qb7XLi6/+C4iDQogICMgWzRdICKx2bfOvcOwoy4uvcOwoy4iICAgIA0KICAjIFs1XSAisdm3zsDavPYuLrjtLiIgICAgICANCiAgIyBbNl0gIrDmt8KxuLrQIiAgICAgICAgICAgDQogICMgWzddICK8urqwIiAgDQogICMgc3RlcCAzIL+su+rAuyDHz7HiIMCnx9ggx9Gx27jtwLsgv7W+7rfOILqvyK8NCiAgY29sbmFtZXMoc2FsYXJ5KSA8LSBjKA0KICAgICJhZ2UiLCJ3YWdlIiwic3BlY2lhbF93YWdlIiwid29ya2luZ190aW1lIiwid29ya2VyX2NvdW50IiwiY2FyZWVyIiwiZ2VuZGVyIg0KICApDQogIGNvbG5hbWVzKHNhbGFyeSkNCiAgI3NhbGFyeSR3YWdlDQogICMgc3RlcCA0IDogsMu79rjxt8+/oSC/w7iuseIuDQogICMgc2FsYXJ5JGFnZSC4piDHz8H2IL7KtbW3zyDBtsShDQogICMgc2FsYXJ5IGRhdGFmcmFtZSDAuyC18Mb6xq6wqsC4t84gwfbBpA0KICAjIGRldGFjaChzYWxhcnkpDQogICMgYXR0YWNoKHNhbGFyeSkNCiAgIyBzdGVwIDUgOrHivPrF67DoIDo6IMbysdUsIMHfvtOwqiwgw9a687CqDQogICMgbWVhbiwgbWVkaWFuLCBtb2RlDQogIHNhbGFyeSR3YWdlDQogIHdhZ2VfbWVhbiA8LSBtZWFuKHNhbGFyeSR3YWdlLCBuYS5ybSA9IFQpDQogIHdhZ2VfbWVhbiAgIyBbMV0gMjE3MTU3OA0KICAjIMHfvtOwqiBtZWRpYW4gDQogIHdhZ2VfbWlkIDwtIG1lZGlhbihzYWxhcnkkd2FnZSwgbmEucm0gPSBUKQ0KICB3YWdlX21pZA0KICAjILn8wKcgsbjHz7HiDQogIHdhZ2VfcmFuZ2UgPC0gcmFuZ2Uoc2FsYXJ5JHdhZ2UsIG5hLnJtID0gVCkNCiAgd2FnZV9yYW5nZSAjIDExMTc2MDUgNDA2NDI4Ng0KICAjIMPWsO3A07HdwLsgud60wiC757b3wMcgwaS6uA0KICBoaWdoZXN0X3dhZ2UgIDwtIHdoaWNoKHNhbGFyeSR3YWdlID09IDQwNjQyODYpDQogIHNhbGFyeVtoaWdoZXN0X3dhZ2UsXQ0KICAjIDS60MCnILG4x8+x4g0KICBxbnQgPC0gcXVhbnRpbGUoc2FsYXJ5JHdhZ2UsbmEucm09VCkNCiAgcW50DQogICMgc3RlcCA2ILiuvbrGrr+hILTjseINCiAgc2FsX2xpc3QgPC0gbGlzdCgNCiAgICDG8rHVv/mx3iA9IHdhZ2VfbWVhbiwNCiAgICC/+bHewd++07CqID0gd2FnZV9taWQsDQogICAgv/mx3rn8wKcgPSB3YWdlX3JhbmdlLA0KICAgIL/5sd6757rQwKcgPSBxbnQNCiAgKQ0KICBzYWxfbGlzdA0KICAjILy6urC/oSC1+7ilIMDTsd2w3cL3DQogIHdhZ2VfYXZnX3Blcl9nZW5kZXIgPC0gdGFwcGx5KA0KICAgIHNhbGFyeSR3YWdlLHNhbGFyeSRnZW5kZXIsbWVhbixuYS5ybT1UDQogICkNCiAgd2FnZV9hdmdfcGVyX2dlbmRlcg0KICAjILOyICAgICAgv6kgDQogICMgMjQ3NzMzMiAxODY1ODIzIA0KICAjIHJlc2hhcGUyDQogIGluc3RhbGwucGFja2FnZXMoInJlc2hhcGUyIikNCiAgbGlicmFyeShyZXNoYXBlMikNCiAgdGVtcCA8LSBtZWx0KHdhZ2VfYXZnX3Blcl9nZW5kZXIpDQogIHRlbXANCiAgZ2dwbG90KA0KICAgIGRhdGEgPSB0ZW1wLA0KICAgIGFlcygNCiAgICAgIHggPSBWYXIxLCAjIG1lbHS/oSCzu8DltcggeCCwqg0KICAgICAgeSA9IHZhbHVlLA0KICAgICAgZmlsbCA9IFZhcjENCiAgICApDQogICkrZ2VvbV9iYXIoDQogICAgc3RhdCA9ICJpZGVudGl0eSINCiAgKQ0KICAjIMS/uK6+7r+hILX7uKUgwNOx3bDdwvcNCiAgIyBzYWxhcnkkY2FyZWVyDQogIHdhZ2VfYXZnX3Blcl9jYXJlZXIgPC0gdGFwcGx5KA0KICAgIHNhbGFyeSR3YWdlLHNhbGFyeSRjYXJlZXIsbWVhbixuYS5ybT1UDQogICkNCiAgd2FnZV9hdmdfcGVyX2NhcmVlcg0KICB0ZW1wIDwtIG1lbHQod2FnZV9hdmdfcGVyX2NhcmVlcikNCiAgdGVtcA0KICBnZ3Bsb3QoDQogICAgZGF0YSA9IHRlbXAsDQogICAgYWVzKA0KICAgICAgeCA9IFZhcjEsICMgbWVsdL+hILO7wOW1yCB4ILCqDQogICAgICB5ID0gdmFsdWUsDQogICAgICBmaWxsID0gVmFyMQ0KICAgICkNCiAgKStnZW9tX2JhcigNCiAgICBzdGF0ID0gImlkZW50aXR5Ig0KICApDQogIG1lbHQgPC0gbWVsdCh3YWdlX2F2Z19wZXJfY2FyZWVyKQ0KICBnZ3Bsb3QoDQogICAgbWVsdCwNCiAgICBhZXMoDQogICAgICB4ID0gVmFyMSwNCiAgICAgIHkgPSB2YWx1ZSwNCiAgICAgIGdyb3VwID0gMQ0KICAgICkNCiAgKStnZW9tX2xpbmUoDQogICAgY29sciA9ICdibHVlJywNCiAgICBzaXplID0gMg0KICApKyBjb29yZF9wb2xhcigpKw0KICAgIHlsaW0oMCxtYXgobWVsdCR2YWx1ZSkpDQogIA0KICAjILCiILDmt8K6sLfOIMGmwM8gwPuw1CC53rTCIL/5sd4gwf203A0KICAjIDF+M7Piucy4uCAgIDEws+LAzLvzICAgIDGz4rnMuLggDQogICMgMTkwNTAxMiAgICAyOTA3MTE5ICAgIDE3MzA4MzUgDQogICMgM341s+K5zLi4IDV+MTCz4rnMuLggDQogICMgMjAyODAxNSAgICAyMzYwNDYzDQogIHRhcHBseSgNCiAgICBzYWxhcnkkd2FnZSwNCiAgICBzYWxhcnkkY2FyZWVyLA0KICAgIHJhbmdlLA0KICAgIG5hLnJtID0gVA0KICApDQogICMgJGAxfjOz4rnMuLhgDQogICMgWzFdIDExNzIzOTkgMjYxOTIyMQ0KICAjIA0KICAjICRgMTCz4sDMu/NgDQogICMgWzFdIDE2ODUyMDQgNDA2NDI4Ng0KICAjIA0KICAjICRgMbPiucy4uGANCiAgIyBbMV0gMTExNzYwNSAyNDE0MzQ1DQogICMgDQogICMgJGAzfjWz4rnMuLhgDQogICMgWzFdIDEyNDU1NDAgMjgyNzQyMA0KICAjIA0KICAjICRgNX4xMLPiucy4uGANCiAgIyBbMV0gMTU0ODAzNiAzMzA5MjMxDQogIHllYXJfMSA8LSBzYWxhcnlbd2hpY2goc2FsYXJ5JHdhZ2UgPT0gMTExNzYwNSksXQ0KICB5ZWFyXzFfMyA8LSBzYWxhcnlbd2hpY2goc2FsYXJ5JHdhZ2UgPT0gMTE3MjM5OSksXQ0KICB5ZWFyXzNfNSA8LSBzYWxhcnlbd2hpY2goc2FsYXJ5JHdhZ2UgPT0gMTI0NTU0MCksXQ0KICB5ZWFyXzVfMTAgPC0gc2FsYXJ5W3doaWNoKHNhbGFyeSR3YWdlID09IDE1NDgwMzYpLF0NCiAgeWVhcl8xMCA8LSBzYWxhcnlbd2hpY2goc2FsYXJ5JHdhZ2UgPT0gMTY4NTIwNCksXQ0KICANCiAgY2FyZWVyX2xpc3QgPC0gbGlzdCgNCiAgICB5ZWFyXzEseWVhcl8xXzMseWVhcl8zXzUseWVhcl81XzEwLHllYXJfMTANCiAgKQ0KICBjYXJlZXJfbGlzdA0KICANCiAgIyMgMrn4tOQNCiAgIyCw5rfCurAgsKHA5SCzt8C6IL/5sd7AuyC53rTCIMH9tNzAuiC067rOutAgNjC068DMu/Mgv6nA2i4NCiAgIyDGr8DMwaHAuiAxMLPiwMy78yCw5rfCv6G8rSCwocDlILO3wLogv/mx3sC7ILnetMIgwf203MC6DQogICMgMjC06yDDyrndv6m8ug0KICAjIDE4ODa47S4gwMy16cC6IDEws+LAzLOqILDmt8LAuyC917DttbUgMTY4uLi/+MC7ILz2t8nH1A0KICANCiAgIyAgM7n4LiDHpcHYyK0gvcPFsLHiDQogIHdhZ2Vfc2NhbGUgPC0gc2NhbGUoc2FsYXJ5JHdhZ2UpDQogIGhlYWQod2FnZV9zY2FsZSwgMTApDQogICMgWywxXQ0KICAjIFsxLF0gLTEuMjg4ODY5OTkNCiAgIyBbMixdIC0wLjkxNzU3MDE4DQogICMgWzMsXSAtMC4zODk4MTkyNA0KICAjIFs0LF0gLTAuMDYzNDA4NzgNCiAgIyBbNSxdICAwLjM3OTI0Njg5DQogICMgWzYsXSAgMC4zMTM0MzA1Mw0KICAjIFs3LF0gIDAuMjg1MDU4MTUNCiAgIyBbOCxdIC0wLjA0MDE2NjYxDQogICMgWzksXSAtMC4xMzgxMjk1OQ0KICAjIFsxMCxdIC0wLjc4MjIyNTcxDQogICMjIMbysdXAzCAwwMyw7SwgMMC7ILHiwdjAuLfOILrQu+q1yCCwqrXpwMwgwNa02Q0KICBzYWxhcnkgPC0gY2JpbmQoc2FsYXJ5LHNjYWxlID0gd2FnZV9zY2FsZSkNCiAgc3RyKHNhbGFyeSkNCiAgZzEgPC0gZ2dwbG90KHNhbGFyeSxhZXMoeD1zYWxhcnkkc2NhbGUseT1zYWxhcnkkYWdlKSkNCiAgZzIgPC0gZ2VvbV9zZWdtZW50KGFlcyh5ZW5kPXNhbGFyeSRhZ2UpLHhlbmQ9MCkNCiAgZzMgPC0gIGcxICsgZzIgKyBnZW9tX3BvaW50KA0KICAgIHNpemUgPSA3LA0KICAgIGFlcyhjb2xvcj1zYWxhcnkkZ2VuZGVyLHNoYXBlPXNhbGFyeSRjYXJlZXIpDQogICkrdGhlbWVfbWluaW1hbCgpDQogIA0KICBnMw0KICAjIyDH2LyuDQogICMgMTCz4sDMu/O1yCA0NX41NLy8ILOyvLrAzCCwocDlILDtvNK15sDaLg0KICAjIDI1IH4gMjm8vCCx17fswLogsN3C97ChIMWpwfYgvsq02Q0KICAjIDQ1vLwgwMy787rOxc20wiCx17fssKMgsN3C97ChIMWptNkNCiAgIyDA+sDTsd3AuiDB1rfOIL+pvLqx17fsv6G8rSCzqsW4s6202Q0KICAjILDtwNOx3cC6IMHWt84gs7K8urHXt+y/obytILOqxbizrbTZDQogIA0KYGBgDQoNCg0K