salary

## 성별 임금 소득에 대한 통계를 구하시오.
# 1. 급여에 대한 기술통계를 서술하시오.
# 2. 남녀성별에 따른 평균임금을 막대그래프로 구현
# 3. 가장 고소득인 연령대를 구하시오.
# 4. 그룹간 임금격차가 커지는 연령대를 구하시오.
# step 1
csv = "https://www.dropbox.com/s/9gchq4nbt67lpxu/example_salary.csv?dl=1"
salary <- read.csv(csv,
                   stringsAsFactor = F,
                   na = "-")
# step 2
str(salary)
colnames(salary)
# [1] "연령"               
# [2] "월급여액..원."      
# [3] "연간특별급여액..원."
# [4] "근로시간..시간."    
# [5] "근로자수..명."      
# [6] "경력구분"           
# [7] "성별"  
# step 3 연산을 하기 위해 한글명을 영어로 변환
colnames(salary) <- c(
  "age","wage","special_wage","working_time","worker_count","career","gender"
)
colnames(salary)
#salary$wage
# step 4 : 검색목록에 올리기.
# salary$age 를 하지 않도록 조치
# salary dataframe 을 디폴트값으로 지정
# detach(salary)
# attach(salary)
# step 5 :기술통계 :: 평균, 중앙값, 최빈값
# mean, median, mode
salary$wage
wage_mean <- mean(salary$wage, na.rm = T)
wage_mean  # [1] 2171578
# 중앙값 median 
wage_mid <- median(salary$wage, na.rm = T)
wage_mid
# 범위 구하기
wage_range <- range(salary$wage, na.rm = T)
wage_range # 1117605 4064286
# 최고임금을 받는 사람의 정보
highest_wage  <- which(salary$wage == 4064286)
salary[highest_wage,]
# 4분위 구하기
qnt <- quantile(salary$wage,na.rm=T)
qnt
# step 6 리스트에 담기
sal_list <- list(
  평균월급 = wage_mean,
  월급중앙값 = wage_mid,
  월급범위 = wage_range,
  월급사분위 = qnt
)
sal_list
# 성별에 따른 임금격차
wage_avg_per_gender <- tapply(
  salary$wage,salary$gender,mean,na.rm=T
)
wage_avg_per_gender
# 남      여 
# 2477332 1865823 
# reshape2
install.packages("reshape2")
library(reshape2)
temp <- melt(wage_avg_per_gender)
temp
ggplot(
  data = temp,
  aes(
    x = Var1, # melt에 내장된 x 값
    y = value,
    fill = Var1
  )
)+geom_bar(
  stat = "identity"
)
# 커리어에 따른 임금격차
# salary$career
wage_avg_per_career <- tapply(
  salary$wage,salary$career,mean,na.rm=T
)
wage_avg_per_career
temp <- melt(wage_avg_per_career)
temp
ggplot(
  data = temp,
  aes(
    x = Var1, # melt에 내장된 x 값
    y = value,
    fill = Var1
  )
)+geom_bar(
  stat = "identity"
)
melt <- melt(wage_avg_per_career)
ggplot(
  melt,
  aes(
    x = Var1,
    y = value,
    group = 1
  )
)+geom_line(
  colr = 'blue',
  size = 2
)+ coord_polar()+
  ylim(0,max(melt$value))
  
# 각 경력별로 제일 적게 받는 월급 집단
# 1~3년미만   10년이상    1년미만 
# 1905012    2907119    1730835 
# 3~5년미만 5~10년미만 
# 2028015    2360463
tapply(
  salary$wage,
  salary$career,
  range,
  na.rm = T
)
# $`1~3년미만`
# [1] 1172399 2619221
# 
# $`10년이상`
# [1] 1685204 4064286
# 
# $`1년미만`
# [1] 1117605 2414345
# 
# $`3~5년미만`
# [1] 1245540 2827420
# 
# $`5~10년미만`
# [1] 1548036 3309231
 year_1 <- salary[which(salary$wage == 1117605),]
 year_1_3 <- salary[which(salary$wage == 1172399),]
 year_3_5 <- salary[which(salary$wage == 1245540),]
 year_5_10 <- salary[which(salary$wage == 1548036),]
 year_10 <- salary[which(salary$wage == 1685204),]

career_list <- list(
  year_1,year_1_3,year_3_5,year_5_10,year_10
)
career_list

## 2번답
# 경력별 가장 낮은 월급을 받는 집단은 대부분 60대이상 여자.
# 특이점은 10년이상 경력에서 가장 낮은 월급을 받는 집단은
# 20대 초반여성
# 1886명. 이들은 10년이나 경력을 쌓고도 168만원을 수령함

#  3번. 표준화 시키기
wage_scale <- scale(salary$wage)
head(wage_scale, 10)
# [,1]
# [1,] -1.28886999
# [2,] -0.91757018
# [3,] -0.38981924
# [4,] -0.06340878
# [5,]  0.37924689
# [6,]  0.31343053
# [7,]  0.28505815
# [8,] -0.04016661
# [9,] -0.13812959
# [10,] -0.78222571
## 평균이 0이고, 0을 기준으로 분산된 값들이 있다
salary <- cbind(salary,scale = wage_scale)
str(salary)
g1 <- ggplot(salary,aes(x=salary$scale,y=salary$age))
g2 <- geom_segment(aes(yend=salary$age),xend=0)
g3 <-  g1 + g2 + geom_point(
    size = 7,
    aes(color=salary$gender,shape=salary$career)
  )+theme_minimal()

g3
## 해석
# 10년이상된 45~54세 남성이 가장 고소득자.
# 25 ~ 29세 그룹은 격차가 크지 않다
# 45세 이상부터는 그룹간 격차가 크다
# 저임금은 주로 여성그룹에서 나타난다
# 고임금은 주로 남성그룹에서 나타난다```
LS0tDQp0aXRsZTogIlIgTm90ZWJvb2siDQpvdXRwdXQ6IGh0bWxfbm90ZWJvb2sNCi0tLQ0KDQojc2FsYXJ5IA0KIA0KDQpgYGB7cn0NCiMjIOyEseuzhCDsnoTquIgg7IaM65Od7JeQIOuMgO2VnCDthrXqs4Trpbwg6rWs7ZWY7Iuc7JikLg0KIyAxLiDquInsl6zsl5Ag64yA7ZWcIOq4sOyIoO2GteqzhOulvCDshJzsiKDtlZjsi5zsmKQuDQojIDIuIOuCqOuFgOyEseuzhOyXkCDrlLDrpbgg7Y+J6reg7J6E6riI7J2EIOunieuMgOq3uOuemO2UhOuhnCDqtaztmIQNCiMgMy4g6rCA7J6lIOqzoOyGjOuTneyduCDsl7DroLnrjIDrpbwg6rWs7ZWY7Iuc7JikLg0KIyA0LiDqt7jro7nqsIQg7J6E6riI6rKp7LCo6rCAIOy7pOyngOuKlCDsl7DroLnrjIDrpbwg6rWs7ZWY7Iuc7JikLg0KIyBzdGVwIDENCmNzdiA9ICJodHRwczovL3d3dy5kcm9wYm94LmNvbS9zLzlnY2hxNG5idDY3bHB4dS9leGFtcGxlX3NhbGFyeS5jc3Y/ZGw9MSINCnNhbGFyeSA8LSByZWFkLmNzdihjc3YsDQogICAgICAgICAgICAgICAgICAgc3RyaW5nc0FzRmFjdG9yID0gRiwNCiAgICAgICAgICAgICAgICAgICBuYSA9ICItIikNCiMgc3RlcCAyDQpzdHIoc2FsYXJ5KQ0KY29sbmFtZXMoc2FsYXJ5KQ0KIyBbMV0gIuyXsOuguSIgICAgICAgICAgICAgICANCiMgWzJdICLsm5TquInsl6zslaEuLuybkC4iICAgICAgDQojIFszXSAi7Jew6rCE7Yq567OE6riJ7Jes7JWhLi7sm5AuIg0KIyBbNF0gIuq3vOuhnOyLnOqwhC4u7Iuc6rCELiIgICAgDQojIFs1XSAi6re866Gc7J6Q7IiYLi7rqoUuIiAgICAgIA0KIyBbNl0gIuqyveugpeq1rOu2hCIgICAgICAgICAgIA0KIyBbN10gIuyEseuzhCIgIA0KIyBzdGVwIDMg7Jew7IKw7J2EIO2VmOq4sCDsnITtlbQg7ZWc6riA66qF7J2EIOyYgeyWtOuhnCDrs4DtmZgNCmNvbG5hbWVzKHNhbGFyeSkgPC0gYygNCiAgImFnZSIsIndhZ2UiLCJzcGVjaWFsX3dhZ2UiLCJ3b3JraW5nX3RpbWUiLCJ3b3JrZXJfY291bnQiLCJjYXJlZXIiLCJnZW5kZXIiDQopDQpjb2xuYW1lcyhzYWxhcnkpDQojc2FsYXJ5JHdhZ2UNCiMgc3RlcCA0IDog6rKA7IOJ66qp66Gd7JeQIOyYrOumrOq4sC4NCiMgc2FsYXJ5JGFnZSDrpbwg7ZWY7KeAIOyViuuPhOuhnSDsobDsuZgNCiMgc2FsYXJ5IGRhdGFmcmFtZSDsnYQg65SU7Y+07Yq46rCS7Jy866GcIOyngOyglQ0KIyBkZXRhY2goc2FsYXJ5KQ0KIyBhdHRhY2goc2FsYXJ5KQ0KIyBzdGVwIDUgOuq4sOyIoO2GteqzhCA6OiDtj4nqt6AsIOykkeyVmeqwkiwg7LWc67mI6rCSDQojIG1lYW4sIG1lZGlhbiwgbW9kZQ0Kc2FsYXJ5JHdhZ2UNCndhZ2VfbWVhbiA8LSBtZWFuKHNhbGFyeSR3YWdlLCBuYS5ybSA9IFQpDQp3YWdlX21lYW4gICMgWzFdIDIxNzE1NzgNCiMg7KSR7JWZ6rCSIG1lZGlhbiANCndhZ2VfbWlkIDwtIG1lZGlhbihzYWxhcnkkd2FnZSwgbmEucm0gPSBUKQ0Kd2FnZV9taWQNCiMg67KU7JyEIOq1rO2VmOq4sA0Kd2FnZV9yYW5nZSA8LSByYW5nZShzYWxhcnkkd2FnZSwgbmEucm0gPSBUKQ0Kd2FnZV9yYW5nZSAjIDExMTc2MDUgNDA2NDI4Ng0KIyDstZzqs6DsnoTquIjsnYQg67Cb64qUIOyCrOuejOydmCDsoJXrs7QNCmhpZ2hlc3Rfd2FnZSAgPC0gd2hpY2goc2FsYXJ5JHdhZ2UgPT0gNDA2NDI4NikNCnNhbGFyeVtoaWdoZXN0X3dhZ2UsXQ0KIyA067aE7JyEIOq1rO2VmOq4sA0KcW50IDwtIHF1YW50aWxlKHNhbGFyeSR3YWdlLG5hLnJtPVQpDQpxbnQNCiMgc3RlcCA2IOumrOyKpO2KuOyXkCDri7TquLANCnNhbF9saXN0IDwtIGxpc3QoDQogIO2Pieq3oOyblOq4iSA9IHdhZ2VfbWVhbiwNCiAg7JuU6riJ7KSR7JWZ6rCSID0gd2FnZV9taWQsDQogIOyblOq4ieuylOychCA9IHdhZ2VfcmFuZ2UsDQogIOyblOq4ieyCrOu2hOychCA9IHFudA0KKQ0Kc2FsX2xpc3QNCiMg7ISx67OE7JeQIOuUsOuluCDsnoTquIjqsqnssKgNCndhZ2VfYXZnX3Blcl9nZW5kZXIgPC0gdGFwcGx5KA0KICBzYWxhcnkkd2FnZSxzYWxhcnkkZ2VuZGVyLG1lYW4sbmEucm09VA0KKQ0Kd2FnZV9hdmdfcGVyX2dlbmRlcg0KIyDrgqggICAgICDsl6wgDQojIDI0NzczMzIgMTg2NTgyMyANCiMgcmVzaGFwZTINCmluc3RhbGwucGFja2FnZXMoInJlc2hhcGUyIikNCmxpYnJhcnkocmVzaGFwZTIpDQp0ZW1wIDwtIG1lbHQod2FnZV9hdmdfcGVyX2dlbmRlcikNCnRlbXANCmdncGxvdCgNCiAgZGF0YSA9IHRlbXAsDQogIGFlcygNCiAgICB4ID0gVmFyMSwgIyBtZWx07JeQIOuCtOyepeuQnCB4IOqwkg0KICAgIHkgPSB2YWx1ZSwNCiAgICBmaWxsID0gVmFyMQ0KICApDQopK2dlb21fYmFyKA0KICBzdGF0ID0gImlkZW50aXR5Ig0KKQ0KIyDsu6TrpqzslrTsl5Ag65Sw66W4IOyehOq4iOqyqeywqA0KIyBzYWxhcnkkY2FyZWVyDQp3YWdlX2F2Z19wZXJfY2FyZWVyIDwtIHRhcHBseSgNCiAgc2FsYXJ5JHdhZ2Usc2FsYXJ5JGNhcmVlcixtZWFuLG5hLnJtPVQNCikNCndhZ2VfYXZnX3Blcl9jYXJlZXINCnRlbXAgPC0gbWVsdCh3YWdlX2F2Z19wZXJfY2FyZWVyKQ0KdGVtcA0KZ2dwbG90KA0KICBkYXRhID0gdGVtcCwNCiAgYWVzKA0KICAgIHggPSBWYXIxLCAjIG1lbHTsl5Ag64K07J6l65CcIHgg6rCSDQogICAgeSA9IHZhbHVlLA0KICAgIGZpbGwgPSBWYXIxDQogICkNCikrZ2VvbV9iYXIoDQogIHN0YXQgPSAiaWRlbnRpdHkiDQopDQptZWx0IDwtIG1lbHQod2FnZV9hdmdfcGVyX2NhcmVlcikNCmdncGxvdCgNCiAgbWVsdCwNCiAgYWVzKA0KICAgIHggPSBWYXIxLA0KICAgIHkgPSB2YWx1ZSwNCiAgICBncm91cCA9IDENCiAgKQ0KKStnZW9tX2xpbmUoDQogIGNvbHIgPSAnYmx1ZScsDQogIHNpemUgPSAyDQopKyBjb29yZF9wb2xhcigpKw0KICB5bGltKDAsbWF4KG1lbHQkdmFsdWUpKQ0KICANCiMg6rCBIOqyveugpeuzhOuhnCDsoJzsnbwg7KCB6rKMIOuwm+uKlCDsm5TquIkg7KeR64uoDQojIDF+M+uFhOuvuOunjCAgIDEw64WE7J207IOBICAgIDHrhYTrr7jrp4wgDQojIDE5MDUwMTIgICAgMjkwNzExOSAgICAxNzMwODM1IA0KIyAzfjXrhYTrr7jrp4wgNX4xMOuFhOuvuOunjCANCiMgMjAyODAxNSAgICAyMzYwNDYzDQp0YXBwbHkoDQogIHNhbGFyeSR3YWdlLA0KICBzYWxhcnkkY2FyZWVyLA0KICByYW5nZSwNCiAgbmEucm0gPSBUDQopDQojICRgMX4z64WE66+466eMYA0KIyBbMV0gMTE3MjM5OSAyNjE5MjIxDQojIA0KIyAkYDEw64WE7J207IOBYA0KIyBbMV0gMTY4NTIwNCA0MDY0Mjg2DQojIA0KIyAkYDHrhYTrr7jrp4xgDQojIFsxXSAxMTE3NjA1IDI0MTQzNDUNCiMgDQojICRgM34164WE66+466eMYA0KIyBbMV0gMTI0NTU0MCAyODI3NDIwDQojIA0KIyAkYDV+MTDrhYTrr7jrp4xgDQojIFsxXSAxNTQ4MDM2IDMzMDkyMzENCiB5ZWFyXzEgPC0gc2FsYXJ5W3doaWNoKHNhbGFyeSR3YWdlID09IDExMTc2MDUpLF0NCiB5ZWFyXzFfMyA8LSBzYWxhcnlbd2hpY2goc2FsYXJ5JHdhZ2UgPT0gMTE3MjM5OSksXQ0KIHllYXJfM181IDwtIHNhbGFyeVt3aGljaChzYWxhcnkkd2FnZSA9PSAxMjQ1NTQwKSxdDQogeWVhcl81XzEwIDwtIHNhbGFyeVt3aGljaChzYWxhcnkkd2FnZSA9PSAxNTQ4MDM2KSxdDQogeWVhcl8xMCA8LSBzYWxhcnlbd2hpY2goc2FsYXJ5JHdhZ2UgPT0gMTY4NTIwNCksXQ0KDQpjYXJlZXJfbGlzdCA8LSBsaXN0KA0KICB5ZWFyXzEseWVhcl8xXzMseWVhcl8zXzUseWVhcl81XzEwLHllYXJfMTANCikNCmNhcmVlcl9saXN0DQoNCiMjIDLrsojri7UNCiMg6rK966Cl67OEIOqwgOyepSDrgq7snYAg7JuU6riJ7J2EIOuwm+uKlCDsp5Hri6jsnYAg64yA67aA67aEIDYw64yA7J207IOBIOyXrOyekC4NCiMg7Yq57J207KCQ7J2AIDEw64WE7J207IOBIOqyveugpeyXkOyEnCDqsIDsnqUg64Ku7J2AIOyblOq4ieydhCDrsJvripQg7KeR64uo7J2ADQojIDIw64yAIOy0iOuwmOyXrOyEsQ0KIyAxODg266qFLiDsnbTrk6TsnYAgMTDrhYTsnbTrgpgg6rK966Cl7J2EIOyMk+qzoOuPhCAxNjjrp4zsm5DsnYQg7IiY66C57ZWoDQoNCiMgIDPrsoguIO2RnOykgO2ZlCDsi5ztgqTquLANCndhZ2Vfc2NhbGUgPC0gc2NhbGUoc2FsYXJ5JHdhZ2UpDQpoZWFkKHdhZ2Vfc2NhbGUsIDEwKQ0KIyBbLDFdDQojIFsxLF0gLTEuMjg4ODY5OTkNCiMgWzIsXSAtMC45MTc1NzAxOA0KIyBbMyxdIC0wLjM4OTgxOTI0DQojIFs0LF0gLTAuMDYzNDA4NzgNCiMgWzUsXSAgMC4zNzkyNDY4OQ0KIyBbNixdICAwLjMxMzQzMDUzDQojIFs3LF0gIDAuMjg1MDU4MTUNCiMgWzgsXSAtMC4wNDAxNjY2MQ0KIyBbOSxdIC0wLjEzODEyOTU5DQojIFsxMCxdIC0wLjc4MjIyNTcxDQojIyDtj4nqt6DsnbQgMOydtOqzoCwgMOydhCDquLDspIDsnLzroZwg67aE7IKw65CcIOqwkuuTpOydtCDsnojri6QNCnNhbGFyeSA8LSBjYmluZChzYWxhcnksc2NhbGUgPSB3YWdlX3NjYWxlKQ0Kc3RyKHNhbGFyeSkNCmcxIDwtIGdncGxvdChzYWxhcnksYWVzKHg9c2FsYXJ5JHNjYWxlLHk9c2FsYXJ5JGFnZSkpDQpnMiA8LSBnZW9tX3NlZ21lbnQoYWVzKHllbmQ9c2FsYXJ5JGFnZSkseGVuZD0wKQ0KZzMgPC0gIGcxICsgZzIgKyBnZW9tX3BvaW50KA0KICAgIHNpemUgPSA3LA0KICAgIGFlcyhjb2xvcj1zYWxhcnkkZ2VuZGVyLHNoYXBlPXNhbGFyeSRjYXJlZXIpDQogICkrdGhlbWVfbWluaW1hbCgpDQoNCmczDQojIyDtlbTshJ0NCiMgMTDrhYTsnbTsg4HrkJwgNDV+NTTshLgg64Ko7ISx7J20IOqwgOyepSDqs6Dshozrk53snpAuDQojIDI1IH4gMjnshLgg6re466O57J2AIOqyqeywqOqwgCDtgazsp4Ag7JWK64ukDQojIDQ17IS4IOydtOyDgeu2gO2EsOuKlCDqt7jro7nqsIQg6rKp7LCo6rCAIO2BrOuLpA0KIyDsoIDsnoTquIjsnYAg7KO866GcIOyXrOyEseq3uOujueyXkOyEnCDrgpjtg4Drgpzri6QNCiMg6rOg7J6E6riI7J2AIOyjvOuhnCDrgqjshLHqt7jro7nsl5DshJwg64KY7YOA64Kc64ukYGBgDQoNCg==