# 데이터 정규화는 변숫값의 분포를 표준화하는 것을 의미한다.
# 표준화는 변수에서 데이터의 평균을 빼거나
# 변수를 전체 데이터의 표준편차로 나누는 작업을 포함한다.
# 이렇게 하면 변숫값의 평균이 0이 되고 값의
# 퍼짐정도(분포) 또한 일정해진다.
# R에서 데이터를 정규화 하는 함수는 scale()이다.


# <임금격차 해소를 위한 현상황 분석예제>
  
  ## 성별 임금 소득에 대한 통계를 구하시오.
  # 1. 급여에 대한 기술통계를 서술하시오.
  # 2. 남녀성별에 따른 평균임금을 막대그래프로 구현
  # 3. 가장 고소득인 연령대를 구하시오.
  # 4. 그룹간 임금격차가 커지는 연령대를 구하시오.
  # step 1
  csv = "https://www.dropbox.com/s/9gchq4nbt67lpxu/example_salary.csv?dl=1"
  salary <- read.csv(csv,
                     stringsAsFactor = F,
                     na = "-")
  # step 2
  str(salary)
  colnames(salary)
  # [1] "연령"               
  # [2] "월급여액..원."      
  # [3] "연간특별급여액..원."
  # [4] "근로시간..시간."    
  # [5] "근로자수..명."      
  # [6] "경력구분"           
  # [7] "성별"  
  # step 3 연산을 하기 위해 한글명을 영어로 변환
  colnames(salary) <- c(
    "age","wage","special_wage","working_time","worker_count","career","gender"
  )
  colnames(salary)
  #salary$wage
  # step 4 : 검색목록에 올리기.
  # salary$age 를 하지 않도록 조치
  # salary dataframe 을 디폴트값으로 지정
  # detach(salary)
  # attach(salary)
  # step 5 :기술통계 :: 평균, 중앙값, 최빈값
  # mean, median, mode
  salary$wage
  wage_mean <- mean(salary$wage, na.rm = T)
  wage_mean  # [1] 2171578
  # 중앙값 median 
  wage_mid <- median(salary$wage, na.rm = T)
  wage_mid
  # 범위 구하기
  wage_range <- range(salary$wage, na.rm = T)
  wage_range # 1117605 4064286
  # 최고임금을 받는 사람의 정보
  highest_wage  <- which(salary$wage == 4064286)
  salary[highest_wage,]
  # 4분위 구하기
  qnt <- quantile(salary$wage,na.rm=T)
  qnt
  # step 6 리스트에 담기
  sal_list <- list(
    평균월급 = wage_mean,
    월급중앙값 = wage_mid,
    월급범위 = wage_range,
    월급사분위 = qnt
  )
  sal_list
  # 성별에 따른 임금격차
  wage_avg_per_gender <- tapply(
    salary$wage,salary$gender,mean,na.rm=T
  )
  wage_avg_per_gender
  # 남      여 
  # 2477332 1865823 
  # reshape2
  install.packages("reshape2")
  library(reshape2)
  temp <- melt(wage_avg_per_gender)
  temp
  ggplot(
    data = temp,
    aes(
      x = Var1, # melt에 내장된 x 값
      y = value,
      fill = Var1
    )
  )+geom_bar(
    stat = "identity"
  )
  # 커리어에 따른 임금격차
  # salary$career
  wage_avg_per_career <- tapply(
    salary$wage,salary$career,mean,na.rm=T
  )
  wage_avg_per_career
  temp <- melt(wage_avg_per_career)
  temp
  ggplot(
    data = temp,
    aes(
      x = Var1, # melt에 내장된 x 값
      y = value,
      fill = Var1
    )
  )+geom_bar(
    stat = "identity"
  )
  melt <- melt(wage_avg_per_career)
  ggplot(
    melt,
    aes(
      x = Var1,
      y = value,
      group = 1
    )
  )+geom_line(
    colr = 'blue',
    size = 2
  )+ coord_polar()+
    ylim(0,max(melt$value))
  
  # 각 경력별로 제일 적게 받는 월급 집단
  # 1~3년미만   10년이상    1년미만 
  # 1905012    2907119    1730835 
  # 3~5년미만 5~10년미만 
  # 2028015    2360463
  tapply(
    salary$wage,
    salary$career,
    range,
    na.rm = T
  )
  # $`1~3년미만`
  # [1] 1172399 2619221
  # 
  # $`10년이상`
  # [1] 1685204 4064286
  # 
  # $`1년미만`
  # [1] 1117605 2414345
  # 
  # $`3~5년미만`
  # [1] 1245540 2827420
  # 
  # $`5~10년미만`
  # [1] 1548036 3309231
  year_1 <- salary[which(salary$wage == 1117605),]
  year_1_3 <- salary[which(salary$wage == 1172399),]
  year_3_5 <- salary[which(salary$wage == 1245540),]
  year_5_10 <- salary[which(salary$wage == 1548036),]
  year_10 <- salary[which(salary$wage == 1685204),]
  
  career_list <- list(
    year_1,year_1_3,year_3_5,year_5_10,year_10
  )
  career_list
  
  ## 2번답
  # 경력별 가장 낮은 월급을 받는 집단은 대부분 60대이상 여자.
  # 특이점은 10년이상 경력에서 가장 낮은 월급을 받는 집단은
  # 20대 초반여성
  # 1886명. 이들은 10년이나 경력을 쌓고도 168만원을 수령함
  
  #  3번. 표준화 시키기
  wage_scale <- scale(salary$wage)
  head(wage_scale, 10)
  # [,1]
  # [1,] -1.28886999
  # [2,] -0.91757018
  # [3,] -0.38981924
  # [4,] -0.06340878
  # [5,]  0.37924689
  # [6,]  0.31343053
  # [7,]  0.28505815
  # [8,] -0.04016661
  # [9,] -0.13812959
  # [10,] -0.78222571
  ## 평균이 0이고, 0을 기준으로 분산된 값들이 있다
  salary <- cbind(salary,scale = wage_scale)
  str(salary)
  g1 <- ggplot(salary,aes(x=salary$scale,y=salary$age))
  g2 <- geom_segment(aes(yend=salary$age),xend=0)
  g3 <-  g1 + g2 + geom_point(
    size = 7,
    aes(color=salary$gender,shape=salary$career)
  )+theme_minimal()
  
  g3
  ## 해석
  # 10년이상된 45~54세 남성이 가장 고소득자.
  # 25 ~ 29세 그룹은 격차가 크지 않다
  # 45세 이상부터는 그룹간 격차가 크다
  # 저임금은 주로 여성그룹에서 나타난다
  # 고임금은 주로 남성그룹에서 나타난다
  
LS0tDQp0aXRsZTogIsDTsd2w3cL3utC8riINCm91dHB1dDogaHRtbF9ub3RlYm9vaw0KLS0tDQoNCg0KYGBge3J9DQojILWlwMzFzSDBpLHUyK20wiC6r7z9sKrAxyC60Mb3uKYgx6XB2Mitx8+0wiCwzcC7IMDHuczH0bTZLg0KIyDHpcHYyK20wiC6r7z2v6G8rSC1pcDMxc3AxyDG8rHVwLsgu6mwxbOqDQojILqvvPa4piDA/MO8ILWlwMzFzcDHIMelwdjG7cL3t84gs6q0qbTCIMDbvvfAuyDG98fUx9G02S4NCiMgwMy3uLDUIMfPuOkguq+8/bCqwMcgxvKx1cDMIDDAzCC1x7DtILCqwMcNCiMgxtvB/MGktbUoutDG9ykgtsfH0SDAz8Gkx9jB+LTZLg0KIyBSv6G8rSC1pcDMxc24piDBpLHUyK0gx8+0wiDH1Lz2tMIgc2NhbGUoKcDMtNkuDQoNCg0KIyA8wNOx3bDdwvcgx9i80rimIMCnx9Egx/a788iyILrQvK6/ucGmPg0KICANCiAgIyMgvLq6sCDA07HdILzStea/oSC068fRIMXrsOi4piCxuMfPvcO/wC4NCiAgIyAxLiCx3r+pv6EgtOvH0SCx4rz6xeuw6LimILytvPrHz73Dv8AuDQogICMgMi4gs7Kz4Ly6urC/oSC1+7ilIMbysdXA07HdwLsguLe067HXt6HHwbfOILG4x/YNCiAgIyAzLiCwocDlILDtvNK15sDOIL+st8m067imILG4x8+9w7/ALg0KICAjIDQuILHXt+ywoyDA07HdsN3C97ChIMS/wfa0wiC/rLfJtOu4piCxuMfPvcO/wC4NCiAgIyBzdGVwIDENCiAgY3N2ID0gImh0dHBzOi8vd3d3LmRyb3Bib3guY29tL3MvOWdjaHE0bmJ0NjdscHh1L2V4YW1wbGVfc2FsYXJ5LmNzdj9kbD0xIg0KICBzYWxhcnkgPC0gcmVhZC5jc3YoY3N2LA0KICAgICAgICAgICAgICAgICAgICAgc3RyaW5nc0FzRmFjdG9yID0gRiwNCiAgICAgICAgICAgICAgICAgICAgIG5hID0gIi0iKQ0KICAjIHN0ZXAgMg0KICBzdHIoc2FsYXJ5KQ0KICBjb2xuYW1lcyhzYWxhcnkpDQogICMgWzFdICK/rLfJIiAgICAgICAgICAgICAgIA0KICAjIFsyXSAiv/mx3r+pvtcuLr/4LiIgICAgICANCiAgIyBbM10gIr+ssKPGr7qwsd6/qb7XLi6/+C4iDQogICMgWzRdICKx2bfOvcOwoy4uvcOwoy4iICAgIA0KICAjIFs1XSAisdm3zsDavPYuLrjtLiIgICAgICANCiAgIyBbNl0gIrDmt8KxuLrQIiAgICAgICAgICAgDQogICMgWzddICK8urqwIiAgDQogICMgc3RlcCAzIL+su+rAuyDHz7HiIMCnx9ggx9Gx27jtwLsgv7W+7rfOILqvyK8NCiAgY29sbmFtZXMoc2FsYXJ5KSA8LSBjKA0KICAgICJhZ2UiLCJ3YWdlIiwic3BlY2lhbF93YWdlIiwid29ya2luZ190aW1lIiwid29ya2VyX2NvdW50IiwiY2FyZWVyIiwiZ2VuZGVyIg0KICApDQogIGNvbG5hbWVzKHNhbGFyeSkNCiAgI3NhbGFyeSR3YWdlDQogICMgc3RlcCA0IDogsMu79rjxt8+/oSC/w7iuseIuDQogICMgc2FsYXJ5JGFnZSC4piDHz8H2IL7KtbW3zyDBtsShDQogICMgc2FsYXJ5IGRhdGFmcmFtZSDAuyC18Mb6xq6wqsC4t84gwfbBpA0KICAjIGRldGFjaChzYWxhcnkpDQogICMgYXR0YWNoKHNhbGFyeSkNCiAgIyBzdGVwIDUgOrHivPrF67DoIDo6IMbysdUsIMHfvtOwqiwgw9a687CqDQogICMgbWVhbiwgbWVkaWFuLCBtb2RlDQogIHNhbGFyeSR3YWdlDQogIHdhZ2VfbWVhbiA8LSBtZWFuKHNhbGFyeSR3YWdlLCBuYS5ybSA9IFQpDQogIHdhZ2VfbWVhbiAgIyBbMV0gMjE3MTU3OA0KICAjIMHfvtOwqiBtZWRpYW4gDQogIHdhZ2VfbWlkIDwtIG1lZGlhbihzYWxhcnkkd2FnZSwgbmEucm0gPSBUKQ0KICB3YWdlX21pZA0KICAjILn8wKcgsbjHz7HiDQogIHdhZ2VfcmFuZ2UgPC0gcmFuZ2Uoc2FsYXJ5JHdhZ2UsIG5hLnJtID0gVCkNCiAgd2FnZV9yYW5nZSAjIDExMTc2MDUgNDA2NDI4Ng0KICAjIMPWsO3A07HdwLsgud60wiC757b3wMcgwaS6uA0KICBoaWdoZXN0X3dhZ2UgIDwtIHdoaWNoKHNhbGFyeSR3YWdlID09IDQwNjQyODYpDQogIHNhbGFyeVtoaWdoZXN0X3dhZ2UsXQ0KICAjIDS60MCnILG4x8+x4g0KICBxbnQgPC0gcXVhbnRpbGUoc2FsYXJ5JHdhZ2UsbmEucm09VCkNCiAgcW50DQogICMgc3RlcCA2ILiuvbrGrr+hILTjseINCiAgc2FsX2xpc3QgPC0gbGlzdCgNCiAgICDG8rHVv/mx3iA9IHdhZ2VfbWVhbiwNCiAgICC/+bHewd++07CqID0gd2FnZV9taWQsDQogICAgv/mx3rn8wKcgPSB3YWdlX3JhbmdlLA0KICAgIL/5sd6757rQwKcgPSBxbnQNCiAgKQ0KICBzYWxfbGlzdA0KICAjILy6urC/oSC1+7ilIMDTsd2w3cL3DQogIHdhZ2VfYXZnX3Blcl9nZW5kZXIgPC0gdGFwcGx5KA0KICAgIHNhbGFyeSR3YWdlLHNhbGFyeSRnZW5kZXIsbWVhbixuYS5ybT1UDQogICkNCiAgd2FnZV9hdmdfcGVyX2dlbmRlcg0KICAjILOyICAgICAgv6kgDQogICMgMjQ3NzMzMiAxODY1ODIzIA0KICAjIHJlc2hhcGUyDQogIGluc3RhbGwucGFja2FnZXMoInJlc2hhcGUyIikNCiAgbGlicmFyeShyZXNoYXBlMikNCiAgdGVtcCA8LSBtZWx0KHdhZ2VfYXZnX3Blcl9nZW5kZXIpDQogIHRlbXANCiAgZ2dwbG90KA0KICAgIGRhdGEgPSB0ZW1wLA0KICAgIGFlcygNCiAgICAgIHggPSBWYXIxLCAjIG1lbHS/oSCzu8DltcggeCCwqg0KICAgICAgeSA9IHZhbHVlLA0KICAgICAgZmlsbCA9IFZhcjENCiAgICApDQogICkrZ2VvbV9iYXIoDQogICAgc3RhdCA9ICJpZGVudGl0eSINCiAgKQ0KICAjIMS/uK6+7r+hILX7uKUgwNOx3bDdwvcNCiAgIyBzYWxhcnkkY2FyZWVyDQogIHdhZ2VfYXZnX3Blcl9jYXJlZXIgPC0gdGFwcGx5KA0KICAgIHNhbGFyeSR3YWdlLHNhbGFyeSRjYXJlZXIsbWVhbixuYS5ybT1UDQogICkNCiAgd2FnZV9hdmdfcGVyX2NhcmVlcg0KICB0ZW1wIDwtIG1lbHQod2FnZV9hdmdfcGVyX2NhcmVlcikNCiAgdGVtcA0KICBnZ3Bsb3QoDQogICAgZGF0YSA9IHRlbXAsDQogICAgYWVzKA0KICAgICAgeCA9IFZhcjEsICMgbWVsdL+hILO7wOW1yCB4ILCqDQogICAgICB5ID0gdmFsdWUsDQogICAgICBmaWxsID0gVmFyMQ0KICAgICkNCiAgKStnZW9tX2JhcigNCiAgICBzdGF0ID0gImlkZW50aXR5Ig0KICApDQogIG1lbHQgPC0gbWVsdCh3YWdlX2F2Z19wZXJfY2FyZWVyKQ0KICBnZ3Bsb3QoDQogICAgbWVsdCwNCiAgICBhZXMoDQogICAgICB4ID0gVmFyMSwNCiAgICAgIHkgPSB2YWx1ZSwNCiAgICAgIGdyb3VwID0gMQ0KICAgICkNCiAgKStnZW9tX2xpbmUoDQogICAgY29sciA9ICdibHVlJywNCiAgICBzaXplID0gMg0KICApKyBjb29yZF9wb2xhcigpKw0KICAgIHlsaW0oMCxtYXgobWVsdCR2YWx1ZSkpDQogIA0KICAjILCiILDmt8K6sLfOIMGmwM8gwPuw1CC53rTCIL/5sd4gwf203A0KICAjIDF+M7Piucy4uCAgIDEws+LAzLvzICAgIDGz4rnMuLggDQogICMgMTkwNTAxMiAgICAyOTA3MTE5ICAgIDE3MzA4MzUgDQogICMgM341s+K5zLi4IDV+MTCz4rnMuLggDQogICMgMjAyODAxNSAgICAyMzYwNDYzDQogIHRhcHBseSgNCiAgICBzYWxhcnkkd2FnZSwNCiAgICBzYWxhcnkkY2FyZWVyLA0KICAgIHJhbmdlLA0KICAgIG5hLnJtID0gVA0KICApDQogICMgJGAxfjOz4rnMuLhgDQogICMgWzFdIDExNzIzOTkgMjYxOTIyMQ0KICAjIA0KICAjICRgMTCz4sDMu/NgDQogICMgWzFdIDE2ODUyMDQgNDA2NDI4Ng0KICAjIA0KICAjICRgMbPiucy4uGANCiAgIyBbMV0gMTExNzYwNSAyNDE0MzQ1DQogICMgDQogICMgJGAzfjWz4rnMuLhgDQogICMgWzFdIDEyNDU1NDAgMjgyNzQyMA0KICAjIA0KICAjICRgNX4xMLPiucy4uGANCiAgIyBbMV0gMTU0ODAzNiAzMzA5MjMxDQogIHllYXJfMSA8LSBzYWxhcnlbd2hpY2goc2FsYXJ5JHdhZ2UgPT0gMTExNzYwNSksXQ0KICB5ZWFyXzFfMyA8LSBzYWxhcnlbd2hpY2goc2FsYXJ5JHdhZ2UgPT0gMTE3MjM5OSksXQ0KICB5ZWFyXzNfNSA8LSBzYWxhcnlbd2hpY2goc2FsYXJ5JHdhZ2UgPT0gMTI0NTU0MCksXQ0KICB5ZWFyXzVfMTAgPC0gc2FsYXJ5W3doaWNoKHNhbGFyeSR3YWdlID09IDE1NDgwMzYpLF0NCiAgeWVhcl8xMCA8LSBzYWxhcnlbd2hpY2goc2FsYXJ5JHdhZ2UgPT0gMTY4NTIwNCksXQ0KICANCiAgY2FyZWVyX2xpc3QgPC0gbGlzdCgNCiAgICB5ZWFyXzEseWVhcl8xXzMseWVhcl8zXzUseWVhcl81XzEwLHllYXJfMTANCiAgKQ0KICBjYXJlZXJfbGlzdA0KICANCiAgIyMgMrn4tOQNCiAgIyCw5rfCurAgsKHA5SCzt8C6IL/5sd7AuyC53rTCIMH9tNzAuiC067rOutAgNjC068DMu/Mgv6nA2i4NCiAgIyDGr8DMwaHAuiAxMLPiwMy78yCw5rfCv6G8rSCwocDlILO3wLogv/mx3sC7ILnetMIgwf203MC6DQogICMgMjC06yDDyrndv6m8ug0KICAjIDE4ODa47S4gwMy16cC6IDEws+LAzLOqILDmt8LAuyC917DttbUgMTY4uLi/+MC7ILz2t8nH1A0KICANCiAgIyAgM7n4LiDHpcHYyK0gvcPFsLHiDQogIHdhZ2Vfc2NhbGUgPC0gc2NhbGUoc2FsYXJ5JHdhZ2UpDQogIGhlYWQod2FnZV9zY2FsZSwgMTApDQogICMgWywxXQ0KICAjIFsxLF0gLTEuMjg4ODY5OTkNCiAgIyBbMixdIC0wLjkxNzU3MDE4DQogICMgWzMsXSAtMC4zODk4MTkyNA0KICAjIFs0LF0gLTAuMDYzNDA4NzgNCiAgIyBbNSxdICAwLjM3OTI0Njg5DQogICMgWzYsXSAgMC4zMTM0MzA1Mw0KICAjIFs3LF0gIDAuMjg1MDU4MTUNCiAgIyBbOCxdIC0wLjA0MDE2NjYxDQogICMgWzksXSAtMC4xMzgxMjk1OQ0KICAjIFsxMCxdIC0wLjc4MjIyNTcxDQogICMjIMbysdXAzCAwwMyw7SwgMMC7ILHiwdjAuLfOILrQu+q1yCCwqrXpwMwgwNa02Q0KICBzYWxhcnkgPC0gY2JpbmQoc2FsYXJ5LHNjYWxlID0gd2FnZV9zY2FsZSkNCiAgc3RyKHNhbGFyeSkNCiAgZzEgPC0gZ2dwbG90KHNhbGFyeSxhZXMoeD1zYWxhcnkkc2NhbGUseT1zYWxhcnkkYWdlKSkNCiAgZzIgPC0gZ2VvbV9zZWdtZW50KGFlcyh5ZW5kPXNhbGFyeSRhZ2UpLHhlbmQ9MCkNCiAgZzMgPC0gIGcxICsgZzIgKyBnZW9tX3BvaW50KA0KICAgIHNpemUgPSA3LA0KICAgIGFlcyhjb2xvcj1zYWxhcnkkZ2VuZGVyLHNoYXBlPXNhbGFyeSRjYXJlZXIpDQogICkrdGhlbWVfbWluaW1hbCgpDQogIA0KICBnMw0KICAjIyDH2LyuDQogICMgMTCz4sDMu/O1yCA0NX41NLy8ILOyvLrAzCCwocDlILDtvNK15sDaLg0KICAjIDI1IH4gMjm8vCCx17fswLogsN3C97ChIMWpwfYgvsq02Q0KICAjIDQ1vLwgwMy787rOxc20wiCx17fssKMgsN3C97ChIMWptNkNCiAgIyDA+sDTsd3AuiDB1rfOIL+pvLqx17fsv6G8rSCzqsW4s6202Q0KICAjILDtwNOx3cC6IMHWt84gs7K8urHXt+y/obytILOqxbizrbTZDQogIA0KYGBgDQoNCg0K