salary
install.packages("ggplot2")
library(ggplot2)
## 성별 임금 소득에 대한 통계를 구하시오.
# 1. 남녀성별에 따른 평균임금을 막대그래프로 구현
# 2. 가장 고소득인 연령대를 구하시오.
# 3. 그룹 간 임금격차가 커지는 연령대를 구하시오.
# 4. 그룹 간 임금격차가 커지는 연령대를 구하시오.
# step 1 : 데이터 읽기
csv = "https://www.dropbox.com/s/bdxpttb9mpr0kpt/example_salary.csv?dl=1"
salary <- read.csv(csv,
stringsAsFactors = F,
na = "-")
# step 2 : 구조 보기
str(salary)
colnames(salary)
#[1] "연령" ctrl+shift+c
#[2] "월급여액..원."
#[3] "연간특별급여액..원."
#[4] "근로시간..시간."
#[5] "근로자수..명."
#[6] "경력구분"
#[7] "성별"
# step 3 연산을 하기 위해 한글명을 영어로 변환
colnames(salary) <- c(
"age","wage","special_wage","working_time",
"worker_count","career","gender"
)
colnames(salary)
class(salary$wage)
# step 4 검색 목록에 올리기
# salary$age 를 하지 않도록 조치
# salary dataframe 을 디폴트값으로 지정
# attach(salary) # 첨부파일
# step 5 : 기술통계 :: 평균 mean, 중앙값 median, 최빈값 mode
head(salary)
class(salary$wage)
# wage <- as.numeric(wage)
wage_mean <- mean(salary$wage, na.rm = T)
wage_mean # [1] 2171578
# 중앙값 median
wage_mid <- median(salary$wage, na.rm = T)
wage_mid
# 범위 구하기보ge, na.rm = T)
wage_range <- range(salary$wage, na.rm = T)
wage_range # [1] 1117605 4064286
# 최고 임금을 받는 사람의 정보
highest_wage <- which(salary$wage == 4064286)
salary[highest_wage,]
# 4분위 구하기
qnt <- quantile(salary$wage,na.rm = T)
qnt
# step 6 리스트에 담기
sal_list <- list(
평균월급 = wage_mean,
월급중앙값 = wage_mid,
월급범위 = wage_range,
월급사분위 = qnt
)
sal_list
# 성별에 따른 임금격차
wage_avg_per_gender <- tapply(
salary$wage,salary$gender,mean,na.rm=T
)
wage_avg_per_gender
# 남 여
# 2477332 1865823
# reshape2
install.packages("reshape2")
library(reshape2)
temp <- melt(wage_avg_per_gender)
ggplot(
data = temp,
aes(
x = Var1,
y = value,
fill = Var1
)
)+geom_bar(
stat = "identity"
)
# 커리어에 따른 임금격차
salary$career
wage_avg_per_careea <- tapply(
salary$wage, salary$career, mean, na.rm = T
)
wage_avg_per_careea
temp <- melt(wage_avg_per_careea)
ggplot(
data = temp,
aes(
x = Var1, # melt에 내장된 x 값
y = value,
fill = Var1
)
)+geom_bar(
stat = "identity"
)
melt <- melt(salary$career)
ggplot(
melt,
aes(
x=Var1,
y=value,
group=1
)+geom_line(
col = 'blue',
size = 2
)+coord_polar()+
ylim(0,max(melt$value))
)
# 각 경력별로 제일 적게 받는 월급 집단
# 1~3년미만 10년이상 1년미만 3~5년미만 5~10년미만
# 1905012 2907119 1730835 2028015 2360463
tapply(
salary$wage,
salary$career,
range,
na.rm = T
)
year_1 <- salary[which(salary$wage == 1117605),]
year_1_3 <- salary[which(salary$wage == 1172399),]
year_3_5 <- salary[which(salary$wage == 1245540),]
year_5_10 <- salary[which(salary$wage == 1548036),]
year_10 <- salary[which(salary$wage == 1685204),]
careea_list <- list(
year_1,year_1_3,year_3_5,year_5_10,year_10
)
careea_list
## 2번 답
# 경력 별 가장 낮은 월급을 받는 집단은 대부분 60대 이상 여자
# 특이점은 10년 이상 경력에서 가장 낮은 월급을 받는 집단은
# 20대 초반 여성
# 1886명. 이들은 10년이나 경력을 쌓고도 168만원을 수령함.
# 3번. 표준화 시키기
wage_scale <- scale(salary$wage)
head(wage_scale, 10)
# [,1]
# [1,] -1.28886999
# [2,] -0.91757018
# [3,] -0.38981924
# [4,] -0.06340878
# [5,] 0.37924689
# [6,] 0.31343053
# [7,] 0.28505815
# [8,] -0.04016661
# [9,] -0.13812959
# [10,] -0.78222571
# 평균이 0이고, 0을 기죽으로 분산된 값들이 있다.
salary <- cbind(salary,scale=wage_scale)
str(salary)
g1 <- ggplot(salary,aes(x=salary$scale,y=salary$age))
g2 <- geom_segment(aes(yend=salary$age),xend=0)
g3 <- g1 + g2 +
geom_point(
size = 7,
aes(color=salary$gender,shape=salary$career)
)+theme_minimal()
g3
## 해석
# 10년 이상 된 45-54세 남성이 가장 고소득자.
# 25~29세 그룹은 격차가 크지 않다.
# 45세 이상부터는 그룹간 격차가 크다.
# 저임금은 주로 여성그룹에서 나타난다.
# 고임금은 주로 남성그룹에서 나타난다.
# 데이터 정규화는 변숫값의 분포를 표준화하는 것을 의미한다.
# 표준화는 변수에서 데이터의 평균을 빼거나
# 변수를 전체 데이터의 표준편차로 나누는 작업을 포함한다.
# 이렇게 하면 변숫값의 평균이 0이 되고 값의
# 퍼짐정도(분포) 또한 일정해진다.
# R에서 데이터를 정규화 하는 함수는 scale()이다.
t <- c(1,2,3,4,5)
t
mean(t)
sd(t)
scale(t)
LS0tDQp0aXRsZTogIlIgTm90ZWJvb2siDQpvdXRwdXQ6IGh0bWxfbm90ZWJvb2sNCi0tLQ0KDQojc2FsYXJ5DQoNCmBgYHtyfQ0KaW5zdGFsbC5wYWNrYWdlcygiZ2dwbG90MiIpDQpsaWJyYXJ5KGdncGxvdDIpDQoNCg0KIyMg7ISx67OEIOyehOq4iCDshozrk53sl5Ag64yA7ZWcIO2GteqzhOulvCDqtaztlZjsi5zsmKQuDQojIDEuIOuCqOuFgOyEseuzhOyXkCDrlLDrpbgg7Y+J6reg7J6E6riI7J2EIOunieuMgOq3uOuemO2UhOuhnCDqtaztmIQNCiMgMi4g6rCA7J6lIOqzoOyGjOuTneyduCDsl7DroLnrjIDrpbwg6rWs7ZWY7Iuc7JikLg0KIyAzLiDqt7jro7kg6rCEIOyehOq4iOqyqeywqOqwgCDsu6Tsp4DripQg7Jew66C564yA66W8IOq1rO2VmOyLnOyYpC4NCiMgNC4g6re466O5IOqwhCDsnoTquIjqsqnssKjqsIAg7Luk7KeA64qUIOyXsOugueuMgOulvCDqtaztlZjsi5zsmKQuDQojIHN0ZXAgMSA6IOuNsOydtO2EsCDsnb3quLANCmNzdiA9ICJodHRwczovL3d3dy5kcm9wYm94LmNvbS9zL2JkeHB0dGI5bXByMGtwdC9leGFtcGxlX3NhbGFyeS5jc3Y/ZGw9MSINCnNhbGFyeSA8LSByZWFkLmNzdihjc3YsDQogICAgICAgICAgICAgICAgICAgc3RyaW5nc0FzRmFjdG9ycyA9IEYsDQogICAgICAgICAgICAgICAgICAgbmEgPSAiLSIpDQojIHN0ZXAgMiA6IOq1rOyhsCDrs7TquLANCnN0cihzYWxhcnkpDQpjb2xuYW1lcyhzYWxhcnkpDQojWzFdICLsl7DroLkiIGN0cmwrc2hpZnQrYw0KI1syXSAi7JuU6riJ7Jes7JWhLi7sm5AuIg0KI1szXSAi7Jew6rCE7Yq567OE6riJ7Jes7JWhLi7sm5AuIg0KI1s0XSAi6re866Gc7Iuc6rCELi7si5zqsIQuIg0KI1s1XSAi6re866Gc7J6Q7IiYLi7rqoUuIg0KI1s2XSAi6rK966Cl6rWs67aEIg0KI1s3XSAi7ISx67OEIg0KIyBzdGVwIDMg7Jew7IKw7J2EIO2VmOq4sCDsnITtlbQg7ZWc6riA66qF7J2EIOyYgeyWtOuhnCDrs4DtmZgNCmNvbG5hbWVzKHNhbGFyeSkgPC0gYygNCiAgImFnZSIsIndhZ2UiLCJzcGVjaWFsX3dhZ2UiLCJ3b3JraW5nX3RpbWUiLA0KICAid29ya2VyX2NvdW50IiwiY2FyZWVyIiwiZ2VuZGVyIg0KICApDQpjb2xuYW1lcyhzYWxhcnkpDQpjbGFzcyhzYWxhcnkkd2FnZSkNCiMgc3RlcCA0IOqygOyDiSDrqqnroZ3sl5Ag7Jis66as6riwDQojIHNhbGFyeSRhZ2Ug66W8IO2VmOyngCDslYrrj4TroZ0g7KGw7LmYDQojIHNhbGFyeSBkYXRhZnJhbWUg7J2EIOuUlO2PtO2KuOqwkuycvOuhnCDsp4DsoJUNCiMgYXR0YWNoKHNhbGFyeSkgIyDssqjrtoDtjIzsnbwNCiMgc3RlcCA1IDog6riw7Iig7Ya16rOEIDo6IO2Pieq3oCBtZWFuLCDspJHslZnqsJIgbWVkaWFuLCDstZzruYjqsJIgbW9kZQ0KaGVhZChzYWxhcnkpDQpjbGFzcyhzYWxhcnkkd2FnZSkNCiMgd2FnZSA8LSBhcy5udW1lcmljKHdhZ2UpDQp3YWdlX21lYW4gPC0gbWVhbihzYWxhcnkkd2FnZSwgbmEucm0gPSBUKQ0Kd2FnZV9tZWFuICMgWzFdIDIxNzE1NzgNCiMg7KSR7JWZ6rCSIG1lZGlhbg0Kd2FnZV9taWQgPC0gbWVkaWFuKHNhbGFyeSR3YWdlLCBuYS5ybSA9IFQpDQp3YWdlX21pZA0KIyDrspTsnIQg6rWs7ZWY6riw67O0Z2UsIG5hLnJtID0gVCkNCndhZ2VfcmFuZ2UgPC0gcmFuZ2Uoc2FsYXJ5JHdhZ2UsIG5hLnJtID0gVCkNCndhZ2VfcmFuZ2UgIyBbMV0gMTExNzYwNSA0MDY0Mjg2DQojIOy1nOqzoCDsnoTquIjsnYQg67Cb64qUIOyCrOuejOydmCDsoJXrs7QNCmhpZ2hlc3Rfd2FnZSA8LSB3aGljaChzYWxhcnkkd2FnZSA9PSA0MDY0Mjg2KQ0Kc2FsYXJ5W2hpZ2hlc3Rfd2FnZSxdDQojIDTrtoTsnIQg6rWs7ZWY6riwDQpxbnQgPC0gcXVhbnRpbGUoc2FsYXJ5JHdhZ2UsbmEucm0gPSBUKQ0KcW50DQojIHN0ZXAgNiDrpqzsiqTtirjsl5Ag64u06riwDQpzYWxfbGlzdCA8LSBsaXN0KA0KICDtj4nqt6Dsm5TquIkgPSB3YWdlX21lYW4sDQogIOyblOq4ieykkeyVmeqwkiA9IHdhZ2VfbWlkLA0KICDsm5TquInrspTsnIQgPSB3YWdlX3JhbmdlLA0KICDsm5TquInsgqzrtoTsnIQgPSBxbnQNCikNCnNhbF9saXN0DQojIOyEseuzhOyXkCDrlLDrpbgg7J6E6riI6rKp7LCoDQp3YWdlX2F2Z19wZXJfZ2VuZGVyIDwtIHRhcHBseSgNCiAgc2FsYXJ5JHdhZ2Usc2FsYXJ5JGdlbmRlcixtZWFuLG5hLnJtPVQNCikNCndhZ2VfYXZnX3Blcl9nZW5kZXINCiMg64KoICAgICAg7JesIA0KIyAyNDc3MzMyIDE4NjU4MjMNCiMgcmVzaGFwZTINCmluc3RhbGwucGFja2FnZXMoInJlc2hhcGUyIikNCmxpYnJhcnkocmVzaGFwZTIpDQp0ZW1wIDwtIG1lbHQod2FnZV9hdmdfcGVyX2dlbmRlcikNCmdncGxvdCgNCiAgZGF0YSA9IHRlbXAsDQogIGFlcygNCiAgICB4ID0gVmFyMSwNCiAgICB5ID0gdmFsdWUsDQogICAgZmlsbCA9IFZhcjENCiAgKQ0KKStnZW9tX2JhcigNCiAgc3RhdCA9ICJpZGVudGl0eSINCikNCg0KIyDsu6TrpqzslrTsl5Ag65Sw66W4IOyehOq4iOqyqeywqA0Kc2FsYXJ5JGNhcmVlcg0Kd2FnZV9hdmdfcGVyX2NhcmVlYSA8LSB0YXBwbHkoDQogIHNhbGFyeSR3YWdlLCBzYWxhcnkkY2FyZWVyLCBtZWFuLCBuYS5ybSA9IFQNCikNCndhZ2VfYXZnX3Blcl9jYXJlZWENCnRlbXAgPC0gbWVsdCh3YWdlX2F2Z19wZXJfY2FyZWVhKQ0KZ2dwbG90KA0KICBkYXRhID0gdGVtcCwNCiAgYWVzKA0KICAgIHggPSBWYXIxLCAjIG1lbHTsl5Ag64K07J6l65CcIHgg6rCSDQogICAgeSA9IHZhbHVlLA0KICAgIGZpbGwgPSBWYXIxDQogICkNCikrZ2VvbV9iYXIoDQogIHN0YXQgPSAiaWRlbnRpdHkiDQopDQoNCg0KbWVsdCA8LSBtZWx0KHNhbGFyeSRjYXJlZXIpDQpnZ3Bsb3QoDQogIG1lbHQsDQogIGFlcygNCiAgICB4PVZhcjEsDQogICAgeT12YWx1ZSwNCiAgICBncm91cD0xDQogICkrZ2VvbV9saW5lKA0KICAgIGNvbCA9ICdibHVlJywNCiAgICBzaXplID0gMg0KICApK2Nvb3JkX3BvbGFyKCkrDQogICAgeWxpbSgwLG1heChtZWx0JHZhbHVlKSkNCikNCg0KIyDqsIEg6rK966Cl67OE66GcIOygnOydvCDsoIHqsowg67Cb64qUIOyblOq4iSDsp5Hri6gNCiMgMX4z64WE66+466eMICAgMTDrhYTsnbTsg4EgICAgMeuFhOuvuOunjCAgM34164WE66+466eMIDV+MTDrhYTrr7jrp4wgDQojIDE5MDUwMTIgICAgMjkwNzExOSAgICAxNzMwODM1ICAgIDIwMjgwMTUgICAgMjM2MDQ2Mw0KdGFwcGx5KA0KICBzYWxhcnkkd2FnZSwNCiAgc2FsYXJ5JGNhcmVlciwNCiAgcmFuZ2UsDQogIG5hLnJtID0gVA0KKQ0KeWVhcl8xIDwtIHNhbGFyeVt3aGljaChzYWxhcnkkd2FnZSA9PSAxMTE3NjA1KSxdDQp5ZWFyXzFfMyA8LSBzYWxhcnlbd2hpY2goc2FsYXJ5JHdhZ2UgPT0gMTE3MjM5OSksXQ0KeWVhcl8zXzUgPC0gc2FsYXJ5W3doaWNoKHNhbGFyeSR3YWdlID09IDEyNDU1NDApLF0NCnllYXJfNV8xMCA8LSBzYWxhcnlbd2hpY2goc2FsYXJ5JHdhZ2UgPT0gMTU0ODAzNiksXQ0KeWVhcl8xMCA8LSBzYWxhcnlbd2hpY2goc2FsYXJ5JHdhZ2UgPT0gMTY4NTIwNCksXQ0KDQpjYXJlZWFfbGlzdCA8LSBsaXN0KA0KICB5ZWFyXzEseWVhcl8xXzMseWVhcl8zXzUseWVhcl81XzEwLHllYXJfMTANCikNCmNhcmVlYV9saXN0DQoNCiMjIDLrsogg64u1DQojIOqyveugpSDrs4Qg6rCA7J6lIOuCruydgCDsm5TquInsnYQg67Cb64qUIOynkeuLqOydgCDrjIDrtoDrtoQgNjDrjIAg7J207IOBIOyXrOyekA0KIyDtirnsnbTsoJDsnYAgMTDrhYQg7J207IOBIOqyveugpeyXkOyEnCDqsIDsnqUg64Ku7J2AIOyblOq4ieydhCDrsJvripQg7KeR64uo7J2ADQojIDIw64yAIOy0iOuwmCDsl6zshLENCiMgMTg4NuuqhS4g7J2065Ok7J2AIDEw64WE7J2064KYIOqyveugpeydhCDsjJPqs6Drj4QgMTY466eM7JuQ7J2EIOyImOugue2VqC4NCg0KIyAz67KILiDtkZzspIDtmZQg7Iuc7YKk6riwDQp3YWdlX3NjYWxlIDwtIHNjYWxlKHNhbGFyeSR3YWdlKQ0KaGVhZCh3YWdlX3NjYWxlLCAxMCkNCiMgWywxXQ0KIyBbMSxdIC0xLjI4ODg2OTk5DQojIFsyLF0gLTAuOTE3NTcwMTgNCiMgWzMsXSAtMC4zODk4MTkyNA0KIyBbNCxdIC0wLjA2MzQwODc4DQojIFs1LF0gIDAuMzc5MjQ2ODkNCiMgWzYsXSAgMC4zMTM0MzA1Mw0KIyBbNyxdICAwLjI4NTA1ODE1DQojIFs4LF0gLTAuMDQwMTY2NjENCiMgWzksXSAtMC4xMzgxMjk1OQ0KIyBbMTAsXSAtMC43ODIyMjU3MQ0KIyDtj4nqt6DsnbQgMOydtOqzoCwgMOydhCDquLDso73snLzroZwg67aE7IKw65CcIOqwkuuTpOydtCDsnojri6QuDQpzYWxhcnkgPC0gY2JpbmQoc2FsYXJ5LHNjYWxlPXdhZ2Vfc2NhbGUpDQpzdHIoc2FsYXJ5KQ0KZzEgPC0gZ2dwbG90KHNhbGFyeSxhZXMoeD1zYWxhcnkkc2NhbGUseT1zYWxhcnkkYWdlKSkNCmcyIDwtIGdlb21fc2VnbWVudChhZXMoeWVuZD1zYWxhcnkkYWdlKSx4ZW5kPTApDQpnMyA8LSBnMSArIGcyICsNCiAgZ2VvbV9wb2ludCgNCiAgICBzaXplID0gNywNCiAgICBhZXMoY29sb3I9c2FsYXJ5JGdlbmRlcixzaGFwZT1zYWxhcnkkY2FyZWVyKQ0KICApK3RoZW1lX21pbmltYWwoKQ0KZzMNCg0KIyMg7ZW07ISdDQojIDEw64WEIOydtOyDgSDrkJwgNDUtNTTshLgg64Ko7ISx7J20IOqwgOyepSDqs6Dshozrk53snpAuDQojIDI1fjI57IS4IOq3uOujueydgCDqsqnssKjqsIAg7YGs7KeAIOyViuuLpC4NCiMgNDXshLgg7J207IOB67aA7YSw64qUIOq3uOujueqwhCDqsqnssKjqsIAg7YGs64ukLg0KIyDsoIDsnoTquIjsnYAg7KO866GcIOyXrOyEseq3uOujueyXkOyEnCDrgpjtg4Drgpzri6QuDQojIOqzoOyehOq4iOydgCDso7zroZwg64Ko7ISx6re466O57JeQ7IScIOuCmO2DgOuCnOuLpC4NCg0KIyDrjbDsnbTthLAg7KCV6rec7ZmU64qUIOuzgOyIq+qwkuydmCDrtoTtj6zrpbwg7ZGc7KSA7ZmU7ZWY64qUIOqyg+ydhCDsnZjrr7jtlZzri6QuDQojIO2RnOykgO2ZlOuKlCDrs4DsiJjsl5DshJwg642w7J207YSw7J2YIO2Pieq3oOydhCDrubzqsbDrgpgNCiMg67OA7IiY66W8IOyghOyytCDrjbDsnbTthLDsnZgg7ZGc7KSA7Y647LCo66GcIOuCmOuIhOuKlCDsnpHsl4XsnYQg7Y+s7ZWo7ZWc64ukLg0KIyDsnbTroIfqsowg7ZWY66m0IOuzgOyIq+qwkuydmCDtj4nqt6DsnbQgMOydtCDrkJjqs6Ag6rCS7J2YDQojIO2NvOynkOygleuPhCjrtoTtj6wpIOuYkO2VnCDsnbzsoJXtlbTsp4Tri6QuDQojIFLsl5DshJwg642w7J207YSw66W8IOygleq3nO2ZlCDtlZjripQg7ZWo7IiY64qUIHNjYWxlKCnsnbTri6QuDQoNCnQgPC0gYygxLDIsMyw0LDUpDQp0DQptZWFuKHQpDQpzZCh0KQ0Kc2NhbGUodCkNCg0KDQoNCg0KDQoNCg0KDQoNCg0KYGBgDQo=