What is (are) your main question(s)? What is your story? What does the final graphic show?
Explain where the data came from, what agency or company made it, how it is structured, what it shows, etc.
Describe and show how you cleaned and reshaped the data
raw_welfare <- read.spss(file = "koweps_hpwc17_2022_Beta1.sav",
to.data.frame = T)
## Warning in read.spss(file = "koweps_hpwc17_2022_Beta1.sav", to.data.frame = T):
## koweps_hpwc17_2022_Beta1.sav: Compression bias (0) is not the usual value of
## 100
## Warning in read.spss(file = "koweps_hpwc17_2022_Beta1.sav", to.data.frame = T):
## koweps_hpwc17_2022_Beta1.sav: Very long string record(s) found (record type 7,
## subtype 14), each will be imported in consecutive separate variables
#복사본 만들기
wel <- raw_welfare
wel <- wel %>% select(code_region = h17_reg7,
relation = h17_g2,
birth = h17_g4,
area = h1706_5,
code_housingtype = h1706_3,
houseprice = h1706_6,
"code_1stprepare" = h1706_8,
"code_2stprepare" = h1706_9,
satisfaction = p1703_7) %>%
filter(relation == 10)
class(wel$code_region)
## [1] "numeric"
table(is.na(wel$code_region))
##
## FALSE
## 7865
head(wel$code_region)
## [1] 1 1 1 1 2 1
list_region <- data.frame(code_region = c(1:7),
region = c("서울",
"수도권(인천/경기)",
"부산/경남/울산",
"대구/경북",
"대전/충남",
"강원/충북",
"광주/전남/전북/제주도"))
list_region
## code_region region
## 1 1 서울
## 2 2 수도권(인천/경기)
## 3 3 부산/경남/울산
## 4 4 대구/경북
## 5 5 대전/충남
## 6 6 강원/충북
## 7 7 광주/전남/전북/제주도
wel <- left_join(wel, list_region, by = "code_region")
wel %>%
select(code_region, region) %>%
head()
## code_region region
## 1 1 서울
## 2 1 서울
## 3 1 서울
## 4 1 서울
## 5 2 수도권(인천/경기)
## 6 1 서울
class(wel$birth)
## [1] "numeric"
summary(wel$birth)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 1922 1943 1954 1957 1969 2003
qplot(wel$birth)
## Warning: `qplot()` was deprecated in ggplot2 3.4.0.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
table(is.na(wel$birth))
##
## FALSE
## 7865
wel$age <- 2022 - wel$birth + 1
summary(wel$age)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 20.00 54.00 69.00 66.23 80.00 101.00
qplot(wel$age)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
wel <- wel %>%
mutate(age_group = ifelse(age < 20, "10s",
ifelse(age < 30, "20s",
ifelse(age < 40, "30s",
ifelse(age < 50, "40s",
ifelse(age < 60, "50s",
ifelse(age < 70, "60s",
ifelse(age < 80, "70s", "old"))))))))
table(wel$age_group)
##
## 20s 30s 40s 50s 60s 70s old
## 149 412 863 1131 1519 1739 2052
ggplot(data = wel, aes(x = age_group)) +
geom_bar()
## 3-2. MZ 파생변수 만들기
wel <- wel %>%
mutate(MZ = ifelse(age >= 20 & age < 25, "E20s",
ifelse(age < 30, "L20s",
ifelse(age < 35, "E30s",
ifelse(age < 40, "L30s",
ifelse(age < 45, "E40s", NA))))))
table(wel$MZ)
##
## E20s E30s E40s L20s L30s
## 19 188 393 130 224
wel %>%
filter(!is.na(MZ)) %>%
ggplot(aes(x = MZ)) +
geom_bar() +
scale_x_discrete(limit = c("E20s", "L20s", "E30s", "L30s", "E40s"))
class(wel$area)
## [1] "numeric"
head(wel$area)
## [1] 92 33 73 92 46 59
table(is.na(wel$area))
##
## FALSE
## 7865
summary(wel$area)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 7.00 56.00 79.00 77.95 99.00 327.00
class(wel$code_housingtype)
## [1] "numeric"
head(wel$code_housingtype)
## [1] 1 3 1 1 5 3
table(is.na(wel$code_housetype))
## < table of extent 0 >
list_housingtype <- data.frame(code_housingtype = c(1:6),
housingtype = c("자가",
"전세",
"보증부 월세",
"월세(사글세)",
"비가구원 명의 주택",
"기타"))
list_housingtype
## code_housingtype housingtype
## 1 1 자가
## 2 2 전세
## 3 3 보증부 월세
## 4 4 월세(사글세)
## 5 5 비가구원 명의 주택
## 6 6 기타
wel <- left_join(wel, list_housingtype, by = "code_housingtype")
wel %>%
select(code_housingtype, housingtype) %>%
head()
## code_housingtype housingtype
## 1 1 자가
## 2 3 보증부 월세
## 3 1 자가
## 4 1 자가
## 5 5 비가구원 명의 주택
## 6 3 보증부 월세
class(wel$houseprice)
## [1] "numeric"
head(wel$houseprice)
## [1] 1e+05 5e+02 4e+04 7e+04 NA 6e+03
table(is.na(wel$houseprice))
##
## FALSE TRUE
## 6785 1080
summary(wel$houseprice)
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 0 4000 12000 22493 30000 650000 1080
options(scipen = 99)
head(wel$houseprice)
## [1] 100000 500 40000 70000 NA 6000
class(wel$code_1stprepare)
## [1] "numeric"
table(is.na(wel$code_1stprepare))
##
## FALSE TRUE
## 6785 1080
head(wel$code_1stprepare)
## [1] 1 1 1 1 NA 3
class(wel$code_2stprepare)
## [1] "numeric"
table(is.na(wel$code_2stprepare))
##
## FALSE TRUE
## 6785 1080
head(wel$code_2stprepare)
## [1] 4 88 4 88 NA 88
list_prepare1 <- data.frame(code_1stprepare = c(1:5),
"prepare1" = c("자기돈(상속인 경우 포함)",
"무상으로 도움을 받음 ",
"부모,형제,친척,친구 등으로 부터 빌림",
"금융기관으로부터 빌림",
"사채"))
list_prepare2 <- data.frame(code_2stprepare = c(1:5),
"prepare2" = c("자기돈(상속인 경우 포함)",
"무상으로 도움을 받음 ",
"부모,형제,친척,친구 등으로 부터 빌림",
"금융기관으로부터 빌림",
"사채"))
list_prepare1
## code_1stprepare prepare1
## 1 1 자기돈(상속인 경우 포함)
## 2 2 무상으로 도움을 받음
## 3 3 부모,형제,친척,친구 등으로 부터 빌림
## 4 4 금융기관으로부터 빌림
## 5 5 사채
list_prepare2
## code_2stprepare prepare2
## 1 1 자기돈(상속인 경우 포함)
## 2 2 무상으로 도움을 받음
## 3 3 부모,형제,친척,친구 등으로 부터 빌림
## 4 4 금융기관으로부터 빌림
## 5 5 사채
wel <- left_join(wel, list_prepare1, by = "code_1stprepare")
wel <- left_join(wel, list_prepare2, by = "code_2stprepare")
wel %>%
select(code_1stprepare, code_2stprepare, prepare1, prepare2) %>%
head()
## code_1stprepare code_2stprepare prepare1
## 1 1 4 자기돈(상속인 경우 포함)
## 2 1 88 자기돈(상속인 경우 포함)
## 3 1 4 자기돈(상속인 경우 포함)
## 4 1 88 자기돈(상속인 경우 포함)
## 5 NA NA <NA>
## 6 3 88 부모,형제,친척,친구 등으로 부터 빌림
## prepare2
## 1 금융기관으로부터 빌림
## 2 <NA>
## 3 금융기관으로부터 빌림
## 4 <NA>
## 5 <NA>
## 6 <NA>
class(wel$satisfaction)
## [1] "numeric"
table(is.na(wel$satisfaction))
##
## FALSE TRUE
## 7555 310
class(wel$satisfaction)
## [1] "numeric"
table(wel$satisfaction)
##
## 1 2 3 4 5
## 57 473 1800 4808 417
table(is.na(wel$satisfaction))
##
## FALSE TRUE
## 7555 310
summary(wel$satisfaction)
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 1.000 3.000 4.000 3.669 4.000 5.000 310
Describe and show how you analyzed the data ## 1. 연령대/지역별 점유형태 분석
ageg_type <- wel %>%
count(age_group, housingtype) %>%
group_by(age_group) %>%
mutate(pct = round(n/sum(n)*100, 2))
head(ageg_type)
## # A tibble: 6 × 4
## # Groups: age_group [1]
## age_group housingtype n pct
## <chr> <chr> <int> <dbl>
## 1 20s 기타 15 10.1
## 2 20s 보증부 월세 75 50.3
## 3 20s 비가구원 명의 주택 18 12.1
## 4 20s 월세(사글세) 5 3.36
## 5 20s 자가 5 3.36
## 6 20s 전세 31 20.8
MZ_type <- wel %>%
count(MZ, housingtype) %>%
group_by(MZ) %>%
mutate(pct = round(n/sum(n)*100, 2))
head(MZ_type)
## # A tibble: 6 × 4
## # Groups: MZ [1]
## MZ housingtype n pct
## <chr> <chr> <int> <dbl>
## 1 E20s 기타 4 21.0
## 2 E20s 보증부 월세 7 36.8
## 3 E20s 비가구원 명의 주택 1 5.26
## 4 E20s 월세(사글세) 2 10.5
## 5 E20s 자가 1 5.26
## 6 E20s 전세 4 21.0
region_type <- wel %>%
count(region, housingtype) %>%
group_by(region) %>%
mutate(pct = round(n/sum(n)*100, 2))
head(region_type)
## # A tibble: 6 × 4
## # Groups: region [1]
## region housingtype n pct
## <chr> <chr> <int> <dbl>
## 1 강원/충북 기타 13 1.82
## 2 강원/충북 보증부 월세 89 12.5
## 3 강원/충북 비가구원 명의 주택 90 12.6
## 4 강원/충북 월세(사글세) 18 2.52
## 5 강원/충북 자가 461 64.6
## 6 강원/충북 전세 43 6.02
region_typeMZ <- wel %>%
filter(!is.na(MZ)) %>%
count(region, housingtype) %>%
group_by(region) %>%
mutate(pct = round(n/sum(n)*100, 2))
head(region_typeMZ)
## # A tibble: 6 × 4
## # Groups: region [1]
## region housingtype n pct
## <chr> <chr> <int> <dbl>
## 1 강원/충북 기타 6 8.22
## 2 강원/충북 보증부 월세 19 26.0
## 3 강원/충북 비가구원 명의 주택 5 6.85
## 4 강원/충북 월세(사글세) 1 1.37
## 5 강원/충북 자가 23 31.5
## 6 강원/충북 전세 19 26.0
age_area <- wel %>%
filter(!is.na(area)) %>%
group_by(age) %>%
summarise(mean_area = mean(area))
head(age_area)
## # A tibble: 6 × 2
## age mean_area
## <dbl> <dbl>
## 1 20 36
## 2 21 47.5
## 3 22 25
## 4 23 29.5
## 5 24 39.5
## 6 25 35.3
MZ_area <- wel %>%
filter(!is.na(area) & !is.na(MZ)) %>%
group_by(age) %>%
summarise(mean_area = mean(area))
head(MZ_area)
## # A tibble: 6 × 2
## age mean_area
## <dbl> <dbl>
## 1 20 36
## 2 21 47.5
## 3 22 25
## 4 23 29.5
## 5 24 39.5
## 6 25 35.3
age_price <- wel %>%
select(housingtype, houseprice, age, MZ) %>%
na.omit()
head(age_price)
## housingtype houseprice age MZ
## 16 자가 100000 39 L30s
## 29 자가 68000 41 E40s
## 36 자가 17000 41 E40s
## 39 전세 35000 43 E40s
## 45 전세 23000 36 L30s
## 51 보증부 월세 500 40 E40s
age_price %>%
group_by(housingtype) %>%
summarise(mean_price = mean(houseprice))
## # A tibble: 3 × 2
## housingtype mean_price
## <chr> <dbl>
## 1 보증부 월세 2713.
## 2 자가 41941.
## 3 전세 16108.
price_per_area <- wel %>%
filter(!is.na(MZ) & !is.na(houseprice)) %>%
select(MZ, houseprice, area, housingtype) %>%
mutate(price_per = round(houseprice/area, 2))
head(price_per_area)
## MZ houseprice area housingtype price_per
## 1 L30s 100000 79 자가 1265.82
## 2 E40s 68000 96 자가 708.33
## 3 E40s 17000 109 자가 155.96
## 4 E40s 35000 122 전세 286.89
## 5 L30s 23000 69 전세 333.33
## 6 E40s 500 20 보증부 월세 25.00
prepare <- wel %>%
select(prepare1, prepare2, housingtype, MZ) %>%
na.omit()
head(prepare)
## prepare1 prepare2 housingtype
## 16 자기돈(상속인 경우 포함) 부모,형제,친척,친구 등으로 부터 빌림 자가
## 29 무상으로 도움을 받음 자기돈(상속인 경우 포함) 자가
## 36 금융기관으로부터 빌림 자기돈(상속인 경우 포함) 자가
## 39 자기돈(상속인 경우 포함) 금융기관으로부터 빌림 전세
## 45 자기돈(상속인 경우 포함) 무상으로 도움을 받음 전세
## 55 금융기관으로부터 빌림 자기돈(상속인 경우 포함) 전세
## MZ
## 16 L30s
## 29 E40s
## 36 E40s
## 39 E40s
## 45 L30s
## 55 E30s
per_prepare1 <- prepare %>%
count(prepare1) %>%
mutate(pct = round(n/sum(n)*100, 1))
per_prepare2 <- prepare %>%
count(prepare2) %>%
mutate(pct = round(n/sum(n)*100, 1))
head(per_prepare1)
## prepare1 n pct
## 1 금융기관으로부터 빌림 221 44.0
## 2 무상으로 도움을 받음 16 3.2
## 3 부모,형제,친척,친구 등으로 부터 빌림 19 3.8
## 4 자기돈(상속인 경우 포함) 246 49.0
head(per_prepare2)
## prepare2 n pct
## 1 금융기관으로부터 빌림 228 45.4
## 2 무상으로 도움을 받음 26 5.2
## 3 부모,형제,친척,친구 등으로 부터 빌림 20 4.0
## 4 자기돈(상속인 경우 포함) 228 45.4
MZprepare <- wel %>%
select(prepare1, prepare2, housingtype, MZ) %>%
na.omit() %>%
mutate(ageg = ifelse(MZ == "E20s"| MZ == "L20s", "20s",
ifelse(MZ == "E30s"| MZ == "L30s", "30s", "40s")))
head(MZprepare)
## prepare1 prepare2 housingtype
## 16 자기돈(상속인 경우 포함) 부모,형제,친척,친구 등으로 부터 빌림 자가
## 29 무상으로 도움을 받음 자기돈(상속인 경우 포함) 자가
## 36 금융기관으로부터 빌림 자기돈(상속인 경우 포함) 자가
## 39 자기돈(상속인 경우 포함) 금융기관으로부터 빌림 전세
## 45 자기돈(상속인 경우 포함) 무상으로 도움을 받음 전세
## 55 금융기관으로부터 빌림 자기돈(상속인 경우 포함) 전세
## MZ ageg
## 16 L30s 30s
## 29 E40s 40s
## 36 E40s 40s
## 39 E40s 40s
## 45 L30s 30s
## 55 E30s 30s
per_MZprepare <- MZprepare %>%
count(ageg, prepare1) %>%
group_by(ageg) %>%
mutate(pct = round(n/sum(n)*100, 1))
head(per_MZprepare)
## # A tibble: 6 × 4
## # Groups: ageg [2]
## ageg prepare1 n pct
## <chr> <chr> <int> <dbl>
## 1 20s "금융기관으로부터 빌림" 24 64.9
## 2 20s "무상으로 도움을 받음 " 2 5.4
## 3 20s "부모,형제,친척,친구 등으로 부터 빌림" 5 13.5
## 4 20s "자기돈(상속인 경우 포함)" 6 16.2
## 5 30s "금융기관으로부터 빌림" 117 51.1
## 6 30s "무상으로 도움을 받음 " 12 5.2
ggplot(data = ageg_type, aes(x = age_group, y = pct, fill = housingtype)) +
geom_col() +
coord_flip() +
ggtitle("전 연령대별 점유형태 비율") +
labs(x = "전체 연령대", y = "비율", fill = "점유형태") +
theme(plot.title = element_text(face = "bold", hjust = 0.5, size = 15, color = "black"))
MZ_type %>%
filter(!is.na(MZ)) %>%
ggplot(aes(x = MZ, y = pct, fill = housingtype)) +
geom_col() +
coord_flip() +
scale_x_discrete(limit = c("E20s", "L20s", "E30s", "L30s", "E40s")) +
labs(x = "연령대", y = "비율", fill = "점유형태") +
ggtitle("MZ세대 연령대별 점유형태 비율") +
theme(plot.title = element_text(face = "bold", hjust = 0.5, size = 15, color = "black"))
ggplot(data = region_type, aes(x = region, y = pct, fill = housingtype)) +
geom_col() +
coord_flip() +
labs(x = "지역", y = "비율", fill = "점유형태") +
ggtitle("지역별 점유형태 비율 (전연령대)") +
theme(plot.title = element_text(face = "bold", hjust = 0.5, size = 15, color = "black"))
ggplot(data = region_typeMZ, aes(x = region, y = pct, fill = housingtype)) +
geom_col() +
coord_flip() +
labs(x = "지역", y = "비율", fill = "점유형태") +
ggtitle("지역별 점유형태 비율 (MZ세대)") +
theme(plot.title = element_text(face = "bold", hjust = 0.5, size = 15, color = "black"))
## 2. 연령별 주거면적/주택마련비용
p <- ggplot(data = age_area, aes(x = age, y = mean_area)) +
geom_line() +
labs(x = "나이", y = "주거면적") +
ggtitle("전 연령별 주거면적 (단위: ㎡)") +
theme(plot.title = element_text(face = "bold", hjust = 0.5, size = 15, color = "black"))
ggplotly(p)
P <- ggplot(data = MZ_area, aes(x = age, y = mean_area)) +
geom_line() +
geom_smooth() +
labs(x = "나이", y = "주거면적") +
ggtitle("MZ연령별 주거면적 (단위: ㎡)") +
theme(plot.title = element_text(face = "bold", hjust = 0.5, size = 15, color = "black"))
ggplotly(P)
## `geom_smooth()` using method = 'loess' and formula = 'y ~ x'
q <- age_price %>%
ggplot(aes(x = MZ, y = houseprice, fill = housingtype)) +
geom_boxplot() +
scale_x_discrete(limit = c("E20s", "L20s", "E30s", "L30s", "E40s")) +
ylim(0,75000) +
labs(x = "연령대", y = "집값", fill = "점유형태") +
scale_fill_discrete(limit = c("자가", "전세", "보증부 월세")) +
ggtitle("MZ연령대별 주택비용 (전체) (단위: 만 원)") +
theme(plot.title = element_text(face = "bold", hjust = 0.5, size = 15, color = "black")) +
facet_wrap(~housingtype, ncol = 3)
ggplotly(q)
## Warning: Removed 40 rows containing non-finite values (`stat_boxplot()`).
Q <- age_price %>%
filter(housingtype == "자가") %>%
ggplot(aes(x = MZ, y = houseprice)) +
geom_boxplot() +
ylim(0, 200000) +
scale_x_discrete(limit = c("E20s", "L20s", "E30s", "L30s", "E40s")) +
labs(x = "연령대", y = "주택가격") +
ggtitle("MZ연령대별 집값 (자가) (단위: 만 원)") +
theme(plot.title = element_text(face = "bold", hjust = 0.5, size = 15, color = "black"))
ggplotly(Q)
## Warning: Removed 1 rows containing non-finite values (`stat_boxplot()`).
R <- age_price %>%
filter(housingtype == "전세") %>%
ggplot(aes(x = MZ, y = houseprice)) +
geom_boxplot() +
scale_x_discrete(limit = c("E20s", "L20s", "E30s", "L30s", "E40s")) +
ylim(0,63000) +
labs(x = "연령대", y = "전세금") +
ggtitle("MZ연령대별 집값 (전세) (단위: 만 원)") +
theme(plot.title = element_text(face = "bold", hjust = 0.5, size = 15, color = "black"))
ggplotly(R)
Z <- age_price %>%
filter(housingtype == "보증부 월세") %>%
ggplot(aes(x = MZ, y = houseprice)) +
geom_boxplot() +
scale_x_discrete(limit = c("E20s", "L20s", "E30s", "L30s", "E40s")) +
ylim(0,35500) +
labs(x = "연령대", y = "보증금") +
ggtitle("MZ연령대별 집값 (보증부 월세) (단위: 만 원)") +
theme(plot.title = element_text(face = "bold", hjust = 0.5, size = 15, color = "black"))
ggplotly(Z)
pp <- price_per_area %>%
ggplot(aes(x = MZ, y = price_per, color = housingtype)) +
geom_jitter() +
ylim(0, 1500) +
scale_x_discrete(limit = c("E20s", "L20s", "E30s", "L30s", "E40s")) +
labs(x = "연령대", y = "평당가격", color = "점유형태") +
ggtitle("MZ연령대별 평당주택마련비용 (단위: 만 원)") +
theme(plot.title = element_text(face = "bold", hjust = 0.5, size = 15, color = "black")) +
facet_wrap(~housingtype, ncol = 3)
ggplotly(pp)
ggplot(data = per_prepare1, aes(x = '', y = pct, fill = prepare1))+
geom_bar(stat='identity')+
theme_void()+
coord_polar('y', start=0 )+
geom_text(aes(label=paste0(round(pct,1), '%')),
position=position_stack(vjust=0.5),
color='black', size=2.5) +
labs(fill = "1순위") +
ggtitle("주택구입비용/보증금 마련경로 (1순위)") +
theme(plot.title = element_text(face = "bold", hjust = 0.5, size = 15, color = "black"))
ggplot(data = per_prepare2, aes(x = '', y = pct, fill = prepare2))+
geom_bar(stat='identity')+
theme_void()+
coord_polar('y', start=0 )+
geom_text(aes(label=paste0(round(pct,1), '%')),
position=position_stack(vjust=0.5),
color='black', size=4) +
labs(fill = "2순위") +
ggtitle("주택구입비용/보증금 마련경로 (2순위)") +
theme(plot.title = element_text(face = "bold", hjust = 0.5, size = 15, color = "black"))
ggplot(data = per_MZprepare, aes(x = '', y = pct, fill = prepare1)) +
geom_bar(stat='identity')+
theme_void() +
coord_polar('y', start=0 )+
geom_text(aes(label=paste0(round(pct,1), '%')),
position=position_stack(vjust=0.5),
color='black', size=3) +
ggtitle("주택구입비용/보증금 마련경로 (MZ연령별)") +
labs(fill = "마련경로") +
theme(plot.title = element_text(face = "bold", hjust = 0.5, size = 15, color = "black")) +
facet_wrap(~ageg, , nrow = 3)
Describe and show how you created the first figure. Why did you choose this figure type?
In showing the figures that you created, describe why you designed it the way you did. Why did you choose those colors, fonts, and other design elements?