library(ggplot2)
## Warning: 패키지 'ggplot2'는 R 버전 4.2.3에서 작성되었습니다
data("midwest")
library(dplyr)
## Warning: 패키지 'dplyr'는 R 버전 4.2.3에서 작성되었습니다
##
## 다음의 패키지를 부착합니다: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
# country,state,poptotal,popasian 변수만 추출
midwest %>% select(county, state, poptotal, popasian)
## # A tibble: 437 × 4
## county state poptotal popasian
## <chr> <chr> <int> <int>
## 1 ADAMS IL 66090 249
## 2 ALEXANDER IL 10626 48
## 3 BOND IL 14991 16
## 4 BOONE IL 30806 150
## 5 BROWN IL 5836 5
## 6 BUREAU IL 35688 195
## 7 CALHOUN IL 5322 15
## 8 CARROLL IL 16805 61
## 9 CASS IL 13437 23
## 10 CHAMPAIGN IL 173025 8033
## # … with 427 more rows
# popasian은 asian으로 poptotal은 total 변수이름으로 변경
midwest %>% rename(asian=popasian, total=poptotal)
## # A tibble: 437 × 28
## PID county state area total popde…¹ popwh…² popbl…³ popam…⁴ asian popot…⁵
## <int> <chr> <chr> <dbl> <int> <dbl> <int> <int> <int> <int> <int>
## 1 561 ADAMS IL 0.052 66090 1271. 63917 1702 98 249 124
## 2 562 ALEXA… IL 0.014 10626 759 7054 3496 19 48 9
## 3 563 BOND IL 0.022 14991 681. 14477 429 35 16 34
## 4 564 BOONE IL 0.017 30806 1812. 29344 127 46 150 1139
## 5 565 BROWN IL 0.018 5836 324. 5264 547 14 5 6
## 6 566 BUREAU IL 0.05 35688 714. 35157 50 65 195 221
## 7 567 CALHO… IL 0.017 5322 313. 5298 1 8 15 0
## 8 568 CARRO… IL 0.027 16805 622. 16519 111 30 61 84
## 9 569 CASS IL 0.024 13437 560. 13384 16 8 23 6
## 10 570 CHAMP… IL 0.058 173025 2983. 146506 16559 331 8033 1596
## # … with 427 more rows, 17 more variables: percwhite <dbl>, percblack <dbl>,
## # percamerindan <dbl>, percasian <dbl>, percother <dbl>, popadults <int>,
## # perchsd <dbl>, percollege <dbl>, percprof <dbl>, poppovertyknown <int>,
## # percpovertyknown <dbl>, percbelowpoverty <dbl>, percchildbelowpovert <dbl>,
## # percadultpoverty <dbl>, percelderlypoverty <dbl>, inmetro <int>,
## # category <chr>, and abbreviated variable names ¹popdensity, ²popwhite,
## # ³popblack, ⁴popamerindian, ⁵popother
# asian/total*100=pct_asian 파생변수 생성하기
midwest %>%
rename(asian = popasian, total = poptotal) %>% # 변수명 변경
mutate(pct_asian = asian / total * 100) # 파생변수 생성
## # A tibble: 437 × 29
## PID county state area total popde…¹ popwh…² popbl…³ popam…⁴ asian popot…⁵
## <int> <chr> <chr> <dbl> <int> <dbl> <int> <int> <int> <int> <int>
## 1 561 ADAMS IL 0.052 66090 1271. 63917 1702 98 249 124
## 2 562 ALEXA… IL 0.014 10626 759 7054 3496 19 48 9
## 3 563 BOND IL 0.022 14991 681. 14477 429 35 16 34
## 4 564 BOONE IL 0.017 30806 1812. 29344 127 46 150 1139
## 5 565 BROWN IL 0.018 5836 324. 5264 547 14 5 6
## 6 566 BUREAU IL 0.05 35688 714. 35157 50 65 195 221
## 7 567 CALHO… IL 0.017 5322 313. 5298 1 8 15 0
## 8 568 CARRO… IL 0.027 16805 622. 16519 111 30 61 84
## 9 569 CASS IL 0.024 13437 560. 13384 16 8 23 6
## 10 570 CHAMP… IL 0.058 173025 2983. 146506 16559 331 8033 1596
## # … with 427 more rows, 18 more variables: percwhite <dbl>, percblack <dbl>,
## # percamerindan <dbl>, percasian <dbl>, percother <dbl>, popadults <int>,
## # perchsd <dbl>, percollege <dbl>, percprof <dbl>, poppovertyknown <int>,
## # percpovertyknown <dbl>, percbelowpoverty <dbl>, percchildbelowpovert <dbl>,
## # percadultpoverty <dbl>, percelderlypoverty <dbl>, inmetro <int>,
## # category <chr>, pct_asian <dbl>, and abbreviated variable names
## # ¹popdensity, ²popwhite, ³popblack, ⁴popamerindian, ⁵popother
# pct_asian>mean(pct_asian) 이면 'large', 그러치 않으면 'small'
# 조건문 만들기 단 ifelse 구문을 사용할 것
midwest_with_category <- midwest %>%
rename(asian = popasian, total = poptotal) %>%
mutate(pct_asian = asian / total * 100,
category = ifelse(pct_asian > mean(pct_asian), "large", "small"))
head(midwest_with_category)
## # A tibble: 6 × 29
## PID county state area total popde…¹ popwh…² popbl…³ popam…⁴ asian popot…⁵
## <int> <chr> <chr> <dbl> <int> <dbl> <int> <int> <int> <int> <int>
## 1 561 ADAMS IL 0.052 66090 1271. 63917 1702 98 249 124
## 2 562 ALEXAND… IL 0.014 10626 759 7054 3496 19 48 9
## 3 563 BOND IL 0.022 14991 681. 14477 429 35 16 34
## 4 564 BOONE IL 0.017 30806 1812. 29344 127 46 150 1139
## 5 565 BROWN IL 0.018 5836 324. 5264 547 14 5 6
## 6 566 BUREAU IL 0.05 35688 714. 35157 50 65 195 221
## # … with 18 more variables: percwhite <dbl>, percblack <dbl>,
## # percamerindan <dbl>, percasian <dbl>, percother <dbl>, popadults <int>,
## # perchsd <dbl>, percollege <dbl>, percprof <dbl>, poppovertyknown <int>,
## # percpovertyknown <dbl>, percbelowpoverty <dbl>, percchildbelowpovert <dbl>,
## # percadultpoverty <dbl>, percelderlypoverty <dbl>, inmetro <int>,
## # category <chr>, pct_asian <dbl>, and abbreviated variable names
## # ¹popdensity, ²popwhite, ³popblack, ⁴popamerindian, ⁵popother
table(midwest_with_category$category)
##
## large small
## 119 318