library(ggplot2)
## Warning: 패키지 'ggplot2'는 R 버전 4.2.3에서 작성되었습니다
data("midwest")


library(dplyr)
## Warning: 패키지 'dplyr'는 R 버전 4.2.3에서 작성되었습니다
## 
## 다음의 패키지를 부착합니다: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
# country,state,poptotal,popasian 변수만 추출
midwest %>% select(county, state, poptotal, popasian)
## # A tibble: 437 × 4
##    county    state poptotal popasian
##    <chr>     <chr>    <int>    <int>
##  1 ADAMS     IL       66090      249
##  2 ALEXANDER IL       10626       48
##  3 BOND      IL       14991       16
##  4 BOONE     IL       30806      150
##  5 BROWN     IL        5836        5
##  6 BUREAU    IL       35688      195
##  7 CALHOUN   IL        5322       15
##  8 CARROLL   IL       16805       61
##  9 CASS      IL       13437       23
## 10 CHAMPAIGN IL      173025     8033
## # … with 427 more rows
# popasian은 asian으로 poptotal은 total 변수이름으로 변경
midwest %>% rename(asian=popasian, total=poptotal)
## # A tibble: 437 × 28
##      PID county state  area  total popde…¹ popwh…² popbl…³ popam…⁴ asian popot…⁵
##    <int> <chr>  <chr> <dbl>  <int>   <dbl>   <int>   <int>   <int> <int>   <int>
##  1   561 ADAMS  IL    0.052  66090   1271.   63917    1702      98   249     124
##  2   562 ALEXA… IL    0.014  10626    759     7054    3496      19    48       9
##  3   563 BOND   IL    0.022  14991    681.   14477     429      35    16      34
##  4   564 BOONE  IL    0.017  30806   1812.   29344     127      46   150    1139
##  5   565 BROWN  IL    0.018   5836    324.    5264     547      14     5       6
##  6   566 BUREAU IL    0.05   35688    714.   35157      50      65   195     221
##  7   567 CALHO… IL    0.017   5322    313.    5298       1       8    15       0
##  8   568 CARRO… IL    0.027  16805    622.   16519     111      30    61      84
##  9   569 CASS   IL    0.024  13437    560.   13384      16       8    23       6
## 10   570 CHAMP… IL    0.058 173025   2983.  146506   16559     331  8033    1596
## # … with 427 more rows, 17 more variables: percwhite <dbl>, percblack <dbl>,
## #   percamerindan <dbl>, percasian <dbl>, percother <dbl>, popadults <int>,
## #   perchsd <dbl>, percollege <dbl>, percprof <dbl>, poppovertyknown <int>,
## #   percpovertyknown <dbl>, percbelowpoverty <dbl>, percchildbelowpovert <dbl>,
## #   percadultpoverty <dbl>, percelderlypoverty <dbl>, inmetro <int>,
## #   category <chr>, and abbreviated variable names ¹​popdensity, ²​popwhite,
## #   ³​popblack, ⁴​popamerindian, ⁵​popother
# asian/total*100=pct_asian 파생변수 생성하기
midwest %>% 
  rename(asian = popasian, total = poptotal) %>% # 변수명 변경
  mutate(pct_asian = asian / total * 100) # 파생변수 생성
## # A tibble: 437 × 29
##      PID county state  area  total popde…¹ popwh…² popbl…³ popam…⁴ asian popot…⁵
##    <int> <chr>  <chr> <dbl>  <int>   <dbl>   <int>   <int>   <int> <int>   <int>
##  1   561 ADAMS  IL    0.052  66090   1271.   63917    1702      98   249     124
##  2   562 ALEXA… IL    0.014  10626    759     7054    3496      19    48       9
##  3   563 BOND   IL    0.022  14991    681.   14477     429      35    16      34
##  4   564 BOONE  IL    0.017  30806   1812.   29344     127      46   150    1139
##  5   565 BROWN  IL    0.018   5836    324.    5264     547      14     5       6
##  6   566 BUREAU IL    0.05   35688    714.   35157      50      65   195     221
##  7   567 CALHO… IL    0.017   5322    313.    5298       1       8    15       0
##  8   568 CARRO… IL    0.027  16805    622.   16519     111      30    61      84
##  9   569 CASS   IL    0.024  13437    560.   13384      16       8    23       6
## 10   570 CHAMP… IL    0.058 173025   2983.  146506   16559     331  8033    1596
## # … with 427 more rows, 18 more variables: percwhite <dbl>, percblack <dbl>,
## #   percamerindan <dbl>, percasian <dbl>, percother <dbl>, popadults <int>,
## #   perchsd <dbl>, percollege <dbl>, percprof <dbl>, poppovertyknown <int>,
## #   percpovertyknown <dbl>, percbelowpoverty <dbl>, percchildbelowpovert <dbl>,
## #   percadultpoverty <dbl>, percelderlypoverty <dbl>, inmetro <int>,
## #   category <chr>, pct_asian <dbl>, and abbreviated variable names
## #   ¹​popdensity, ²​popwhite, ³​popblack, ⁴​popamerindian, ⁵​popother
# pct_asian>mean(pct_asian) 이면 'large', 그러치 않으면 'small' 
# 조건문 만들기 단 ifelse 구문을 사용할 것

midwest_with_category <- midwest %>% 
  rename(asian = popasian, total = poptotal) %>% 
  mutate(pct_asian = asian / total * 100,
         category = ifelse(pct_asian > mean(pct_asian), "large", "small"))

head(midwest_with_category)
## # A tibble: 6 × 29
##     PID county   state  area total popde…¹ popwh…² popbl…³ popam…⁴ asian popot…⁵
##   <int> <chr>    <chr> <dbl> <int>   <dbl>   <int>   <int>   <int> <int>   <int>
## 1   561 ADAMS    IL    0.052 66090   1271.   63917    1702      98   249     124
## 2   562 ALEXAND… IL    0.014 10626    759     7054    3496      19    48       9
## 3   563 BOND     IL    0.022 14991    681.   14477     429      35    16      34
## 4   564 BOONE    IL    0.017 30806   1812.   29344     127      46   150    1139
## 5   565 BROWN    IL    0.018  5836    324.    5264     547      14     5       6
## 6   566 BUREAU   IL    0.05  35688    714.   35157      50      65   195     221
## # … with 18 more variables: percwhite <dbl>, percblack <dbl>,
## #   percamerindan <dbl>, percasian <dbl>, percother <dbl>, popadults <int>,
## #   perchsd <dbl>, percollege <dbl>, percprof <dbl>, poppovertyknown <int>,
## #   percpovertyknown <dbl>, percbelowpoverty <dbl>, percchildbelowpovert <dbl>,
## #   percadultpoverty <dbl>, percelderlypoverty <dbl>, inmetro <int>,
## #   category <chr>, pct_asian <dbl>, and abbreviated variable names
## #   ¹​popdensity, ²​popwhite, ³​popblack, ⁴​popamerindian, ⁵​popother
table(midwest_with_category$category)
## 
## large small 
##   119   318