library(dplyr)
library(ggplot2)
midwest <- as.data.frame(ggplot2::midwest)
# View first 6 rows
head(midwest)
## PID county state area poptotal popdensity popwhite popblack
## 1 561 ADAMS IL 0.052 66090 1270.9615 63917 1702
## 2 562 ALEXANDER IL 0.014 10626 759.0000 7054 3496
## 3 563 BOND IL 0.022 14991 681.4091 14477 429
## 4 564 BOONE IL 0.017 30806 1812.1176 29344 127
## 5 565 BROWN IL 0.018 5836 324.2222 5264 547
## 6 566 BUREAU IL 0.050 35688 713.7600 35157 50
## popamerindian popasian popother percwhite percblack percamerindan
## 1 98 249 124 96.71206 2.5752761 0.1482826
## 2 19 48 9 66.38434 32.9004329 0.1788067
## 3 35 16 34 96.57128 2.8617170 0.2334734
## 4 46 150 1139 95.25417 0.4122574 0.1493216
## 5 14 5 6 90.19877 9.3728581 0.2398903
## 6 65 195 221 98.51210 0.1401031 0.1821340
## percasian percother popadults perchsd percollege percprof
## 1 0.37675897 0.18762294 43298 75.10740 19.63139 4.355859
## 2 0.45172219 0.08469791 6724 59.72635 11.24331 2.870315
## 3 0.10673071 0.22680275 9669 69.33499 17.03382 4.488572
## 4 0.48691813 3.69733169 19272 75.47219 17.27895 4.197800
## 5 0.08567512 0.10281014 3979 68.86152 14.47600 3.367680
## 6 0.54640215 0.61925577 23444 76.62941 18.90462 3.275891
## poppovertyknown percpovertyknown percbelowpoverty percchildbelowpovert
## 1 63628 96.27478 13.151443 18.01172
## 2 10529 99.08714 32.244278 45.82651
## 3 14235 94.95697 12.068844 14.03606
## 4 30337 98.47757 7.209019 11.17954
## 5 4815 82.50514 13.520249 13.02289
## 6 35107 98.37200 10.399635 14.15882
## percadultpoverty percelderlypoverty inmetro category
## 1 11.009776 12.443812 0 AAR
## 2 27.385647 25.228976 0 LHR
## 3 10.852090 12.697410 0 AAR
## 4 5.536013 6.217047 1 ALU
## 5 11.143211 19.200000 0 AAR
## 6 8.179287 11.008586 0 AAR
# Structure of midwest data set
str(midwest)
## 'data.frame': 437 obs. of 28 variables:
## $ PID : int 561 562 563 564 565 566 567 568 569 570 ...
## $ county : chr "ADAMS" "ALEXANDER" "BOND" "BOONE" ...
## $ state : chr "IL" "IL" "IL" "IL" ...
## $ area : num 0.052 0.014 0.022 0.017 0.018 0.05 0.017 0.027 0.024 0.058 ...
## $ poptotal : int 66090 10626 14991 30806 5836 35688 5322 16805 13437 173025 ...
## $ popdensity : num 1271 759 681 1812 324 ...
## $ popwhite : int 63917 7054 14477 29344 5264 35157 5298 16519 13384 146506 ...
## $ popblack : int 1702 3496 429 127 547 50 1 111 16 16559 ...
## $ popamerindian : int 98 19 35 46 14 65 8 30 8 331 ...
## $ popasian : int 249 48 16 150 5 195 15 61 23 8033 ...
## $ popother : int 124 9 34 1139 6 221 0 84 6 1596 ...
## $ percwhite : num 96.7 66.4 96.6 95.3 90.2 ...
## $ percblack : num 2.575 32.9 2.862 0.412 9.373 ...
## $ percamerindan : num 0.148 0.179 0.233 0.149 0.24 ...
## $ percasian : num 0.3768 0.4517 0.1067 0.4869 0.0857 ...
## $ percother : num 0.1876 0.0847 0.2268 3.6973 0.1028 ...
## $ popadults : int 43298 6724 9669 19272 3979 23444 3583 11323 8825 95971 ...
## $ perchsd : num 75.1 59.7 69.3 75.5 68.9 ...
## $ percollege : num 19.6 11.2 17 17.3 14.5 ...
## $ percprof : num 4.36 2.87 4.49 4.2 3.37 ...
## $ poppovertyknown : int 63628 10529 14235 30337 4815 35107 5241 16455 13081 154934 ...
## $ percpovertyknown : num 96.3 99.1 95 98.5 82.5 ...
## $ percbelowpoverty : num 13.15 32.24 12.07 7.21 13.52 ...
## $ percchildbelowpovert: num 18 45.8 14 11.2 13 ...
## $ percadultpoverty : num 11.01 27.39 10.85 5.54 11.14 ...
## $ percelderlypoverty : num 12.44 25.23 12.7 6.22 19.2 ...
## $ inmetro : int 0 0 0 1 0 0 0 0 0 1 ...
## $ category : chr "AAR" "LHR" "AAR" "ALU" ...
popadults는 해당 지역의 성인 인구, poptotal은 전체 인구를 나타냅니다. midwest 데이터에 전체 인구 대비 미성년 인구 백분율 변수를 추가하세요.
PID | county | state | area | poptotal | popdensity | popwhite | popblack | popamerindian | popasian | popother | percwhite | percblack | percamerindan | percasian | percother | popadults | perchsd | percollege | percprof | poppovertyknown | percpovertyknown | percbelowpoverty | percchildbelowpovert | percadultpoverty | percelderlypoverty | inmetro | category | popkid |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
561 | ADAMS | IL | 0.052 | 66090 | 1270.9615 | 63917 | 1702 | 98 | 249 | 124 | 96.71206 | 2.5752761 | 0.1482826 | 0.3767590 | 0.1876229 | 43298 | 75.10740 | 19.63139 | 4.355859 | 63628 | 96.27478 | 13.151443 | 18.01172 | 11.009776 | 12.443812 | 0 | AAR | 34.48631 |
562 | ALEXANDER | IL | 0.014 | 10626 | 759.0000 | 7054 | 3496 | 19 | 48 | 9 | 66.38434 | 32.9004329 | 0.1788067 | 0.4517222 | 0.0846979 | 6724 | 59.72635 | 11.24331 | 2.870315 | 10529 | 99.08714 | 32.244278 | 45.82651 | 27.385647 | 25.228976 | 0 | LHR | 36.72125 |
563 | BOND | IL | 0.022 | 14991 | 681.4091 | 14477 | 429 | 35 | 16 | 34 | 96.57128 | 2.8617170 | 0.2334734 | 0.1067307 | 0.2268028 | 9669 | 69.33499 | 17.03382 | 4.488572 | 14235 | 94.95697 | 12.068844 | 14.03606 | 10.852090 | 12.697410 | 0 | AAR | 35.50130 |
564 | BOONE | IL | 0.017 | 30806 | 1812.1176 | 29344 | 127 | 46 | 150 | 1139 | 95.25417 | 0.4122574 | 0.1493216 | 0.4869181 | 3.6973317 | 19272 | 75.47219 | 17.27895 | 4.197800 | 30337 | 98.47757 | 7.209019 | 11.17954 | 5.536013 | 6.217047 | 1 | ALU | 37.44076 |
565 | BROWN | IL | 0.018 | 5836 | 324.2222 | 5264 | 547 | 14 | 5 | 6 | 90.19877 | 9.3728581 | 0.2398903 | 0.0856751 | 0.1028101 | 3979 | 68.86152 | 14.47600 | 3.367680 | 4815 | 82.50514 | 13.520249 | 13.02289 | 11.143211 | 19.200000 | 0 | AAR | 31.81974 |
566 | BUREAU | IL | 0.050 | 35688 | 713.7600 | 35157 | 50 | 65 | 195 | 221 | 98.51210 | 0.1401031 | 0.1821340 | 0.5464022 | 0.6192558 | 23444 | 76.62941 | 18.90462 | 3.275892 | 35107 | 98.37200 | 10.399635 | 14.15882 | 8.179287 | 11.008586 | 0 | AAR | 34.30845 |
미성년 인구 백분율이 가장 높은 상위 5개 county(지역)의 미성년 인구 백분율을 출력하세요.
midwest2 <- midwest1 %>%
select(county, popkid) %>%
arrange(desc(popkid)) %>% # arrange랑 select 순서 바꿔도 동일
head(5)
knitr::kable(head(midwest2))
분류표의 기준에 따라 미성년 비율 등급 변수를 추가하고, 각 등급에 몇 개의 지역이 있는지 알아보세요.
분류 | 기준 |
---|---|
large | 40%이상 |
middle | 30~40%미만 |
small | 30%미만 |
midwest3 <- midwest1 %>%
mutate(popkidclass = ifelse(popkid >= 40, "large",
ifelse(popkid >= 30, "middle", "small"))) %>%
group_by(popkidclass) %>%
summarise(n = n())
knitr::kable(head(midwest3))
popkidclass | n |
---|---|
large | 32 |
middle | 396 |
small | 9 |
popasian은 해당 지역의 아시아인 인구를 나타냅니다. 전체 인구 대비 아시아인 인구 백분율 변수를 추가하고 하위 10개 지역의 state(주), county(지역), 아시아인 인구 백분율을 출력하세요.
midwest4 <- midwest1 %>%
mutate(popasiantotal = (popasian / poptotal)*100) %>%
arrange(popasiantotal) %>%
select(state, county, popasiantotal)
knitr::kable(head(midwest4, 10))
state | county | popasiantotal |
---|---|---|
WI | MENOMINEE | 0.0000000 |
IN | BENTON | 0.0105921 |
IN | CARROLL | 0.0159498 |
OH | VINTON | 0.0270319 |
WI | IRON | 0.0325045 |
IL | SCOTT | 0.0531538 |
IN | CLAY | 0.0607165 |
MI | OSCODA | 0.0637592 |
OH | PERRY | 0.0665462 |
IL | PIATT | 0.0707486 |