1 load libraries


library(dplyr)
library(ggplot2)

2 Read data


midwest <- as.data.frame(ggplot2::midwest)

# View first 6 rows
head(midwest)
##   PID    county state  area poptotal popdensity popwhite popblack
## 1 561     ADAMS    IL 0.052    66090  1270.9615    63917     1702
## 2 562 ALEXANDER    IL 0.014    10626   759.0000     7054     3496
## 3 563      BOND    IL 0.022    14991   681.4091    14477      429
## 4 564     BOONE    IL 0.017    30806  1812.1176    29344      127
## 5 565     BROWN    IL 0.018     5836   324.2222     5264      547
## 6 566    BUREAU    IL 0.050    35688   713.7600    35157       50
##   popamerindian popasian popother percwhite  percblack percamerindan
## 1            98      249      124  96.71206  2.5752761     0.1482826
## 2            19       48        9  66.38434 32.9004329     0.1788067
## 3            35       16       34  96.57128  2.8617170     0.2334734
## 4            46      150     1139  95.25417  0.4122574     0.1493216
## 5            14        5        6  90.19877  9.3728581     0.2398903
## 6            65      195      221  98.51210  0.1401031     0.1821340
##    percasian  percother popadults  perchsd percollege percprof
## 1 0.37675897 0.18762294     43298 75.10740   19.63139 4.355859
## 2 0.45172219 0.08469791      6724 59.72635   11.24331 2.870315
## 3 0.10673071 0.22680275      9669 69.33499   17.03382 4.488572
## 4 0.48691813 3.69733169     19272 75.47219   17.27895 4.197800
## 5 0.08567512 0.10281014      3979 68.86152   14.47600 3.367680
## 6 0.54640215 0.61925577     23444 76.62941   18.90462 3.275891
##   poppovertyknown percpovertyknown percbelowpoverty percchildbelowpovert
## 1           63628         96.27478        13.151443             18.01172
## 2           10529         99.08714        32.244278             45.82651
## 3           14235         94.95697        12.068844             14.03606
## 4           30337         98.47757         7.209019             11.17954
## 5            4815         82.50514        13.520249             13.02289
## 6           35107         98.37200        10.399635             14.15882
##   percadultpoverty percelderlypoverty inmetro category
## 1        11.009776          12.443812       0      AAR
## 2        27.385647          25.228976       0      LHR
## 3        10.852090          12.697410       0      AAR
## 4         5.536013           6.217047       1      ALU
## 5        11.143211          19.200000       0      AAR
## 6         8.179287          11.008586       0      AAR
# Structure of midwest data set
str(midwest)
## 'data.frame':    437 obs. of  28 variables:
##  $ PID                 : int  561 562 563 564 565 566 567 568 569 570 ...
##  $ county              : chr  "ADAMS" "ALEXANDER" "BOND" "BOONE" ...
##  $ state               : chr  "IL" "IL" "IL" "IL" ...
##  $ area                : num  0.052 0.014 0.022 0.017 0.018 0.05 0.017 0.027 0.024 0.058 ...
##  $ poptotal            : int  66090 10626 14991 30806 5836 35688 5322 16805 13437 173025 ...
##  $ popdensity          : num  1271 759 681 1812 324 ...
##  $ popwhite            : int  63917 7054 14477 29344 5264 35157 5298 16519 13384 146506 ...
##  $ popblack            : int  1702 3496 429 127 547 50 1 111 16 16559 ...
##  $ popamerindian       : int  98 19 35 46 14 65 8 30 8 331 ...
##  $ popasian            : int  249 48 16 150 5 195 15 61 23 8033 ...
##  $ popother            : int  124 9 34 1139 6 221 0 84 6 1596 ...
##  $ percwhite           : num  96.7 66.4 96.6 95.3 90.2 ...
##  $ percblack           : num  2.575 32.9 2.862 0.412 9.373 ...
##  $ percamerindan       : num  0.148 0.179 0.233 0.149 0.24 ...
##  $ percasian           : num  0.3768 0.4517 0.1067 0.4869 0.0857 ...
##  $ percother           : num  0.1876 0.0847 0.2268 3.6973 0.1028 ...
##  $ popadults           : int  43298 6724 9669 19272 3979 23444 3583 11323 8825 95971 ...
##  $ perchsd             : num  75.1 59.7 69.3 75.5 68.9 ...
##  $ percollege          : num  19.6 11.2 17 17.3 14.5 ...
##  $ percprof            : num  4.36 2.87 4.49 4.2 3.37 ...
##  $ poppovertyknown     : int  63628 10529 14235 30337 4815 35107 5241 16455 13081 154934 ...
##  $ percpovertyknown    : num  96.3 99.1 95 98.5 82.5 ...
##  $ percbelowpoverty    : num  13.15 32.24 12.07 7.21 13.52 ...
##  $ percchildbelowpovert: num  18 45.8 14 11.2 13 ...
##  $ percadultpoverty    : num  11.01 27.39 10.85 5.54 11.14 ...
##  $ percelderlypoverty  : num  12.44 25.23 12.7 6.22 19.2 ...
##  $ inmetro             : int  0 0 0 1 0 0 0 0 0 1 ...
##  $ category            : chr  "AAR" "LHR" "AAR" "ALU" ...

3 Data Preprocessing


3.1 Q1)

popadults는 해당 지역의 성인 인구, poptotal은 전체 인구를 나타냅니다. midwest 데이터에 전체 인구 대비 미성년 인구 백분율 변수를 추가하세요.

PID county state area poptotal popdensity popwhite popblack popamerindian popasian popother percwhite percblack percamerindan percasian percother popadults perchsd percollege percprof poppovertyknown percpovertyknown percbelowpoverty percchildbelowpovert percadultpoverty percelderlypoverty inmetro category popkid
561 ADAMS IL 0.052 66090 1270.9615 63917 1702 98 249 124 96.71206 2.5752761 0.1482826 0.3767590 0.1876229 43298 75.10740 19.63139 4.355859 63628 96.27478 13.151443 18.01172 11.009776 12.443812 0 AAR 34.48631
562 ALEXANDER IL 0.014 10626 759.0000 7054 3496 19 48 9 66.38434 32.9004329 0.1788067 0.4517222 0.0846979 6724 59.72635 11.24331 2.870315 10529 99.08714 32.244278 45.82651 27.385647 25.228976 0 LHR 36.72125
563 BOND IL 0.022 14991 681.4091 14477 429 35 16 34 96.57128 2.8617170 0.2334734 0.1067307 0.2268028 9669 69.33499 17.03382 4.488572 14235 94.95697 12.068844 14.03606 10.852090 12.697410 0 AAR 35.50130
564 BOONE IL 0.017 30806 1812.1176 29344 127 46 150 1139 95.25417 0.4122574 0.1493216 0.4869181 3.6973317 19272 75.47219 17.27895 4.197800 30337 98.47757 7.209019 11.17954 5.536013 6.217047 1 ALU 37.44076
565 BROWN IL 0.018 5836 324.2222 5264 547 14 5 6 90.19877 9.3728581 0.2398903 0.0856751 0.1028101 3979 68.86152 14.47600 3.367680 4815 82.50514 13.520249 13.02289 11.143211 19.200000 0 AAR 31.81974
566 BUREAU IL 0.050 35688 713.7600 35157 50 65 195 221 98.51210 0.1401031 0.1821340 0.5464022 0.6192558 23444 76.62941 18.90462 3.275892 35107 98.37200 10.399635 14.15882 8.179287 11.008586 0 AAR 34.30845

3.2 Q2)

미성년 인구 백분율이 가장 높은 상위 5개 county(지역)의 미성년 인구 백분율을 출력하세요.

midwest2 <- midwest1 %>% 
  select(county, popkid) %>%
  arrange(desc(popkid)) %>%     # arrange랑 select 순서 바꿔도 동일
  head(5)

knitr::kable(head(midwest2))

3.3 Q3)

분류표의 기준에 따라 미성년 비율 등급 변수를 추가하고, 각 등급에 몇 개의 지역이 있는지 알아보세요.

분류 기준
large 40%이상
middle 30~40%미만
small 30%미만
midwest3 <- midwest1 %>% 
  mutate(popkidclass = ifelse(popkid >= 40, "large",
                              ifelse(popkid >= 30, "middle", "small"))) %>% 
  group_by(popkidclass) %>% 
  summarise(n = n())

knitr::kable(head(midwest3))
popkidclass n
large 32
middle 396
small 9

3.4 Q4)

popasian은 해당 지역의 아시아인 인구를 나타냅니다. 전체 인구 대비 아시아인 인구 백분율 변수를 추가하고 하위 10개 지역의 state(주), county(지역), 아시아인 인구 백분율을 출력하세요.

midwest4 <- midwest1 %>% 
  mutate(popasiantotal = (popasian / poptotal)*100) %>% 
  arrange(popasiantotal) %>% 
  select(state, county, popasiantotal)

knitr::kable(head(midwest4, 10))
state county popasiantotal
WI MENOMINEE 0.0000000
IN BENTON 0.0105921
IN CARROLL 0.0159498
OH VINTON 0.0270319
WI IRON 0.0325045
IL SCOTT 0.0531538
IN CLAY 0.0607165
MI OSCODA 0.0637592
OH PERRY 0.0665462
IL PIATT 0.0707486