1.변수명바꾸기
mpg <- as.data.frame(ggplot2::mpg)
library(dplyr)
##
## 다음의 패키지를 부착합니다: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
mpg_new <- mpg
mpg_new <- rename(mpg_new,city=cty)
mpg_new <- rename(mpg_new,highway=hwy)
head(mpg_new)
## manufacturer model displ year cyl trans drv city highway fl class
## 1 audi a4 1.8 1999 4 auto(l5) f 18 29 p compact
## 2 audi a4 1.8 1999 4 manual(m5) f 21 29 p compact
## 3 audi a4 2.0 2008 4 manual(m6) f 20 31 p compact
## 4 audi a4 2.0 2008 4 auto(av) f 21 30 p compact
## 5 audi a4 2.8 1999 6 auto(l5) f 16 26 p compact
## 6 audi a4 2.8 1999 6 manual(m5) f 18 26 p compact
2.교과서 123쪽 문제 수행하기
Q1.
midwest <- as.data.frame(ggplot2::midwest)
head(midwest)
## PID county state area poptotal popdensity popwhite popblack popamerindian
## 1 561 ADAMS IL 0.052 66090 1270.9615 63917 1702 98
## 2 562 ALEXANDER IL 0.014 10626 759.0000 7054 3496 19
## 3 563 BOND IL 0.022 14991 681.4091 14477 429 35
## 4 564 BOONE IL 0.017 30806 1812.1176 29344 127 46
## 5 565 BROWN IL 0.018 5836 324.2222 5264 547 14
## 6 566 BUREAU IL 0.050 35688 713.7600 35157 50 65
## popasian popother percwhite percblack percamerindan percasian percother
## 1 249 124 96.71206 2.5752761 0.1482826 0.37675897 0.18762294
## 2 48 9 66.38434 32.9004329 0.1788067 0.45172219 0.08469791
## 3 16 34 96.57128 2.8617170 0.2334734 0.10673071 0.22680275
## 4 150 1139 95.25417 0.4122574 0.1493216 0.48691813 3.69733169
## 5 5 6 90.19877 9.3728581 0.2398903 0.08567512 0.10281014
## 6 195 221 98.51210 0.1401031 0.1821340 0.54640215 0.61925577
## popadults perchsd percollege percprof poppovertyknown percpovertyknown
## 1 43298 75.10740 19.63139 4.355859 63628 96.27478
## 2 6724 59.72635 11.24331 2.870315 10529 99.08714
## 3 9669 69.33499 17.03382 4.488572 14235 94.95697
## 4 19272 75.47219 17.27895 4.197800 30337 98.47757
## 5 3979 68.86152 14.47600 3.367680 4815 82.50514
## 6 23444 76.62941 18.90462 3.275891 35107 98.37200
## percbelowpoverty percchildbelowpovert percadultpoverty percelderlypoverty
## 1 13.151443 18.01172 11.009776 12.443812
## 2 32.244278 45.82651 27.385647 25.228976
## 3 12.068844 14.03606 10.852090 12.697410
## 4 7.209019 11.17954 5.536013 6.217047
## 5 13.520249 13.02289 11.143211 19.200000
## 6 10.399635 14.15882 8.179287 11.008586
## inmetro category
## 1 0 AAR
## 2 0 LHR
## 3 0 AAR
## 4 1 ALU
## 5 0 AAR
## 6 0 AAR
tail(midwest)
## PID county state area poptotal popdensity popwhite popblack
## 432 3047 WASHINGTON WI 0.025 95328 3813.1200 94465 125
## 433 3048 WAUKESHA WI 0.034 304715 8962.2059 298313 1096
## 434 3049 WAUPACA WI 0.045 46104 1024.5333 45695 22
## 435 3050 WAUSHARA WI 0.037 19385 523.9189 19094 29
## 436 3051 WINNEBAGO WI 0.035 140320 4009.1429 136822 697
## 437 3052 WOOD WI 0.048 73605 1533.4375 72157 90
## popamerindian popasian popother percwhite percblack percamerindan percasian
## 432 208 337 193 99.09470 0.1311262 0.2181940 0.3535163
## 433 672 2699 1935 97.89902 0.3596804 0.2205339 0.8857457
## 434 125 92 170 99.11288 0.0477182 0.2711262 0.1995488
## 435 70 43 149 98.49884 0.1496002 0.3611040 0.2218210
## 436 685 1728 388 97.50713 0.4967218 0.4881699 1.2314709
## 437 481 722 155 98.03274 0.1222743 0.6534882 0.9809116
## percother popadults perchsd percollege percprof poppovertyknown
## 432 0.2024589 59583 81.34032 23.39090 4.014568 94143
## 433 0.6350196 195837 87.98899 35.39678 7.667090 299802
## 434 0.3687316 30109 72.13790 16.54987 3.138596 44412
## 435 0.7686355 13316 70.00601 15.06458 2.620907 19163
## 436 0.2765108 88960 80.61938 24.99550 5.659847 133950
## 437 0.2105835 46796 78.29515 21.66638 4.583725 72685
## percpovertyknown percbelowpoverty percchildbelowpovert percadultpoverty
## 432 98.75692 3.237628 4.069854 2.584500
## 433 98.38767 3.121060 3.785820 2.590061
## 434 96.33004 8.488697 10.071411 6.953799
## 435 98.85478 13.786985 20.050708 11.695784
## 436 95.46038 8.804031 10.592031 8.660587
## 437 98.75008 8.525831 11.162997 7.375656
## percelderlypoverty inmetro category
## 432 4.280889 1 HLU
## 433 4.085479 1 HLU
## 434 10.338641 0 AAR
## 435 11.804558 0 AAR
## 436 6.661094 1 HAU
## 437 7.882918 0 AAR
View(midwest)
dim(midwest)
## [1] 437 28
str(midwest)
## 'data.frame': 437 obs. of 28 variables:
## $ PID : int 561 562 563 564 565 566 567 568 569 570 ...
## $ county : chr "ADAMS" "ALEXANDER" "BOND" "BOONE" ...
## $ state : chr "IL" "IL" "IL" "IL" ...
## $ area : num 0.052 0.014 0.022 0.017 0.018 0.05 0.017 0.027 0.024 0.058 ...
## $ poptotal : int 66090 10626 14991 30806 5836 35688 5322 16805 13437 173025 ...
## $ popdensity : num 1271 759 681 1812 324 ...
## $ popwhite : int 63917 7054 14477 29344 5264 35157 5298 16519 13384 146506 ...
## $ popblack : int 1702 3496 429 127 547 50 1 111 16 16559 ...
## $ popamerindian : int 98 19 35 46 14 65 8 30 8 331 ...
## $ popasian : int 249 48 16 150 5 195 15 61 23 8033 ...
## $ popother : int 124 9 34 1139 6 221 0 84 6 1596 ...
## $ percwhite : num 96.7 66.4 96.6 95.3 90.2 ...
## $ percblack : num 2.575 32.9 2.862 0.412 9.373 ...
## $ percamerindan : num 0.148 0.179 0.233 0.149 0.24 ...
## $ percasian : num 0.3768 0.4517 0.1067 0.4869 0.0857 ...
## $ percother : num 0.1876 0.0847 0.2268 3.6973 0.1028 ...
## $ popadults : int 43298 6724 9669 19272 3979 23444 3583 11323 8825 95971 ...
## $ perchsd : num 75.1 59.7 69.3 75.5 68.9 ...
## $ percollege : num 19.6 11.2 17 17.3 14.5 ...
## $ percprof : num 4.36 2.87 4.49 4.2 3.37 ...
## $ poppovertyknown : int 63628 10529 14235 30337 4815 35107 5241 16455 13081 154934 ...
## $ percpovertyknown : num 96.3 99.1 95 98.5 82.5 ...
## $ percbelowpoverty : num 13.15 32.24 12.07 7.21 13.52 ...
## $ percchildbelowpovert: num 18 45.8 14 11.2 13 ...
## $ percadultpoverty : num 11.01 27.39 10.85 5.54 11.14 ...
## $ percelderlypoverty : num 12.44 25.23 12.7 6.22 19.2 ...
## $ inmetro : int 0 0 0 1 0 0 0 0 0 1 ...
## $ category : chr "AAR" "LHR" "AAR" "ALU" ...
summary(midwest)
## PID county state area
## Min. : 561 Length:437 Length:437 Min. :0.00500
## 1st Qu.: 670 Class :character Class :character 1st Qu.:0.02400
## Median :1221 Mode :character Mode :character Median :0.03000
## Mean :1437 Mean :0.03317
## 3rd Qu.:2059 3rd Qu.:0.03800
## Max. :3052 Max. :0.11000
## poptotal popdensity popwhite popblack
## Min. : 1701 Min. : 85.05 Min. : 416 Min. : 0
## 1st Qu.: 18840 1st Qu.: 622.41 1st Qu.: 18630 1st Qu.: 29
## Median : 35324 Median : 1156.21 Median : 34471 Median : 201
## Mean : 96130 Mean : 3097.74 Mean : 81840 Mean : 11024
## 3rd Qu.: 75651 3rd Qu.: 2330.00 3rd Qu.: 72968 3rd Qu.: 1291
## Max. :5105067 Max. :88018.40 Max. :3204947 Max. :1317147
## popamerindian popasian popother percwhite
## Min. : 4.0 Min. : 0 Min. : 0 Min. :10.69
## 1st Qu.: 44.0 1st Qu.: 35 1st Qu.: 20 1st Qu.:94.89
## Median : 94.0 Median : 102 Median : 66 Median :98.03
## Mean : 343.1 Mean : 1310 Mean : 1613 Mean :95.56
## 3rd Qu.: 288.0 3rd Qu.: 401 3rd Qu.: 345 3rd Qu.:99.07
## Max. :10289.0 Max. :188565 Max. :384119 Max. :99.82
## percblack percamerindan percasian percother
## Min. : 0.0000 Min. : 0.05623 Min. :0.0000 Min. :0.00000
## 1st Qu.: 0.1157 1st Qu.: 0.15793 1st Qu.:0.1737 1st Qu.:0.09102
## Median : 0.5390 Median : 0.21502 Median :0.2972 Median :0.17844
## Mean : 2.6763 Mean : 0.79894 Mean :0.4872 Mean :0.47906
## 3rd Qu.: 2.6014 3rd Qu.: 0.38362 3rd Qu.:0.5212 3rd Qu.:0.48050
## Max. :40.2100 Max. :89.17738 Max. :5.0705 Max. :7.52427
## popadults perchsd percollege percprof
## Min. : 1287 Min. :46.91 Min. : 7.336 Min. : 0.5203
## 1st Qu.: 12271 1st Qu.:71.33 1st Qu.:14.114 1st Qu.: 2.9980
## Median : 22188 Median :74.25 Median :16.798 Median : 3.8142
## Mean : 60973 Mean :73.97 Mean :18.273 Mean : 4.4473
## 3rd Qu.: 47541 3rd Qu.:77.20 3rd Qu.:20.550 3rd Qu.: 4.9493
## Max. :3291995 Max. :88.90 Max. :48.079 Max. :20.7913
## poppovertyknown percpovertyknown percbelowpoverty percchildbelowpovert
## Min. : 1696 Min. :80.90 Min. : 2.180 Min. : 1.919
## 1st Qu.: 18364 1st Qu.:96.89 1st Qu.: 9.199 1st Qu.:11.624
## Median : 33788 Median :98.17 Median :11.822 Median :15.270
## Mean : 93642 Mean :97.11 Mean :12.511 Mean :16.447
## 3rd Qu.: 72840 3rd Qu.:98.60 3rd Qu.:15.133 3rd Qu.:20.352
## Max. :5023523 Max. :99.86 Max. :48.691 Max. :64.308
## percadultpoverty percelderlypoverty inmetro category
## Min. : 1.938 Min. : 3.547 Min. :0.0000 Length:437
## 1st Qu.: 7.668 1st Qu.: 8.912 1st Qu.:0.0000 Class :character
## Median :10.008 Median :10.869 Median :0.0000 Mode :character
## Mean :10.919 Mean :11.389 Mean :0.3432
## 3rd Qu.:13.182 3rd Qu.:13.412 3rd Qu.:1.0000
## Max. :43.312 Max. :31.162 Max. :1.0000
Q2.
midwest_new <- midwest
library(dplyr)
midwest_new <- rename(midwest_new, total=poptotal)
midwest_new <- rename(midwest_new, asian=popasian)
head(midwest_new)
## PID county state area total popdensity popwhite popblack popamerindian
## 1 561 ADAMS IL 0.052 66090 1270.9615 63917 1702 98
## 2 562 ALEXANDER IL 0.014 10626 759.0000 7054 3496 19
## 3 563 BOND IL 0.022 14991 681.4091 14477 429 35
## 4 564 BOONE IL 0.017 30806 1812.1176 29344 127 46
## 5 565 BROWN IL 0.018 5836 324.2222 5264 547 14
## 6 566 BUREAU IL 0.050 35688 713.7600 35157 50 65
## asian popother percwhite percblack percamerindan percasian percother
## 1 249 124 96.71206 2.5752761 0.1482826 0.37675897 0.18762294
## 2 48 9 66.38434 32.9004329 0.1788067 0.45172219 0.08469791
## 3 16 34 96.57128 2.8617170 0.2334734 0.10673071 0.22680275
## 4 150 1139 95.25417 0.4122574 0.1493216 0.48691813 3.69733169
## 5 5 6 90.19877 9.3728581 0.2398903 0.08567512 0.10281014
## 6 195 221 98.51210 0.1401031 0.1821340 0.54640215 0.61925577
## popadults perchsd percollege percprof poppovertyknown percpovertyknown
## 1 43298 75.10740 19.63139 4.355859 63628 96.27478
## 2 6724 59.72635 11.24331 2.870315 10529 99.08714
## 3 9669 69.33499 17.03382 4.488572 14235 94.95697
## 4 19272 75.47219 17.27895 4.197800 30337 98.47757
## 5 3979 68.86152 14.47600 3.367680 4815 82.50514
## 6 23444 76.62941 18.90462 3.275891 35107 98.37200
## percbelowpoverty percchildbelowpovert percadultpoverty percelderlypoverty
## 1 13.151443 18.01172 11.009776 12.443812
## 2 32.244278 45.82651 27.385647 25.228976
## 3 12.068844 14.03606 10.852090 12.697410
## 4 7.209019 11.17954 5.536013 6.217047
## 5 13.520249 13.02289 11.143211 19.200000
## 6 10.399635 14.15882 8.179287 11.008586
## inmetro category
## 1 0 AAR
## 2 0 LHR
## 3 0 AAR
## 4 1 ALU
## 5 0 AAR
## 6 0 AAR
Q3.
midwest_new$ratio <- (midwest_new$asian/midwest_new$total)*100
head(midwest_new)
## PID county state area total popdensity popwhite popblack popamerindian
## 1 561 ADAMS IL 0.052 66090 1270.9615 63917 1702 98
## 2 562 ALEXANDER IL 0.014 10626 759.0000 7054 3496 19
## 3 563 BOND IL 0.022 14991 681.4091 14477 429 35
## 4 564 BOONE IL 0.017 30806 1812.1176 29344 127 46
## 5 565 BROWN IL 0.018 5836 324.2222 5264 547 14
## 6 566 BUREAU IL 0.050 35688 713.7600 35157 50 65
## asian popother percwhite percblack percamerindan percasian percother
## 1 249 124 96.71206 2.5752761 0.1482826 0.37675897 0.18762294
## 2 48 9 66.38434 32.9004329 0.1788067 0.45172219 0.08469791
## 3 16 34 96.57128 2.8617170 0.2334734 0.10673071 0.22680275
## 4 150 1139 95.25417 0.4122574 0.1493216 0.48691813 3.69733169
## 5 5 6 90.19877 9.3728581 0.2398903 0.08567512 0.10281014
## 6 195 221 98.51210 0.1401031 0.1821340 0.54640215 0.61925577
## popadults perchsd percollege percprof poppovertyknown percpovertyknown
## 1 43298 75.10740 19.63139 4.355859 63628 96.27478
## 2 6724 59.72635 11.24331 2.870315 10529 99.08714
## 3 9669 69.33499 17.03382 4.488572 14235 94.95697
## 4 19272 75.47219 17.27895 4.197800 30337 98.47757
## 5 3979 68.86152 14.47600 3.367680 4815 82.50514
## 6 23444 76.62941 18.90462 3.275891 35107 98.37200
## percbelowpoverty percchildbelowpovert percadultpoverty percelderlypoverty
## 1 13.151443 18.01172 11.009776 12.443812
## 2 32.244278 45.82651 27.385647 25.228976
## 3 12.068844 14.03606 10.852090 12.697410
## 4 7.209019 11.17954 5.536013 6.217047
## 5 13.520249 13.02289 11.143211 19.200000
## 6 10.399635 14.15882 8.179287 11.008586
## inmetro category ratio
## 1 0 AAR 0.37675897
## 2 0 LHR 0.45172219
## 3 0 AAR 0.10673071
## 4 1 ALU 0.48691813
## 5 0 AAR 0.08567512
## 6 0 AAR 0.54640215
hist(midwest_new$ratio)

Q4.
mean(midwest_new$ratio)
## [1] 0.4872462
midwest_new$group <- ifelse(midwest_new$ratio > 0.4872462,"large","small")
head(midwest_new)
## PID county state area total popdensity popwhite popblack popamerindian
## 1 561 ADAMS IL 0.052 66090 1270.9615 63917 1702 98
## 2 562 ALEXANDER IL 0.014 10626 759.0000 7054 3496 19
## 3 563 BOND IL 0.022 14991 681.4091 14477 429 35
## 4 564 BOONE IL 0.017 30806 1812.1176 29344 127 46
## 5 565 BROWN IL 0.018 5836 324.2222 5264 547 14
## 6 566 BUREAU IL 0.050 35688 713.7600 35157 50 65
## asian popother percwhite percblack percamerindan percasian percother
## 1 249 124 96.71206 2.5752761 0.1482826 0.37675897 0.18762294
## 2 48 9 66.38434 32.9004329 0.1788067 0.45172219 0.08469791
## 3 16 34 96.57128 2.8617170 0.2334734 0.10673071 0.22680275
## 4 150 1139 95.25417 0.4122574 0.1493216 0.48691813 3.69733169
## 5 5 6 90.19877 9.3728581 0.2398903 0.08567512 0.10281014
## 6 195 221 98.51210 0.1401031 0.1821340 0.54640215 0.61925577
## popadults perchsd percollege percprof poppovertyknown percpovertyknown
## 1 43298 75.10740 19.63139 4.355859 63628 96.27478
## 2 6724 59.72635 11.24331 2.870315 10529 99.08714
## 3 9669 69.33499 17.03382 4.488572 14235 94.95697
## 4 19272 75.47219 17.27895 4.197800 30337 98.47757
## 5 3979 68.86152 14.47600 3.367680 4815 82.50514
## 6 23444 76.62941 18.90462 3.275891 35107 98.37200
## percbelowpoverty percchildbelowpovert percadultpoverty percelderlypoverty
## 1 13.151443 18.01172 11.009776 12.443812
## 2 32.244278 45.82651 27.385647 25.228976
## 3 12.068844 14.03606 10.852090 12.697410
## 4 7.209019 11.17954 5.536013 6.217047
## 5 13.520249 13.02289 11.143211 19.200000
## 6 10.399635 14.15882 8.179287 11.008586
## inmetro category ratio group
## 1 0 AAR 0.37675897 small
## 2 0 LHR 0.45172219 small
## 3 0 AAR 0.10673071 small
## 4 1 ALU 0.48691813 small
## 5 0 AAR 0.08567512 small
## 6 0 AAR 0.54640215 large
Q5.
table(midwest_new$group)
##
## large small
## 119 318
library(ggplot2)
##
## 다음의 패키지를 부착합니다: 'ggplot2'
## The following objects are masked _by_ '.GlobalEnv':
##
## midwest, mpg
qplot(midwest_new$group)
## Warning: `qplot()` was deprecated in ggplot2 3.4.0.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

3.엑셀파일 불러들여 데이터 살펴보고 과제
수행하기
Q1.
library(readxl)
mlu_data <- read_excel("C:\\Users\\user\\Documents\\20220124\\mlu.xls", sheet=2)
mlu_data_new <- mlu_data
Q2.
dim(mlu_data_new)
## [1] 35 8
35개
Q3.
library(dplyr)
mlu_data_new <- rename(mlu_data_new, utterances=utterances_mlu)
mlu_data_new <- rename(mlu_data_new, words=words_mlu)
head(mlu_data_new)
## # A tibble: 6 × 8
## File age utterances words DurationTime DurationSec Types_freq Token_freq
## <chr> <chr> <dbl> <dbl> <chr> <dbl> <dbl> <dbl>
## 1 13_A0P0… A0 566 1290 "00:17:35" 1055 580 1346
## 2 21_A0P0… A0 565 1602 "00:20:44" 1244 737 1606
## 3 27_A0P0… A0 470 813 "00:12:07" 727 378 832
## 4 28_A0P0… A0 371 976 "00:11:53" 713 419 979
## 5 29_A0P0… A0 802 2239 "00:24:45" 1485 814 2253
## 6 2_A0P01… A0 563 1243 "00:12:06\"" NA 425 1263
Q4.
mlu_data_new$mlu <- mlu_data_new$words/mlu_data_new$utterances
head(mlu_data_new)
## # A tibble: 6 × 9
## File age utterances words DurationTime DurationSec Types_freq Token_freq
## <chr> <chr> <dbl> <dbl> <chr> <dbl> <dbl> <dbl>
## 1 13_A0P0… A0 566 1290 "00:17:35" 1055 580 1346
## 2 21_A0P0… A0 565 1602 "00:20:44" 1244 737 1606
## 3 27_A0P0… A0 470 813 "00:12:07" 727 378 832
## 4 28_A0P0… A0 371 976 "00:11:53" 713 419 979
## 5 29_A0P0… A0 802 2239 "00:24:45" 1485 814 2253
## 6 2_A0P01… A0 563 1243 "00:12:06\"" NA 425 1263
## # ℹ 1 more variable: mlu <dbl>
Q5.
summary(mlu_data_new)
## File age utterances words
## Length:35 Length:35 Min. :323.0 Min. : 813
## Class :character Class :character 1st Qu.:561.0 1st Qu.:1368
## Mode :character Mode :character Median :621.0 Median :1716
## Mean :631.8 Mean :1710
## 3rd Qu.:716.0 3rd Qu.:2060
## Max. :890.0 Max. :2766
##
## DurationTime DurationSec Types_freq Token_freq
## Length:35 Min. : 527 Min. : 378.0 Min. : 832
## Class :character 1st Qu.: 924 1st Qu.: 567.5 1st Qu.:1446
## Mode :character Median :1060 Median : 694.0 Median :1798
## Mean :1086 Mean : 669.1 Mean :1778
## 3rd Qu.:1246 3rd Qu.: 775.5 3rd Qu.:2134
## Max. :1762 Max. :1014.0 Max. :2827
## NA's :1
## mlu
## Min. :1.730
## 1st Qu.:2.447
## Median :2.745
## Mean :2.696
## 3rd Qu.:2.916
## Max. :3.476
##
평균:2.696, 1st Qu:2.447, 2nd Qu:2.916
Q6.
mlu_data_new$grade <- ifelse(mlu_data_new$mlu >=3.4,"A",
ifelse(mlu_data_new$mlu >= 2.9,"B",
ifelse(mlu_data_new$mlu >= 2.7,"C","D")))
head(mlu_data_new)
## # A tibble: 6 × 10
## File age utterances words DurationTime DurationSec Types_freq Token_freq
## <chr> <chr> <dbl> <dbl> <chr> <dbl> <dbl> <dbl>
## 1 13_A0P0… A0 566 1290 "00:17:35" 1055 580 1346
## 2 21_A0P0… A0 565 1602 "00:20:44" 1244 737 1606
## 3 27_A0P0… A0 470 813 "00:12:07" 727 378 832
## 4 28_A0P0… A0 371 976 "00:11:53" 713 419 979
## 5 29_A0P0… A0 802 2239 "00:24:45" 1485 814 2253
## 6 2_A0P01… A0 563 1243 "00:12:06\"" NA 425 1263
## # ℹ 2 more variables: mlu <dbl>, grade <chr>
Q8.
qplot(mlu_data_new$age,mlu_data_new$mlu)
