q.1 복사본 만들기
library(ggplot2)
mpg <- as.data.frame(ggplot2::mpg)
library를 통해 ggplot2를 불러오고 보기 쉽도록 데이터 프레임의 형식으로 ggpplot2 내의 mpg 데이터를 복사하였다
q.2
library(dplyr)
##
## 다음의 패키지를 부착합니다: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
mpg <- rename(mpg, city=cty)
mpg <- rename(mpg, highway=hwy)
q.3
head(mpg)
## manufacturer model displ year cyl trans drv city highway fl class
## 1 audi a4 1.8 1999 4 auto(l5) f 18 29 p compact
## 2 audi a4 1.8 1999 4 manual(m5) f 21 29 p compact
## 3 audi a4 2.0 2008 4 manual(m6) f 20 31 p compact
## 4 audi a4 2.0 2008 4 auto(av) f 21 30 p compact
## 5 audi a4 2.8 1999 6 auto(l5) f 16 26 p compact
## 6 audi a4 2.8 1999 6 manual(m5) f 18 26 p compact
문제1 복사본 만들기
midwest <- as.data.frame(ggplot2::midwest)
데이터 특징 확인하기 요약 통계량 확인하기
summary(midwest)
## PID county state area
## Min. : 561 Length:437 Length:437 Min. :0.00500
## 1st Qu.: 670 Class :character Class :character 1st Qu.:0.02400
## Median :1221 Mode :character Mode :character Median :0.03000
## Mean :1437 Mean :0.03317
## 3rd Qu.:2059 3rd Qu.:0.03800
## Max. :3052 Max. :0.11000
## poptotal popdensity popwhite popblack
## Min. : 1701 Min. : 85.05 Min. : 416 Min. : 0
## 1st Qu.: 18840 1st Qu.: 622.41 1st Qu.: 18630 1st Qu.: 29
## Median : 35324 Median : 1156.21 Median : 34471 Median : 201
## Mean : 96130 Mean : 3097.74 Mean : 81840 Mean : 11024
## 3rd Qu.: 75651 3rd Qu.: 2330.00 3rd Qu.: 72968 3rd Qu.: 1291
## Max. :5105067 Max. :88018.40 Max. :3204947 Max. :1317147
## popamerindian popasian popother percwhite
## Min. : 4.0 Min. : 0 Min. : 0 Min. :10.69
## 1st Qu.: 44.0 1st Qu.: 35 1st Qu.: 20 1st Qu.:94.89
## Median : 94.0 Median : 102 Median : 66 Median :98.03
## Mean : 343.1 Mean : 1310 Mean : 1613 Mean :95.56
## 3rd Qu.: 288.0 3rd Qu.: 401 3rd Qu.: 345 3rd Qu.:99.07
## Max. :10289.0 Max. :188565 Max. :384119 Max. :99.82
## percblack percamerindan percasian percother
## Min. : 0.0000 Min. : 0.05623 Min. :0.0000 Min. :0.00000
## 1st Qu.: 0.1157 1st Qu.: 0.15793 1st Qu.:0.1737 1st Qu.:0.09102
## Median : 0.5390 Median : 0.21502 Median :0.2972 Median :0.17844
## Mean : 2.6763 Mean : 0.79894 Mean :0.4872 Mean :0.47906
## 3rd Qu.: 2.6014 3rd Qu.: 0.38362 3rd Qu.:0.5212 3rd Qu.:0.48050
## Max. :40.2100 Max. :89.17738 Max. :5.0705 Max. :7.52427
## popadults perchsd percollege percprof
## Min. : 1287 Min. :46.91 Min. : 7.336 Min. : 0.5203
## 1st Qu.: 12271 1st Qu.:71.33 1st Qu.:14.114 1st Qu.: 2.9980
## Median : 22188 Median :74.25 Median :16.798 Median : 3.8142
## Mean : 60973 Mean :73.97 Mean :18.273 Mean : 4.4473
## 3rd Qu.: 47541 3rd Qu.:77.20 3rd Qu.:20.550 3rd Qu.: 4.9493
## Max. :3291995 Max. :88.90 Max. :48.079 Max. :20.7913
## poppovertyknown percpovertyknown percbelowpoverty percchildbelowpovert
## Min. : 1696 Min. :80.90 Min. : 2.180 Min. : 1.919
## 1st Qu.: 18364 1st Qu.:96.89 1st Qu.: 9.199 1st Qu.:11.624
## Median : 33788 Median :98.17 Median :11.822 Median :15.270
## Mean : 93642 Mean :97.11 Mean :12.511 Mean :16.447
## 3rd Qu.: 72840 3rd Qu.:98.60 3rd Qu.:15.133 3rd Qu.:20.352
## Max. :5023523 Max. :99.86 Max. :48.691 Max. :64.308
## percadultpoverty percelderlypoverty inmetro category
## Min. : 1.938 Min. : 3.547 Min. :0.0000 Length:437
## 1st Qu.: 7.668 1st Qu.: 8.912 1st Qu.:0.0000 Class :character
## Median :10.008 Median :10.869 Median :0.0000 Mode :character
## Mean :10.919 Mean :11.389 Mean :0.3432
## 3rd Qu.:13.182 3rd Qu.:13.412 3rd Qu.:1.0000
## Max. :43.312 Max. :31.162 Max. :1.0000
데이터를 확인하는 방법 중 summary를 통해 전반적인 통계량을 확인해보았다
문제 2
rename(midwest, total=poptotal) ->midwest
rename(midwest, asian=popasian) ->midwest
문제 3 전체인구대비 아시아인구 백분율을 구하는 법은 (아시아인구/전체인구)*100 입니다 따라서 함수는
midwest$asiper <- midwest$asian/midwest$total*100
그래프로 그려보면
ggplot(midwest,aes(x=midwest$county, y=midwest$asiper,fill=midwest$state))+geom_bar(stat="identity",position = "dodge")
## Warning: Use of `midwest$county` is discouraged.
## ℹ Use `county` instead.
## Warning: Use of `midwest$asiper` is discouraged.
## ℹ Use `asiper` instead.
## Warning: Use of `midwest$state` is discouraged.
## ℹ Use `state` instead.
의 그래프를 통해서 아시아 인들의 분포를 볼 수 있다. 하지만 컨트리가 너무
많은 관계로 잘 안 보인다는 문제가 있었다.
ggplot(midwest,aes(x=midwest$county, y=midwest$asiper,fill=midwest$state))+geom_bar(stat="identity",position = "dodge")+facet_wrap(~midwest$state)
## Warning: Use of `midwest$county` is discouraged.
## ℹ Use `county` instead.
## Warning: Use of `midwest$asiper` is discouraged.
## ℹ Use `asiper` instead.
## Warning: Use of `midwest$state` is discouraged.
## ℹ Use `state` instead.
facet_wrap이란 함수를 이용해서 주마다의 아시아인 백분율 차이를
살펴보았다.
문제4 평균구하기
mean(midwest$asiper)
## [1] 0.4872462
조건붙이기
midwest$aspean <- ifelse(midwest$asiper >0.4872462, "large", "small")
문제 5 빈도표 출력
table(midwest$aspean)
##
## large small
## 119 318
빈도 막대그래프 출력
qplot(midwest$aspean)
## Warning: `qplot()` was deprecated in ggplot2 3.4.0.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
1
library(readxl)
read_excel("C:/Users/Administrator/Documents/rmarkdown/mlu.xls", sheet = 2) ->mlu_data
2
table(mlu_data$age) ->age
age
##
## A0 A1 A2
## 12 11 12
3
rename(mlu_data, utterances=utterances_mlu) ->mlu_data
rename(mlu_data, words=words_mlu) -> mlu_data
4
mlu_data$utterances/mlu_data$words -> mlu_data$mlu
5
summary(mlu_data$mlu)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.2877 0.3429 0.3642 0.3789 0.4087 0.5781
6
mlu_data$mlu_grade <- ifelse(mlu_data$mlu > 0.4087, "A",
ifelse(mlu_data$mlu >0.3789, "B",
ifelse(mlu_data$mlu > 0.3429, "c", "D")))
7
table(mlu_data$age,mlu_data$mlu)
##
## 0.287679425837321 0.298162729658793 0.299349240780911 0.314020344980097
## A0 0 0 0 0
## A1 0 0 0 0
## A2 1 1 1 1
##
## 0.330183106910809 0.330522765598651 0.335487077534791 0.336656891495601
## A0 0 0 0 1
## A1 1 0 0 0
## A2 0 1 1 0
##
## 0.341048653755314 0.344771241830065 0.350963914977756 0.352684144818976
## A0 0 1 0 1
## A1 0 0 0 0
## A2 1 0 1 0
##
## 0.354716981132075 0.355271565495208 0.356581017304667 0.358195623046003
## A0 1 0 0 1
## A1 0 0 1 0
## A2 0 1 0 0
##
## 0.362921348314607 0.364244741873805 0.371415566681839 0.37212449255751
## A0 0 0 0 0
## A1 0 1 1 0
## A2 1 0 0 1
##
## 0.37568058076225 0.37881679389313 0.380122950819672 0.381700054141852
## A0 0 0 1 1
## A1 1 1 0 0
## A2 0 0 0 0
##
## 0.393457117595049 0.405594405594406 0.411764705882353 0.416149068322981
## A0 0 0 0 0
## A1 1 0 1 1
## A2 0 1 0 0
##
## 0.424221453287197 0.438759689922481 0.452579034941764 0.452936444086887
## A0 0 1 0 1
## A1 1 0 1 0
## A2 0 0 0 0
##
## 0.469242902208202 0.485585585585586 0.578105781057811
## A0 1 1 1
## A1 0 0 0
## A2 0 0 0
문제가 잘못됐다고 하심
8
qplot(mlu_data$age,mlu_data$mlu)