rm(list=ls())
getwd()
## [1] "C:/R"
setwd("c:/R")
library(tidyverse)
## -- Attaching packages --------------------------------------- tidyverse 1.3.1 --
## v ggplot2 3.3.5 v purrr 0.3.4
## v tibble 3.1.6 v dplyr 1.0.8
## v tidyr 1.2.0 v stringr 1.4.0
## v readr 2.1.2 v forcats 0.5.1
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
library(ggplot2)
library(readxl)
seoulair<-read_excel("서울대기오염_2019.xlsx")
str(seoulair)
## tibble [9,491 x 8] (S3: tbl_df/tbl/data.frame)
## $ 날짜 : chr [1:9491] "전체" "2019-12-31" "2019-12-31" "2019-12-31" ...
## $ 측정소명 : chr [1:9491] "평균" "평균" "강남구" "강동구" ...
## $ 미세먼지 : num [1:9491] 42 26 22 27 31 29 36 22 25 23 ...
## $ 초미세먼지 : num [1:9491] 25 15 14 19 17 16 18 10 16 18 ...
## $ 오존 : num [1:9491] 0.025 0.022 0.025 0.019 0.022 0.022 0.026 0.019 0.022 0.019 ...
## $ 이산화질소
## NO2 (ppm): num [1:9491] 0.028 0.016 0.014 0.02 0.022 0.017 0.013 0.016 0.02 0.019 ...
## $ 일산화탄소
## CO (ppm) : num [1:9491] 0.5 0.4 0.4 0.4 0.4 0.4 0.4 0.4 0.3 0.4 ...
## $ 아황산가스
## SO2(ppm) : num [1:9491] 0.004 0.003 0.003 0.003 0.002 0.004 0.003 0.002 0.004 0.003 ...
seoulair<-seoulair %>%
rename(date="날짜",
district="측정소명",
pm10="미세먼지",
pm2.5="초미세먼지") %>%
select(date,district,pm10,pm2.5)
table(seoulair$date)
##
## 2019-01-01 2019-01-02 2019-01-03 2019-01-04 2019-01-05 2019-01-06 2019-01-07
## 26 26 26 26 26 26 26
## 2019-01-08 2019-01-09 2019-01-10 2019-01-11 2019-01-12 2019-01-13 2019-01-14
## 26 26 26 26 26 26 26
## 2019-01-15 2019-01-16 2019-01-17 2019-01-18 2019-01-19 2019-01-20 2019-01-21
## 26 26 26 26 26 26 26
## 2019-01-22 2019-01-23 2019-01-24 2019-01-25 2019-01-26 2019-01-27 2019-01-28
## 26 26 26 26 26 26 26
## 2019-01-29 2019-01-30 2019-01-31 2019-02-01 2019-02-02 2019-02-03 2019-02-04
## 26 26 26 26 26 26 26
## 2019-02-05 2019-02-06 2019-02-07 2019-02-08 2019-02-09 2019-02-10 2019-02-11
## 26 26 26 26 26 26 26
## 2019-02-12 2019-02-13 2019-02-14 2019-02-15 2019-02-16 2019-02-17 2019-02-18
## 26 26 26 26 26 26 26
## 2019-02-19 2019-02-20 2019-02-21 2019-02-22 2019-02-23 2019-02-24 2019-02-25
## 26 26 26 26 26 26 26
## 2019-02-26 2019-02-27 2019-02-28 2019-03-01 2019-03-02 2019-03-03 2019-03-04
## 26 26 26 26 26 26 26
## 2019-03-05 2019-03-06 2019-03-07 2019-03-08 2019-03-09 2019-03-10 2019-03-11
## 26 26 26 26 26 26 26
## 2019-03-12 2019-03-13 2019-03-14 2019-03-15 2019-03-16 2019-03-17 2019-03-18
## 26 26 26 26 26 26 26
## 2019-03-19 2019-03-20 2019-03-21 2019-03-22 2019-03-23 2019-03-24 2019-03-25
## 26 26 26 26 26 26 26
## 2019-03-26 2019-03-27 2019-03-28 2019-03-29 2019-03-30 2019-03-31 2019-04-01
## 26 26 26 26 26 26 26
## 2019-04-02 2019-04-03 2019-04-04 2019-04-05 2019-04-06 2019-04-07 2019-04-08
## 26 26 26 26 26 26 26
## 2019-04-09 2019-04-10 2019-04-11 2019-04-12 2019-04-13 2019-04-14 2019-04-15
## 26 26 26 26 26 26 26
## 2019-04-16 2019-04-17 2019-04-18 2019-04-19 2019-04-20 2019-04-21 2019-04-22
## 26 26 26 26 26 26 26
## 2019-04-23 2019-04-24 2019-04-25 2019-04-26 2019-04-27 2019-04-28 2019-04-29
## 26 26 26 26 26 26 26
## 2019-04-30 2019-05-01 2019-05-02 2019-05-03 2019-05-04 2019-05-05 2019-05-06
## 26 26 26 26 26 26 26
## 2019-05-07 2019-05-08 2019-05-09 2019-05-10 2019-05-11 2019-05-12 2019-05-13
## 26 26 26 26 26 26 26
## 2019-05-14 2019-05-15 2019-05-16 2019-05-17 2019-05-18 2019-05-19 2019-05-20
## 26 26 26 26 26 26 26
## 2019-05-21 2019-05-22 2019-05-23 2019-05-24 2019-05-25 2019-05-26 2019-05-27
## 26 26 26 26 26 26 26
## 2019-05-28 2019-05-29 2019-05-30 2019-05-31 2019-06-01 2019-06-02 2019-06-03
## 26 26 26 26 26 26 26
## 2019-06-04 2019-06-05 2019-06-06 2019-06-07 2019-06-08 2019-06-09 2019-06-10
## 26 26 26 26 26 26 26
## 2019-06-11 2019-06-12 2019-06-13 2019-06-14 2019-06-15 2019-06-16 2019-06-17
## 26 26 26 26 26 26 26
## 2019-06-18 2019-06-19 2019-06-20 2019-06-21 2019-06-22 2019-06-23 2019-06-24
## 26 26 26 26 26 26 26
## 2019-06-25 2019-06-26 2019-06-27 2019-06-28 2019-06-29 2019-06-30 2019-07-01
## 26 26 26 26 26 26 26
## 2019-07-02 2019-07-03 2019-07-04 2019-07-05 2019-07-06 2019-07-07 2019-07-08
## 26 26 26 26 26 26 26
## 2019-07-09 2019-07-10 2019-07-11 2019-07-12 2019-07-13 2019-07-14 2019-07-15
## 26 26 26 26 26 26 26
## 2019-07-16 2019-07-17 2019-07-18 2019-07-19 2019-07-20 2019-07-21 2019-07-22
## 26 26 26 26 26 26 26
## 2019-07-23 2019-07-24 2019-07-25 2019-07-26 2019-07-27 2019-07-28 2019-07-29
## 26 26 26 26 26 26 26
## 2019-07-30 2019-07-31 2019-08-01 2019-08-02 2019-08-03 2019-08-04 2019-08-05
## 26 26 26 26 26 26 26
## 2019-08-06 2019-08-07 2019-08-08 2019-08-09 2019-08-10 2019-08-11 2019-08-12
## 26 26 26 26 26 26 26
## 2019-08-13 2019-08-14 2019-08-15 2019-08-16 2019-08-17 2019-08-18 2019-08-19
## 26 26 26 26 26 26 26
## 2019-08-20 2019-08-21 2019-08-22 2019-08-23 2019-08-24 2019-08-25 2019-08-26
## 26 26 26 26 26 26 26
## 2019-08-27 2019-08-28 2019-08-29 2019-08-30 2019-08-31 2019-09-01 2019-09-02
## 26 26 26 26 26 26 26
## 2019-09-03 2019-09-04 2019-09-05 2019-09-06 2019-09-07 2019-09-08 2019-09-09
## 26 26 26 26 26 26 26
## 2019-09-10 2019-09-11 2019-09-12 2019-09-13 2019-09-14 2019-09-15 2019-09-16
## 26 26 26 26 26 26 26
## 2019-09-17 2019-09-18 2019-09-19 2019-09-20 2019-09-21 2019-09-22 2019-09-23
## 26 26 26 26 26 26 26
## 2019-09-24 2019-09-25 2019-09-26 2019-09-27 2019-09-28 2019-09-29 2019-09-30
## 26 26 26 26 26 26 26
## 2019-10-01 2019-10-02 2019-10-03 2019-10-04 2019-10-05 2019-10-06 2019-10-07
## 26 26 26 26 26 26 26
## 2019-10-08 2019-10-09 2019-10-10 2019-10-11 2019-10-12 2019-10-13 2019-10-14
## 26 26 26 26 26 26 26
## 2019-10-15 2019-10-16 2019-10-17 2019-10-18 2019-10-19 2019-10-20 2019-10-21
## 26 26 26 26 26 26 26
## 2019-10-22 2019-10-23 2019-10-24 2019-10-25 2019-10-26 2019-10-27 2019-10-28
## 26 26 26 26 26 26 26
## 2019-10-29 2019-10-30 2019-10-31 2019-11-01 2019-11-02 2019-11-03 2019-11-04
## 26 26 26 26 26 26 26
## 2019-11-05 2019-11-06 2019-11-07 2019-11-08 2019-11-09 2019-11-10 2019-11-11
## 26 26 26 26 26 26 26
## 2019-11-12 2019-11-13 2019-11-14 2019-11-15 2019-11-16 2019-11-17 2019-11-18
## 26 26 26 26 26 26 26
## 2019-11-19 2019-11-20 2019-11-21 2019-11-22 2019-11-23 2019-11-24 2019-11-25
## 26 26 26 26 26 26 26
## 2019-11-26 2019-11-27 2019-11-28 2019-11-29 2019-11-30 2019-12-01 2019-12-02
## 26 26 26 26 26 26 26
## 2019-12-03 2019-12-04 2019-12-05 2019-12-06 2019-12-07 2019-12-08 2019-12-09
## 26 26 26 26 26 26 26
## 2019-12-10 2019-12-11 2019-12-12 2019-12-13 2019-12-14 2019-12-15 2019-12-16
## 26 26 26 26 26 26 26
## 2019-12-17 2019-12-18 2019-12-19 2019-12-20 2019-12-21 2019-12-22 2019-12-23
## 26 26 26 26 26 26 26
## 2019-12-24 2019-12-25 2019-12-26 2019-12-27 2019-12-28 2019-12-29 2019-12-30
## 26 26 26 26 26 26 26
## 2019-12-31 전체
## 26 1
seoulair %>% count(date)
## # A tibble: 366 x 2
## date n
## <chr> <int>
## 1 2019-01-01 26
## 2 2019-01-02 26
## 3 2019-01-03 26
## 4 2019-01-04 26
## 5 2019-01-05 26
## 6 2019-01-06 26
## 7 2019-01-07 26
## 8 2019-01-08 26
## 9 2019-01-09 26
## 10 2019-01-10 26
## # ... with 356 more rows
table(seoulair$district)
##
## 강남구 강동구 강북구 강서구 관악구 광진구 구로구 금천구
## 365 365 365 365 365 365 365 365
## 노원구 도봉구 동대문구 동작구 마포구 서대문구 서초구 성동구
## 365 365 365 365 365 365 365 365
## 성북구 송파구 양천구 영등포구 용산구 은평구 종로구 중구
## 365 365 365 365 365 365 365 365
## 중랑구 평균
## 365 366
seoulair<-seoulair %>% filter(date!="전체"&district!="평균")
table(seoulair$date)
##
## 2019-01-01 2019-01-02 2019-01-03 2019-01-04 2019-01-05 2019-01-06 2019-01-07
## 25 25 25 25 25 25 25
## 2019-01-08 2019-01-09 2019-01-10 2019-01-11 2019-01-12 2019-01-13 2019-01-14
## 25 25 25 25 25 25 25
## 2019-01-15 2019-01-16 2019-01-17 2019-01-18 2019-01-19 2019-01-20 2019-01-21
## 25 25 25 25 25 25 25
## 2019-01-22 2019-01-23 2019-01-24 2019-01-25 2019-01-26 2019-01-27 2019-01-28
## 25 25 25 25 25 25 25
## 2019-01-29 2019-01-30 2019-01-31 2019-02-01 2019-02-02 2019-02-03 2019-02-04
## 25 25 25 25 25 25 25
## 2019-02-05 2019-02-06 2019-02-07 2019-02-08 2019-02-09 2019-02-10 2019-02-11
## 25 25 25 25 25 25 25
## 2019-02-12 2019-02-13 2019-02-14 2019-02-15 2019-02-16 2019-02-17 2019-02-18
## 25 25 25 25 25 25 25
## 2019-02-19 2019-02-20 2019-02-21 2019-02-22 2019-02-23 2019-02-24 2019-02-25
## 25 25 25 25 25 25 25
## 2019-02-26 2019-02-27 2019-02-28 2019-03-01 2019-03-02 2019-03-03 2019-03-04
## 25 25 25 25 25 25 25
## 2019-03-05 2019-03-06 2019-03-07 2019-03-08 2019-03-09 2019-03-10 2019-03-11
## 25 25 25 25 25 25 25
## 2019-03-12 2019-03-13 2019-03-14 2019-03-15 2019-03-16 2019-03-17 2019-03-18
## 25 25 25 25 25 25 25
## 2019-03-19 2019-03-20 2019-03-21 2019-03-22 2019-03-23 2019-03-24 2019-03-25
## 25 25 25 25 25 25 25
## 2019-03-26 2019-03-27 2019-03-28 2019-03-29 2019-03-30 2019-03-31 2019-04-01
## 25 25 25 25 25 25 25
## 2019-04-02 2019-04-03 2019-04-04 2019-04-05 2019-04-06 2019-04-07 2019-04-08
## 25 25 25 25 25 25 25
## 2019-04-09 2019-04-10 2019-04-11 2019-04-12 2019-04-13 2019-04-14 2019-04-15
## 25 25 25 25 25 25 25
## 2019-04-16 2019-04-17 2019-04-18 2019-04-19 2019-04-20 2019-04-21 2019-04-22
## 25 25 25 25 25 25 25
## 2019-04-23 2019-04-24 2019-04-25 2019-04-26 2019-04-27 2019-04-28 2019-04-29
## 25 25 25 25 25 25 25
## 2019-04-30 2019-05-01 2019-05-02 2019-05-03 2019-05-04 2019-05-05 2019-05-06
## 25 25 25 25 25 25 25
## 2019-05-07 2019-05-08 2019-05-09 2019-05-10 2019-05-11 2019-05-12 2019-05-13
## 25 25 25 25 25 25 25
## 2019-05-14 2019-05-15 2019-05-16 2019-05-17 2019-05-18 2019-05-19 2019-05-20
## 25 25 25 25 25 25 25
## 2019-05-21 2019-05-22 2019-05-23 2019-05-24 2019-05-25 2019-05-26 2019-05-27
## 25 25 25 25 25 25 25
## 2019-05-28 2019-05-29 2019-05-30 2019-05-31 2019-06-01 2019-06-02 2019-06-03
## 25 25 25 25 25 25 25
## 2019-06-04 2019-06-05 2019-06-06 2019-06-07 2019-06-08 2019-06-09 2019-06-10
## 25 25 25 25 25 25 25
## 2019-06-11 2019-06-12 2019-06-13 2019-06-14 2019-06-15 2019-06-16 2019-06-17
## 25 25 25 25 25 25 25
## 2019-06-18 2019-06-19 2019-06-20 2019-06-21 2019-06-22 2019-06-23 2019-06-24
## 25 25 25 25 25 25 25
## 2019-06-25 2019-06-26 2019-06-27 2019-06-28 2019-06-29 2019-06-30 2019-07-01
## 25 25 25 25 25 25 25
## 2019-07-02 2019-07-03 2019-07-04 2019-07-05 2019-07-06 2019-07-07 2019-07-08
## 25 25 25 25 25 25 25
## 2019-07-09 2019-07-10 2019-07-11 2019-07-12 2019-07-13 2019-07-14 2019-07-15
## 25 25 25 25 25 25 25
## 2019-07-16 2019-07-17 2019-07-18 2019-07-19 2019-07-20 2019-07-21 2019-07-22
## 25 25 25 25 25 25 25
## 2019-07-23 2019-07-24 2019-07-25 2019-07-26 2019-07-27 2019-07-28 2019-07-29
## 25 25 25 25 25 25 25
## 2019-07-30 2019-07-31 2019-08-01 2019-08-02 2019-08-03 2019-08-04 2019-08-05
## 25 25 25 25 25 25 25
## 2019-08-06 2019-08-07 2019-08-08 2019-08-09 2019-08-10 2019-08-11 2019-08-12
## 25 25 25 25 25 25 25
## 2019-08-13 2019-08-14 2019-08-15 2019-08-16 2019-08-17 2019-08-18 2019-08-19
## 25 25 25 25 25 25 25
## 2019-08-20 2019-08-21 2019-08-22 2019-08-23 2019-08-24 2019-08-25 2019-08-26
## 25 25 25 25 25 25 25
## 2019-08-27 2019-08-28 2019-08-29 2019-08-30 2019-08-31 2019-09-01 2019-09-02
## 25 25 25 25 25 25 25
## 2019-09-03 2019-09-04 2019-09-05 2019-09-06 2019-09-07 2019-09-08 2019-09-09
## 25 25 25 25 25 25 25
## 2019-09-10 2019-09-11 2019-09-12 2019-09-13 2019-09-14 2019-09-15 2019-09-16
## 25 25 25 25 25 25 25
## 2019-09-17 2019-09-18 2019-09-19 2019-09-20 2019-09-21 2019-09-22 2019-09-23
## 25 25 25 25 25 25 25
## 2019-09-24 2019-09-25 2019-09-26 2019-09-27 2019-09-28 2019-09-29 2019-09-30
## 25 25 25 25 25 25 25
## 2019-10-01 2019-10-02 2019-10-03 2019-10-04 2019-10-05 2019-10-06 2019-10-07
## 25 25 25 25 25 25 25
## 2019-10-08 2019-10-09 2019-10-10 2019-10-11 2019-10-12 2019-10-13 2019-10-14
## 25 25 25 25 25 25 25
## 2019-10-15 2019-10-16 2019-10-17 2019-10-18 2019-10-19 2019-10-20 2019-10-21
## 25 25 25 25 25 25 25
## 2019-10-22 2019-10-23 2019-10-24 2019-10-25 2019-10-26 2019-10-27 2019-10-28
## 25 25 25 25 25 25 25
## 2019-10-29 2019-10-30 2019-10-31 2019-11-01 2019-11-02 2019-11-03 2019-11-04
## 25 25 25 25 25 25 25
## 2019-11-05 2019-11-06 2019-11-07 2019-11-08 2019-11-09 2019-11-10 2019-11-11
## 25 25 25 25 25 25 25
## 2019-11-12 2019-11-13 2019-11-14 2019-11-15 2019-11-16 2019-11-17 2019-11-18
## 25 25 25 25 25 25 25
## 2019-11-19 2019-11-20 2019-11-21 2019-11-22 2019-11-23 2019-11-24 2019-11-25
## 25 25 25 25 25 25 25
## 2019-11-26 2019-11-27 2019-11-28 2019-11-29 2019-11-30 2019-12-01 2019-12-02
## 25 25 25 25 25 25 25
## 2019-12-03 2019-12-04 2019-12-05 2019-12-06 2019-12-07 2019-12-08 2019-12-09
## 25 25 25 25 25 25 25
## 2019-12-10 2019-12-11 2019-12-12 2019-12-13 2019-12-14 2019-12-15 2019-12-16
## 25 25 25 25 25 25 25
## 2019-12-17 2019-12-18 2019-12-19 2019-12-20 2019-12-21 2019-12-22 2019-12-23
## 25 25 25 25 25 25 25
## 2019-12-24 2019-12-25 2019-12-26 2019-12-27 2019-12-28 2019-12-29 2019-12-30
## 25 25 25 25 25 25 25
## 2019-12-31
## 25
table(seoulair$district)
##
## 강남구 강동구 강북구 강서구 관악구 광진구 구로구 금천구
## 365 365 365 365 365 365 365 365
## 노원구 도봉구 동대문구 동작구 마포구 서대문구 서초구 성동구
## 365 365 365 365 365 365 365 365
## 성북구 송파구 양천구 영등포구 용산구 은평구 종로구 중구
## 365 365 365 365 365 365 365 365
## 중랑구
## 365
summary(seoulair)
## date district pm10 pm2.5
## Length:9125 Length:9125 Min. : 3.00 Min. : 1.00
## Class :character Class :character 1st Qu.: 24.00 1st Qu.: 14.00
## Mode :character Mode :character Median : 36.00 Median : 21.00
## Mean : 41.76 Mean : 24.93
## 3rd Qu.: 52.00 3rd Qu.: 30.00
## Max. :228.00 Max. :153.00
## NA's :213 NA's :203
seoulair %>% mutate(month=substr(seoulair$date,6,7))
## # A tibble: 9,125 x 5
## date district pm10 pm2.5 month
## <chr> <chr> <dbl> <dbl> <chr>
## 1 2019-12-31 강남구 22 14 12
## 2 2019-12-31 강동구 27 19 12
## 3 2019-12-31 강북구 31 17 12
## 4 2019-12-31 강서구 29 16 12
## 5 2019-12-31 관악구 36 18 12
## 6 2019-12-31 광진구 22 10 12
## 7 2019-12-31 구로구 25 16 12
## 8 2019-12-31 금천구 23 18 12
## 9 2019-12-31 노원구 22 17 12
## 10 2019-12-31 도봉구 19 12 12
## # ... with 9,115 more rows
seoulair$month<-substr(seoulair$date,6,7)
seoulair$day<-substr(seoulair$date,9,10)
class(seoulair$month)
## [1] "character"
class(seoulair$day)
## [1] "character"
seoulair$month<-as.numeric(seoulair$month)
seoulair$day<-as.numeric(seoulair$day)
seoulair$season<-
ifelse(seoulair$month%in%c(3,4,5),"spring",
ifelse(seoulair$month%in%c(6,7,8),"summer",
ifelse(seoulair$month%in%c(9,10,11),"autumn","winter")))
str(seoulair)
## tibble [9,125 x 7] (S3: tbl_df/tbl/data.frame)
## $ date : chr [1:9125] "2019-12-31" "2019-12-31" "2019-12-31" "2019-12-31" ...
## $ district: chr [1:9125] "강남구" "강동구" "강북구" "강서구" ...
## $ pm10 : num [1:9125] 22 27 31 29 36 22 25 23 22 19 ...
## $ pm2.5 : num [1:9125] 14 19 17 16 18 10 16 18 17 12 ...
## $ month : num [1:9125] 12 12 12 12 12 12 12 12 12 12 ...
## $ day : num [1:9125] 31 31 31 31 31 31 31 31 31 31 ...
## $ season : chr [1:9125] "winter" "winter" "winter" "winter" ...
mean(seoulair$pm10,na.rm=T)
## [1] 41.76167
seoulair %>%
filter(!is.na(pm10)) %>%
filter(pm10==max(pm10)) %>%
select(date,district,pm10)
## # A tibble: 1 x 3
## date district pm10
## <chr> <chr> <dbl>
## 1 2019-03-05 강북구 228
seoulair %>%
filter(!is.na(pm10)) %>%
filter(pm10==min(pm10)) %>%
select(date,district,pm10)
## # A tibble: 6 x 3
## date district pm10
## <chr> <chr> <dbl>
## 1 2019-10-03 노원구 3
## 2 2019-09-22 서초구 3
## 3 2019-09-22 용산구 3
## 4 2019-07-21 마포구 3
## 5 2019-07-11 중랑구 3
## 6 2019-04-10 동대문구 3
seoulair %>%
filter(!is.na(pm10)) %>%
group_by(district) %>%
summarise(m=mean(pm10)) %>%
arrange(m) %>%
head(5)
## # A tibble: 5 x 2
## district m
## <chr> <dbl>
## 1 용산구 34.1
## 2 중랑구 37.3
## 3 중구 37.6
## 4 종로구 37.7
## 5 도봉구 38.0
seoulair %>%
filter(!is.na(pm10)) %>%
group_by(district) %>%
summarise(m=mean(pm10)) %>%
arrange(desc(m)) %>%
head(5)
## # A tibble: 5 x 2
## district m
## <chr> <dbl>
## 1 관악구 49.0
## 2 양천구 47.7
## 3 마포구 47.1
## 4 강서구 46.5
## 5 강북구 45.0
seoulair %>%
filter(!is.na(pm10)&!is.na(pm2.5)) %>%
group_by(season) %>%
summarise(m1=mean(pm10),
m2=mean(pm2.5)) %>%
arrange(m1)
## # A tibble: 4 x 3
## season m1 m2
## <chr> <dbl> <dbl>
## 1 summer 26.3 18.1
## 2 autumn 31.1 15.7
## 3 spring 54.1 31.6
## 4 winter 54.7 33.7
seoulair %>%
filter(!is.na(pm10)) %>%
mutate(pm_grade=ifelse(pm10<=30,"good",
ifelse(pm10<=61,"normal",
ifelse(pm10<=150,"bad","worse")))) %>%
group_by(pm_grade) %>%
summarise(n=n()) %>%
mutate(total=sum(n),
pct=round(n/total*100,1)) %>%
select(pm_grade,n,pct) %>%
arrange(desc(n))
## # A tibble: 4 x 3
## pm_grade n pct
## <chr> <int> <dbl>
## 1 normal 3966 44.5
## 2 good 3412 38.3
## 3 bad 1453 16.3
## 4 worse 81 0.9
seoulair %>%
filter(!is.na(pm10)) %>%
mutate(pm_grade=ifelse(pm10<=30, "good",
ifelse(pm10<=81,"normal",
ifelse(pm10<=150,"bad","worse")))) %>%
group_by(district, pm_grade) %>%
summarise(n=n()) %>%
mutate(total=sum(n),
pct=round(n/total*100,1)) %>%
filter(pm_grade=="good") %>%
select(district,n,pct) %>%
arrange(desc(pct)) %>%
head(5)
## `summarise()` has grouped output by 'district'. You can override using the
## `.groups` argument.
## # A tibble: 5 x 3
## # Groups: district [5]
## district n pct
## <chr> <int> <dbl>
## 1 용산구 196 54
## 2 중구 169 46.3
## 3 중랑구 151 46.2
## 4 종로구 163 44.7
## 5 금천구 161 44.4
ggplot(data=seoulair,aes(x=date, y=pm10))+geom_line()
## Warning: Removed 48 row(s) containing missing values (geom_path).
subway<-read.csv("CARD_SUBWAY_MONTH_202203.csv",stringsAsFactors=F)
str(subway)
## 'data.frame': 18467 obs. of 6 variables:
## $ 사용일자 : int 20220301 20220301 20220301 20220301 20220301 20220301 20220301 20220301 20220301 20220301 ...
## $ 노선명 : chr "장항선" "장항선" "장항선" "안산선" ...
## $ 역명 : chr "배방" "온양온천" "신창(순천향대)" "오이도" ...
## $ 승차총승객수: int 593 2388 1065 4789 1892 2122 1360 1836 2211 1899 ...
## $ 하차총승객수: int 698 2517 1164 4668 1693 2228 1331 1663 2122 1814 ...
## $ 등록일자 : int 20220304 20220304 20220304 20220304 20220304 20220304 20220304 20220304 20220304 20220304 ...
subway<-subway %>%
rename(date="사용일자",
line="노선명",
station="역명",
on_passenger="승차총승객수",
off_passenger="하차총승객수") %>%
select(-"등록일자")
head(subway)
## date line station on_passenger off_passenger
## 1 20220301 장항선 배방 593 698
## 2 20220301 장항선 온양온천 2388 2517
## 3 20220301 장항선 신창(순천향대) 1065 1164
## 4 20220301 안산선 오이도 4789 4668
## 5 20220301 안산선 수리산 1892 1693
## 6 20220301 우이신설선 북한산우이 2122 2228
summary(subway)
## date line station on_passenger
## Min. :20220301 Length:18467 Length:18467 Min. : 1
## 1st Qu.:20220308 Class :character Class :character 1st Qu.: 3078
## Median :20220316 Mode :character Mode :character Median : 6334
## Mean :20220316 Mean : 8852
## 3rd Qu.:20220324 3rd Qu.:11838
## Max. :20220331 Max. :80279
## off_passenger
## Min. : 0
## 1st Qu.: 2989
## Median : 6229
## Mean : 8823
## 3rd Qu.:11742
## Max. :78816
subway$day<-substr(subway$date,7,8)
class(subway$day)
## [1] "character"
table(subway$line)
##
## 1호선 2호선 3호선 4호선 5호선
## 310 1550 1041 806 1736
## 6호선 7호선 8호선 9호선 9호선2~3단계
## 1172 1345 558 775 403
## 경강선 경부선 경원선 경의선 경인선
## 341 1209 904 811 620
## 경춘선 공항철도 1호선 과천선 분당선 수인선
## 589 434 248 1065 558
## 안산선 우이신설선 일산선 장항선 중앙선
## 403 403 318 217 651
subway$line<-ifelse(subway$line=="0호선2~3단계","9호선",subway$line)
table(subway$station)
##
## 4.19민주묘지 가능
## 31 31
## 가락시장 가산디지털단지
## 62 62
## 가양 가오리
## 31 31
## 가좌 가천대
## 31 31
## 가평 간석
## 31 31
## 갈매 강남
## 31 31
## 강남구청 강동
## 62 31
## 강동구청 강매
## 31 31
## 강변(동서울터미널) 강일
## 31 31
## 강촌 개롱
## 31 31
## 개봉 개포동
## 31 31
## 개화 개화산
## 31 31
## 거여 건대입구
## 31 62
## 검암 경기광주
## 33 31
## 경마공원 경복궁(정부서울청사)
## 31 31
## 경찰병원 계양
## 31 34
## 고덕 고려대(종암)
## 31 31
## 고색 고속터미널
## 31 93
## 고잔 곡산
## 31 31
## 곤지암 공덕
## 31 124
## 공릉(서울과학기술대) 공항시장
## 31 31
## 공항화물청사 과천
## 31 31
## 관악 광나루(장신대)
## 31 31
## 광명 광명사거리
## 31 31
## 광운대 광화문(세종문화회관)
## 31 31
## 광흥창(서강) 교대(법원.검찰청)
## 31 62
## 구로 구로디지털단지
## 31 31
## 구룡 구리
## 31 31
## 구반포 구산
## 31 31
## 구성 구의(광진구청)
## 31 31
## 구일 구파발
## 31 31
## 국수 국회의사당
## 31 31
## 군자(능동) 군포
## 62 31
## 굴봉산 굴포천
## 31 4
## 굽은다리(강동구민회관앞) 금곡
## 31 31
## 금릉 금정
## 31 31
## 금천구청 금촌
## 31 31
## 금호 기흥
## 31 31
## 길동 길음
## 31 31
## 김유정 김포공항
## 31 93
## 까치산 까치울
## 31 5
## 낙성대(강감찬) 남구로
## 31 31
## 남동인더스파크 남부터미널(예술의전당)
## 31 31
## 남성 남영
## 31 31
## 남위례 남춘천
## 31 31
## 남태령 남한산성입구(성남법원.검찰청)
## 31 31
## 내방 노들
## 31 31
## 노량진 노원
## 62 62
## 녹번 녹사평(용산구청)
## 31 31
## 녹양 녹천
## 31 31
## 논현 능곡
## 31 31
## 단대오거리 달월
## 31 31
## 답십리 당고개
## 31 31
## 당산 당정
## 62 31
## 대곡 대공원
## 31 31
## 대림(구로구청) 대모산입구
## 62 31
## 대방 대성리
## 31 31
## 대야미 대청
## 31 31
## 대치 대화
## 31 31
## 대흥(서강대앞) 덕계
## 31 31
## 덕소 덕정
## 31 31
## 도곡 도농
## 62 31
## 도림천 도봉
## 31 31
## 도봉산 도심
## 62 31
## 도원 도화
## 31 31
## 독립문 독바위
## 31 31
## 독산 돌곶이
## 31 31
## 동대문 동대문역사문화공원(DDP)
## 62 93
## 동대입구 동두천
## 31 31
## 동두천중앙 동묘앞
## 31 62
## 동암 동인천
## 31 31
## 동작(현충원) 두정
## 62 31
## 둔촌동 둔촌오륜
## 31 31
## 등촌 디지털미디어시티
## 31 93
## 뚝섬 뚝섬유원지
## 31 31
## 마곡 마곡나루(서울식물원)
## 31 62
## 마두 마들
## 31 31
## 마석 마장
## 31 31
## 마천 마포
## 31 31
## 마포구청 망우
## 31 31
## 망원 망월사
## 31 31
## 망포 매교
## 31 31
## 매봉 매탄권선
## 31 31
## 먹골 면목
## 31 31
## 명동 명일
## 31 31
## 명학 모란
## 31 62
## 목동 몽촌토성(평화의문)
## 31 31
## 무악재 문래
## 31 31
## 문산 문정
## 31 31
## 미금 미사
## 31 31
## 미아(서울사이버대학) 미아사거리
## 31 31
## 반월 반포
## 31 31
## 발산 방배
## 31 31
## 방이 방학
## 31 31
## 방화 배방
## 31 31
## 백마 백석
## 31 31
## 백양리 백운
## 31 31
## 버티고개 범계
## 31 31
## 별내 병점
## 31 31
## 보라매 보문
## 31 62
## 보산 보정
## 31 31
## 복정 봉명
## 42 31
## 봉은사 봉천
## 31 31
## 봉화산(서울의료원) 부개
## 31 31
## 부발 부천
## 31 31
## 부천시청 부천종합운동장
## 8 2
## 부평 부평구청
## 31 2
## 북한산보국문 북한산우이
## 31 31
## 불광 사가정
## 62 31
## 사당 사릉
## 62 31
## 사리 사평
## 31 31
## 산본 산성
## 31 31
## 삼각지 삼동
## 62 31
## 삼산체육관 삼성(무역센터)
## 1 31
## 삼성중앙 삼송
## 31 31
## 삼양 삼양사거리
## 31 31
## 삼전 상갈
## 31 31
## 상계 상도
## 31 31
## 상동 상록수
## 9 31
## 상봉(시외버스터미널) 상수
## 62 31
## 상왕십리 상월곡(한국과학기술연구원)
## 31 31
## 상일동 상천
## 31 31
## 새절(신사) 샛강
## 31 31
## 서강대 서대문
## 31 31
## 서동탄 서빙고
## 31 31
## 서울대입구(관악구청) 서울숲
## 31 31
## 서울역 서정리
## 155 31
## 서초 서현
## 31 31
## 석계 석수
## 62 31
## 석촌 석촌고분
## 62 31
## 선릉 선바위
## 62 31
## 선유도 선정릉
## 31 62
## 성균관대 성수
## 31 31
## 성신여대입구(돈암) 성환
## 62 31
## 세류 세마
## 31 31
## 세종대왕릉 소래포구
## 31 31
## 소사 소요산
## 31 31
## 솔밭공원 솔샘
## 31 31
## 송내 송도
## 31 31
## 송정 송탄
## 31 31
## 송파 송파나루
## 31 31
## 수내 수락산
## 31 31
## 수리산 수색
## 31 31
## 수서 수원
## 62 62
## 수원시청 수유(강북구청)
## 31 31
## 수진 숙대입구(갈월)
## 31 31
## 숭실대입구(살피재) 숭의
## 31 31
## 시청 신갈
## 62 31
## 신금호 신길
## 31 62
## 신길온천 신내
## 31 40
## 신논현 신답
## 31 31
## 신당 신대방
## 62 31
## 신대방삼거리 신도림
## 31 62
## 신둔도예촌 신림
## 31 31
## 신목동 신반포
## 31 31
## 신방화 신사
## 31 31
## 신설동 신용산
## 93 31
## 신원 신이문
## 31 31
## 신정(은행정) 신정네거리
## 31 31
## 신중동 신창(순천향대)
## 5 31
## 신촌 신포
## 62 31
## 신풍 신흥
## 31 31
## 쌍문 쌍용(나사렛대)
## 31 31
## 아산 아신
## 31 31
## 아차산(어린이대공원후문) 아현
## 31 31
## 안국 안산
## 31 31
## 안암(고대병원앞) 안양
## 31 31
## 암사 압구정
## 31 31
## 압구정로데오 애오개
## 31 31
## 야당 야목
## 31 31
## 야탑 약수
## 31 62
## 양수 양원
## 31 31
## 양재(서초구청) 양정
## 31 31
## 양주 양천구청
## 31 31
## 양천향교 양평
## 31 62
## 어린이대공원(세종대) 어천
## 31 31
## 언주 여의나루
## 31 31
## 여의도 여주
## 62 31
## 역곡 역삼
## 31 31
## 역촌 연수
## 31 31
## 연신내 염창
## 47 31
## 영등포 영등포구청
## 31 62
## 영등포시장 영종
## 31 31
## 영통 오금
## 31 62
## 오류동 오리
## 31 31
## 오목교(목동운동장앞) 오목천
## 31 31
## 오빈 오산
## 31 31
## 오산대 오이도
## 31 31
## 옥수 온수(성공회대입구)
## 62 62
## 온양온천 올림픽공원(한국체대)
## 31 62
## 왕십리(성동구청) 외대앞
## 93 31
## 용답 용두(동대문구청)
## 31 31
## 용마산(용마폭포공원) 용문
## 31 31
## 용산 우장산
## 31 31
## 운길산 운서
## 31 31
## 운정 원당
## 31 31
## 원덕 원인재
## 31 31
## 원흥 월계
## 31 31
## 월곡(동덕여대) 월곶
## 31 31
## 월드컵경기장(성산) 월롱
## 31 31
## 을지로3가 을지로4가
## 62 62
## 을지로입구 응봉
## 31 31
## 응암 의왕
## 31 31
## 의정부 이대
## 31 31
## 이매 이수
## 62 31
## 이천 이촌(국립중앙박물관)
## 31 62
## 이태원 인덕원
## 31 31
## 인천 인천공항1터미널
## 62 31
## 인천공항2터미널 인천논현
## 31 31
## 인하대 일산
## 31 31
## 일원 임진강
## 31 31
## 잠실(송파구청) 잠실나루
## 62 31
## 잠실새내 잠원
## 31 31
## 장승배기 장암
## 31 31
## 장지 장한평
## 31 31
## 정릉 정발산
## 31 31
## 정부과천청사 정왕
## 31 31
## 정자 제기동
## 31 31
## 제물포 종각
## 31 31
## 종로3가 종로5가
## 93 31
## 종합운동장 주안
## 62 31
## 주엽 죽전
## 31 31
## 중계 중곡
## 31 31
## 중동 중랑
## 31 31
## 중앙 중앙보훈병원
## 31 31
## 중화 증미
## 31 31
## 증산(명지대앞) 지축
## 31 39
## 지평 지행
## 31 31
## 직산 진위
## 31 31
## 창동 창신
## 36 31
## 천마산 천안
## 31 31
## 천왕 천호(풍납토성)
## 31 62
## 철산 청구
## 31 62
## 청담 청라국제도시
## 31 31
## 청량리(서울시립대입구) 청명
## 62 31
## 청평 초월
## 31 31
## 초지 총신대입구(이수)
## 31 31
## 춘의 춘천
## 7 31
## 충무로 충정로(경기대입구)
## 49 62
## 탄현 탕정
## 31 31
## 태릉입구 태평
## 62 31
## 퇴계원 파주
## 31 31
## 판교 팔당
## 31 31
## 평내호평 평촌
## 31 31
## 평택 평택지제
## 31 31
## 풍산 하계
## 31 31
## 하남검단산 하남시청(덕풍·신장)
## 31 31
## 하남풍산 학동
## 31 31
## 학여울 한강진
## 31 31
## 한남 한대앞
## 31 31
## 한성대입구(삼선교) 한성백제
## 31 31
## 한양대 한티
## 31 31
## 합정 행당
## 62 31
## 행신 혜화
## 31 31
## 호구포 홍대입구
## 31 93
## 홍제 화계
## 31 31
## 화곡 화랑대(서울여대입구)
## 31 31
## 화서 화전
## 31 31
## 화정 회기
## 31 31
## 회룡 회현(남대문시장)
## 31 31
## 효창공원앞 흑석(중앙대입구)
## 62 31
subway$total_passenger<-subway$on_passenger+subway$off_passenger
str(subway)
## 'data.frame': 18467 obs. of 7 variables:
## $ date : int 20220301 20220301 20220301 20220301 20220301 20220301 20220301 20220301 20220301 20220301 ...
## $ line : chr "장항선" "장항선" "장항선" "안산선" ...
## $ station : chr "배방" "온양온천" "신창(순천향대)" "오이도" ...
## $ on_passenger : int 593 2388 1065 4789 1892 2122 1360 1836 2211 1899 ...
## $ off_passenger : int 698 2517 1164 4668 1693 2228 1331 1663 2122 1814 ...
## $ day : chr "01" "01" "01" "01" ...
## $ total_passenger: int 1291 4905 2229 9457 3585 4350 2691 3499 4333 3713 ...