getwd()
## [1] "C:/Users/samsung/Desktop"
rm=list(ls())
setwd('c:/data')
library(dplyr)
##
## 다음의 패키지를 부착합니다: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
df<-read.csv("서울시.csv",fileEncoding="euc-kr")
glimpse(df)
## Rows: 14,390
## Columns: 8
## $ 측정월 <int> 202312, 202312, 202312, 202312, 202312, 202312, 20…
## $ 측정소명 <chr> "강남구", "강남대로", "강동구", "강변북로", "강북…
## $ 이산화질소농도.ppm. <dbl> 0.0357, 0.0394, 0.0372, 0.0378, 0.0264, 0.0403, 0.…
## $ 오존농도.ppm. <dbl> 0.0127, 0.0088, 0.0119, 0.0104, 0.0157, 0.0136, 0.…
## $ 일산화탄소농도.ppm. <dbl> 0.53, 0.90, 0.64, 0.52, 0.59, 0.70, 0.67, 0.68, 0.…
## $ 아황산가스.ppm. <dbl> 0.0034, 0.0033, 0.0032, 0.0037, 0.0027, 0.0037, 0.…
## $ 미세먼지..... <int> 39, 48, 41, 43, 35, 44, 47, 46, 33, 37, 45, 40, 36…
## $ 초미세먼지..... <int> 24, 28, 24, 28, 23, 23, 25, 26, 19, 23, 26, 26, 23…
names(df) # 변수 이름 출력
## [1] "측정월" "측정소명" "이산화질소농도.ppm."
## [4] "오존농도.ppm." "일산화탄소농도.ppm." "아황산가스.ppm."
## [7] "미세먼지....." "초미세먼지....."
rename<-dplyr::rename
df<-df %>% rename(month="측정월",
region= "측정소명",
no2="이산화질소농도.ppm.",
ozone="오존농도.ppm.",
co="일산화탄소농도.ppm." ,
s="아황산가스.ppm." ,
pm10="미세먼지.....",
pm2.5="초미세먼지.....") %>%
select(month,region,no2,ozone,co,s,pm10,pm2.5)
glimpse(df)
## Rows: 14,390
## Columns: 8
## $ month <int> 202312, 202312, 202312, 202312, 202312, 202312, 202312, 202312,…
## $ region <chr> "강남구", "강남대로", "강동구", "강변북로", "강북구", "강서구",…
## $ no2 <dbl> 0.0357, 0.0394, 0.0372, 0.0378, 0.0264, 0.0403, 0.0406, 0.0385,…
## $ ozone <dbl> 0.0127, 0.0088, 0.0119, 0.0104, 0.0157, 0.0136, 0.0100, 0.0128,…
## $ co <dbl> 0.53, 0.90, 0.64, 0.52, 0.59, 0.70, 0.67, 0.68, 0.56, 0.76, 0.5…
## $ s <dbl> 0.0034, 0.0033, 0.0032, 0.0037, 0.0027, 0.0037, 0.0032, 0.0031,…
## $ pm10 <int> 39, 48, 41, 43, 35, 44, 47, 46, 33, 37, 45, 40, 36, 42, 37, 49,…
## $ pm2.5 <int> 24, 28, 24, 28, 23, 23, 25, 26, 19, 23, 26, 26, 23, 28, 25, 31,…
# 결측치 유무확인
colSums(is.na(df))
## month region no2 ozone co s pm10 pm2.5
## 0 0 474 736 499 758 2422 8620
# 결측값 전체 삭제
df<-na.omit(df)
# 결측값 재확인
colSums(is.na(df))
## month region no2 ozone co s pm10 pm2.5
## 0 0 0 0 0 0 0 0
summary(df)
## month region no2 ozone
## Min. :201201 Length:5755 Min. :0.00400 Min. :0.00400
## 1st Qu.:201602 Class :character 1st Qu.:0.02260 1st Qu.:0.01400
## Median :201902 Mode :character Median :0.03100 Median :0.02200
## Mean :201839 Mean :0.03144 Mean :0.02308
## 3rd Qu.:202108 3rd Qu.:0.03900 3rd Qu.:0.03045
## Max. :202312 Max. :0.09000 Max. :0.06500
## co s pm10 pm2.5
## Min. :0.1000 Min. :0.001000 Min. : 10.00 Min. : 5.00
## 1st Qu.:0.4000 1st Qu.:0.003000 1st Qu.: 31.00 1st Qu.:17.00
## Median :0.5000 Median :0.004000 Median : 41.00 Median :22.00
## Mean :0.5369 Mean :0.004186 Mean : 42.12 Mean :22.81
## 3rd Qu.:0.6000 3rd Qu.:0.005000 3rd Qu.: 51.00 3rd Qu.:28.00
## Max. :1.8000 Max. :0.011000 Max. :107.00 Max. :55.00
glimpse(df)
## Rows: 5,755
## Columns: 8
## $ month <int> 202312, 202312, 202312, 202312, 202312, 202312, 202312, 202312,…
## $ region <chr> "강남구", "강남대로", "강동구", "강변북로", "강북구", "강서구",…
## $ no2 <dbl> 0.0357, 0.0394, 0.0372, 0.0378, 0.0264, 0.0403, 0.0406, 0.0385,…
## $ ozone <dbl> 0.0127, 0.0088, 0.0119, 0.0104, 0.0157, 0.0136, 0.0100, 0.0128,…
## $ co <dbl> 0.53, 0.90, 0.64, 0.52, 0.59, 0.70, 0.67, 0.68, 0.56, 0.76, 0.5…
## $ s <dbl> 0.0034, 0.0033, 0.0032, 0.0037, 0.0027, 0.0037, 0.0032, 0.0031,…
## $ pm10 <int> 39, 48, 41, 43, 35, 44, 47, 46, 33, 37, 45, 40, 36, 42, 37, 49,…
## $ pm2.5 <int> 24, 28, 24, 28, 23, 23, 25, 26, 19, 23, 26, 26, 23, 28, 25, 31,…
table(df$region)
##
## 강남구 강남대로 강동구 강변북로 강북구 강서구
## 125 135 125 136 125 135
## 공항대로 관악구 관악산 광진구 구로구 금천구
## 135 124 57 136 125 125
## 남산 노원구 도봉구 도산대로 동대문구 동작구
## 62 126 125 144 126 126
## 동작대로 마포구 마포아트센터 북한산 서대문구 서울숲
## 136 125 56 62 125 56
## 서초구 성동구 성북구 세곡 송파구 시흥대로
## 125 126 125 62 125 62
## 신촌로 양천구 영등포구 영등포로 올림픽공원 용산구
## 138 125 125 136 56 125
## 은평구 자연사박물관 정릉로 종로 종로구 중구
## 125 48 134 137 137 125
## 중랑구 천호대로 청계천로 한강대로 항동 행주
## 125 137 137 136 61 62
## 홍릉로 홍지문 화랑로
## 138 38 133
df %>% filter(pm10==max(pm10))
## month region no2 ozone co s pm10 pm2.5
## 1 201502 공항대로 0.033 0.011 0.8 0.007 107 27
df %>% group_by(region) %>% summarize(m=mean(pm10)) %>% arrange(desc(m)) %>%
head(5)
## # A tibble: 5 × 2
## region m
## <chr> <dbl>
## 1 공항대로 52.0
## 2 한강대로 49.1
## 3 신촌로 48.6
## 4 동작대로 48.4
## 5 강남대로 48.2
df %>% filter(pm2.5==min(pm2.5)) %>% arrange(desc(pm10))
## month region no2 ozone co s pm10 pm2.5
## 1 202109 도봉구 0.009 0.029 0.4 0.002 16 5
## 2 202009 동작대로 0.032 0.017 0.3 0.003 16 5
## 3 202109 강남대로 0.024 0.020 0.5 0.003 15 5
## 4 202109 서울숲 0.012 0.031 0.4 0.003 15 5
## 5 202109 관악산 0.005 0.042 0.3 0.003 13 5
## 6 202109 서초구 0.015 0.031 0.4 0.002 12 5
## 7 202109 자연사박물관 0.009 0.043 0.4 0.003 11 5
## 8 202109 올림픽공원 0.012 0.036 0.4 0.003 10 5